32 #define USE_INVALID_CODE_SCHEME
34 #ifdef USE_INVALID_CODE_SCHEME
36 #define INVALID_CODE_FE 0xfffffffe
37 #define INVALID_CODE_FF 0xffffffff
38 #define VALID_CODE_LIMIT 0x7fffffff
41 #define utf8_islead(c) ((UChar )((c) & 0xc0) != 0x80)
44 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
45 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
46 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
47 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
48 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
49 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
50 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
51 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
52 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
53 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
54 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
55 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
56 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
57 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
58 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
59 4, 4, 4, 4, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
70 static const signed char trans[][0x100] = {
72 A,
A,
A,
A,
A,
A,
A,
A,
A,
A,
A,
A,
A,
A,
A,
A,
73 A,
A,
A,
A,
A,
A,
A,
A,
A,
A,
A,
A,
A,
A,
A,
A,
74 A,
A,
A,
A,
A,
A,
A,
A,
A,
A,
A,
A,
A,
A,
A,
A,
75 A,
A,
A,
A,
A,
A,
A,
A,
A,
A,
A,
A,
A,
A,
A,
A,
76 A,
A,
A,
A,
A,
A,
A,
A,
A,
A,
A,
A,
A,
A,
A,
A,
77 A,
A,
A,
A,
A,
A,
A,
A,
A,
A,
A,
A,
A,
A,
A,
A,
78 A,
A,
A,
A,
A,
A,
A,
A,
A,
A,
A,
A,
A,
A,
A,
A,
79 A,
A,
A,
A,
A,
A,
A,
A,
A,
A,
A,
A,
A,
A,
A,
A,
80 F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
81 F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
82 F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
83 F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
84 F,
F, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
85 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
86 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3,
87 5, 6, 6, 6, 7,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F, F
90 F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
91 F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
92 F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
93 F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
94 F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
95 F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
96 F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
97 F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
98 A,
A,
A,
A,
A,
A,
A,
A,
A,
A,
A,
A,
A,
A,
A,
A,
99 A,
A,
A,
A,
A,
A,
A,
A,
A,
A,
A,
A,
A,
A,
A,
A,
100 A,
A,
A,
A,
A,
A,
A,
A,
A,
A,
A,
A,
A,
A,
A,
A,
101 A,
A,
A,
A,
A,
A,
A,
A,
A,
A,
A,
A,
A,
A,
A,
A,
102 F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
103 F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
104 F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
105 F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F, F
108 F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
109 F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
110 F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
111 F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
112 F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
113 F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
114 F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
115 F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
116 F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
117 F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
118 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
119 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
120 F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
121 F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
122 F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
123 F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F, F
126 F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
127 F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
128 F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
129 F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
130 F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
131 F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
132 F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
133 F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
134 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
135 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
136 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
137 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
138 F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
139 F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
140 F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
141 F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F, F
144 F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
145 F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
146 F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
147 F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
148 F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
149 F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
150 F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
151 F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
152 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
153 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
154 F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
155 F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
156 F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
157 F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
158 F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
159 F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F, F
162 F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
163 F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
164 F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
165 F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
166 F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
167 F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
168 F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
169 F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
170 F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
171 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
172 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
173 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
174 F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
175 F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
176 F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
177 F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F, F
180 F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
181 F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
182 F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
183 F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
184 F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
185 F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
186 F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
187 F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
188 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
189 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
190 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
191 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
192 F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
193 F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
194 F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
195 F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F, F
198 F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
199 F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
200 F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
201 F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
202 F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
203 F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
204 F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
205 F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
206 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
207 F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
208 F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
209 F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
210 F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
211 F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
212 F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
213 F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F, F
222 int firstbyte = *p++;
224 s =
trans[0][firstbyte];
248 if (*p == 0x0a)
return 1;
250 #ifdef USE_UNICODE_ALL_LINE_TERMINATORS
251 #ifndef USE_CRNL_AS_LINE_TERMINATOR
252 if (*p == 0x0d)
return 1;
255 if (*(p+1) == 0x85 && *p == 0xc2)
258 if ((*(p+2) == 0xa8 || *(p+2) == 0xa9)
259 && *(p+1) == 0x80 && *p == 0xe2)
275 len =
enclen(enc, p, end);
279 n = c & ((1 << (6 -
len)) - 1);
282 n = (n << 6) | (c & ((1 << 6) - 1));
287 #ifdef USE_INVALID_CODE_SCHEME
299 if ((code & 0xffffff80) == 0)
return 1;
300 else if ((code & 0xfffff800) == 0)
return 2;
301 else if ((code & 0xffff0000) == 0)
return 3;
302 else if ((code & 0xffe00000) == 0)
return 4;
303 else if ((code & 0xfc000000) == 0)
return 5;
304 else if ((code & 0x80000000) == 0)
return 6;
305 #ifdef USE_INVALID_CODE_SCHEME
316 #define UTF8_TRAILS(code, shift) (UChar )((((code) >> (shift)) & 0x3f) | 0x80)
317 #define UTF8_TRAIL0(code) (UChar )(((code) & 0x3f) | 0x80)
319 if ((code & 0xffffff80) == 0) {
326 if ((code & 0xfffff800) == 0) {
327 *p++ = (
UChar )(((code>>6)& 0x1f) | 0xc0);
329 else if ((code & 0xffff0000) == 0) {
330 *p++ = (
UChar )(((code>>12) & 0x0f) | 0xe0);
333 else if ((code & 0xffe00000) == 0) {
334 *p++ = (
UChar )(((code>>18) & 0x07) | 0xf0);
338 else if ((code & 0xfc000000) == 0) {
339 *p++ = (
UChar )(((code>>24) & 0x03) | 0xf8);
344 else if ((code & 0x80000000) == 0) {
345 *p++ = (
UChar )(((code>>30) & 0x01) | 0xfc);
351 #ifdef USE_INVALID_CODE_SCHEME
366 return (
int)(p -
buf);
377 #ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
412 if (s <= start)
return (
UChar* )
s;
int onigenc_unicode_is_code_ctype(OnigCodePoint code, unsigned int ctype, OnigEncoding enc ARG_UNUSED)
static OnigCodePoint mbc_to_code(const UChar *p, const UChar *end, OnigEncoding enc)
unsigned int OnigCodePoint
#define OnigEncodingDefine(f, n)
int onigenc_always_true_is_allowed_reverse_match(const UChar *s ARG_UNUSED, const UChar *end ARG_UNUSED, OnigEncoding enc ARG_UNUSED)
#define ONIGERR_TOO_BIG_WIDE_CHAR_VALUE
static int mbc_case_fold(OnigCaseFoldType flag, const UChar **pp, const UChar *end, UChar *fold, OnigEncoding enc)
static UChar * left_adjust_char_head(const UChar *start, const UChar *s, const UChar *end, OnigEncoding enc ARG_UNUSED)
#define UTF8_TRAILS(code, shift)
#define ONIGENC_IS_MBC_ASCII(p)
static int mbc_enc_len(const UChar *p, const UChar *e, OnigEncoding enc ARG_UNUSED)
unsigned int OnigCaseFoldType
static int is_mbc_newline(const UChar *p, const UChar *end, OnigEncoding enc)
static const int EncLen_UTF8[]
static int code_to_mbclen(OnigCodePoint code, OnigEncoding enc ARG_UNUSED)
#define UTF8_TRAIL0(code)
#define ENC_ALIAS(name, orig)
static const signed char trans[][0x100]
int onigenc_unicode_property_name_to_ctype(OnigEncoding enc, UChar *name, UChar *end)
static int get_ctype_code_range(OnigCtype ctype, OnigCodePoint *sb_out, const OnigCodePoint *ranges[], OnigEncoding enc ARG_UNUSED)
int onigenc_unicode_ctype_code_range(int ctype, const OnigCodePoint *ranges[])
int onigenc_unicode_mbc_case_fold(OnigEncoding enc, OnigCaseFoldType flag ARG_UNUSED, const UChar **pp, const UChar *end, UChar *fold)
#define enclen(enc, p, e)
unsigned char buf[MIME_BUF_SIZE]
int onigenc_unicode_apply_all_case_fold(OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void *arg, OnigEncoding enc ARG_UNUSED)
#define ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(n)
#define ONIGENC_CASE_FOLD_TURKISH_AZERI
#define ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(n)
register unsigned int len
static int code_to_mbc(OnigCodePoint code, UChar *buf, OnigEncoding enc ARG_UNUSED)
#define ONIGENC_CONSTRUCT_MBCLEN_INVALID()
#define ENC_REPLICATE(name, orig)
int onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc, OnigCaseFoldType flag, const OnigUChar *p, const OnigUChar *end, OnigCaseFoldCodeItem items[])
#define ONIGENC_ASCII_CODE_TO_LOWER_CASE(c)
static int get_case_fold_codes_by_str(OnigCaseFoldType flag, const OnigUChar *p, const OnigUChar *end, OnigCaseFoldCodeItem items[], OnigEncoding enc)