Ruby  1.9.3p551(2014-11-13revision48407)
regcomp.c
Go to the documentation of this file.
1 /**********************************************************************
2  regcomp.c - Oniguruma (regular expression library)
3 **********************************************************************/
4 /*-
5  * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  * notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  * notice, this list of conditions and the following disclaimer in the
15  * documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 #include "regparse.h"
31 
33 
34 extern OnigCaseFoldType
36 {
38 }
39 
40 extern int
42 {
43  OnigDefaultCaseFoldFlag = case_fold_flag;
44  return 0;
45 }
46 
47 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
48 static unsigned char PadBuf[WORD_ALIGNMENT_SIZE];
49 #endif
50 
51 static UChar*
53 {
54  ptrdiff_t len = end - s;
55 
56  if (len > 0) {
57  UChar* r = (UChar* )xmalloc(len + 1);
59  xmemcpy(r, s, len);
60  r[len] = (UChar )0;
61  return r;
62  }
63  else return NULL;
64 }
65 
66 static void
68 {
69  Node c;
70  c = *a; *a = *b; *b = c;
71 
72  if (NTYPE(a) == NT_STR) {
73  StrNode* sn = NSTR(a);
74  if (sn->capa == 0) {
75  size_t len = sn->end - sn->s;
76  sn->s = sn->buf;
77  sn->end = sn->s + len;
78  }
79  }
80 
81  if (NTYPE(b) == NT_STR) {
82  StrNode* sn = NSTR(b);
83  if (sn->capa == 0) {
84  size_t len = sn->end - sn->s;
85  sn->s = sn->buf;
86  sn->end = sn->s + len;
87  }
88  }
89 }
90 
91 static OnigDistance
93 {
96  else {
97  if (d1 <= ONIG_INFINITE_DISTANCE - d2) return d1 + d2;
98  else return ONIG_INFINITE_DISTANCE;
99  }
100 }
101 
102 static OnigDistance
104 {
105  if (m == 0) return 0;
106 
107  if (d < ONIG_INFINITE_DISTANCE / m)
108  return d * m;
109  else
110  return ONIG_INFINITE_DISTANCE;
111 }
112 
113 static int
115 {
116  int i;
117  for (i = 0; i < (int )BITSET_SIZE; i++) {
118  if (bs[i] != 0) return 0;
119  }
120  return 1;
121 }
122 
123 #ifdef ONIG_DEBUG
124 static int
125 onig_is_prelude(void)
126 {
127  return !rb_const_defined(rb_cThread, rb_intern_const("MUTEX_FOR_THREAD_EXCLUSIVE"));
128 }
129 
130 static int
131 bitset_on_num(BitSetRef bs)
132 {
133  int i, n;
134 
135  n = 0;
136  for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
137  if (BITSET_AT(bs, i)) n++;
138  }
139  return n;
140 }
141 #endif
142 
143 extern int
145 {
146  if (size <= 0) {
147  size = 0;
148  buf->p = NULL;
149  }
150  else {
151  buf->p = (UChar* )xmalloc(size);
152  if (IS_NULL(buf->p)) return(ONIGERR_MEMORY);
153  }
154 
155  buf->alloc = (unsigned int)size;
156  buf->used = 0;
157  return 0;
158 }
159 
160 
161 #ifdef USE_SUBEXP_CALL
162 
163 static int
164 unset_addr_list_init(UnsetAddrList* uslist, int size)
165 {
166  UnsetAddr* p;
167 
168  p = (UnsetAddr* )xmalloc(sizeof(UnsetAddr)* size);
170  uslist->num = 0;
171  uslist->alloc = size;
172  uslist->us = p;
173  return 0;
174 }
175 
176 static void
177 unset_addr_list_end(UnsetAddrList* uslist)
178 {
179  if (IS_NOT_NULL(uslist->us))
180  xfree(uslist->us);
181 }
182 
183 static int
184 unset_addr_list_add(UnsetAddrList* uslist, int offset, struct _Node* node)
185 {
186  UnsetAddr* p;
187  int size;
188 
189  if (uslist->num >= uslist->alloc) {
190  size = uslist->alloc * 2;
191  p = (UnsetAddr* )xrealloc(uslist->us, sizeof(UnsetAddr) * size);
193  uslist->alloc = size;
194  uslist->us = p;
195  }
196 
197  uslist->us[uslist->num].offset = offset;
198  uslist->us[uslist->num].target = node;
199  uslist->num++;
200  return 0;
201 }
202 #endif /* USE_SUBEXP_CALL */
203 
204 
205 static int
206 add_opcode(regex_t* reg, int opcode)
207 {
208  BBUF_ADD1(reg, opcode);
209  return 0;
210 }
211 
212 #ifdef USE_COMBINATION_EXPLOSION_CHECK
213 static int
214 add_state_check_num(regex_t* reg, int num)
215 {
217 
218  BBUF_ADD(reg, &n, SIZE_STATE_CHECK_NUM);
219  return 0;
220 }
221 #endif
222 
223 static int
224 add_rel_addr(regex_t* reg, int addr)
225 {
226  RelAddrType ra = (RelAddrType )addr;
227 
228  BBUF_ADD(reg, &ra, SIZE_RELADDR);
229  return 0;
230 }
231 
232 static int
233 add_abs_addr(regex_t* reg, int addr)
234 {
235  AbsAddrType ra = (AbsAddrType )addr;
236 
237  BBUF_ADD(reg, &ra, SIZE_ABSADDR);
238  return 0;
239 }
240 
241 static int
243 {
244  LengthType l = (LengthType )len;
245 
246  BBUF_ADD(reg, &l, SIZE_LENGTH);
247  return 0;
248 }
249 
250 static int
251 add_mem_num(regex_t* reg, int num)
252 {
253  MemNumType n = (MemNumType )num;
254 
255  BBUF_ADD(reg, &n, SIZE_MEMNUM);
256  return 0;
257 }
258 
259 static int
260 add_pointer(regex_t* reg, void* addr)
261 {
262  PointerType ptr = (PointerType )addr;
263 
264  BBUF_ADD(reg, &ptr, SIZE_POINTER);
265  return 0;
266 }
267 
268 static int
270 {
271  BBUF_ADD(reg, &option, SIZE_OPTION);
272  return 0;
273 }
274 
275 static int
276 add_opcode_rel_addr(regex_t* reg, int opcode, int addr)
277 {
278  int r;
279 
280  r = add_opcode(reg, opcode);
281  if (r) return r;
282  r = add_rel_addr(reg, addr);
283  return r;
284 }
285 
286 static int
288 {
289  BBUF_ADD(reg, bytes, len);
290  return 0;
291 }
292 
293 static int
295 {
296  BBUF_ADD(reg, bs, SIZE_BITSET);
297  return 0;
298 }
299 
300 static int
301 add_opcode_option(regex_t* reg, int opcode, OnigOptionType option)
302 {
303  int r;
304 
305  r = add_opcode(reg, opcode);
306  if (r) return r;
307  r = add_option(reg, option);
308  return r;
309 }
310 
311 static int compile_length_tree(Node* node, regex_t* reg);
312 static int compile_tree(Node* node, regex_t* reg);
313 
314 
315 #define IS_NEED_STR_LEN_OP_EXACT(op) \
316  ((op) == OP_EXACTN || (op) == OP_EXACTMB2N ||\
317  (op) == OP_EXACTMB3N || (op) == OP_EXACTMBN || (op) == OP_EXACTN_IC)
318 
319 static int
320 select_str_opcode(int mb_len, OnigDistance str_len, int ignore_case)
321 {
322  int op;
323 
324  if (ignore_case) {
325  switch (str_len) {
326  case 1: op = OP_EXACT1_IC; break;
327  default: op = OP_EXACTN_IC; break;
328  }
329  }
330  else {
331  switch (mb_len) {
332  case 1:
333  switch (str_len) {
334  case 1: op = OP_EXACT1; break;
335  case 2: op = OP_EXACT2; break;
336  case 3: op = OP_EXACT3; break;
337  case 4: op = OP_EXACT4; break;
338  case 5: op = OP_EXACT5; break;
339  default: op = OP_EXACTN; break;
340  }
341  break;
342 
343  case 2:
344  switch (str_len) {
345  case 1: op = OP_EXACTMB2N1; break;
346  case 2: op = OP_EXACTMB2N2; break;
347  case 3: op = OP_EXACTMB2N3; break;
348  default: op = OP_EXACTMB2N; break;
349  }
350  break;
351 
352  case 3:
353  op = OP_EXACTMB3N;
354  break;
355 
356  default:
357  op = OP_EXACTMBN;
358  break;
359  }
360  }
361  return op;
362 }
363 
364 static int
365 compile_tree_empty_check(Node* node, regex_t* reg, int empty_info)
366 {
367  int r;
368  int saved_num_null_check = reg->num_null_check;
369 
370  if (empty_info != 0) {
372  if (r) return r;
373  r = add_mem_num(reg, reg->num_null_check); /* NULL CHECK ID */
374  if (r) return r;
375  reg->num_null_check++;
376  }
377 
378  r = compile_tree(node, reg);
379  if (r) return r;
380 
381  if (empty_info != 0) {
382  if (empty_info == NQ_TARGET_IS_EMPTY)
383  r = add_opcode(reg, OP_NULL_CHECK_END);
384  else if (empty_info == NQ_TARGET_IS_EMPTY_MEM)
386  else if (empty_info == NQ_TARGET_IS_EMPTY_REC)
388 
389  if (r) return r;
390  r = add_mem_num(reg, saved_num_null_check); /* NULL CHECK ID */
391  }
392  return r;
393 }
394 
395 #ifdef USE_SUBEXP_CALL
396 static int
397 compile_call(CallNode* node, regex_t* reg)
398 {
399  int r;
400 
401  r = add_opcode(reg, OP_CALL);
402  if (r) return r;
403  r = unset_addr_list_add(node->unset_addr_list, BBUF_GET_OFFSET_POS(reg),
404  node->target);
405  if (r) return r;
406  r = add_abs_addr(reg, 0 /*dummy addr.*/);
407  return r;
408 }
409 #endif
410 
411 static int
412 compile_tree_n_times(Node* node, int n, regex_t* reg)
413 {
414  int i, r;
415 
416  for (i = 0; i < n; i++) {
417  r = compile_tree(node, reg);
418  if (r) return r;
419  }
420  return 0;
421 }
422 
423 static int
425  regex_t* reg ARG_UNUSED, int ignore_case)
426 {
427  int len;
428  int op = select_str_opcode(mb_len, str_len, ignore_case);
429 
430  len = SIZE_OPCODE;
431 
432  if (op == OP_EXACTMBN) len += SIZE_LENGTH;
433  if (IS_NEED_STR_LEN_OP_EXACT(op))
434  len += SIZE_LENGTH;
435 
436  len += mb_len * str_len;
437  return len;
438 }
439 
440 static int
441 add_compile_string(UChar* s, int mb_len, OnigDistance str_len,
442  regex_t* reg, int ignore_case)
443 {
444  int op = select_str_opcode(mb_len, str_len, ignore_case);
445  add_opcode(reg, op);
446 
447  if (op == OP_EXACTMBN)
448  add_length(reg, mb_len);
449 
450  if (IS_NEED_STR_LEN_OP_EXACT(op)) {
451  if (op == OP_EXACTN_IC)
452  add_length(reg, mb_len * str_len);
453  else
454  add_length(reg, str_len);
455  }
456 
457  add_bytes(reg, s, mb_len * str_len);
458  return 0;
459 }
460 
461 
462 static int
464 {
465  int rlen, r, len, prev_len, slen, ambig;
466  OnigEncoding enc = reg->enc;
467  UChar *p, *prev;
468  StrNode* sn;
469 
470  sn = NSTR(node);
471  if (sn->end <= sn->s)
472  return 0;
473 
474  ambig = NSTRING_IS_AMBIG(node);
475 
476  p = prev = sn->s;
477  prev_len = enclen(enc, p, sn->end);
478  p += prev_len;
479  slen = 1;
480  rlen = 0;
481 
482  for (; p < sn->end; ) {
483  len = enclen(enc, p, sn->end);
484  if (len == prev_len) {
485  slen++;
486  }
487  else {
488  r = add_compile_string_length(prev, prev_len, slen, reg, ambig);
489  rlen += r;
490  prev = p;
491  slen = 1;
492  prev_len = len;
493  }
494  p += len;
495  }
496  r = add_compile_string_length(prev, prev_len, slen, reg, ambig);
497  rlen += r;
498  return rlen;
499 }
500 
501 static int
503 {
504  if (sn->end <= sn->s)
505  return 0;
506 
507  return add_compile_string_length(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0);
508 }
509 
510 static int
512 {
513  int r, len, prev_len, slen, ambig;
514  OnigEncoding enc = reg->enc;
515  UChar *p, *prev, *end;
516  StrNode* sn;
517 
518  sn = NSTR(node);
519  if (sn->end <= sn->s)
520  return 0;
521 
522  end = sn->end;
523  ambig = NSTRING_IS_AMBIG(node);
524 
525  p = prev = sn->s;
526  prev_len = enclen(enc, p, end);
527  p += prev_len;
528  slen = 1;
529 
530  for (; p < end; ) {
531  len = enclen(enc, p, end);
532  if (len == prev_len) {
533  slen++;
534  }
535  else {
536  r = add_compile_string(prev, prev_len, slen, reg, ambig);
537  if (r) return r;
538 
539  prev = p;
540  slen = 1;
541  prev_len = len;
542  }
543 
544  p += len;
545  }
546  return add_compile_string(prev, prev_len, slen, reg, ambig);
547 }
548 
549 static int
551 {
552  if (sn->end <= sn->s)
553  return 0;
554 
555  return add_compile_string(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0);
556 }
557 
558 static int
560 {
561 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
562  add_length(reg, mbuf->used);
563  return add_bytes(reg, mbuf->p, mbuf->used);
564 #else
565  int r, pad_size;
567 
568  GET_ALIGNMENT_PAD_SIZE(p, pad_size);
569  add_length(reg, mbuf->used + (WORD_ALIGNMENT_SIZE - 1));
570  if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
571 
572  r = add_bytes(reg, mbuf->p, mbuf->used);
573 
574  /* padding for return value from compile_length_cclass_node() to be fix. */
575  pad_size = (WORD_ALIGNMENT_SIZE - 1) - pad_size;
576  if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
577  return r;
578 #endif
579 }
580 
581 static int
583 {
584  int len;
585 
586  if (IS_NCCLASS_SHARE(cc)) {
587  len = SIZE_OPCODE + SIZE_POINTER;
588  return len;
589  }
590 
591  if (IS_NULL(cc->mbuf)) {
592  len = SIZE_OPCODE + SIZE_BITSET;
593  }
594  else {
595  if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
596  len = SIZE_OPCODE;
597  }
598  else {
599  len = SIZE_OPCODE + SIZE_BITSET;
600  }
601 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
602  len += SIZE_LENGTH + cc->mbuf->used;
603 #else
604  len += SIZE_LENGTH + cc->mbuf->used + (WORD_ALIGNMENT_SIZE - 1);
605 #endif
606  }
607 
608  return len;
609 }
610 
611 static int
613 {
614  int r;
615 
616  if (IS_NCCLASS_SHARE(cc)) {
618  r = add_pointer(reg, cc);
619  return r;
620  }
621 
622  if (IS_NULL(cc->mbuf)) {
623  if (IS_NCCLASS_NOT(cc))
625  else
626  add_opcode(reg, OP_CCLASS);
627 
628  r = add_bitset(reg, cc->bs);
629  }
630  else {
631  if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
632  if (IS_NCCLASS_NOT(cc))
634  else
635  add_opcode(reg, OP_CCLASS_MB);
636 
637  r = add_multi_byte_cclass(cc->mbuf, reg);
638  }
639  else {
640  if (IS_NCCLASS_NOT(cc))
642  else
644 
645  r = add_bitset(reg, cc->bs);
646  if (r) return r;
647  r = add_multi_byte_cclass(cc->mbuf, reg);
648  }
649  }
650 
651  return r;
652 }
653 
654 static int
655 entry_repeat_range(regex_t* reg, int id, int lower, int upper)
656 {
657 #define REPEAT_RANGE_ALLOC 4
658 
660 
661  if (reg->repeat_range_alloc == 0) {
664  reg->repeat_range = p;
666  }
667  else if (reg->repeat_range_alloc <= id) {
668  int n;
671  sizeof(OnigRepeatRange) * n);
673  reg->repeat_range = p;
674  reg->repeat_range_alloc = n;
675  }
676  else {
677  p = reg->repeat_range;
678  }
679 
680  p[id].lower = lower;
681  p[id].upper = (IS_REPEAT_INFINITE(upper) ? 0x7fffffff : upper);
682  return 0;
683 }
684 
685 static int
686 compile_range_repeat_node(QtfrNode* qn, int target_len, int empty_info,
687  regex_t* reg)
688 {
689  int r;
690  int num_repeat = reg->num_repeat;
691 
692  r = add_opcode(reg, qn->greedy ? OP_REPEAT : OP_REPEAT_NG);
693  if (r) return r;
694  r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
695  reg->num_repeat++;
696  if (r) return r;
697  r = add_rel_addr(reg, target_len + SIZE_OP_REPEAT_INC);
698  if (r) return r;
699 
700  r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper);
701  if (r) return r;
702 
703  r = compile_tree_empty_check(qn->target, reg, empty_info);
704  if (r) return r;
705 
706  if (
707 #ifdef USE_SUBEXP_CALL
708  reg->num_call > 0 ||
709 #endif
712  }
713  else {
715  }
716  if (r) return r;
717  r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
718  return r;
719 }
720 
721 static int
723 {
724  if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) &&
725  NTYPE(qn->target) == NT_CANY)
726  return 1;
727  else
728  return 0;
729 }
730 
731 #define QUANTIFIER_EXPAND_LIMIT_SIZE 50
732 #define CKN_ON (ckn > 0)
733 
734 #ifdef USE_COMBINATION_EXPLOSION_CHECK
735 
736 static int
738 {
739  int len, mod_tlen, cklen;
740  int ckn;
741  int infinite = IS_REPEAT_INFINITE(qn->upper);
742  int empty_info = qn->target_empty_info;
743  int tlen = compile_length_tree(qn->target, reg);
744 
745  if (tlen < 0) return tlen;
746 
747  ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
748 
749  cklen = (CKN_ON ? SIZE_STATE_CHECK_NUM: 0);
750 
751  /* anychar repeat */
752  if (NTYPE(qn->target) == NT_CANY) {
753  if (qn->greedy && infinite) {
754  if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON)
755  return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower + cklen;
756  else
757  return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower + cklen;
758  }
759  }
760 
761  if (empty_info != 0)
762  mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
763  else
764  mod_tlen = tlen;
765 
766  if (infinite && qn->lower <= 1) {
767  if (qn->greedy) {
768  if (qn->lower == 1)
769  len = SIZE_OP_JUMP;
770  else
771  len = 0;
772 
773  len += SIZE_OP_PUSH + cklen + mod_tlen + SIZE_OP_JUMP;
774  }
775  else {
776  if (qn->lower == 0)
777  len = SIZE_OP_JUMP;
778  else
779  len = 0;
780 
781  len += mod_tlen + SIZE_OP_PUSH + cklen;
782  }
783  }
784  else if (qn->upper == 0) {
785  if (qn->is_refered != 0) /* /(?<n>..){0}/ */
786  len = SIZE_OP_JUMP + tlen;
787  else
788  len = 0;
789  }
790  else if (qn->upper == 1 && qn->greedy) {
791  if (qn->lower == 0) {
792  if (CKN_ON) {
793  len = SIZE_OP_STATE_CHECK_PUSH + tlen;
794  }
795  else {
796  len = SIZE_OP_PUSH + tlen;
797  }
798  }
799  else {
800  len = tlen;
801  }
802  }
803  else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
804  len = SIZE_OP_PUSH + cklen + SIZE_OP_JUMP + tlen;
805  }
806  else {
807  len = SIZE_OP_REPEAT_INC
808  + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
809  if (CKN_ON)
810  len += SIZE_OP_STATE_CHECK;
811  }
812 
813  return len;
814 }
815 
816 static int
818 {
819  int r, mod_tlen;
820  int ckn;
821  int infinite = IS_REPEAT_INFINITE(qn->upper);
822  int empty_info = qn->target_empty_info;
823  int tlen = compile_length_tree(qn->target, reg);
824 
825  if (tlen < 0) return tlen;
826 
827  ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
828 
829  if (is_anychar_star_quantifier(qn)) {
830  r = compile_tree_n_times(qn->target, qn->lower, reg);
831  if (r) return r;
832  if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) {
833  if (IS_MULTILINE(reg->options))
835  else
837  if (r) return r;
838  if (CKN_ON) {
839  r = add_state_check_num(reg, ckn);
840  if (r) return r;
841  }
842 
843  return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
844  }
845  else {
846  if (IS_MULTILINE(reg->options)) {
847  r = add_opcode(reg, (CKN_ON ?
849  : OP_ANYCHAR_ML_STAR));
850  }
851  else {
852  r = add_opcode(reg, (CKN_ON ?
854  : OP_ANYCHAR_STAR));
855  }
856  if (r) return r;
857  if (CKN_ON)
858  r = add_state_check_num(reg, ckn);
859 
860  return r;
861  }
862  }
863 
864  if (empty_info != 0)
865  mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
866  else
867  mod_tlen = tlen;
868 
869  if (infinite && qn->lower <= 1) {
870  if (qn->greedy) {
871  if (qn->lower == 1) {
872  r = add_opcode_rel_addr(reg, OP_JUMP,
873  (CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH));
874  if (r) return r;
875  }
876 
877  if (CKN_ON) {
879  if (r) return r;
880  r = add_state_check_num(reg, ckn);
881  if (r) return r;
882  r = add_rel_addr(reg, mod_tlen + SIZE_OP_JUMP);
883  }
884  else {
885  r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
886  }
887  if (r) return r;
888  r = compile_tree_empty_check(qn->target, reg, empty_info);
889  if (r) return r;
890  r = add_opcode_rel_addr(reg, OP_JUMP,
891  -(mod_tlen + (int )SIZE_OP_JUMP
892  + (int )(CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH)));
893  }
894  else {
895  if (qn->lower == 0) {
896  r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
897  if (r) return r;
898  }
899  r = compile_tree_empty_check(qn->target, reg, empty_info);
900  if (r) return r;
901  if (CKN_ON) {
903  if (r) return r;
904  r = add_state_check_num(reg, ckn);
905  if (r) return r;
906  r = add_rel_addr(reg,
907  -(mod_tlen + (int )SIZE_OP_STATE_CHECK_PUSH_OR_JUMP));
908  }
909  else
910  r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
911  }
912  }
913  else if (qn->upper == 0) {
914  if (qn->is_refered != 0) { /* /(?<n>..){0}/ */
915  r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
916  if (r) return r;
917  r = compile_tree(qn->target, reg);
918  }
919  else
920  r = 0;
921  }
922  else if (qn->upper == 1 && qn->greedy) {
923  if (qn->lower == 0) {
924  if (CKN_ON) {
926  if (r) return r;
927  r = add_state_check_num(reg, ckn);
928  if (r) return r;
929  r = add_rel_addr(reg, tlen);
930  }
931  else {
932  r = add_opcode_rel_addr(reg, OP_PUSH, tlen);
933  }
934  if (r) return r;
935  }
936 
937  r = compile_tree(qn->target, reg);
938  }
939  else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
940  if (CKN_ON) {
942  if (r) return r;
943  r = add_state_check_num(reg, ckn);
944  if (r) return r;
945  r = add_rel_addr(reg, SIZE_OP_JUMP);
946  }
947  else {
949  }
950 
951  if (r) return r;
952  r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
953  if (r) return r;
954  r = compile_tree(qn->target, reg);
955  }
956  else {
957  r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
958  if (CKN_ON) {
959  if (r) return r;
960  r = add_opcode(reg, OP_STATE_CHECK);
961  if (r) return r;
962  r = add_state_check_num(reg, ckn);
963  }
964  }
965  return r;
966 }
967 
968 #else /* USE_COMBINATION_EXPLOSION_CHECK */
969 
970 static int
972 {
973  int len, mod_tlen;
974  int infinite = IS_REPEAT_INFINITE(qn->upper);
975  int empty_info = qn->target_empty_info;
976  int tlen = compile_length_tree(qn->target, reg);
977 
978  if (tlen < 0) return tlen;
979 
980  /* anychar repeat */
981  if (NTYPE(qn->target) == NT_CANY) {
982  if (qn->greedy && infinite) {
983  if (IS_NOT_NULL(qn->next_head_exact))
984  return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower;
985  else
986  return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower;
987  }
988  }
989 
990  if (empty_info != 0)
991  mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
992  else
993  mod_tlen = tlen;
994 
995  if (infinite &&
996  (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
997  if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
998  len = SIZE_OP_JUMP;
999  }
1000  else {
1001  len = tlen * qn->lower;
1002  }
1003 
1004  if (qn->greedy) {
1005  if (IS_NOT_NULL(qn->head_exact))
1006  len += SIZE_OP_PUSH_OR_JUMP_EXACT1 + mod_tlen + SIZE_OP_JUMP;
1007  else if (IS_NOT_NULL(qn->next_head_exact))
1008  len += SIZE_OP_PUSH_IF_PEEK_NEXT + mod_tlen + SIZE_OP_JUMP;
1009  else
1010  len += SIZE_OP_PUSH + mod_tlen + SIZE_OP_JUMP;
1011  }
1012  else
1013  len += SIZE_OP_JUMP + mod_tlen + SIZE_OP_PUSH;
1014  }
1015  else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */
1016  len = SIZE_OP_JUMP + tlen;
1017  }
1018  else if (!infinite && qn->greedy &&
1019  (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
1021  len = tlen * qn->lower;
1022  len += (SIZE_OP_PUSH + tlen) * (qn->upper - qn->lower);
1023  }
1024  else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
1025  len = SIZE_OP_PUSH + SIZE_OP_JUMP + tlen;
1026  }
1027  else {
1028  len = SIZE_OP_REPEAT_INC
1029  + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
1030  }
1031 
1032  return len;
1033 }
1034 
1035 static int
1037 {
1038  int i, r, mod_tlen;
1039  int infinite = IS_REPEAT_INFINITE(qn->upper);
1040  int empty_info = qn->target_empty_info;
1041  int tlen = compile_length_tree(qn->target, reg);
1042 
1043  if (tlen < 0) return tlen;
1044 
1045  if (is_anychar_star_quantifier(qn)) {
1046  r = compile_tree_n_times(qn->target, qn->lower, reg);
1047  if (r) return r;
1048  if (IS_NOT_NULL(qn->next_head_exact)) {
1049  if (IS_MULTILINE(reg->options))
1051  else
1053  if (r) return r;
1054  return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
1055  }
1056  else {
1057  if (IS_MULTILINE(reg->options))
1058  return add_opcode(reg, OP_ANYCHAR_ML_STAR);
1059  else
1060  return add_opcode(reg, OP_ANYCHAR_STAR);
1061  }
1062  }
1063 
1064  if (empty_info != 0)
1065  mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
1066  else
1067  mod_tlen = tlen;
1068 
1069  if (infinite &&
1070  (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1071  if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
1072  if (qn->greedy) {
1073  if (IS_NOT_NULL(qn->head_exact))
1075  else if (IS_NOT_NULL(qn->next_head_exact))
1077  else
1079  }
1080  else {
1082  }
1083  if (r) return r;
1084  }
1085  else {
1086  r = compile_tree_n_times(qn->target, qn->lower, reg);
1087  if (r) return r;
1088  }
1089 
1090  if (qn->greedy) {
1091  if (IS_NOT_NULL(qn->head_exact)) {
1093  mod_tlen + SIZE_OP_JUMP);
1094  if (r) return r;
1095  add_bytes(reg, NSTR(qn->head_exact)->s, 1);
1096  r = compile_tree_empty_check(qn->target, reg, empty_info);
1097  if (r) return r;
1098  r = add_opcode_rel_addr(reg, OP_JUMP,
1099  -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1));
1100  }
1101  else if (IS_NOT_NULL(qn->next_head_exact)) {
1103  mod_tlen + SIZE_OP_JUMP);
1104  if (r) return r;
1105  add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
1106  r = compile_tree_empty_check(qn->target, reg, empty_info);
1107  if (r) return r;
1108  r = add_opcode_rel_addr(reg, OP_JUMP,
1109  -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_IF_PEEK_NEXT));
1110  }
1111  else {
1112  r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
1113  if (r) return r;
1114  r = compile_tree_empty_check(qn->target, reg, empty_info);
1115  if (r) return r;
1116  r = add_opcode_rel_addr(reg, OP_JUMP,
1117  -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH));
1118  }
1119  }
1120  else {
1121  r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
1122  if (r) return r;
1123  r = compile_tree_empty_check(qn->target, reg, empty_info);
1124  if (r) return r;
1125  r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
1126  }
1127  }
1128  else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */
1129  r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
1130  if (r) return r;
1131  r = compile_tree(qn->target, reg);
1132  }
1133  else if (!infinite && qn->greedy &&
1134  (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
1136  int n = qn->upper - qn->lower;
1137 
1138  r = compile_tree_n_times(qn->target, qn->lower, reg);
1139  if (r) return r;
1140 
1141  for (i = 0; i < n; i++) {
1142  r = add_opcode_rel_addr(reg, OP_PUSH,
1143  (n - i) * tlen + (n - i - 1) * SIZE_OP_PUSH);
1144  if (r) return r;
1145  r = compile_tree(qn->target, reg);
1146  if (r) return r;
1147  }
1148  }
1149  else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
1151  if (r) return r;
1152  r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
1153  if (r) return r;
1154  r = compile_tree(qn->target, reg);
1155  }
1156  else {
1157  r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
1158  }
1159  return r;
1160 }
1161 #endif /* USE_COMBINATION_EXPLOSION_CHECK */
1162 
1163 static int
1165 {
1166  int tlen;
1167  OnigOptionType prev = reg->options;
1168 
1169  reg->options = node->option;
1170  tlen = compile_length_tree(node->target, reg);
1171  reg->options = prev;
1172 
1173  if (tlen < 0) return tlen;
1174 
1175  if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1177  + tlen + SIZE_OP_SET_OPTION;
1178  }
1179  else
1180  return tlen;
1181 }
1182 
1183 static int
1185 {
1186  int r;
1187  OnigOptionType prev = reg->options;
1188 
1189  if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1190  r = add_opcode_option(reg, OP_SET_OPTION_PUSH, node->option);
1191  if (r) return r;
1192  r = add_opcode_option(reg, OP_SET_OPTION, prev);
1193  if (r) return r;
1194  r = add_opcode(reg, OP_FAIL);
1195  if (r) return r;
1196  }
1197 
1198  reg->options = node->option;
1199  r = compile_tree(node->target, reg);
1200  reg->options = prev;
1201 
1202  if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1203  if (r) return r;
1204  r = add_opcode_option(reg, OP_SET_OPTION, prev);
1205  }
1206  return r;
1207 }
1208 
1209 static int
1211 {
1212  int len;
1213  int tlen;
1214 
1215  if (node->type == ENCLOSE_OPTION)
1216  return compile_length_option_node(node, reg);
1217 
1218  if (node->target) {
1219  tlen = compile_length_tree(node->target, reg);
1220  if (tlen < 0) return tlen;
1221  }
1222  else
1223  tlen = 0;
1224 
1225  switch (node->type) {
1226  case ENCLOSE_MEMORY:
1227 #ifdef USE_SUBEXP_CALL
1228  if (IS_ENCLOSE_CALLED(node)) {
1229  len = SIZE_OP_MEMORY_START_PUSH + tlen
1231  if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1232  len += (IS_ENCLOSE_RECURSION(node)
1234  else
1235  len += (IS_ENCLOSE_RECURSION(node)
1237  }
1238  else
1239 #endif
1240  {
1241  if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
1243  else
1244  len = SIZE_OP_MEMORY_START;
1245 
1246  len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)
1248  }
1249  break;
1250 
1253  QtfrNode* qn = NQTFR(node->target);
1254  tlen = compile_length_tree(qn->target, reg);
1255  if (tlen < 0) return tlen;
1256 
1257  len = tlen * qn->lower
1258  + SIZE_OP_PUSH + tlen + SIZE_OP_POP + SIZE_OP_JUMP;
1259  }
1260  else {
1262  }
1263  break;
1264 
1265  default:
1266  return ONIGERR_TYPE_BUG;
1267  break;
1268  }
1269 
1270  return len;
1271 }
1272 
1273 static int get_char_length_tree(Node* node, regex_t* reg, int* len);
1274 
1275 static int
1277 {
1278  int r, len;
1279 
1280  if (node->type == ENCLOSE_OPTION)
1281  return compile_option_node(node, reg);
1282 
1283  switch (node->type) {
1284  case ENCLOSE_MEMORY:
1285 #ifdef USE_SUBEXP_CALL
1286  if (IS_ENCLOSE_CALLED(node)) {
1287  r = add_opcode(reg, OP_CALL);
1288  if (r) return r;
1290  node->state |= NST_ADDR_FIXED;
1291  r = add_abs_addr(reg, (int )node->call_addr);
1292  if (r) return r;
1293  len = compile_length_tree(node->target, reg);
1295  if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1296  len += (IS_ENCLOSE_RECURSION(node)
1298  else
1299  len += (IS_ENCLOSE_RECURSION(node)
1301 
1302  r = add_opcode_rel_addr(reg, OP_JUMP, len);
1303  if (r) return r;
1304  }
1305 #endif
1306  if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
1307  r = add_opcode(reg, OP_MEMORY_START_PUSH);
1308  else
1309  r = add_opcode(reg, OP_MEMORY_START);
1310  if (r) return r;
1311  r = add_mem_num(reg, node->regnum);
1312  if (r) return r;
1313  r = compile_tree(node->target, reg);
1314  if (r) return r;
1315 #ifdef USE_SUBEXP_CALL
1316  if (IS_ENCLOSE_CALLED(node)) {
1317  if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1318  r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
1320  else
1321  r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
1323 
1324  if (r) return r;
1325  r = add_mem_num(reg, node->regnum);
1326  if (r) return r;
1327  r = add_opcode(reg, OP_RETURN);
1328  }
1329  else
1330 #endif
1331  {
1332  if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1333  r = add_opcode(reg, OP_MEMORY_END_PUSH);
1334  else
1335  r = add_opcode(reg, OP_MEMORY_END);
1336  if (r) return r;
1337  r = add_mem_num(reg, node->regnum);
1338  }
1339  break;
1340 
1343  QtfrNode* qn = NQTFR(node->target);
1344  r = compile_tree_n_times(qn->target, qn->lower, reg);
1345  if (r) return r;
1346 
1347  len = compile_length_tree(qn->target, reg);
1348  if (len < 0) return len;
1349 
1351  if (r) return r;
1352  r = compile_tree(qn->target, reg);
1353  if (r) return r;
1354  r = add_opcode(reg, OP_POP);
1355  if (r) return r;
1356  r = add_opcode_rel_addr(reg, OP_JUMP,
1357  -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP + (int )SIZE_OP_JUMP));
1358  }
1359  else {
1360  r = add_opcode(reg, OP_PUSH_STOP_BT);
1361  if (r) return r;
1362  r = compile_tree(node->target, reg);
1363  if (r) return r;
1364  r = add_opcode(reg, OP_POP_STOP_BT);
1365  }
1366  break;
1367 
1368  default:
1369  return ONIGERR_TYPE_BUG;
1370  break;
1371  }
1372 
1373  return r;
1374 }
1375 
1376 static int
1378 {
1379  int len;
1380  int tlen = 0;
1381 
1382  if (node->target) {
1383  tlen = compile_length_tree(node->target, reg);
1384  if (tlen < 0) return tlen;
1385  }
1386 
1387  switch (node->type) {
1388  case ANCHOR_PREC_READ:
1389  len = SIZE_OP_PUSH_POS + tlen + SIZE_OP_POP_POS;
1390  break;
1391  case ANCHOR_PREC_READ_NOT:
1392  len = SIZE_OP_PUSH_POS_NOT + tlen + SIZE_OP_FAIL_POS;
1393  break;
1394  case ANCHOR_LOOK_BEHIND:
1395  len = SIZE_OP_LOOK_BEHIND + tlen;
1396  break;
1399  break;
1400 
1401  default:
1402  len = SIZE_OPCODE;
1403  break;
1404  }
1405 
1406  return len;
1407 }
1408 
1409 static int
1411 {
1412  int r, len;
1413 
1414  switch (node->type) {
1415  case ANCHOR_BEGIN_BUF: r = add_opcode(reg, OP_BEGIN_BUF); break;
1416  case ANCHOR_END_BUF: r = add_opcode(reg, OP_END_BUF); break;
1417  case ANCHOR_BEGIN_LINE: r = add_opcode(reg, OP_BEGIN_LINE); break;
1418  case ANCHOR_END_LINE: r = add_opcode(reg, OP_END_LINE); break;
1419  case ANCHOR_SEMI_END_BUF: r = add_opcode(reg, OP_SEMI_END_BUF); break;
1420  case ANCHOR_BEGIN_POSITION: r = add_opcode(reg, OP_BEGIN_POSITION); break;
1421 
1422  case ANCHOR_WORD_BOUND: r = add_opcode(reg, OP_WORD_BOUND); break;
1423  case ANCHOR_NOT_WORD_BOUND: r = add_opcode(reg, OP_NOT_WORD_BOUND); break;
1424 #ifdef USE_WORD_BEGIN_END
1425  case ANCHOR_WORD_BEGIN: r = add_opcode(reg, OP_WORD_BEGIN); break;
1426  case ANCHOR_WORD_END: r = add_opcode(reg, OP_WORD_END); break;
1427 #endif
1428 
1429  case ANCHOR_PREC_READ:
1430  r = add_opcode(reg, OP_PUSH_POS);
1431  if (r) return r;
1432  r = compile_tree(node->target, reg);
1433  if (r) return r;
1434  r = add_opcode(reg, OP_POP_POS);
1435  break;
1436 
1437  case ANCHOR_PREC_READ_NOT:
1438  len = compile_length_tree(node->target, reg);
1439  if (len < 0) return len;
1441  if (r) return r;
1442  r = compile_tree(node->target, reg);
1443  if (r) return r;
1444  r = add_opcode(reg, OP_FAIL_POS);
1445  break;
1446 
1447  case ANCHOR_LOOK_BEHIND:
1448  {
1449  int n;
1450  r = add_opcode(reg, OP_LOOK_BEHIND);
1451  if (r) return r;
1452  if (node->char_len < 0) {
1453  r = get_char_length_tree(node->target, reg, &n);
1455  }
1456  else
1457  n = node->char_len;
1458  r = add_length(reg, n);
1459  if (r) return r;
1460  r = compile_tree(node->target, reg);
1461  }
1462  break;
1463 
1465  {
1466  int n;
1467  len = compile_length_tree(node->target, reg);
1470  if (r) return r;
1471  if (node->char_len < 0) {
1472  r = get_char_length_tree(node->target, reg, &n);
1474  }
1475  else
1476  n = node->char_len;
1477  r = add_length(reg, n);
1478  if (r) return r;
1479  r = compile_tree(node->target, reg);
1480  if (r) return r;
1482  }
1483  break;
1484 
1485  default:
1486  return ONIGERR_TYPE_BUG;
1487  break;
1488  }
1489 
1490  return r;
1491 }
1492 
1493 static int
1495 {
1496  int len, type, r;
1497 
1498  type = NTYPE(node);
1499  switch (type) {
1500  case NT_LIST:
1501  len = 0;
1502  do {
1503  r = compile_length_tree(NCAR(node), reg);
1504  if (r < 0) return r;
1505  len += r;
1506  } while (IS_NOT_NULL(node = NCDR(node)));
1507  r = len;
1508  break;
1509 
1510  case NT_ALT:
1511  {
1512  int n;
1513 
1514  n = r = 0;
1515  do {
1516  r += compile_length_tree(NCAR(node), reg);
1517  n++;
1518  } while (IS_NOT_NULL(node = NCDR(node)));
1519  r += (SIZE_OP_PUSH + SIZE_OP_JUMP) * (n - 1);
1520  }
1521  break;
1522 
1523  case NT_STR:
1524  if (NSTRING_IS_RAW(node))
1525  r = compile_length_string_raw_node(NSTR(node), reg);
1526  else
1527  r = compile_length_string_node(node, reg);
1528  break;
1529 
1530  case NT_CCLASS:
1531  r = compile_length_cclass_node(NCCLASS(node), reg);
1532  break;
1533 
1534  case NT_CTYPE:
1535  case NT_CANY:
1536  r = SIZE_OPCODE;
1537  break;
1538 
1539  case NT_BREF:
1540  {
1541  BRefNode* br = NBREF(node);
1542 
1543 #ifdef USE_BACKREF_WITH_LEVEL
1544  if (IS_BACKREF_NEST_LEVEL(br)) {
1546  SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
1547  }
1548  else
1549 #endif
1550  if (br->back_num == 1) {
1551  r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2)
1553  }
1554  else {
1555  r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
1556  }
1557  }
1558  break;
1559 
1560 #ifdef USE_SUBEXP_CALL
1561  case NT_CALL:
1562  r = SIZE_OP_CALL;
1563  break;
1564 #endif
1565 
1566  case NT_QTFR:
1567  r = compile_length_quantifier_node(NQTFR(node), reg);
1568  break;
1569 
1570  case NT_ENCLOSE:
1571  r = compile_length_enclose_node(NENCLOSE(node), reg);
1572  break;
1573 
1574  case NT_ANCHOR:
1575  r = compile_length_anchor_node(NANCHOR(node), reg);
1576  break;
1577 
1578  default:
1579  return ONIGERR_TYPE_BUG;
1580  break;
1581  }
1582 
1583  return r;
1584 }
1585 
1586 static int
1588 {
1589  int n, type, len, pos, r = 0;
1590 
1591  type = NTYPE(node);
1592  switch (type) {
1593  case NT_LIST:
1594  do {
1595  r = compile_tree(NCAR(node), reg);
1596  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1597  break;
1598 
1599  case NT_ALT:
1600  {
1601  Node* x = node;
1602  len = 0;
1603  do {
1604  len += compile_length_tree(NCAR(x), reg);
1605  if (NCDR(x) != NULL) {
1606  len += SIZE_OP_PUSH + SIZE_OP_JUMP;
1607  }
1608  } while (IS_NOT_NULL(x = NCDR(x)));
1609  pos = reg->used + len; /* goal position */
1610 
1611  do {
1612  len = compile_length_tree(NCAR(node), reg);
1613  if (IS_NOT_NULL(NCDR(node))) {
1614  r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_JUMP);
1615  if (r) break;
1616  }
1617  r = compile_tree(NCAR(node), reg);
1618  if (r) break;
1619  if (IS_NOT_NULL(NCDR(node))) {
1620  len = pos - (reg->used + SIZE_OP_JUMP);
1621  r = add_opcode_rel_addr(reg, OP_JUMP, len);
1622  if (r) break;
1623  }
1624  } while (IS_NOT_NULL(node = NCDR(node)));
1625  }
1626  break;
1627 
1628  case NT_STR:
1629  if (NSTRING_IS_RAW(node))
1630  r = compile_string_raw_node(NSTR(node), reg);
1631  else
1632  r = compile_string_node(node, reg);
1633  break;
1634 
1635  case NT_CCLASS:
1636  r = compile_cclass_node(NCCLASS(node), reg);
1637  break;
1638 
1639  case NT_CTYPE:
1640  {
1641  int op;
1642 
1643  switch (NCTYPE(node)->ctype) {
1644  case ONIGENC_CTYPE_WORD:
1645  if (NCTYPE(node)->not != 0) op = OP_NOT_WORD;
1646  else op = OP_WORD;
1647  break;
1648  default:
1649  return ONIGERR_TYPE_BUG;
1650  break;
1651  }
1652  r = add_opcode(reg, op);
1653  }
1654  break;
1655 
1656  case NT_CANY:
1657  if (IS_MULTILINE(reg->options))
1658  r = add_opcode(reg, OP_ANYCHAR_ML);
1659  else
1660  r = add_opcode(reg, OP_ANYCHAR);
1661  break;
1662 
1663  case NT_BREF:
1664  {
1665  BRefNode* br = NBREF(node);
1666 
1667 #ifdef USE_BACKREF_WITH_LEVEL
1668  if (IS_BACKREF_NEST_LEVEL(br)) {
1670  if (r) return r;
1671  r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE));
1672  if (r) return r;
1673  r = add_length(reg, br->nest_level);
1674  if (r) return r;
1675 
1676  goto add_bacref_mems;
1677  }
1678  else
1679 #endif
1680  if (br->back_num == 1) {
1681  n = br->back_static[0];
1682  if (IS_IGNORECASE(reg->options)) {
1683  r = add_opcode(reg, OP_BACKREFN_IC);
1684  if (r) return r;
1685  r = add_mem_num(reg, n);
1686  }
1687  else {
1688  switch (n) {
1689  case 1: r = add_opcode(reg, OP_BACKREF1); break;
1690  case 2: r = add_opcode(reg, OP_BACKREF2); break;
1691  default:
1692  r = add_opcode(reg, OP_BACKREFN);
1693  if (r) return r;
1694  r = add_mem_num(reg, n);
1695  break;
1696  }
1697  }
1698  }
1699  else {
1700  int i;
1701  int* p;
1702 
1703  if (IS_IGNORECASE(reg->options)) {
1704  r = add_opcode(reg, OP_BACKREF_MULTI_IC);
1705  }
1706  else {
1707  r = add_opcode(reg, OP_BACKREF_MULTI);
1708  }
1709  if (r) return r;
1710 
1711 #ifdef USE_BACKREF_WITH_LEVEL
1712  add_bacref_mems:
1713 #endif
1714  r = add_length(reg, br->back_num);
1715  if (r) return r;
1716  p = BACKREFS_P(br);
1717  for (i = br->back_num - 1; i >= 0; i--) {
1718  r = add_mem_num(reg, p[i]);
1719  if (r) return r;
1720  }
1721  }
1722  }
1723  break;
1724 
1725 #ifdef USE_SUBEXP_CALL
1726  case NT_CALL:
1727  r = compile_call(NCALL(node), reg);
1728  break;
1729 #endif
1730 
1731  case NT_QTFR:
1732  r = compile_quantifier_node(NQTFR(node), reg);
1733  break;
1734 
1735  case NT_ENCLOSE:
1736  r = compile_enclose_node(NENCLOSE(node), reg);
1737  break;
1738 
1739  case NT_ANCHOR:
1740  r = compile_anchor_node(NANCHOR(node), reg);
1741  break;
1742 
1743  default:
1744 #ifdef ONIG_DEBUG
1745  fprintf(stderr, "compile_tree: undefined node type %d\n", NTYPE(node));
1746 #endif
1747  break;
1748  }
1749 
1750  return r;
1751 }
1752 
1753 #ifdef USE_NAMED_GROUP
1754 
1755 static int
1756 noname_disable_map(Node** plink, GroupNumRemap* map, int* counter)
1757 {
1758  int r = 0;
1759  Node* node = *plink;
1760 
1761  switch (NTYPE(node)) {
1762  case NT_LIST:
1763  case NT_ALT:
1764  do {
1765  r = noname_disable_map(&(NCAR(node)), map, counter);
1766  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1767  break;
1768 
1769  case NT_QTFR:
1770  {
1771  Node** ptarget = &(NQTFR(node)->target);
1772  Node* old = *ptarget;
1773  r = noname_disable_map(ptarget, map, counter);
1774  if (*ptarget != old && NTYPE(*ptarget) == NT_QTFR) {
1775  onig_reduce_nested_quantifier(node, *ptarget);
1776  }
1777  }
1778  break;
1779 
1780  case NT_ENCLOSE:
1781  {
1782  EncloseNode* en = NENCLOSE(node);
1783  if (en->type == ENCLOSE_MEMORY) {
1784  if (IS_ENCLOSE_NAMED_GROUP(en)) {
1785  (*counter)++;
1786  map[en->regnum].new_val = *counter;
1787  en->regnum = *counter;
1788  r = noname_disable_map(&(en->target), map, counter);
1789  }
1790  else {
1791  *plink = en->target;
1792  en->target = NULL_NODE;
1793  onig_node_free(node);
1794  r = noname_disable_map(plink, map, counter);
1795  }
1796  }
1797  else
1798  r = noname_disable_map(&(en->target), map, counter);
1799  }
1800  break;
1801 
1802  case NT_ANCHOR:
1803  {
1804  AnchorNode* an = NANCHOR(node);
1805  switch (an->type) {
1806  case ANCHOR_PREC_READ:
1807  case ANCHOR_PREC_READ_NOT:
1808  case ANCHOR_LOOK_BEHIND:
1810  r = noname_disable_map(&(an->target), map, counter);
1811  break;
1812  }
1813  }
1814  break;
1815 
1816  default:
1817  break;
1818  }
1819 
1820  return r;
1821 }
1822 
1823 static int
1824 renumber_node_backref(Node* node, GroupNumRemap* map)
1825 {
1826  int i, pos, n, old_num;
1827  int *backs;
1828  BRefNode* bn = NBREF(node);
1829 
1830  if (! IS_BACKREF_NAME_REF(bn))
1832 
1833  old_num = bn->back_num;
1834  if (IS_NULL(bn->back_dynamic))
1835  backs = bn->back_static;
1836  else
1837  backs = bn->back_dynamic;
1838 
1839  for (i = 0, pos = 0; i < old_num; i++) {
1840  n = map[backs[i]].new_val;
1841  if (n > 0) {
1842  backs[pos] = n;
1843  pos++;
1844  }
1845  }
1846 
1847  bn->back_num = pos;
1848  return 0;
1849 }
1850 
1851 static int
1852 renumber_by_map(Node* node, GroupNumRemap* map)
1853 {
1854  int r = 0;
1855 
1856  switch (NTYPE(node)) {
1857  case NT_LIST:
1858  case NT_ALT:
1859  do {
1860  r = renumber_by_map(NCAR(node), map);
1861  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1862  break;
1863  case NT_QTFR:
1864  r = renumber_by_map(NQTFR(node)->target, map);
1865  break;
1866  case NT_ENCLOSE:
1867  r = renumber_by_map(NENCLOSE(node)->target, map);
1868  break;
1869 
1870  case NT_BREF:
1871  r = renumber_node_backref(node, map);
1872  break;
1873 
1874  case NT_ANCHOR:
1875  {
1876  AnchorNode* an = NANCHOR(node);
1877  switch (an->type) {
1878  case ANCHOR_PREC_READ:
1879  case ANCHOR_PREC_READ_NOT:
1880  case ANCHOR_LOOK_BEHIND:
1882  r = renumber_by_map(an->target, map);
1883  break;
1884  }
1885  }
1886  break;
1887 
1888  default:
1889  break;
1890  }
1891 
1892  return r;
1893 }
1894 
1895 static int
1896 numbered_ref_check(Node* node)
1897 {
1898  int r = 0;
1899 
1900  switch (NTYPE(node)) {
1901  case NT_LIST:
1902  case NT_ALT:
1903  do {
1904  r = numbered_ref_check(NCAR(node));
1905  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1906  break;
1907  case NT_QTFR:
1908  r = numbered_ref_check(NQTFR(node)->target);
1909  break;
1910  case NT_ENCLOSE:
1911  r = numbered_ref_check(NENCLOSE(node)->target);
1912  break;
1913 
1914  case NT_BREF:
1915  if (! IS_BACKREF_NAME_REF(NBREF(node)))
1917  break;
1918 
1919  default:
1920  break;
1921  }
1922 
1923  return r;
1924 }
1925 
1926 static int
1927 disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env)
1928 {
1929  int r, i, pos, counter;
1930  BitStatusType loc;
1931  GroupNumRemap* map;
1932 
1933  map = (GroupNumRemap* )xalloca(sizeof(GroupNumRemap) * (env->num_mem + 1));
1935  for (i = 1; i <= env->num_mem; i++) {
1936  map[i].new_val = 0;
1937  }
1938  counter = 0;
1939  r = noname_disable_map(root, map, &counter);
1940  if (r != 0) return r;
1941 
1942  r = renumber_by_map(*root, map);
1943  if (r != 0) return r;
1944 
1945  for (i = 1, pos = 1; i <= env->num_mem; i++) {
1946  if (map[i].new_val > 0) {
1947  SCANENV_MEM_NODES(env)[pos] = SCANENV_MEM_NODES(env)[i];
1948  pos++;
1949  }
1950  }
1951 
1952  loc = env->capture_history;
1954  for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
1955  if (BIT_STATUS_AT(loc, i)) {
1957  }
1958  }
1959 
1960  env->num_mem = env->num_named;
1961  reg->num_mem = env->num_named;
1962 
1963  return onig_renumber_name_table(reg, map);
1964 }
1965 #endif /* USE_NAMED_GROUP */
1966 
1967 #ifdef USE_SUBEXP_CALL
1968 static int
1969 unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg)
1970 {
1971  int i, offset;
1972  EncloseNode* en;
1973  AbsAddrType addr;
1974 
1975  for (i = 0; i < uslist->num; i++) {
1976  en = NENCLOSE(uslist->us[i].target);
1977  if (! IS_ENCLOSE_ADDR_FIXED(en)) return ONIGERR_PARSER_BUG;
1978  addr = en->call_addr;
1979  offset = uslist->us[i].offset;
1980 
1981  BBUF_WRITE(reg, offset, &addr, SIZE_ABSADDR);
1982  }
1983  return 0;
1984 }
1985 #endif
1986 
1987 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
1988 static int
1989 quantifiers_memory_node_info(Node* node)
1990 {
1991  int r = 0;
1992 
1993  switch (NTYPE(node)) {
1994  case NT_LIST:
1995  case NT_ALT:
1996  {
1997  int v;
1998  do {
1999  v = quantifiers_memory_node_info(NCAR(node));
2000  if (v > r) r = v;
2001  } while (v >= 0 && IS_NOT_NULL(node = NCDR(node)));
2002  }
2003  break;
2004 
2005 #ifdef USE_SUBEXP_CALL
2006  case NT_CALL:
2007  if (IS_CALL_RECURSION(NCALL(node))) {
2008  return NQ_TARGET_IS_EMPTY_REC; /* tiny version */
2009  }
2010  else
2011  r = quantifiers_memory_node_info(NCALL(node)->target);
2012  break;
2013 #endif
2014 
2015  case NT_QTFR:
2016  {
2017  QtfrNode* qn = NQTFR(node);
2018  if (qn->upper != 0) {
2019  r = quantifiers_memory_node_info(qn->target);
2020  }
2021  }
2022  break;
2023 
2024  case NT_ENCLOSE:
2025  {
2026  EncloseNode* en = NENCLOSE(node);
2027  switch (en->type) {
2028  case ENCLOSE_MEMORY:
2029  return NQ_TARGET_IS_EMPTY_MEM;
2030  break;
2031 
2032  case ENCLOSE_OPTION:
2034  r = quantifiers_memory_node_info(en->target);
2035  break;
2036  default:
2037  break;
2038  }
2039  }
2040  break;
2041 
2042  case NT_BREF:
2043  case NT_STR:
2044  case NT_CTYPE:
2045  case NT_CCLASS:
2046  case NT_CANY:
2047  case NT_ANCHOR:
2048  default:
2049  break;
2050  }
2051 
2052  return r;
2053 }
2054 #endif /* USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT */
2055 
2056 static int
2058 {
2059  OnigDistance tmin;
2060  int r = 0;
2061 
2062  *min = 0;
2063  switch (NTYPE(node)) {
2064  case NT_BREF:
2065  {
2066  int i;
2067  int* backs;
2068  Node** nodes = SCANENV_MEM_NODES(env);
2069  BRefNode* br = NBREF(node);
2070  if (br->state & NST_RECURSION) break;
2071 
2072  backs = BACKREFS_P(br);
2073  if (backs[0] > env->num_mem) return ONIGERR_INVALID_BACKREF;
2074  r = get_min_match_length(nodes[backs[0]], min, env);
2075  if (r != 0) break;
2076  for (i = 1; i < br->back_num; i++) {
2077  if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
2078  r = get_min_match_length(nodes[backs[i]], &tmin, env);
2079  if (r != 0) break;
2080  if (*min > tmin) *min = tmin;
2081  }
2082  }
2083  break;
2084 
2085 #ifdef USE_SUBEXP_CALL
2086  case NT_CALL:
2087  if (IS_CALL_RECURSION(NCALL(node))) {
2088  EncloseNode* en = NENCLOSE(NCALL(node)->target);
2089  if (IS_ENCLOSE_MIN_FIXED(en))
2090  *min = en->min_len;
2091  }
2092  else
2093  r = get_min_match_length(NCALL(node)->target, min, env);
2094  break;
2095 #endif
2096 
2097  case NT_LIST:
2098  do {
2099  r = get_min_match_length(NCAR(node), &tmin, env);
2100  if (r == 0) *min += tmin;
2101  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2102  break;
2103 
2104  case NT_ALT:
2105  {
2106  Node *x, *y;
2107  y = node;
2108  do {
2109  x = NCAR(y);
2110  r = get_min_match_length(x, &tmin, env);
2111  if (r != 0) break;
2112  if (y == node) *min = tmin;
2113  else if (*min > tmin) *min = tmin;
2114  } while (r == 0 && IS_NOT_NULL(y = NCDR(y)));
2115  }
2116  break;
2117 
2118  case NT_STR:
2119  {
2120  StrNode* sn = NSTR(node);
2121  *min = sn->end - sn->s;
2122  }
2123  break;
2124 
2125  case NT_CTYPE:
2126  *min = 1;
2127  break;
2128 
2129  case NT_CCLASS:
2130  case NT_CANY:
2131  *min = 1;
2132  break;
2133 
2134  case NT_QTFR:
2135  {
2136  QtfrNode* qn = NQTFR(node);
2137 
2138  if (qn->lower > 0) {
2139  r = get_min_match_length(qn->target, min, env);
2140  if (r == 0)
2141  *min = distance_multiply(*min, qn->lower);
2142  }
2143  }
2144  break;
2145 
2146  case NT_ENCLOSE:
2147  {
2148  EncloseNode* en = NENCLOSE(node);
2149  switch (en->type) {
2150  case ENCLOSE_MEMORY:
2151 #ifdef USE_SUBEXP_CALL
2152  if (IS_ENCLOSE_MIN_FIXED(en))
2153  *min = en->min_len;
2154  else {
2155  r = get_min_match_length(en->target, min, env);
2156  if (r == 0) {
2157  en->min_len = *min;
2159  }
2160  }
2161  break;
2162 #endif
2163  case ENCLOSE_OPTION:
2165  r = get_min_match_length(en->target, min, env);
2166  break;
2167  }
2168  }
2169  break;
2170 
2171  case NT_ANCHOR:
2172  default:
2173  break;
2174  }
2175 
2176  return r;
2177 }
2178 
2179 static int
2181 {
2182  OnigDistance tmax;
2183  int r = 0;
2184 
2185  *max = 0;
2186  switch (NTYPE(node)) {
2187  case NT_LIST:
2188  do {
2189  r = get_max_match_length(NCAR(node), &tmax, env);
2190  if (r == 0)
2191  *max = distance_add(*max, tmax);
2192  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2193  break;
2194 
2195  case NT_ALT:
2196  do {
2197  r = get_max_match_length(NCAR(node), &tmax, env);
2198  if (r == 0 && *max < tmax) *max = tmax;
2199  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2200  break;
2201 
2202  case NT_STR:
2203  {
2204  StrNode* sn = NSTR(node);
2205  *max = sn->end - sn->s;
2206  }
2207  break;
2208 
2209  case NT_CTYPE:
2210  *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
2211  break;
2212 
2213  case NT_CCLASS:
2214  case NT_CANY:
2215  *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
2216  break;
2217 
2218  case NT_BREF:
2219  {
2220  int i;
2221  int* backs;
2222  Node** nodes = SCANENV_MEM_NODES(env);
2223  BRefNode* br = NBREF(node);
2224  if (br->state & NST_RECURSION) {
2225  *max = ONIG_INFINITE_DISTANCE;
2226  break;
2227  }
2228  backs = BACKREFS_P(br);
2229  for (i = 0; i < br->back_num; i++) {
2230  if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
2231  r = get_max_match_length(nodes[backs[i]], &tmax, env);
2232  if (r != 0) break;
2233  if (*max < tmax) *max = tmax;
2234  }
2235  }
2236  break;
2237 
2238 #ifdef USE_SUBEXP_CALL
2239  case NT_CALL:
2240  if (! IS_CALL_RECURSION(NCALL(node)))
2241  r = get_max_match_length(NCALL(node)->target, max, env);
2242  else
2243  *max = ONIG_INFINITE_DISTANCE;
2244  break;
2245 #endif
2246 
2247  case NT_QTFR:
2248  {
2249  QtfrNode* qn = NQTFR(node);
2250 
2251  if (qn->upper != 0) {
2252  r = get_max_match_length(qn->target, max, env);
2253  if (r == 0 && *max != 0) {
2254  if (! IS_REPEAT_INFINITE(qn->upper))
2255  *max = distance_multiply(*max, qn->upper);
2256  else
2257  *max = ONIG_INFINITE_DISTANCE;
2258  }
2259  }
2260  }
2261  break;
2262 
2263  case NT_ENCLOSE:
2264  {
2265  EncloseNode* en = NENCLOSE(node);
2266  switch (en->type) {
2267  case ENCLOSE_MEMORY:
2268 #ifdef USE_SUBEXP_CALL
2269  if (IS_ENCLOSE_MAX_FIXED(en))
2270  *max = en->max_len;
2271  else {
2272  r = get_max_match_length(en->target, max, env);
2273  if (r == 0) {
2274  en->max_len = *max;
2276  }
2277  }
2278  break;
2279 #endif
2280  case ENCLOSE_OPTION:
2282  r = get_max_match_length(en->target, max, env);
2283  break;
2284  }
2285  }
2286  break;
2287 
2288  case NT_ANCHOR:
2289  default:
2290  break;
2291  }
2292 
2293  return r;
2294 }
2295 
2296 #define GET_CHAR_LEN_VARLEN -1
2297 #define GET_CHAR_LEN_TOP_ALT_VARLEN -2
2298 
2299 /* fixed size pattern node only */
2300 static int
2301 get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)
2302 {
2303  int tlen;
2304  int r = 0;
2305 
2306  level++;
2307  *len = 0;
2308  switch (NTYPE(node)) {
2309  case NT_LIST:
2310  do {
2311  r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
2312  if (r == 0)
2313  *len = (int)distance_add(*len, tlen);
2314  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2315  break;
2316 
2317  case NT_ALT:
2318  {
2319  int tlen2;
2320  int varlen = 0;
2321 
2322  r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
2323  while (r == 0 && IS_NOT_NULL(node = NCDR(node))) {
2324  r = get_char_length_tree1(NCAR(node), reg, &tlen2, level);
2325  if (r == 0) {
2326  if (tlen != tlen2)
2327  varlen = 1;
2328  }
2329  }
2330  if (r == 0) {
2331  if (varlen != 0) {
2332  if (level == 1)
2334  else
2335  r = GET_CHAR_LEN_VARLEN;
2336  }
2337  else
2338  *len = tlen;
2339  }
2340  }
2341  break;
2342 
2343  case NT_STR:
2344  {
2345  StrNode* sn = NSTR(node);
2346  UChar *s = sn->s;
2347  while (s < sn->end) {
2348  s += enclen(reg->enc, s, sn->end);
2349  (*len)++;
2350  }
2351  }
2352  break;
2353 
2354  case NT_QTFR:
2355  {
2356  QtfrNode* qn = NQTFR(node);
2357  if (qn->lower == qn->upper) {
2358  r = get_char_length_tree1(qn->target, reg, &tlen, level);
2359  if (r == 0)
2360  *len = (int)distance_multiply(tlen, qn->lower);
2361  }
2362  else
2363  r = GET_CHAR_LEN_VARLEN;
2364  }
2365  break;
2366 
2367 #ifdef USE_SUBEXP_CALL
2368  case NT_CALL:
2369  if (! IS_CALL_RECURSION(NCALL(node)))
2370  r = get_char_length_tree1(NCALL(node)->target, reg, len, level);
2371  else
2372  r = GET_CHAR_LEN_VARLEN;
2373  break;
2374 #endif
2375 
2376  case NT_CTYPE:
2377  *len = 1;
2378  break;
2379 
2380  case NT_CCLASS:
2381  case NT_CANY:
2382  *len = 1;
2383  break;
2384 
2385  case NT_ENCLOSE:
2386  {
2387  EncloseNode* en = NENCLOSE(node);
2388  switch (en->type) {
2389  case ENCLOSE_MEMORY:
2390 #ifdef USE_SUBEXP_CALL
2391  if (IS_ENCLOSE_CLEN_FIXED(en))
2392  *len = en->char_len;
2393  else {
2394  r = get_char_length_tree1(en->target, reg, len, level);
2395  if (r == 0) {
2396  en->char_len = *len;
2398  }
2399  }
2400  break;
2401 #endif
2402  case ENCLOSE_OPTION:
2404  r = get_char_length_tree1(en->target, reg, len, level);
2405  break;
2406  default:
2407  break;
2408  }
2409  }
2410  break;
2411 
2412  case NT_ANCHOR:
2413  break;
2414 
2415  default:
2416  r = GET_CHAR_LEN_VARLEN;
2417  break;
2418  }
2419 
2420  return r;
2421 }
2422 
2423 static int
2425 {
2426  return get_char_length_tree1(node, reg, len, 0);
2427 }
2428 
2429 /* x is not included y ==> 1 : 0 */
2430 static int
2432 {
2433  int i;
2434  OnigDistance len;
2436  UChar *p, c;
2437  int ytype;
2438 
2439  retry:
2440  ytype = NTYPE(y);
2441  switch (NTYPE(x)) {
2442  case NT_CTYPE:
2443  {
2444  switch (ytype) {
2445  case NT_CTYPE:
2446  if (NCTYPE(y)->ctype == NCTYPE(x)->ctype &&
2447  NCTYPE(y)->not != NCTYPE(x)->not)
2448  return 1;
2449  else
2450  return 0;
2451  break;
2452 
2453  case NT_CCLASS:
2454  swap:
2455  {
2456  Node* tmp;
2457  tmp = x; x = y; y = tmp;
2458  goto retry;
2459  }
2460  break;
2461 
2462  case NT_STR:
2463  goto swap;
2464  break;
2465 
2466  default:
2467  break;
2468  }
2469  }
2470  break;
2471 
2472  case NT_CCLASS:
2473  {
2474  CClassNode* xc = NCCLASS(x);
2475  switch (ytype) {
2476  case NT_CTYPE:
2477  switch (NCTYPE(y)->ctype) {
2478  case ONIGENC_CTYPE_WORD:
2479  if (NCTYPE(y)->not == 0) {
2480  if (IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) {
2481  for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2482  if (BITSET_AT(xc->bs, i)) {
2483  if (IS_CODE_SB_WORD(reg->enc, i)) return 0;
2484  }
2485  }
2486  return 1;
2487  }
2488  return 0;
2489  }
2490  else {
2491  for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2492  if (! IS_CODE_SB_WORD(reg->enc, i)) {
2493  if (!IS_NCCLASS_NOT(xc)) {
2494  if (BITSET_AT(xc->bs, i))
2495  return 0;
2496  }
2497  else {
2498  if (! BITSET_AT(xc->bs, i))
2499  return 0;
2500  }
2501  }
2502  }
2503  return 1;
2504  }
2505  break;
2506 
2507  default:
2508  break;
2509  }
2510  break;
2511 
2512  case NT_CCLASS:
2513  {
2514  int v;
2515  CClassNode* yc = NCCLASS(y);
2516 
2517  for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2518  v = BITSET_AT(xc->bs, i);
2519  if ((v != 0 && !IS_NCCLASS_NOT(xc)) ||
2520  (v == 0 && IS_NCCLASS_NOT(xc))) {
2521  v = BITSET_AT(yc->bs, i);
2522  if ((v != 0 && !IS_NCCLASS_NOT(yc)) ||
2523  (v == 0 && IS_NCCLASS_NOT(yc)))
2524  return 0;
2525  }
2526  }
2527  if ((IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) ||
2528  (IS_NULL(yc->mbuf) && !IS_NCCLASS_NOT(yc)))
2529  return 1;
2530  return 0;
2531  }
2532  break;
2533 
2534  case NT_STR:
2535  goto swap;
2536  break;
2537 
2538  default:
2539  break;
2540  }
2541  }
2542  break;
2543 
2544  case NT_STR:
2545  {
2546  StrNode* xs = NSTR(x);
2547  if (NSTRING_LEN(x) == 0)
2548  break;
2549 
2550  c = *(xs->s);
2551  switch (ytype) {
2552  case NT_CTYPE:
2553  switch (NCTYPE(y)->ctype) {
2554  case ONIGENC_CTYPE_WORD:
2555  if (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end))
2556  return NCTYPE(y)->not;
2557  else
2558  return !(NCTYPE(y)->not);
2559  break;
2560  default:
2561  break;
2562  }
2563  break;
2564 
2565  case NT_CCLASS:
2566  {
2567  CClassNode* cc = NCCLASS(y);
2568 
2569  code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s,
2570  xs->s + ONIGENC_MBC_MAXLEN(reg->enc));
2571  return (onig_is_code_in_cc(reg->enc, code, cc) != 0 ? 0 : 1);
2572  }
2573  break;
2574 
2575  case NT_STR:
2576  {
2577  UChar *q;
2578  StrNode* ys = NSTR(y);
2579  len = NSTRING_LEN(x);
2580  if (len > NSTRING_LEN(y)) len = NSTRING_LEN(y);
2581  if (NSTRING_IS_AMBIG(x) || NSTRING_IS_AMBIG(y)) {
2582  /* tiny version */
2583  return 0;
2584  }
2585  else {
2586  for (i = 0, p = ys->s, q = xs->s; (OnigDistance)i < len; i++, p++, q++) {
2587  if (*p != *q) return 1;
2588  }
2589  }
2590  }
2591  break;
2592 
2593  default:
2594  break;
2595  }
2596  }
2597  break;
2598 
2599  default:
2600  break;
2601  }
2602 
2603  return 0;
2604 }
2605 
2606 static Node*
2607 get_head_value_node(Node* node, int exact, regex_t* reg)
2608 {
2609  Node* n = NULL_NODE;
2610 
2611  switch (NTYPE(node)) {
2612  case NT_BREF:
2613  case NT_ALT:
2614  case NT_CANY:
2615 #ifdef USE_SUBEXP_CALL
2616  case NT_CALL:
2617 #endif
2618  break;
2619 
2620  case NT_CTYPE:
2621  case NT_CCLASS:
2622  if (exact == 0) {
2623  n = node;
2624  }
2625  break;
2626 
2627  case NT_LIST:
2628  n = get_head_value_node(NCAR(node), exact, reg);
2629  break;
2630 
2631  case NT_STR:
2632  {
2633  StrNode* sn = NSTR(node);
2634 
2635  if (sn->end <= sn->s)
2636  break;
2637 
2638  if (exact != 0 &&
2639  !NSTRING_IS_RAW(node) && IS_IGNORECASE(reg->options)) {
2640  }
2641  else {
2642  n = node;
2643  }
2644  }
2645  break;
2646 
2647  case NT_QTFR:
2648  {
2649  QtfrNode* qn = NQTFR(node);
2650  if (qn->lower > 0) {
2651  if (IS_NOT_NULL(qn->head_exact))
2652  n = qn->head_exact;
2653  else
2654  n = get_head_value_node(qn->target, exact, reg);
2655  }
2656  }
2657  break;
2658 
2659  case NT_ENCLOSE:
2660  {
2661  EncloseNode* en = NENCLOSE(node);
2662  switch (en->type) {
2663  case ENCLOSE_OPTION:
2664  {
2666 
2667  reg->options = NENCLOSE(node)->option;
2668  n = get_head_value_node(NENCLOSE(node)->target, exact, reg);
2669  reg->options = options;
2670  }
2671  break;
2672 
2673  case ENCLOSE_MEMORY:
2675  n = get_head_value_node(en->target, exact, reg);
2676  break;
2677  }
2678  }
2679  break;
2680 
2681  case NT_ANCHOR:
2682  if (NANCHOR(node)->type == ANCHOR_PREC_READ)
2683  n = get_head_value_node(NANCHOR(node)->target, exact, reg);
2684  break;
2685 
2686  default:
2687  break;
2688  }
2689 
2690  return n;
2691 }
2692 
2693 static int
2694 check_type_tree(Node* node, int type_mask, int enclose_mask, int anchor_mask)
2695 {
2696  int type, r = 0;
2697 
2698  type = NTYPE(node);
2699  if ((NTYPE2BIT(type) & type_mask) == 0)
2700  return 1;
2701 
2702  switch (type) {
2703  case NT_LIST:
2704  case NT_ALT:
2705  do {
2706  r = check_type_tree(NCAR(node), type_mask, enclose_mask,
2707  anchor_mask);
2708  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2709  break;
2710 
2711  case NT_QTFR:
2712  r = check_type_tree(NQTFR(node)->target, type_mask, enclose_mask,
2713  anchor_mask);
2714  break;
2715 
2716  case NT_ENCLOSE:
2717  {
2718  EncloseNode* en = NENCLOSE(node);
2719  if ((en->type & enclose_mask) == 0)
2720  return 1;
2721 
2722  r = check_type_tree(en->target, type_mask, enclose_mask, anchor_mask);
2723  }
2724  break;
2725 
2726  case NT_ANCHOR:
2727  type = NANCHOR(node)->type;
2728  if ((type & anchor_mask) == 0)
2729  return 1;
2730 
2731  if (NANCHOR(node)->target)
2732  r = check_type_tree(NANCHOR(node)->target,
2733  type_mask, enclose_mask, anchor_mask);
2734  break;
2735 
2736  default:
2737  break;
2738  }
2739  return r;
2740 }
2741 
2742 #ifdef USE_SUBEXP_CALL
2743 
2744 #define RECURSION_EXIST 1
2745 #define RECURSION_INFINITE 2
2746 
2747 static int
2748 subexp_inf_recursive_check(Node* node, ScanEnv* env, int head)
2749 {
2750  int type;
2751  int r = 0;
2752 
2753  type = NTYPE(node);
2754  switch (type) {
2755  case NT_LIST:
2756  {
2757  Node *x;
2758  OnigDistance min;
2759  int ret;
2760 
2761  x = node;
2762  do {
2763  ret = subexp_inf_recursive_check(NCAR(x), env, head);
2764  if (ret < 0 || ret == RECURSION_INFINITE) return ret;
2765  r |= ret;
2766  if (head) {
2767  ret = get_min_match_length(NCAR(x), &min, env);
2768  if (ret != 0) return ret;
2769  if (min != 0) head = 0;
2770  }
2771  } while (IS_NOT_NULL(x = NCDR(x)));
2772  }
2773  break;
2774 
2775  case NT_ALT:
2776  {
2777  int ret;
2778  r = RECURSION_EXIST;
2779  do {
2780  ret = subexp_inf_recursive_check(NCAR(node), env, head);
2781  if (ret < 0 || ret == RECURSION_INFINITE) return ret;
2782  r &= ret;
2783  } while (IS_NOT_NULL(node = NCDR(node)));
2784  }
2785  break;
2786 
2787  case NT_QTFR:
2788  r = subexp_inf_recursive_check(NQTFR(node)->target, env, head);
2789  if (r == RECURSION_EXIST) {
2790  if (NQTFR(node)->lower == 0) r = 0;
2791  }
2792  break;
2793 
2794  case NT_ANCHOR:
2795  {
2796  AnchorNode* an = NANCHOR(node);
2797  switch (an->type) {
2798  case ANCHOR_PREC_READ:
2799  case ANCHOR_PREC_READ_NOT:
2800  case ANCHOR_LOOK_BEHIND:
2802  r = subexp_inf_recursive_check(an->target, env, head);
2803  break;
2804  }
2805  }
2806  break;
2807 
2808  case NT_CALL:
2809  r = subexp_inf_recursive_check(NCALL(node)->target, env, head);
2810  break;
2811 
2812  case NT_ENCLOSE:
2813  if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
2814  return 0;
2815  else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
2816  return (head == 0 ? RECURSION_EXIST : RECURSION_INFINITE);
2817  else {
2819  r = subexp_inf_recursive_check(NENCLOSE(node)->target, env, head);
2821  }
2822  break;
2823 
2824  default:
2825  break;
2826  }
2827 
2828  return r;
2829 }
2830 
2831 static int
2832 subexp_inf_recursive_check_trav(Node* node, ScanEnv* env)
2833 {
2834  int type;
2835  int r = 0;
2836 
2837  type = NTYPE(node);
2838  switch (type) {
2839  case NT_LIST:
2840  case NT_ALT:
2841  do {
2842  r = subexp_inf_recursive_check_trav(NCAR(node), env);
2843  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2844  break;
2845 
2846  case NT_QTFR:
2847  r = subexp_inf_recursive_check_trav(NQTFR(node)->target, env);
2848  break;
2849 
2850  case NT_ANCHOR:
2851  {
2852  AnchorNode* an = NANCHOR(node);
2853  switch (an->type) {
2854  case ANCHOR_PREC_READ:
2855  case ANCHOR_PREC_READ_NOT:
2856  case ANCHOR_LOOK_BEHIND:
2858  r = subexp_inf_recursive_check_trav(an->target, env);
2859  break;
2860  }
2861  }
2862  break;
2863 
2864  case NT_ENCLOSE:
2865  {
2866  EncloseNode* en = NENCLOSE(node);
2867 
2868  if (IS_ENCLOSE_RECURSION(en)) {
2870  r = subexp_inf_recursive_check(en->target, env, 1);
2871  if (r > 0) return ONIGERR_NEVER_ENDING_RECURSION;
2873  }
2874  r = subexp_inf_recursive_check_trav(en->target, env);
2875  }
2876 
2877  break;
2878 
2879  default:
2880  break;
2881  }
2882 
2883  return r;
2884 }
2885 
2886 static int
2887 subexp_recursive_check(Node* node)
2888 {
2889  int r = 0;
2890 
2891  switch (NTYPE(node)) {
2892  case NT_LIST:
2893  case NT_ALT:
2894  do {
2895  r |= subexp_recursive_check(NCAR(node));
2896  } while (IS_NOT_NULL(node = NCDR(node)));
2897  break;
2898 
2899  case NT_QTFR:
2900  r = subexp_recursive_check(NQTFR(node)->target);
2901  break;
2902 
2903  case NT_ANCHOR:
2904  {
2905  AnchorNode* an = NANCHOR(node);
2906  switch (an->type) {
2907  case ANCHOR_PREC_READ:
2908  case ANCHOR_PREC_READ_NOT:
2909  case ANCHOR_LOOK_BEHIND:
2911  r = subexp_recursive_check(an->target);
2912  break;
2913  }
2914  }
2915  break;
2916 
2917  case NT_CALL:
2918  r = subexp_recursive_check(NCALL(node)->target);
2919  if (r != 0) SET_CALL_RECURSION(node);
2920  break;
2921 
2922  case NT_ENCLOSE:
2923  if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
2924  return 0;
2925  else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
2926  return 1; /* recursion */
2927  else {
2929  r = subexp_recursive_check(NENCLOSE(node)->target);
2931  }
2932  break;
2933 
2934  default:
2935  break;
2936  }
2937 
2938  return r;
2939 }
2940 
2941 
2942 static int
2943 subexp_recursive_check_trav(Node* node, ScanEnv* env)
2944 {
2945 #define FOUND_CALLED_NODE 1
2946 
2947  int type;
2948  int r = 0;
2949 
2950  type = NTYPE(node);
2951  switch (type) {
2952  case NT_LIST:
2953  case NT_ALT:
2954  {
2955  int ret;
2956  do {
2957  ret = subexp_recursive_check_trav(NCAR(node), env);
2958  if (ret == FOUND_CALLED_NODE) r = FOUND_CALLED_NODE;
2959  else if (ret < 0) return ret;
2960  } while (IS_NOT_NULL(node = NCDR(node)));
2961  }
2962  break;
2963 
2964  case NT_QTFR:
2965  r = subexp_recursive_check_trav(NQTFR(node)->target, env);
2966  if (NQTFR(node)->upper == 0) {
2967  if (r == FOUND_CALLED_NODE)
2968  NQTFR(node)->is_refered = 1;
2969  }
2970  break;
2971 
2972  case NT_ANCHOR:
2973  {
2974  AnchorNode* an = NANCHOR(node);
2975  switch (an->type) {
2976  case ANCHOR_PREC_READ:
2977  case ANCHOR_PREC_READ_NOT:
2978  case ANCHOR_LOOK_BEHIND:
2980  r = subexp_recursive_check_trav(an->target, env);
2981  break;
2982  }
2983  }
2984  break;
2985 
2986  case NT_ENCLOSE:
2987  {
2988  EncloseNode* en = NENCLOSE(node);
2989 
2990  if (! IS_ENCLOSE_RECURSION(en)) {
2991  if (IS_ENCLOSE_CALLED(en)) {
2993  r = subexp_recursive_check(en->target);
2994  if (r != 0) SET_ENCLOSE_STATUS(node, NST_RECURSION);
2996  }
2997  }
2998  r = subexp_recursive_check_trav(en->target, env);
2999  if (IS_ENCLOSE_CALLED(en))
3000  r |= FOUND_CALLED_NODE;
3001  }
3002  break;
3003 
3004  default:
3005  break;
3006  }
3007 
3008  return r;
3009 }
3010 
3011 static int
3012 setup_subexp_call(Node* node, ScanEnv* env)
3013 {
3014  int type;
3015  int r = 0;
3016 
3017  type = NTYPE(node);
3018  switch (type) {
3019  case NT_LIST:
3020  do {
3021  r = setup_subexp_call(NCAR(node), env);
3022  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3023  break;
3024 
3025  case NT_ALT:
3026  do {
3027  r = setup_subexp_call(NCAR(node), env);
3028  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3029  break;
3030 
3031  case NT_QTFR:
3032  r = setup_subexp_call(NQTFR(node)->target, env);
3033  break;
3034  case NT_ENCLOSE:
3035  r = setup_subexp_call(NENCLOSE(node)->target, env);
3036  break;
3037 
3038  case NT_CALL:
3039  {
3040  CallNode* cn = NCALL(node);
3041  Node** nodes = SCANENV_MEM_NODES(env);
3042 
3043  if (cn->group_num != 0) {
3044  int gnum = cn->group_num;
3045 
3046 #ifdef USE_NAMED_GROUP
3047  if (env->num_named > 0 &&
3051  }
3052 #endif
3053  if (gnum > env->num_mem) {
3057  }
3058 
3059 #ifdef USE_NAMED_GROUP
3060  set_call_attr:
3061 #endif
3062  cn->target = nodes[cn->group_num];
3063  if (IS_NULL(cn->target)) {
3067  }
3070  cn->unset_addr_list = env->unset_addr_list;
3071  }
3072 #ifdef USE_NAMED_GROUP
3073  else {
3074  int *refs;
3075 
3076  int n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end,
3077  &refs);
3078  if (n <= 0) {
3082  }
3083  else if (n > 1) {
3087  }
3088  else {
3089  cn->group_num = refs[0];
3090  goto set_call_attr;
3091  }
3092  }
3093 #endif
3094  }
3095  break;
3096 
3097  case NT_ANCHOR:
3098  {
3099  AnchorNode* an = NANCHOR(node);
3100 
3101  switch (an->type) {
3102  case ANCHOR_PREC_READ:
3103  case ANCHOR_PREC_READ_NOT:
3104  case ANCHOR_LOOK_BEHIND:
3106  r = setup_subexp_call(an->target, env);
3107  break;
3108  }
3109  }
3110  break;
3111 
3112  default:
3113  break;
3114  }
3115 
3116  return r;
3117 }
3118 #endif
3119 
3120 /* divide different length alternatives in look-behind.
3121  (?<=A|B) ==> (?<=A)|(?<=B)
3122  (?<!A|B) ==> (?<!A)(?<!B)
3123 */
3124 static int
3126 {
3127  Node *head, *np, *insert_node;
3128  AnchorNode* an = NANCHOR(node);
3129  int anc_type = an->type;
3130 
3131  head = an->target;
3132  np = NCAR(head);
3133  swap_node(node, head);
3134  NCAR(node) = head;
3135  NANCHOR(head)->target = np;
3136 
3137  np = node;
3138  while ((np = NCDR(np)) != NULL_NODE) {
3139  insert_node = onig_node_new_anchor(anc_type);
3140  CHECK_NULL_RETURN_MEMERR(insert_node);
3141  NANCHOR(insert_node)->target = NCAR(np);
3142  NCAR(np) = insert_node;
3143  }
3144 
3145  if (anc_type == ANCHOR_LOOK_BEHIND_NOT) {
3146  np = node;
3147  do {
3148  SET_NTYPE(np, NT_LIST); /* alt -> list */
3149  } while ((np = NCDR(np)) != NULL_NODE);
3150  }
3151  return 0;
3152 }
3153 
3154 static int
3156 {
3157  int r, len;
3158  AnchorNode* an = NANCHOR(node);
3159 
3160  r = get_char_length_tree(an->target, reg, &len);
3161  if (r == 0)
3162  an->char_len = len;
3163  else if (r == GET_CHAR_LEN_VARLEN)
3165  else if (r == GET_CHAR_LEN_TOP_ALT_VARLEN) {
3168  else
3170  }
3171 
3172  return r;
3173 }
3174 
3175 static int
3176 next_setup(Node* node, Node* next_node, regex_t* reg)
3177 {
3178  int type;
3179 
3180  retry:
3181  type = NTYPE(node);
3182  if (type == NT_QTFR) {
3183  QtfrNode* qn = NQTFR(node);
3184  if (qn->greedy && IS_REPEAT_INFINITE(qn->upper)) {
3185 #ifdef USE_QTFR_PEEK_NEXT
3186  Node* n = get_head_value_node(next_node, 1, reg);
3187  /* '\0': for UTF-16BE etc... */
3188  if (IS_NOT_NULL(n) && NSTR(n)->s[0] != '\0') {
3189  qn->next_head_exact = n;
3190  }
3191 #endif
3192  /* automatic posseivation a*b ==> (?>a*)b */
3193  if (qn->lower <= 1) {
3194  int ttype = NTYPE(qn->target);
3195  if (IS_NODE_TYPE_SIMPLE(ttype)) {
3196  Node *x, *y;
3197  x = get_head_value_node(qn->target, 0, reg);
3198  if (IS_NOT_NULL(x)) {
3199  y = get_head_value_node(next_node, 0, reg);
3200  if (IS_NOT_NULL(y) && is_not_included(x, y, reg)) {
3204  swap_node(node, en);
3205  NENCLOSE(node)->target = en;
3206  }
3207  }
3208  }
3209  }
3210  }
3211  }
3212  else if (type == NT_ENCLOSE) {
3213  EncloseNode* en = NENCLOSE(node);
3214  if (en->type == ENCLOSE_MEMORY) {
3215  node = en->target;
3216  goto retry;
3217  }
3218  }
3219  return 0;
3220 }
3221 
3222 
3223 static int
3225 {
3227  UChar *sbuf, *ebuf, *sp;
3228  int r, i, len;
3229  OnigDistance sbuf_size;
3230  StrNode* sn = NSTR(node);
3231 
3232  end = sn->end;
3233  sbuf_size = (end - sn->s) * 2;
3234  sbuf = (UChar* )xmalloc(sbuf_size);
3236  ebuf = sbuf + sbuf_size;
3237 
3238  sp = sbuf;
3239  p = sn->s;
3240  while (p < end) {
3241  len = ONIGENC_MBC_CASE_FOLD(reg->enc, reg->case_fold_flag, &p, end, buf);
3242  q = buf;
3243  for (i = 0; i < len; i++) {
3244  if (sp >= ebuf) {
3245  sbuf = (UChar* )xrealloc(sbuf, sbuf_size * 2);
3247  sp = sbuf + sbuf_size;
3248  sbuf_size *= 2;
3249  ebuf = sbuf + sbuf_size;
3250  }
3251 
3252  *sp++ = buf[i];
3253  }
3254  }
3255 
3256  r = onig_node_str_set(node, sbuf, sp);
3257  if (r != 0) {
3258  xfree(sbuf);
3259  return r;
3260  }
3261 
3262  xfree(sbuf);
3263  return 0;
3264 }
3265 
3266 static int
3268  regex_t* reg)
3269 {
3270  int r;
3271  Node *node;
3272 
3273  node = onig_node_new_str(s, end);
3274  if (IS_NULL(node)) return ONIGERR_MEMORY;
3275 
3276  r = update_string_node_case_fold(reg, node);
3277  if (r != 0) {
3278  onig_node_free(node);
3279  return r;
3280  }
3281 
3282  NSTRING_SET_AMBIG(node);
3284  *rnode = node;
3285  return 0;
3286 }
3287 
3288 static int
3290  UChar *p, int slen, UChar *end,
3291  regex_t* reg, Node **rnode)
3292 {
3293  int r, i, j, len, varlen;
3294  Node *anode, *var_anode, *snode, *xnode, *an;
3296 
3297  *rnode = var_anode = NULL_NODE;
3298 
3299  varlen = 0;
3300  for (i = 0; i < item_num; i++) {
3301  if (items[i].byte_len != slen) {
3302  varlen = 1;
3303  break;
3304  }
3305  }
3306 
3307  if (varlen != 0) {
3308  *rnode = var_anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3309  if (IS_NULL(var_anode)) return ONIGERR_MEMORY;
3310 
3311  xnode = onig_node_new_list(NULL, NULL);
3312  if (IS_NULL(xnode)) goto mem_err;
3313  NCAR(var_anode) = xnode;
3314 
3316  if (IS_NULL(anode)) goto mem_err;
3317  NCAR(xnode) = anode;
3318  }
3319  else {
3320  *rnode = anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3321  if (IS_NULL(anode)) return ONIGERR_MEMORY;
3322  }
3323 
3324  snode = onig_node_new_str(p, p + slen);
3325  if (IS_NULL(snode)) goto mem_err;
3326 
3327  NCAR(anode) = snode;
3328 
3329  for (i = 0; i < item_num; i++) {
3330  snode = onig_node_new_str(NULL, NULL);
3331  if (IS_NULL(snode)) goto mem_err;
3332 
3333  for (j = 0; j < items[i].code_len; j++) {
3334  len = ONIGENC_CODE_TO_MBC(reg->enc, items[i].code[j], buf);
3335  if (len < 0) {
3336  r = len;
3337  goto mem_err2;
3338  }
3339 
3340  r = onig_node_str_cat(snode, buf, buf + len);
3341  if (r != 0) goto mem_err2;
3342  }
3343 
3345  if (IS_NULL(an)) {
3346  goto mem_err2;
3347  }
3348 
3349  if (items[i].byte_len != slen) {
3350  Node *rem;
3351  UChar *q = p + items[i].byte_len;
3352 
3353  if (q < end) {
3354  r = expand_case_fold_make_rem_string(&rem, q, end, reg);
3355  if (r != 0) {
3356  onig_node_free(an);
3357  goto mem_err2;
3358  }
3359 
3360  xnode = onig_node_list_add(NULL_NODE, snode);
3361  if (IS_NULL(xnode)) {
3362  onig_node_free(an);
3363  onig_node_free(rem);
3364  goto mem_err2;
3365  }
3366  if (IS_NULL(onig_node_list_add(xnode, rem))) {
3367  onig_node_free(an);
3368  onig_node_free(xnode);
3369  onig_node_free(rem);
3370  goto mem_err;
3371  }
3372 
3373  NCAR(an) = xnode;
3374  }
3375  else {
3376  NCAR(an) = snode;
3377  }
3378 
3379  NCDR(var_anode) = an;
3380  var_anode = an;
3381  }
3382  else {
3383  NCAR(an) = snode;
3384  NCDR(anode) = an;
3385  anode = an;
3386  }
3387  }
3388 
3389  return varlen;
3390 
3391  mem_err2:
3392  onig_node_free(snode);
3393 
3394  mem_err:
3395  onig_node_free(*rnode);
3396 
3397  return ONIGERR_MEMORY;
3398 }
3399 
3400 static int
3402 {
3403 #define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION 8
3404 
3405  int r, n, len, alt_num;
3406  UChar *start, *end, *p;
3407  Node *top_root, *root, *snode, *prev_node;
3409  StrNode* sn = NSTR(node);
3410 
3411  if (NSTRING_IS_AMBIG(node)) return 0;
3412 
3413  start = sn->s;
3414  end = sn->end;
3415  if (start >= end) return 0;
3416 
3417  r = 0;
3418  top_root = root = prev_node = snode = NULL_NODE;
3419  alt_num = 1;
3420  p = start;
3421  while (p < end) {
3423  p, end, items);
3424  if (n < 0) {
3425  r = n;
3426  goto err;
3427  }
3428 
3429  len = enclen(reg->enc, p, end);
3430 
3431  if (n == 0) {
3432  if (IS_NULL(snode)) {
3433  if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
3434  top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3435  if (IS_NULL(root)) {
3436  onig_node_free(prev_node);
3437  goto mem_err;
3438  }
3439  }
3440 
3441  prev_node = snode = onig_node_new_str(NULL, NULL);
3442  if (IS_NULL(snode)) goto mem_err;
3443  if (IS_NOT_NULL(root)) {
3444  if (IS_NULL(onig_node_list_add(root, snode))) {
3445  onig_node_free(snode);
3446  goto mem_err;
3447  }
3448  }
3449  }
3450 
3451  r = onig_node_str_cat(snode, p, p + len);
3452  if (r != 0) goto err;
3453  }
3454  else {
3455  alt_num *= (n + 1);
3456  if (alt_num > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) break;
3457 
3458  if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
3459  top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3460  if (IS_NULL(root)) {
3461  onig_node_free(prev_node);
3462  goto mem_err;
3463  }
3464  }
3465 
3466  r = expand_case_fold_string_alt(n, items, p, len, end, reg, &prev_node);
3467  if (r < 0) goto mem_err;
3468  if (r == 1) {
3469  if (IS_NULL(root)) {
3470  top_root = prev_node;
3471  }
3472  else {
3473  if (IS_NULL(onig_node_list_add(root, prev_node))) {
3474  onig_node_free(prev_node);
3475  goto mem_err;
3476  }
3477  }
3478 
3479  root = NCAR(prev_node);
3480  }
3481  else { /* r == 0 */
3482  if (IS_NOT_NULL(root)) {
3483  if (IS_NULL(onig_node_list_add(root, prev_node))) {
3484  onig_node_free(prev_node);
3485  goto mem_err;
3486  }
3487  }
3488  }
3489 
3490  snode = NULL_NODE;
3491  }
3492 
3493  p += len;
3494  }
3495 
3496  if (p < end) {
3497  Node *srem;
3498 
3499  r = expand_case_fold_make_rem_string(&srem, p, end, reg);
3500  if (r != 0) goto mem_err;
3501 
3502  if (IS_NOT_NULL(prev_node) && IS_NULL(root)) {
3503  top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3504  if (IS_NULL(root)) {
3505  onig_node_free(srem);
3506  onig_node_free(prev_node);
3507  goto mem_err;
3508  }
3509  }
3510 
3511  if (IS_NULL(root)) {
3512  prev_node = srem;
3513  }
3514  else {
3515  if (IS_NULL(onig_node_list_add(root, srem))) {
3516  onig_node_free(srem);
3517  goto mem_err;
3518  }
3519  }
3520  }
3521 
3522  /* ending */
3523  top_root = (IS_NOT_NULL(top_root) ? top_root : prev_node);
3524  swap_node(node, top_root);
3525  onig_node_free(top_root);
3526  return 0;
3527 
3528  mem_err:
3529  r = ONIGERR_MEMORY;
3530 
3531  err:
3532  onig_node_free(top_root);
3533  return r;
3534 }
3535 
3536 
3537 #ifdef USE_COMBINATION_EXPLOSION_CHECK
3538 
3539 #define CEC_THRES_NUM_BIG_REPEAT 512
3540 #define CEC_INFINITE_NUM 0x7fffffff
3541 
3542 #define CEC_IN_INFINITE_REPEAT (1<<0)
3543 #define CEC_IN_FINITE_REPEAT (1<<1)
3544 #define CEC_CONT_BIG_REPEAT (1<<2)
3545 
3546 static int
3547 setup_comb_exp_check(Node* node, int state, ScanEnv* env)
3548 {
3549  int type;
3550  int r = state;
3551 
3552  type = NTYPE(node);
3553  switch (type) {
3554  case NT_LIST:
3555  {
3556  Node* prev = NULL_NODE;
3557  do {
3558  r = setup_comb_exp_check(NCAR(node), r, env);
3559  prev = NCAR(node);
3560  } while (r >= 0 && IS_NOT_NULL(node = NCDR(node)));
3561  }
3562  break;
3563 
3564  case NT_ALT:
3565  {
3566  int ret;
3567  do {
3568  ret = setup_comb_exp_check(NCAR(node), state, env);
3569  r |= ret;
3570  } while (ret >= 0 && IS_NOT_NULL(node = NCDR(node)));
3571  }
3572  break;
3573 
3574  case NT_QTFR:
3575  {
3576  int child_state = state;
3577  int add_state = 0;
3578  QtfrNode* qn = NQTFR(node);
3579  Node* target = qn->target;
3580  int var_num;
3581 
3582  if (! IS_REPEAT_INFINITE(qn->upper)) {
3583  if (qn->upper > 1) {
3584  /* {0,1}, {1,1} are allowed */
3585  child_state |= CEC_IN_FINITE_REPEAT;
3586 
3587  /* check (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n} */
3588  if (env->backrefed_mem == 0) {
3589  if (NTYPE(qn->target) == NT_ENCLOSE) {
3590  EncloseNode* en = NENCLOSE(qn->target);
3591  if (en->type == ENCLOSE_MEMORY) {
3592  if (NTYPE(en->target) == NT_QTFR) {
3593  QtfrNode* q = NQTFR(en->target);
3594  if (IS_REPEAT_INFINITE(q->upper)
3595  && q->greedy == qn->greedy) {
3596  qn->upper = (qn->lower == 0 ? 1 : qn->lower);
3597  if (qn->upper == 1)
3598  child_state = state;
3599  }
3600  }
3601  }
3602  }
3603  }
3604  }
3605  }
3606 
3607  if (state & CEC_IN_FINITE_REPEAT) {
3608  qn->comb_exp_check_num = -1;
3609  }
3610  else {
3611  if (IS_REPEAT_INFINITE(qn->upper)) {
3612  var_num = CEC_INFINITE_NUM;
3613  child_state |= CEC_IN_INFINITE_REPEAT;
3614  }
3615  else {
3616  var_num = qn->upper - qn->lower;
3617  }
3618 
3619  if (var_num >= CEC_THRES_NUM_BIG_REPEAT)
3620  add_state |= CEC_CONT_BIG_REPEAT;
3621 
3622  if (((state & CEC_IN_INFINITE_REPEAT) != 0 && var_num != 0) ||
3623  ((state & CEC_CONT_BIG_REPEAT) != 0 &&
3624  var_num >= CEC_THRES_NUM_BIG_REPEAT)) {
3625  if (qn->comb_exp_check_num == 0) {
3626  env->num_comb_exp_check++;
3627  qn->comb_exp_check_num = env->num_comb_exp_check;
3628  if (env->curr_max_regnum > env->comb_exp_max_regnum)
3629  env->comb_exp_max_regnum = env->curr_max_regnum;
3630  }
3631  }
3632  }
3633 
3634  r = setup_comb_exp_check(target, child_state, env);
3635  r |= add_state;
3636  }
3637  break;
3638 
3639  case NT_ENCLOSE:
3640  {
3641  EncloseNode* en = NENCLOSE(node);
3642 
3643  switch (en->type) {
3644  case ENCLOSE_MEMORY:
3645  {
3646  if (env->curr_max_regnum < en->regnum)
3647  env->curr_max_regnum = en->regnum;
3648 
3649  r = setup_comb_exp_check(en->target, state, env);
3650  }
3651  break;
3652 
3653  default:
3654  r = setup_comb_exp_check(en->target, state, env);
3655  break;
3656  }
3657  }
3658  break;
3659 
3660 #ifdef USE_SUBEXP_CALL
3661  case NT_CALL:
3662  if (IS_CALL_RECURSION(NCALL(node)))
3663  env->has_recursion = 1;
3664  else
3665  r = setup_comb_exp_check(NCALL(node)->target, state, env);
3666  break;
3667 #endif
3668 
3669  default:
3670  break;
3671  }
3672 
3673  return r;
3674 }
3675 #endif
3676 
3677 #define IN_ALT (1<<0)
3678 #define IN_NOT (1<<1)
3679 #define IN_REPEAT (1<<2)
3680 #define IN_VAR_REPEAT (1<<3)
3681 
3682 /* setup_tree does the following work.
3683  1. check empty loop. (set qn->target_empty_info)
3684  2. expand ignore-case in char class.
3685  3. set memory status bit flags. (reg->mem_stats)
3686  4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact].
3687  5. find invalid patterns in look-behind.
3688  6. expand repeated string.
3689  */
3690 static int
3691 setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
3692 {
3693  int type;
3694  int r = 0;
3695 
3696 restart:
3697  type = NTYPE(node);
3698  switch (type) {
3699  case NT_LIST:
3700  {
3701  Node* prev = NULL_NODE;
3702  do {
3703  r = setup_tree(NCAR(node), reg, state, env);
3704  if (IS_NOT_NULL(prev) && r == 0) {
3705  r = next_setup(prev, NCAR(node), reg);
3706  }
3707  prev = NCAR(node);
3708  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3709  }
3710  break;
3711 
3712  case NT_ALT:
3713  do {
3714  r = setup_tree(NCAR(node), reg, (state | IN_ALT), env);
3715  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3716  break;
3717 
3718  case NT_CCLASS:
3719  break;
3720 
3721  case NT_STR:
3722  if (IS_IGNORECASE(reg->options) && !NSTRING_IS_RAW(node)) {
3723  r = expand_case_fold_string(node, reg);
3724  }
3725  break;
3726 
3727  case NT_CTYPE:
3728  case NT_CANY:
3729  break;
3730 
3731 #ifdef USE_SUBEXP_CALL
3732  case NT_CALL:
3733  break;
3734 #endif
3735 
3736  case NT_BREF:
3737  {
3738  int i;
3739  int* p;
3740  Node** nodes = SCANENV_MEM_NODES(env);
3741  BRefNode* br = NBREF(node);
3742  p = BACKREFS_P(br);
3743  for (i = 0; i < br->back_num; i++) {
3744  if (p[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
3745  BIT_STATUS_ON_AT(env->backrefed_mem, p[i]);
3746  BIT_STATUS_ON_AT(env->bt_mem_start, p[i]);
3747 #ifdef USE_BACKREF_WITH_LEVEL
3748  if (IS_BACKREF_NEST_LEVEL(br)) {
3749  BIT_STATUS_ON_AT(env->bt_mem_end, p[i]);
3750  }
3751 #endif
3752  SET_ENCLOSE_STATUS(nodes[p[i]], NST_MEM_BACKREFED);
3753  }
3754  }
3755  break;
3756 
3757  case NT_QTFR:
3758  {
3759  OnigDistance d;
3760  QtfrNode* qn = NQTFR(node);
3761  Node* target = qn->target;
3762 
3763  if ((state & IN_REPEAT) != 0) {
3764  qn->state |= NST_IN_REPEAT;
3765  }
3766 
3767  if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 1) {
3768  r = get_min_match_length(target, &d, env);
3769  if (r) break;
3770  if (d == 0) {
3772 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
3773  r = quantifiers_memory_node_info(target);
3774  if (r < 0) break;
3775  if (r > 0) {
3776  qn->target_empty_info = r;
3777  }
3778 #endif
3779 #if 0
3780  r = get_max_match_length(target, &d, env);
3781  if (r == 0 && d == 0) {
3782  /* ()* ==> ()?, ()+ ==> () */
3783  qn->upper = 1;
3784  if (qn->lower > 1) qn->lower = 1;
3785  if (NTYPE(target) == NT_STR) {
3786  qn->upper = qn->lower = 0; /* /(?:)+/ ==> // */
3787  }
3788  }
3789 #endif
3790  }
3791  }
3792 
3793  state |= IN_REPEAT;
3794  if (qn->lower != qn->upper)
3795  state |= IN_VAR_REPEAT;
3796  r = setup_tree(target, reg, state, env);
3797  if (r) break;
3798 
3799  /* expand string */
3800 #define EXPAND_STRING_MAX_LENGTH 100
3801  if (NTYPE(target) == NT_STR) {
3802  if (!IS_REPEAT_INFINITE(qn->lower) && qn->lower == qn->upper &&
3803  qn->lower > 1 && qn->lower <= EXPAND_STRING_MAX_LENGTH) {
3804  OnigDistance len = NSTRING_LEN(target);
3805  StrNode* sn = NSTR(target);
3806 
3807  if (len * qn->lower <= EXPAND_STRING_MAX_LENGTH) {
3808  int i, n = qn->lower;
3809  onig_node_conv_to_str_node(node, NSTR(target)->flag);
3810  for (i = 0; i < n; i++) {
3811  r = onig_node_str_cat(node, sn->s, sn->end);
3812  if (r) break;
3813  }
3814  onig_node_free(target);
3815  break; /* break case NT_QTFR: */
3816  }
3817  }
3818  }
3819 
3820 #ifdef USE_OP_PUSH_OR_JUMP_EXACT
3821  if (qn->greedy && (qn->target_empty_info != 0)) {
3822  if (NTYPE(target) == NT_QTFR) {
3823  QtfrNode* tqn = NQTFR(target);
3824  if (IS_NOT_NULL(tqn->head_exact)) {
3825  qn->head_exact = tqn->head_exact;
3826  tqn->head_exact = NULL;
3827  }
3828  }
3829  else {
3830  qn->head_exact = get_head_value_node(qn->target, 1, reg);
3831  }
3832  }
3833 #endif
3834  }
3835  break;
3836 
3837  case NT_ENCLOSE:
3838  {
3839  EncloseNode* en = NENCLOSE(node);
3840 
3841  switch (en->type) {
3842  case ENCLOSE_OPTION:
3843  {
3845  reg->options = NENCLOSE(node)->option;
3846  r = setup_tree(NENCLOSE(node)->target, reg, state, env);
3847  reg->options = options;
3848  }
3849  break;
3850 
3851  case ENCLOSE_MEMORY:
3852  if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT)) != 0) {
3854  /* SET_ENCLOSE_STATUS(node, NST_MEM_IN_ALT_NOT); */
3855  }
3856  r = setup_tree(en->target, reg, state, env);
3857  break;
3858 
3860  {
3861  Node* target = en->target;
3862  r = setup_tree(target, reg, state, env);
3863  if (NTYPE(target) == NT_QTFR) {
3864  QtfrNode* tqn = NQTFR(target);
3865  if (IS_REPEAT_INFINITE(tqn->upper) && tqn->lower <= 1 &&
3866  tqn->greedy != 0) { /* (?>a*), a*+ etc... */
3867  int qtype = NTYPE(tqn->target);
3868  if (IS_NODE_TYPE_SIMPLE(qtype))
3870  }
3871  }
3872  }
3873  break;
3874  }
3875  }
3876  break;
3877 
3878  case NT_ANCHOR:
3879  {
3880  AnchorNode* an = NANCHOR(node);
3881 
3882  switch (an->type) {
3883  case ANCHOR_PREC_READ:
3884  r = setup_tree(an->target, reg, state, env);
3885  break;
3886  case ANCHOR_PREC_READ_NOT:
3887  r = setup_tree(an->target, reg, (state | IN_NOT), env);
3888  break;
3889 
3890 /* allowed node types in look-behind */
3891 #define ALLOWED_TYPE_IN_LB \
3892  ( BIT_NT_LIST | BIT_NT_ALT | BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE | \
3893  BIT_NT_CANY | BIT_NT_ANCHOR | BIT_NT_ENCLOSE | BIT_NT_QTFR | BIT_NT_CALL )
3894 
3895 #define ALLOWED_ENCLOSE_IN_LB ( ENCLOSE_MEMORY )
3896 #define ALLOWED_ENCLOSE_IN_LB_NOT 0
3897 
3898 #define ALLOWED_ANCHOR_IN_LB \
3899 ( ANCHOR_LOOK_BEHIND | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION )
3900 #define ALLOWED_ANCHOR_IN_LB_NOT \
3901 ( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION )
3902 
3903  case ANCHOR_LOOK_BEHIND:
3904  {
3907  if (r < 0) return r;
3908  if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
3909  r = setup_look_behind(node, reg, env);
3910  if (r != 0) return r;
3911  if (NTYPE(node) != NT_ANCHOR) goto restart;
3912  r = setup_tree(an->target, reg, state, env);
3913  }
3914  break;
3915 
3917  {
3920  if (r < 0) return r;
3921  if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
3922  r = setup_look_behind(node, reg, env);
3923  if (r != 0) return r;
3924  if (NTYPE(node) != NT_ANCHOR) goto restart;
3925  r = setup_tree(an->target, reg, (state | IN_NOT), env);
3926  }
3927  break;
3928  }
3929  }
3930  break;
3931 
3932  default:
3933  break;
3934  }
3935 
3936  return r;
3937 }
3938 
3939 /* set skip map for Boyer-Moor search */
3940 static int
3942  UChar skip[], int** int_skip)
3943 {
3944  OnigDistance i, len;
3945 
3946  len = end - s;
3947  if (len < ONIG_CHAR_TABLE_SIZE) {
3948  for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = len;
3949 
3950  for (i = 0; i < len - 1; i++)
3951  skip[s[i]] = len - 1 - i;
3952  }
3953  else {
3954  if (IS_NULL(*int_skip)) {
3955  *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
3956  if (IS_NULL(*int_skip)) return ONIGERR_MEMORY;
3957  }
3958  for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = (int)len;
3959 
3960  for (i = 0; i < len - 1; i++)
3961  (*int_skip)[s[i]] = (int)(len - 1 - i);
3962  }
3963  return 0;
3964 }
3965 
3966 #define OPT_EXACT_MAXLEN 24
3967 
3968 typedef struct {
3969  OnigDistance min; /* min byte length */
3970  OnigDistance max; /* max byte length */
3971 } MinMaxLen;
3972 
3973 typedef struct {
3979 } OptEnv;
3980 
3981 typedef struct {
3984 } OptAncInfo;
3985 
3986 typedef struct {
3987  MinMaxLen mmd; /* info position */
3989 
3992  int len;
3994 } OptExactInfo;
3995 
3996 typedef struct {
3997  MinMaxLen mmd; /* info position */
3999 
4000  int value; /* weighted value */
4002 } OptMapInfo;
4003 
4004 typedef struct {
4006 
4008  OptExactInfo exb; /* boundary */
4009  OptExactInfo exm; /* middle */
4010  OptExactInfo expr; /* prec read (?=...) */
4011 
4012  OptMapInfo map; /* boundary */
4013 } NodeOptInfo;
4014 
4015 
4016 static int
4018 {
4019  static const short int ByteValTable[] = {
4020  5, 1, 1, 1, 1, 1, 1, 1, 1, 10, 10, 1, 1, 10, 1, 1,
4021  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
4022  12, 4, 7, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5,
4023  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5,
4024  5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
4025  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 6, 5, 5, 5,
4026  5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
4027  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 1
4028  };
4029 
4030  if (i < (int )(sizeof(ByteValTable)/sizeof(ByteValTable[0]))) {
4031  if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1)
4032  return 20;
4033  else
4034  return (int )ByteValTable[i];
4035  }
4036  else
4037  return 4; /* Take it easy. */
4038 }
4039 
4040 static int
4042 {
4043  /* 1000 / (min-max-dist + 1) */
4044  static const short int dist_vals[] = {
4045  1000, 500, 333, 250, 200, 167, 143, 125, 111, 100,
4046  91, 83, 77, 71, 67, 63, 59, 56, 53, 50,
4047  48, 45, 43, 42, 40, 38, 37, 36, 34, 33,
4048  32, 31, 30, 29, 29, 28, 27, 26, 26, 25,
4049  24, 24, 23, 23, 22, 22, 21, 21, 20, 20,
4050  20, 19, 19, 19, 18, 18, 18, 17, 17, 17,
4051  16, 16, 16, 16, 15, 15, 15, 15, 14, 14,
4052  14, 14, 14, 14, 13, 13, 13, 13, 13, 13,
4053  12, 12, 12, 12, 12, 12, 11, 11, 11, 11,
4054  11, 11, 11, 11, 11, 10, 10, 10, 10, 10
4055  };
4056 
4057  OnigDistance d;
4058 
4059  if (mm->max == ONIG_INFINITE_DISTANCE) return 0;
4060 
4061  d = mm->max - mm->min;
4062  if (d < sizeof(dist_vals)/sizeof(dist_vals[0]))
4063  /* return dist_vals[d] * 16 / (mm->min + 12); */
4064  return (int )dist_vals[d];
4065  else
4066  return 1;
4067 }
4068 
4069 static int
4071 {
4072  if (v2 <= 0) return -1;
4073  if (v1 <= 0) return 1;
4074 
4075  v1 *= distance_value(d1);
4076  v2 *= distance_value(d2);
4077 
4078  if (v2 > v1) return 1;
4079  if (v2 < v1) return -1;
4080 
4081  if (d2->min < d1->min) return 1;
4082  if (d2->min > d1->min) return -1;
4083  return 0;
4084 }
4085 
4086 static int
4088 {
4089  return (a->min == b->min && a->max == b->max) ? 1 : 0;
4090 }
4091 
4092 
4093 static void
4095 {
4096  mml->min = min;
4097  mml->max = max;
4098 }
4099 
4100 static void
4102 {
4103  mml->min = mml->max = 0;
4104 }
4105 
4106 static void
4108 {
4109  to->min = from->min;
4110  to->max = from->max;
4111 }
4112 
4113 static void
4115 {
4116  to->min = distance_add(to->min, from->min);
4117  to->max = distance_add(to->max, from->max);
4118 }
4119 
4120 #if 0
4121 static void
4122 add_len_mml(MinMaxLen* to, OnigDistance len)
4123 {
4124  to->min = distance_add(to->min, len);
4125  to->max = distance_add(to->max, len);
4126 }
4127 #endif
4128 
4129 static void
4131 {
4132  if (to->min > from->min) to->min = from->min;
4133  if (to->max < from->max) to->max = from->max;
4134 }
4135 
4136 static void
4138 {
4139  *to = *from;
4140 }
4141 
4142 static void
4144 {
4145  anc->left_anchor = 0;
4146  anc->right_anchor = 0;
4147 }
4148 
4149 static void
4151 {
4152  *to = *from;
4153 }
4154 
4155 static void
4157  OnigDistance left_len, OnigDistance right_len)
4158 {
4159  clear_opt_anc_info(to);
4160 
4161  to->left_anchor = left->left_anchor;
4162  if (left_len == 0) {
4163  to->left_anchor |= right->left_anchor;
4164  }
4165 
4166  to->right_anchor = right->right_anchor;
4167  if (right_len == 0) {
4168  to->right_anchor |= left->right_anchor;
4169  }
4170 }
4171 
4172 static int
4174 {
4175  if (anc == ANCHOR_END_BUF || anc == ANCHOR_SEMI_END_BUF ||
4176  anc == ANCHOR_END_LINE || anc == ANCHOR_PREC_READ ||
4177  anc == ANCHOR_PREC_READ_NOT)
4178  return 0;
4179 
4180  return 1;
4181 }
4182 
4183 static int
4185 {
4186  if ((to->left_anchor & anc) != 0) return 1;
4187 
4188  return ((to->right_anchor & anc) != 0 ? 1 : 0);
4189 }
4190 
4191 static void
4193 {
4194  if (is_left_anchor(anc))
4195  to->left_anchor |= anc;
4196  else
4197  to->right_anchor |= anc;
4198 }
4199 
4200 static void
4202 {
4203  if (is_left_anchor(anc))
4204  to->left_anchor &= ~anc;
4205  else
4206  to->right_anchor &= ~anc;
4207 }
4208 
4209 static void
4211 {
4212  to->left_anchor &= add->left_anchor;
4213  to->right_anchor &= add->right_anchor;
4214 }
4215 
4216 static int
4218 {
4219  return (ex->len >= OPT_EXACT_MAXLEN ? 1 : 0);
4220 }
4221 
4222 static void
4224 {
4225  clear_mml(&ex->mmd);
4226  clear_opt_anc_info(&ex->anc);
4227  ex->reach_end = 0;
4228  ex->ignore_case = 0;
4229  ex->len = 0;
4230  ex->s[0] = '\0';
4231 }
4232 
4233 static void
4235 {
4236  *to = *from;
4237 }
4238 
4239 static void
4241 {
4242  int i, j, len;
4243  UChar *p, *end;
4244  OptAncInfo tanc;
4245 
4246  if (! to->ignore_case && add->ignore_case) {
4247  if (to->len >= add->len) return ; /* avoid */
4248 
4249  to->ignore_case = 1;
4250  }
4251 
4252  p = add->s;
4253  end = p + add->len;
4254  for (i = to->len; p < end; ) {
4255  len = enclen(enc, p, end);
4256  if (i + len > OPT_EXACT_MAXLEN) break;
4257  for (j = 0; j < len && p < end; j++)
4258  to->s[i++] = *p++;
4259  }
4260 
4261  to->len = i;
4262  to->reach_end = (p == end ? add->reach_end : 0);
4263 
4264  concat_opt_anc_info(&tanc, &to->anc, &add->anc, 1, 1);
4265  if (! to->reach_end) tanc.right_anchor = 0;
4266  copy_opt_anc_info(&to->anc, &tanc);
4267 }
4268 
4269 static void
4271  int raw ARG_UNUSED, OnigEncoding enc)
4272 {
4273  int i, j, len;
4274  UChar *p;
4275 
4276  for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) {
4277  len = enclen(enc, p, end);
4278  if (i + len > OPT_EXACT_MAXLEN) break;
4279  for (j = 0; j < len && p < end; j++)
4280  to->s[i++] = *p++;
4281  }
4282 
4283  to->len = i;
4284 }
4285 
4286 static void
4288 {
4289  int i, j, len;
4290 
4291  if (add->len == 0 || to->len == 0) {
4293  return ;
4294  }
4295 
4296  if (! is_equal_mml(&to->mmd, &add->mmd)) {
4298  return ;
4299  }
4300 
4301  for (i = 0; i < to->len && i < add->len; ) {
4302  if (to->s[i] != add->s[i]) break;
4303  len = enclen(env->enc, to->s + i, to->s + to->len);
4304 
4305  for (j = 1; j < len; j++) {
4306  if (to->s[i+j] != add->s[i+j]) break;
4307  }
4308  if (j < len) break;
4309  i += len;
4310  }
4311 
4312  if (! add->reach_end || i < add->len || i < to->len) {
4313  to->reach_end = 0;
4314  }
4315  to->len = i;
4316  to->ignore_case |= add->ignore_case;
4317 
4318  alt_merge_opt_anc_info(&to->anc, &add->anc);
4319  if (! to->reach_end) to->anc.right_anchor = 0;
4320 }
4321 
4322 static void
4324 {
4325  int v1, v2;
4326 
4327  v1 = now->len;
4328  v2 = alt->len;
4329 
4330  if (v2 == 0) {
4331  return ;
4332  }
4333  else if (v1 == 0) {
4334  copy_opt_exact_info(now, alt);
4335  return ;
4336  }
4337  else if (v1 <= 2 && v2 <= 2) {
4338  /* ByteValTable[x] is big value --> low price */
4339  v2 = map_position_value(enc, now->s[0]);
4340  v1 = map_position_value(enc, alt->s[0]);
4341 
4342  if (now->len > 1) v1 += 5;
4343  if (alt->len > 1) v2 += 5;
4344  }
4345 
4346  if (now->ignore_case == 0) v1 *= 2;
4347  if (alt->ignore_case == 0) v2 *= 2;
4348 
4349  if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
4350  copy_opt_exact_info(now, alt);
4351 }
4352 
4353 static void
4355 {
4356  static const OptMapInfo clean_info = {
4357  {0, 0}, {0, 0}, 0,
4358  {
4359  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4360  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4361  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4362  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4363  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4364  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4365  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4366  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4367  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4368  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4369  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4370  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4371  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4372  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4373  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4374  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
4375  }
4376  };
4377 
4378  xmemcpy(map, &clean_info, sizeof(OptMapInfo));
4379 }
4380 
4381 static void
4383 {
4384  *to = *from;
4385 }
4386 
4387 static void
4389 {
4390  if (map->map[c] == 0) {
4391  map->map[c] = 1;
4392  map->value += map_position_value(enc, c);
4393  }
4394 }
4395 
4396 static int
4398  OnigEncoding enc, OnigCaseFoldType case_fold_flag)
4399 {
4402  int i, n;
4403 
4404  add_char_opt_map_info(map, p[0], enc);
4405 
4406  case_fold_flag = DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag);
4407  n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, case_fold_flag, p, end, items);
4408  if (n < 0) return n;
4409 
4410  for (i = 0; i < n; i++) {
4411  ONIGENC_CODE_TO_MBC(enc, items[i].code[0], buf);
4412  add_char_opt_map_info(map, buf[0], enc);
4413  }
4414 
4415  return 0;
4416 }
4417 
4418 static void
4420 {
4421  const int z = 1<<15; /* 32768: something big value */
4422 
4423  int v1, v2;
4424 
4425  if (alt->value == 0) return ;
4426  if (now->value == 0) {
4427  copy_opt_map_info(now, alt);
4428  return ;
4429  }
4430 
4431  v1 = z / now->value;
4432  v2 = z / alt->value;
4433  if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
4434  copy_opt_map_info(now, alt);
4435 }
4436 
4437 static int
4439 {
4440 #define COMP_EM_BASE 20
4441  int ve, vm;
4442 
4443  if (m->value <= 0) return -1;
4444 
4445  ve = COMP_EM_BASE * e->len * (e->ignore_case ? 1 : 2);
4446  vm = COMP_EM_BASE * 5 * 2 / m->value;
4447  return comp_distance_value(&e->mmd, &m->mmd, ve, vm);
4448 }
4449 
4450 static void
4452 {
4453  int i, val;
4454 
4455  /* if (! is_equal_mml(&to->mmd, &add->mmd)) return ; */
4456  if (to->value == 0) return ;
4457  if (add->value == 0 || to->mmd.max < add->mmd.min) {
4458  clear_opt_map_info(to);
4459  return ;
4460  }
4461 
4462  alt_merge_mml(&to->mmd, &add->mmd);
4463 
4464  val = 0;
4465  for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
4466  if (add->map[i])
4467  to->map[i] = 1;
4468 
4469  if (to->map[i])
4470  val += map_position_value(enc, i);
4471  }
4472  to->value = val;
4473 
4474  alt_merge_opt_anc_info(&to->anc, &add->anc);
4475 }
4476 
4477 static void
4479 {
4480  copy_mml(&(opt->exb.mmd), mmd);
4481  copy_mml(&(opt->expr.mmd), mmd);
4482  copy_mml(&(opt->map.mmd), mmd);
4483 }
4484 
4485 static void
4487 {
4488  clear_mml(&opt->len);
4489  clear_opt_anc_info(&opt->anc);
4490  clear_opt_exact_info(&opt->exb);
4491  clear_opt_exact_info(&opt->exm);
4492  clear_opt_exact_info(&opt->expr);
4493  clear_opt_map_info(&opt->map);
4494 }
4495 
4496 static void
4498 {
4499  *to = *from;
4500 }
4501 
4502 static void
4504 {
4505  int exb_reach, exm_reach;
4506  OptAncInfo tanc;
4507 
4508  concat_opt_anc_info(&tanc, &to->anc, &add->anc, to->len.max, add->len.max);
4509  copy_opt_anc_info(&to->anc, &tanc);
4510 
4511  if (add->exb.len > 0 && to->len.max == 0) {
4512  concat_opt_anc_info(&tanc, &to->anc, &add->exb.anc,
4513  to->len.max, add->len.max);
4514  copy_opt_anc_info(&add->exb.anc, &tanc);
4515  }
4516 
4517  if (add->map.value > 0 && to->len.max == 0) {
4518  if (add->map.mmd.max == 0)
4519  add->map.anc.left_anchor |= to->anc.left_anchor;
4520  }
4521 
4522  exb_reach = to->exb.reach_end;
4523  exm_reach = to->exm.reach_end;
4524 
4525  if (add->len.max != 0)
4526  to->exb.reach_end = to->exm.reach_end = 0;
4527 
4528  if (add->exb.len > 0) {
4529  if (exb_reach) {
4530  concat_opt_exact_info(&to->exb, &add->exb, enc);
4531  clear_opt_exact_info(&add->exb);
4532  }
4533  else if (exm_reach) {
4534  concat_opt_exact_info(&to->exm, &add->exb, enc);
4535  clear_opt_exact_info(&add->exb);
4536  }
4537  }
4538  select_opt_exact_info(enc, &to->exm, &add->exb);
4539  select_opt_exact_info(enc, &to->exm, &add->exm);
4540 
4541  if (to->expr.len > 0) {
4542  if (add->len.max > 0) {
4543  if (to->expr.len > (int )add->len.max)
4544  to->expr.len = (int)add->len.max;
4545 
4546  if (to->expr.mmd.max == 0)
4547  select_opt_exact_info(enc, &to->exb, &to->expr);
4548  else
4549  select_opt_exact_info(enc, &to->exm, &to->expr);
4550  }
4551  }
4552  else if (add->expr.len > 0) {
4553  copy_opt_exact_info(&to->expr, &add->expr);
4554  }
4555 
4556  select_opt_map_info(&to->map, &add->map);
4557 
4558  add_mml(&to->len, &add->len);
4559 }
4560 
4561 static void
4563 {
4564  alt_merge_opt_anc_info (&to->anc, &add->anc);
4565  alt_merge_opt_exact_info(&to->exb, &add->exb, env);
4566  alt_merge_opt_exact_info(&to->exm, &add->exm, env);
4567  alt_merge_opt_exact_info(&to->expr, &add->expr, env);
4568  alt_merge_opt_map_info(env->enc, &to->map, &add->map);
4569 
4570  alt_merge_mml(&to->len, &add->len);
4571 }
4572 
4573 
4574 #define MAX_NODE_OPT_INFO_REF_COUNT 5
4575 
4576 static int
4578 {
4579  int type;
4580  int r = 0;
4581 
4582  clear_node_opt_info(opt);
4583  set_bound_node_opt_info(opt, &env->mmd);
4584 
4585  type = NTYPE(node);
4586  switch (type) {
4587  case NT_LIST:
4588  {
4589  OptEnv nenv;
4590  NodeOptInfo nopt;
4591  Node* nd = node;
4592 
4593  copy_opt_env(&nenv, env);
4594  do {
4595  r = optimize_node_left(NCAR(nd), &nopt, &nenv);
4596  if (r == 0) {
4597  add_mml(&nenv.mmd, &nopt.len);
4598  concat_left_node_opt_info(env->enc, opt, &nopt);
4599  }
4600  } while (r == 0 && IS_NOT_NULL(nd = NCDR(nd)));
4601  }
4602  break;
4603 
4604  case NT_ALT:
4605  {
4606  NodeOptInfo nopt;
4607  Node* nd = node;
4608 
4609  do {
4610  r = optimize_node_left(NCAR(nd), &nopt, env);
4611  if (r == 0) {
4612  if (nd == node) copy_node_opt_info(opt, &nopt);
4613  else alt_merge_node_opt_info(opt, &nopt, env);
4614  }
4615  } while ((r == 0) && IS_NOT_NULL(nd = NCDR(nd)));
4616  }
4617  break;
4618 
4619  case NT_STR:
4620  {
4621  StrNode* sn = NSTR(node);
4622  OnigDistance slen = sn->end - sn->s;
4623  int is_raw = NSTRING_IS_RAW(node);
4624 
4625  if (! NSTRING_IS_AMBIG(node)) {
4626  concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
4627  NSTRING_IS_RAW(node), env->enc);
4628  if (slen > 0) {
4629  add_char_opt_map_info(&opt->map, *(sn->s), env->enc);
4630  }
4631  set_mml(&opt->len, slen, slen);
4632  }
4633  else {
4634  OnigDistance max;
4635 
4636  if (NSTRING_IS_DONT_GET_OPT_INFO(node)) {
4637  int n = onigenc_strlen(env->enc, sn->s, sn->end);
4638  max = ONIGENC_MBC_MAXLEN_DIST(env->enc) * n;
4639  }
4640  else {
4641  concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
4642  is_raw, env->enc);
4643  opt->exb.ignore_case = 1;
4644 
4645  if (slen > 0) {
4646  r = add_char_amb_opt_map_info(&opt->map, sn->s, sn->end,
4647  env->enc, env->case_fold_flag);
4648  if (r != 0) break;
4649  }
4650 
4651  max = slen;
4652  }
4653 
4654  set_mml(&opt->len, slen, max);
4655  }
4656 
4657  if ((OnigDistance)opt->exb.len == slen)
4658  opt->exb.reach_end = 1;
4659  }
4660  break;
4661 
4662  case NT_CCLASS:
4663  {
4664  int i, z;
4665  CClassNode* cc = NCCLASS(node);
4666 
4667  /* no need to check ignore case. (setted in setup_tree()) */
4668 
4669  if (IS_NOT_NULL(cc->mbuf) || IS_NCCLASS_NOT(cc)) {
4670  OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
4672 
4673  set_mml(&opt->len, min, max);
4674  }
4675  else {
4676  for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
4677  z = BITSET_AT(cc->bs, i);
4678  if ((z && !IS_NCCLASS_NOT(cc)) || (!z && IS_NCCLASS_NOT(cc))) {
4679  add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
4680  }
4681  }
4682  set_mml(&opt->len, 1, 1);
4683  }
4684  }
4685  break;
4686 
4687  case NT_CTYPE:
4688  {
4689  int i, min, max;
4690 
4691  max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
4692 
4693  if (max == 1) {
4694  min = 1;
4695 
4696  switch (NCTYPE(node)->ctype) {
4697  case ONIGENC_CTYPE_WORD:
4698  if (NCTYPE(node)->not != 0) {
4699  for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
4700  if (! ONIGENC_IS_CODE_WORD(env->enc, i)) {
4701  add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
4702  }
4703  }
4704  }
4705  else {
4706  for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
4707  if (ONIGENC_IS_CODE_WORD(env->enc, i)) {
4708  add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
4709  }
4710  }
4711  }
4712  break;
4713  }
4714  }
4715  else {
4716  min = ONIGENC_MBC_MINLEN(env->enc);
4717  }
4718  set_mml(&opt->len, min, max);
4719  }
4720  break;
4721 
4722  case NT_CANY:
4723  {
4724  OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
4726  set_mml(&opt->len, min, max);
4727  }
4728  break;
4729 
4730  case NT_ANCHOR:
4731  switch (NANCHOR(node)->type) {
4732  case ANCHOR_BEGIN_BUF:
4733  case ANCHOR_BEGIN_POSITION:
4734  case ANCHOR_BEGIN_LINE:
4735  case ANCHOR_END_BUF:
4736  case ANCHOR_SEMI_END_BUF:
4737  case ANCHOR_END_LINE:
4738  case ANCHOR_LOOK_BEHIND: /* just for (?<=x).* */
4739  add_opt_anc_info(&opt->anc, NANCHOR(node)->type);
4740  break;
4741 
4742  case ANCHOR_PREC_READ:
4743  {
4744  NodeOptInfo nopt;
4745 
4746  r = optimize_node_left(NANCHOR(node)->target, &nopt, env);
4747  if (r == 0) {
4748  if (nopt.exb.len > 0)
4749  copy_opt_exact_info(&opt->expr, &nopt.exb);
4750  else if (nopt.exm.len > 0)
4751  copy_opt_exact_info(&opt->expr, &nopt.exm);
4752 
4753  opt->expr.reach_end = 0;
4754 
4755  if (nopt.map.value > 0)
4756  copy_opt_map_info(&opt->map, &nopt.map);
4757  }
4758  }
4759  break;
4760 
4761  case ANCHOR_PREC_READ_NOT:
4763  break;
4764  }
4765  break;
4766 
4767  case NT_BREF:
4768  {
4769  int i;
4770  int* backs;
4771  OnigDistance min, max, tmin, tmax;
4772  Node** nodes = SCANENV_MEM_NODES(env->scan_env);
4773  BRefNode* br = NBREF(node);
4774 
4775  if (br->state & NST_RECURSION) {
4776  set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
4777  break;
4778  }
4779  backs = BACKREFS_P(br);
4780  r = get_min_match_length(nodes[backs[0]], &min, env->scan_env);
4781  if (r != 0) break;
4782  r = get_max_match_length(nodes[backs[0]], &max, env->scan_env);
4783  if (r != 0) break;
4784  for (i = 1; i < br->back_num; i++) {
4785  r = get_min_match_length(nodes[backs[i]], &tmin, env->scan_env);
4786  if (r != 0) break;
4787  r = get_max_match_length(nodes[backs[i]], &tmax, env->scan_env);
4788  if (r != 0) break;
4789  if (min > tmin) min = tmin;
4790  if (max < tmax) max = tmax;
4791  }
4792  if (r == 0) set_mml(&opt->len, min, max);
4793  }
4794  break;
4795 
4796 #ifdef USE_SUBEXP_CALL
4797  case NT_CALL:
4798  if (IS_CALL_RECURSION(NCALL(node)))
4799  set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
4800  else {
4801  OnigOptionType save = env->options;
4802  env->options = NENCLOSE(NCALL(node)->target)->option;
4803  r = optimize_node_left(NCALL(node)->target, opt, env);
4804  env->options = save;
4805  }
4806  break;
4807 #endif
4808 
4809  case NT_QTFR:
4810  {
4811  int i;
4812  OnigDistance min, max;
4813  NodeOptInfo nopt;
4814  QtfrNode* qn = NQTFR(node);
4815 
4816  r = optimize_node_left(qn->target, &nopt, env);
4817  if (r) break;
4818 
4819  if (qn->lower == 0 && IS_REPEAT_INFINITE(qn->upper)) {
4820  if (env->mmd.max == 0 &&
4821  NTYPE(qn->target) == NT_CANY && qn->greedy) {
4822  if (IS_MULTILINE(env->options))
4824  else
4826  }
4827  }
4828  else {
4829  if (qn->lower > 0) {
4830  copy_node_opt_info(opt, &nopt);
4831  if (nopt.exb.len > 0) {
4832  if (nopt.exb.reach_end) {
4833  for (i = 2; i <= qn->lower &&
4834  ! is_full_opt_exact_info(&opt->exb); i++) {
4835  concat_opt_exact_info(&opt->exb, &nopt.exb, env->enc);
4836  }
4837  if (i < qn->lower) {
4838  opt->exb.reach_end = 0;
4839  }
4840  }
4841  }
4842 
4843  if (qn->lower != qn->upper) {
4844  opt->exb.reach_end = 0;
4845  opt->exm.reach_end = 0;
4846  }
4847  if (qn->lower > 1)
4848  opt->exm.reach_end = 0;
4849  }
4850  }
4851 
4852  min = distance_multiply(nopt.len.min, qn->lower);
4853  if (IS_REPEAT_INFINITE(qn->upper))
4854  max = (nopt.len.max > 0 ? ONIG_INFINITE_DISTANCE : 0);
4855  else
4856  max = distance_multiply(nopt.len.max, qn->upper);
4857 
4858  set_mml(&opt->len, min, max);
4859  }
4860  break;
4861 
4862  case NT_ENCLOSE:
4863  {
4864  EncloseNode* en = NENCLOSE(node);
4865 
4866  switch (en->type) {
4867  case ENCLOSE_OPTION:
4868  {
4869  OnigOptionType save = env->options;
4870 
4871  env->options = en->option;
4872  r = optimize_node_left(en->target, opt, env);
4873  env->options = save;
4874  }
4875  break;
4876 
4877  case ENCLOSE_MEMORY:
4878 #ifdef USE_SUBEXP_CALL
4879  en->opt_count++;
4881  OnigDistance min, max;
4882 
4883  min = 0;
4884  max = ONIG_INFINITE_DISTANCE;
4885  if (IS_ENCLOSE_MIN_FIXED(en)) min = en->min_len;
4886  if (IS_ENCLOSE_MAX_FIXED(en)) max = en->max_len;
4887  set_mml(&opt->len, min, max);
4888  }
4889  else
4890 #endif
4891  {
4892  r = optimize_node_left(en->target, opt, env);
4893 
4895  if (BIT_STATUS_AT(env->scan_env->backrefed_mem, en->regnum))
4897  }
4898  }
4899  break;
4900 
4902  r = optimize_node_left(en->target, opt, env);
4903  break;
4904  }
4905  }
4906  break;
4907 
4908  default:
4909 #ifdef ONIG_DEBUG
4910  if (!onig_is_prelude()) fprintf(stderr, "optimize_node_left: undefined node type %d\n",
4911  NTYPE(node));
4912 #endif
4913  r = ONIGERR_TYPE_BUG;
4914  break;
4915  }
4916 
4917  return r;
4918 }
4919 
4920 static int
4922 {
4923  int r;
4924 
4925  if (e->len == 0) return 0;
4926 
4927  if (e->ignore_case) {
4928  reg->exact = (UChar* )xmalloc(e->len);
4930  xmemcpy(reg->exact, e->s, e->len);
4931  reg->exact_end = reg->exact + e->len;
4933  }
4934  else {
4935  int allow_reverse;
4936 
4937  reg->exact = str_dup(e->s, e->s + e->len);
4939  reg->exact_end = reg->exact + e->len;
4940 
4941  allow_reverse =
4943 
4944  if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
4945  r = set_bm_skip(reg->exact, reg->exact_end, reg->enc,
4946  reg->map, &(reg->int_map));
4947  if (r) return r;
4948 
4949  reg->optimize = (allow_reverse != 0
4951  }
4952  else {
4954  }
4955  }
4956 
4957  reg->dmin = e->mmd.min;
4958  reg->dmax = e->mmd.max;
4959 
4960  if (reg->dmin != ONIG_INFINITE_DISTANCE) {
4961  reg->threshold_len = (int)(reg->dmin + (reg->exact_end - reg->exact));
4962  }
4963 
4964  return 0;
4965 }
4966 
4967 static void
4969 {
4970  int i;
4971 
4972  for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
4973  reg->map[i] = m->map[i];
4974 
4975  reg->optimize = ONIG_OPTIMIZE_MAP;
4976  reg->dmin = m->mmd.min;
4977  reg->dmax = m->mmd.max;
4978 
4979  if (reg->dmin != ONIG_INFINITE_DISTANCE) {
4980  reg->threshold_len = (int)(reg->dmin + 1);
4981  }
4982 }
4983 
4984 static void
4986 {
4987  reg->sub_anchor |= anc->left_anchor & ANCHOR_BEGIN_LINE;
4988  reg->sub_anchor |= anc->right_anchor & ANCHOR_END_LINE;
4989 }
4990 
4991 #ifdef ONIG_DEBUG
4992 static void print_optimize_info(FILE* f, regex_t* reg);
4993 #endif
4994 
4995 static int
4997 {
4998 
4999  int r;
5000  NodeOptInfo opt;
5001  OptEnv env;
5002 
5003  env.enc = reg->enc;
5004  env.options = reg->options;
5005  env.case_fold_flag = reg->case_fold_flag;
5006  env.scan_env = scan_env;
5007  clear_mml(&env.mmd);
5008 
5009  r = optimize_node_left(node, &opt, &env);
5010  if (r) return r;
5011 
5012  reg->anchor = opt.anc.left_anchor & (ANCHOR_BEGIN_BUF |
5015 
5017 
5018  if (reg->anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)) {
5019  reg->anchor_dmin = opt.len.min;
5020  reg->anchor_dmax = opt.len.max;
5021  }
5022 
5023  if (opt.exb.len > 0 || opt.exm.len > 0) {
5024  select_opt_exact_info(reg->enc, &opt.exb, &opt.exm);
5025  if (opt.map.value > 0 &&
5026  comp_opt_exact_or_map_info(&opt.exb, &opt.map) > 0) {
5027  goto set_map;
5028  }
5029  else {
5030  r = set_optimize_exact_info(reg, &opt.exb);
5031  set_sub_anchor(reg, &opt.exb.anc);
5032  }
5033  }
5034  else if (opt.map.value > 0) {
5035  set_map:
5036  set_optimize_map_info(reg, &opt.map);
5037  set_sub_anchor(reg, &opt.map.anc);
5038  }
5039  else {
5041  if (opt.len.max == 0)
5043  }
5044 
5045 #if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5046  if (!onig_is_prelude()) print_optimize_info(stderr, reg);
5047 #endif
5048  return r;
5049 }
5050 
5051 static void
5053 {
5055  reg->anchor = 0;
5056  reg->anchor_dmin = 0;
5057  reg->anchor_dmax = 0;
5058  reg->sub_anchor = 0;
5059  reg->exact_end = (UChar* )NULL;
5060  reg->threshold_len = 0;
5061  if (IS_NOT_NULL(reg->exact)) {
5062  xfree(reg->exact);
5063  reg->exact = (UChar* )NULL;
5064  }
5065 }
5066 
5067 #ifdef ONIG_DEBUG
5068 
5069 static void print_enc_string(FILE* fp, OnigEncoding enc,
5070  const UChar *s, const UChar *end)
5071 {
5072  fprintf(fp, "\nPATTERN: /");
5073 
5074  if (ONIGENC_MBC_MINLEN(enc) > 1) {
5075  const UChar *p;
5077 
5078  p = s;
5079  while (p < end) {
5080  code = ONIGENC_MBC_TO_CODE(enc, p, end);
5081  if (code >= 0x80) {
5082  fprintf(fp, " 0x%04x ", (int )code);
5083  }
5084  else {
5085  fputc((int )code, fp);
5086  }
5087 
5088  p += enclen(enc, p, end);
5089  }
5090  }
5091  else {
5092  while (s < end) {
5093  fputc((int )*s, fp);
5094  s++;
5095  }
5096  }
5097 
5098  fprintf(fp, "/ (%s)\n", enc->name);
5099 }
5100 
5101 static void
5102 print_distance_range(FILE* f, OnigDistance a, OnigDistance b)
5103 {
5104  if (a == ONIG_INFINITE_DISTANCE)
5105  fputs("inf", f);
5106  else
5107  fprintf(f, "(%"PRIuSIZE")", a);
5108 
5109  fputs("-", f);
5110 
5111  if (b == ONIG_INFINITE_DISTANCE)
5112  fputs("inf", f);
5113  else
5114  fprintf(f, "(%"PRIuSIZE")", b);
5115 }
5116 
5117 static void
5118 print_anchor(FILE* f, int anchor)
5119 {
5120  int q = 0;
5121 
5122  fprintf(f, "[");
5123 
5124  if (anchor & ANCHOR_BEGIN_BUF) {
5125  fprintf(f, "begin-buf");
5126  q = 1;
5127  }
5128  if (anchor & ANCHOR_BEGIN_LINE) {
5129  if (q) fprintf(f, ", ");
5130  q = 1;
5131  fprintf(f, "begin-line");
5132  }
5133  if (anchor & ANCHOR_BEGIN_POSITION) {
5134  if (q) fprintf(f, ", ");
5135  q = 1;
5136  fprintf(f, "begin-pos");
5137  }
5138  if (anchor & ANCHOR_END_BUF) {
5139  if (q) fprintf(f, ", ");
5140  q = 1;
5141  fprintf(f, "end-buf");
5142  }
5143  if (anchor & ANCHOR_SEMI_END_BUF) {
5144  if (q) fprintf(f, ", ");
5145  q = 1;
5146  fprintf(f, "semi-end-buf");
5147  }
5148  if (anchor & ANCHOR_END_LINE) {
5149  if (q) fprintf(f, ", ");
5150  q = 1;
5151  fprintf(f, "end-line");
5152  }
5153  if (anchor & ANCHOR_ANYCHAR_STAR) {
5154  if (q) fprintf(f, ", ");
5155  q = 1;
5156  fprintf(f, "anychar-star");
5157  }
5158  if (anchor & ANCHOR_ANYCHAR_STAR_ML) {
5159  if (q) fprintf(f, ", ");
5160  fprintf(f, "anychar-star-pl");
5161  }
5162 
5163  fprintf(f, "]");
5164 }
5165 
5166 static void
5167 print_optimize_info(FILE* f, regex_t* reg)
5168 {
5169  static const char* on[] = { "NONE", "EXACT", "EXACT_BM", "EXACT_BM_NOT_REV",
5170  "EXACT_IC", "MAP" };
5171 
5172  fprintf(f, "optimize: %s\n", on[reg->optimize]);
5173  fprintf(f, " anchor: "); print_anchor(f, reg->anchor);
5174  if ((reg->anchor & ANCHOR_END_BUF_MASK) != 0)
5175  print_distance_range(f, reg->anchor_dmin, reg->anchor_dmax);
5176  fprintf(f, "\n");
5177 
5178  if (reg->optimize) {
5179  fprintf(f, " sub anchor: "); print_anchor(f, reg->sub_anchor);
5180  fprintf(f, "\n");
5181  }
5182  fprintf(f, "\n");
5183 
5184  if (reg->exact) {
5185  UChar *p;
5186  fprintf(f, "exact: [");
5187  for (p = reg->exact; p < reg->exact_end; p++) {
5188  fputc(*p, f);
5189  }
5190  fprintf(f, "]: length: %ld\n", (reg->exact_end - reg->exact));
5191  }
5192  else if (reg->optimize & ONIG_OPTIMIZE_MAP) {
5193  int c, i, n = 0;
5194 
5195  for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
5196  if (reg->map[i]) n++;
5197 
5198  fprintf(f, "map: n=%d\n", n);
5199  if (n > 0) {
5200  c = 0;
5201  fputc('[', f);
5202  for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
5203  if (reg->map[i] != 0) {
5204  if (c > 0) fputs(", ", f);
5205  c++;
5206  if (ONIGENC_MBC_MAXLEN(reg->enc) == 1 &&
5208  fputc(i, f);
5209  else
5210  fprintf(f, "%d", i);
5211  }
5212  }
5213  fprintf(f, "]\n");
5214  }
5215  }
5216 }
5217 #endif /* ONIG_DEBUG */
5218 
5219 
5220 extern void
5222 {
5223  if (IS_NOT_NULL(reg)) {
5224  if (IS_NOT_NULL(reg->p)) xfree(reg->p);
5225  if (IS_NOT_NULL(reg->exact)) xfree(reg->exact);
5226  if (IS_NOT_NULL(reg->int_map)) xfree(reg->int_map);
5228  if (IS_NOT_NULL(reg->repeat_range)) xfree(reg->repeat_range);
5229  if (IS_NOT_NULL(reg->chain)) onig_free(reg->chain);
5230 
5231 #ifdef USE_NAMED_GROUP
5232  onig_names_free(reg);
5233 #endif
5234  }
5235 }
5236 
5237 extern void
5239 {
5240  if (IS_NOT_NULL(reg)) {
5241  onig_free_body(reg);
5242  xfree(reg);
5243  }
5244 }
5245 
5246 size_t
5248 {
5249  size_t size = sizeof(regex_t);
5250  if (IS_NOT_NULL(reg->p)) size += reg->alloc;
5251  if (IS_NOT_NULL(reg->exact)) size += reg->exact_end - reg->exact;
5252  if (IS_NOT_NULL(reg->int_map)) size += sizeof(int) * ONIG_CHAR_TABLE_SIZE;
5253  if (IS_NOT_NULL(reg->int_map_backward)) size += sizeof(int) * ONIG_CHAR_TABLE_SIZE;
5254  if (IS_NOT_NULL(reg->repeat_range)) size += reg->repeat_range_alloc * sizeof(OnigRepeatRange);
5255  if (IS_NOT_NULL(reg->chain)) size += onig_memsize(reg->chain);
5256 
5257  return size;
5258 }
5259 
5260 #define REGEX_TRANSFER(to,from) do {\
5261  (to)->state = ONIG_STATE_MODIFY;\
5262  onig_free_body(to);\
5263  xmemcpy(to, from, sizeof(regex_t));\
5264  xfree(from);\
5265 } while (0)
5266 
5267 extern void
5269 {
5271  REGEX_TRANSFER(to, from);
5273 }
5274 
5275 #define REGEX_CHAIN_HEAD(reg) do {\
5276  while (IS_NOT_NULL((reg)->chain)) {\
5277  (reg) = (reg)->chain;\
5278  }\
5279 } while (0)
5280 
5281 extern void
5283 {
5285  REGEX_CHAIN_HEAD(to);
5286  to->chain = add;
5288 }
5289 
5290 extern void
5292 {
5293  regex_t *head, *prev;
5294 
5295  prev = reg;
5296  head = prev->chain;
5297  if (IS_NOT_NULL(head)) {
5298  reg->state = ONIG_STATE_MODIFY;
5299  while (IS_NOT_NULL(head->chain)) {
5300  prev = head;
5301  head = head->chain;
5302  }
5303  prev->chain = (regex_t* )NULL;
5304  REGEX_TRANSFER(reg, head);
5305  }
5306 }
5307 
5308 #ifdef ONIG_DEBUG
5309 static void print_compiled_byte_code_list P_((FILE* f, regex_t* reg));
5310 #endif
5311 #ifdef ONIG_DEBUG_PARSE_TREE
5312 static void print_tree P_((FILE* f, Node* node));
5313 #endif
5314 
5315 extern int
5316 onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
5317  OnigErrorInfo* einfo, const char *sourcefile, int sourceline)
5318 {
5319 #define COMPILE_INIT_SIZE 20
5320 
5321  int r;
5322  OnigDistance init_size;
5323  Node* root;
5324  ScanEnv scan_env = {0};
5325 #ifdef USE_SUBEXP_CALL
5326  UnsetAddrList uslist;
5327 #endif
5328 
5329  if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL;
5330 
5331  scan_env.sourcefile = sourcefile;
5332  scan_env.sourceline = sourceline;
5333  reg->state = ONIG_STATE_COMPILING;
5334 
5335 #ifdef ONIG_DEBUG
5336  if (!onig_is_prelude()) print_enc_string(stderr, reg->enc, pattern, pattern_end);
5337 #endif
5338 
5339  if (reg->alloc == 0) {
5340  init_size = (pattern_end - pattern) * 2;
5341  if (init_size <= 0) init_size = COMPILE_INIT_SIZE;
5342  r = BBUF_INIT(reg, init_size);
5343  if (r != 0) goto end;
5344  }
5345  else
5346  reg->used = 0;
5347 
5348  reg->num_mem = 0;
5349  reg->num_repeat = 0;
5350  reg->num_null_check = 0;
5351  reg->repeat_range_alloc = 0;
5352  reg->repeat_range = (OnigRepeatRange* )NULL;
5353 #ifdef USE_COMBINATION_EXPLOSION_CHECK
5354  reg->num_comb_exp_check = 0;
5355 #endif
5356 
5357  r = onig_parse_make_tree(&root, pattern, pattern_end, reg, &scan_env);
5358  if (r != 0) goto err;
5359 
5360 #ifdef ONIG_DEBUG_PARSE_TREE
5361 # if 0
5362  fprintf(stderr, "ORIGINAL PARSE TREE:\n");
5363  if (!onig_is_prelude()) {
5364  print_tree(stderr, root);
5365  }
5366 # endif
5367 #endif
5368 
5369 #ifdef USE_NAMED_GROUP
5370  /* mixed use named group and no-named group */
5371  if (scan_env.num_named > 0 &&
5374  if (scan_env.num_named != scan_env.num_mem)
5375  r = disable_noname_group_capture(&root, reg, &scan_env);
5376  else
5377  r = numbered_ref_check(root);
5378 
5379  if (r != 0) goto err;
5380  }
5381 #endif
5382 
5383 #ifdef USE_SUBEXP_CALL
5384  if (scan_env.num_call > 0) {
5385  r = unset_addr_list_init(&uslist, scan_env.num_call);
5386  if (r != 0) goto err;
5387  scan_env.unset_addr_list = &uslist;
5388  r = setup_subexp_call(root, &scan_env);
5389  if (r != 0) goto err_unset;
5390  r = subexp_recursive_check_trav(root, &scan_env);
5391  if (r < 0) goto err_unset;
5392  r = subexp_inf_recursive_check_trav(root, &scan_env);
5393  if (r != 0) goto err_unset;
5394 
5395  reg->num_call = scan_env.num_call;
5396  }
5397  else
5398  reg->num_call = 0;
5399 #endif
5400 
5401  r = setup_tree(root, reg, 0, &scan_env);
5402  if (r != 0) goto err_unset;
5403 
5404 #ifdef ONIG_DEBUG_PARSE_TREE
5405  if (!onig_is_prelude()) print_tree(stderr, root);
5406 #endif
5407 
5408  reg->capture_history = scan_env.capture_history;
5409  reg->bt_mem_start = scan_env.bt_mem_start;
5410  reg->bt_mem_start |= reg->capture_history;
5411  if (IS_FIND_CONDITION(reg->options))
5413  else {
5414  reg->bt_mem_end = scan_env.bt_mem_end;
5415  reg->bt_mem_end |= reg->capture_history;
5416  }
5417 
5418 #ifdef USE_COMBINATION_EXPLOSION_CHECK
5419  if (scan_env.backrefed_mem == 0
5420 #ifdef USE_SUBEXP_CALL
5421  || scan_env.num_call == 0
5422 #endif
5423  ) {
5424  setup_comb_exp_check(root, 0, &scan_env);
5425 #ifdef USE_SUBEXP_CALL
5426  if (scan_env.has_recursion != 0) {
5427  scan_env.num_comb_exp_check = 0;
5428  }
5429  else
5430 #endif
5431  if (scan_env.comb_exp_max_regnum > 0) {
5432  int i;
5433  for (i = 1; i <= scan_env.comb_exp_max_regnum; i++) {
5434  if (BIT_STATUS_AT(scan_env.backrefed_mem, i) != 0) {
5435  scan_env.num_comb_exp_check = 0;
5436  break;
5437  }
5438  }
5439  }
5440  }
5441 
5442  reg->num_comb_exp_check = scan_env.num_comb_exp_check;
5443 #endif
5444 
5445  clear_optimize_info(reg);
5446 #ifndef ONIG_DONT_OPTIMIZE
5447  r = set_optimize_info_from_tree(root, reg, &scan_env);
5448  if (r != 0) goto err_unset;
5449 #endif
5450 
5451  if (IS_NOT_NULL(scan_env.mem_nodes_dynamic)) {
5452  xfree(scan_env.mem_nodes_dynamic);
5453  scan_env.mem_nodes_dynamic = (Node** )NULL;
5454  }
5455 
5456  r = compile_tree(root, reg);
5457  if (r == 0) {
5458  r = add_opcode(reg, OP_END);
5459 #ifdef USE_SUBEXP_CALL
5460  if (scan_env.num_call > 0) {
5461  r = unset_addr_list_fix(&uslist, reg);
5462  unset_addr_list_end(&uslist);
5463  if (r) goto err;
5464  }
5465 #endif
5466 
5467  if ((reg->num_repeat != 0) || (reg->bt_mem_end != 0))
5469  else {
5470  if (reg->bt_mem_start != 0)
5472  else
5474  }
5475  }
5476 #ifdef USE_SUBEXP_CALL
5477  else if (scan_env.num_call > 0) {
5478  unset_addr_list_end(&uslist);
5479  }
5480 #endif
5481  onig_node_free(root);
5482 
5483 #ifdef ONIG_DEBUG_COMPILE
5484 #ifdef USE_NAMED_GROUP
5485  if (!onig_is_prelude()) onig_print_names(stderr, reg);
5486 #endif
5487  if (!onig_is_prelude()) print_compiled_byte_code_list(stderr, reg);
5488 #endif
5489 
5490  end:
5491  reg->state = ONIG_STATE_NORMAL;
5492  return r;
5493 
5494  err_unset:
5495 #ifdef USE_SUBEXP_CALL
5496  if (scan_env.num_call > 0) {
5497  unset_addr_list_end(&uslist);
5498  }
5499 #endif
5500  err:
5501  if (IS_NOT_NULL(scan_env.error)) {
5502  if (IS_NOT_NULL(einfo)) {
5503  einfo->enc = scan_env.enc;
5504  einfo->par = scan_env.error;
5505  einfo->par_end = scan_env.error_end;
5506  }
5507  }
5508 
5509  onig_node_free(root);
5510  if (IS_NOT_NULL(scan_env.mem_nodes_dynamic))
5511  xfree(scan_env.mem_nodes_dynamic);
5512  return r;
5513 }
5514 
5515 #ifdef USE_RECOMPILE_API
5516 extern int
5517 onig_recompile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
5518  OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax,
5520 {
5521  int r;
5522  regex_t *new_reg;
5523 
5524  r = onig_new(&new_reg, pattern, pattern_end, option, enc, syntax, einfo);
5525  if (r) return r;
5526  if (ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
5527  onig_transfer(reg, new_reg);
5528  }
5529  else {
5530  onig_chain_link_add(reg, new_reg);
5531  }
5532  return 0;
5533 }
5534 #endif
5535 
5536 static int onig_inited = 0;
5537 
5538 extern int
5540  OnigCaseFoldType case_fold_flag,
5541  OnigEncoding enc, const OnigSyntaxType* syntax)
5542 {
5543  if (! onig_inited)
5544  onig_init();
5545 
5546  if (IS_NULL(reg))
5547  return ONIGERR_INVALID_ARGUMENT;
5548 
5549  if (ONIGENC_IS_UNDEF(enc))
5551 
5555  }
5556 
5557  (reg)->state = ONIG_STATE_MODIFY;
5558 
5559  if ((option & ONIG_OPTION_NEGATE_SINGLELINE) != 0) {
5560  option |= syntax->options;
5561  option &= ~ONIG_OPTION_SINGLELINE;
5562  }
5563  else
5564  option |= syntax->options;
5565 
5566  (reg)->enc = enc;
5567  (reg)->options = option;
5568  (reg)->syntax = syntax;
5569  (reg)->optimize = 0;
5570  (reg)->exact = (UChar* )NULL;
5571  (reg)->int_map = (int* )NULL;
5572  (reg)->int_map_backward = (int* )NULL;
5573  (reg)->chain = (regex_t* )NULL;
5574 
5575  (reg)->p = (UChar* )NULL;
5576  (reg)->alloc = 0;
5577  (reg)->used = 0;
5578  (reg)->name_table = (void* )NULL;
5579 
5580  (reg)->case_fold_flag = case_fold_flag;
5581  return 0;
5582 }
5583 
5584 extern int
5585 onig_new_without_alloc(regex_t* reg, const UChar* pattern,
5586  const UChar* pattern_end, OnigOptionType option, OnigEncoding enc,
5587  OnigSyntaxType* syntax, OnigErrorInfo* einfo)
5588 {
5589  int r;
5590 
5591  r = onig_reg_init(reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
5592  if (r) return r;
5593 
5594  r = onig_compile(reg, pattern, pattern_end, einfo, NULL, 0);
5595  return r;
5596 }
5597 
5598 extern int
5599 onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
5600  OnigOptionType option, OnigEncoding enc, const OnigSyntaxType* syntax,
5601  OnigErrorInfo* einfo)
5602 {
5603  int r;
5604 
5605  *reg = (regex_t* )xmalloc(sizeof(regex_t));
5606  if (IS_NULL(*reg)) return ONIGERR_MEMORY;
5607 
5608  r = onig_reg_init(*reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
5609  if (r) goto err;
5610 
5611  r = onig_compile(*reg, pattern, pattern_end, einfo, NULL, 0);
5612  if (r) {
5613  err:
5614  onig_free(*reg);
5615  *reg = NULL;
5616  }
5617  return r;
5618 }
5619 
5620 
5621 extern int
5623 {
5624  if (onig_inited != 0)
5625  return 0;
5626 
5629 
5630  onig_inited = 1;
5631 
5632  onigenc_init();
5633  /* onigenc_set_default_caseconv_table((UChar* )0); */
5634 
5635 #ifdef ONIG_DEBUG_STATISTICS
5636  onig_statistics_init();
5637 #endif
5638 
5640  return 0;
5641 }
5642 
5643 
5644 extern int
5646 {
5648 
5649 #ifdef ONIG_DEBUG_STATISTICS
5650  if (!onig_is_prelude()) onig_print_statistics(stderr);
5651 #endif
5652 
5653 #ifdef USE_SHARED_CCLASS_TABLE
5655 #endif
5656 
5657 #ifdef USE_PARSE_TREE_NODE_RECYCLE
5659 #endif
5660 
5661  onig_inited = 0;
5662 
5665  return 0;
5666 }
5667 
5668 extern int
5670 {
5671  OnigCodePoint n, *data;
5672  OnigCodePoint low, high, x;
5673 
5674  GET_CODE_POINT(n, p);
5675  data = (OnigCodePoint* )p;
5676  data++;
5677 
5678  for (low = 0, high = n; low < high; ) {
5679  x = (low + high) >> 1;
5680  if (code > data[x * 2 + 1])
5681  low = x + 1;
5682  else
5683  high = x;
5684  }
5685 
5686  return ((low < n && code >= data[low * 2]) ? 1 : 0);
5687 }
5688 
5689 extern int
5691 {
5692  int found;
5693 
5694  if (elen > 1 || (code >= SINGLE_BYTE_SIZE)) {
5695  if (IS_NULL(cc->mbuf)) {
5696  found = 0;
5697  }
5698  else {
5699  found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0);
5700  }
5701  }
5702  else {
5703  found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1);
5704  }
5705 
5706  if (IS_NCCLASS_NOT(cc))
5707  return !found;
5708  else
5709  return found;
5710 }
5711 
5712 extern int
5714 {
5715  int len;
5716 
5717  if (ONIGENC_MBC_MINLEN(enc) > 1) {
5718  len = 2;
5719  }
5720  else {
5721  len = ONIGENC_CODE_TO_MBCLEN(enc, code);
5722  }
5723  return onig_is_code_in_cc_len(len, code, cc);
5724 }
5725 
5726 
5727 #ifdef ONIG_DEBUG
5728 
5729 /* arguments type */
5730 #define ARG_SPECIAL -1
5731 #define ARG_NON 0
5732 #define ARG_RELADDR 1
5733 #define ARG_ABSADDR 2
5734 #define ARG_LENGTH 3
5735 #define ARG_MEMNUM 4
5736 #define ARG_OPTION 5
5737 #define ARG_STATE_CHECK 6
5738 
5739 OnigOpInfoType OnigOpInfo[] = {
5740  { OP_FINISH, "finish", ARG_NON },
5741  { OP_END, "end", ARG_NON },
5742  { OP_EXACT1, "exact1", ARG_SPECIAL },
5743  { OP_EXACT2, "exact2", ARG_SPECIAL },
5744  { OP_EXACT3, "exact3", ARG_SPECIAL },
5745  { OP_EXACT4, "exact4", ARG_SPECIAL },
5746  { OP_EXACT5, "exact5", ARG_SPECIAL },
5747  { OP_EXACTN, "exactn", ARG_SPECIAL },
5748  { OP_EXACTMB2N1, "exactmb2-n1", ARG_SPECIAL },
5749  { OP_EXACTMB2N2, "exactmb2-n2", ARG_SPECIAL },
5750  { OP_EXACTMB2N3, "exactmb2-n3", ARG_SPECIAL },
5751  { OP_EXACTMB2N, "exactmb2-n", ARG_SPECIAL },
5752  { OP_EXACTMB3N, "exactmb3n" , ARG_SPECIAL },
5753  { OP_EXACTMBN, "exactmbn", ARG_SPECIAL },
5754  { OP_EXACT1_IC, "exact1-ic", ARG_SPECIAL },
5755  { OP_EXACTN_IC, "exactn-ic", ARG_SPECIAL },
5756  { OP_CCLASS, "cclass", ARG_SPECIAL },
5757  { OP_CCLASS_MB, "cclass-mb", ARG_SPECIAL },
5758  { OP_CCLASS_MIX, "cclass-mix", ARG_SPECIAL },
5759  { OP_CCLASS_NOT, "cclass-not", ARG_SPECIAL },
5760  { OP_CCLASS_MB_NOT, "cclass-mb-not", ARG_SPECIAL },
5761  { OP_CCLASS_MIX_NOT, "cclass-mix-not", ARG_SPECIAL },
5762  { OP_CCLASS_NODE, "cclass-node", ARG_SPECIAL },
5763  { OP_ANYCHAR, "anychar", ARG_NON },
5764  { OP_ANYCHAR_ML, "anychar-ml", ARG_NON },
5765  { OP_ANYCHAR_STAR, "anychar*", ARG_NON },
5766  { OP_ANYCHAR_ML_STAR, "anychar-ml*", ARG_NON },
5767  { OP_ANYCHAR_STAR_PEEK_NEXT, "anychar*-peek-next", ARG_SPECIAL },
5768  { OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next", ARG_SPECIAL },
5769  { OP_WORD, "word", ARG_NON },
5770  { OP_NOT_WORD, "not-word", ARG_NON },
5771  { OP_WORD_BOUND, "word-bound", ARG_NON },
5772  { OP_NOT_WORD_BOUND, "not-word-bound", ARG_NON },
5773  { OP_WORD_BEGIN, "word-begin", ARG_NON },
5774  { OP_WORD_END, "word-end", ARG_NON },
5775  { OP_BEGIN_BUF, "begin-buf", ARG_NON },
5776  { OP_END_BUF, "end-buf", ARG_NON },
5777  { OP_BEGIN_LINE, "begin-line", ARG_NON },
5778  { OP_END_LINE, "end-line", ARG_NON },
5779  { OP_SEMI_END_BUF, "semi-end-buf", ARG_NON },
5780  { OP_BEGIN_POSITION, "begin-position", ARG_NON },
5781  { OP_BACKREF1, "backref1", ARG_NON },
5782  { OP_BACKREF2, "backref2", ARG_NON },
5783  { OP_BACKREFN, "backrefn", ARG_MEMNUM },
5784  { OP_BACKREFN_IC, "backrefn-ic", ARG_SPECIAL },
5785  { OP_BACKREF_MULTI, "backref_multi", ARG_SPECIAL },
5786  { OP_BACKREF_MULTI_IC, "backref_multi-ic", ARG_SPECIAL },
5787  { OP_BACKREF_WITH_LEVEL, "backref_at_level", ARG_SPECIAL },
5788  { OP_MEMORY_START_PUSH, "mem-start-push", ARG_MEMNUM },
5789  { OP_MEMORY_START, "mem-start", ARG_MEMNUM },
5790  { OP_MEMORY_END_PUSH, "mem-end-push", ARG_MEMNUM },
5791  { OP_MEMORY_END_PUSH_REC, "mem-end-push-rec", ARG_MEMNUM },
5792  { OP_MEMORY_END, "mem-end", ARG_MEMNUM },
5793  { OP_MEMORY_END_REC, "mem-end-rec", ARG_MEMNUM },
5794  { OP_SET_OPTION_PUSH, "set-option-push", ARG_OPTION },
5795  { OP_SET_OPTION, "set-option", ARG_OPTION },
5796  { OP_FAIL, "fail", ARG_NON },
5797  { OP_JUMP, "jump", ARG_RELADDR },
5798  { OP_PUSH, "push", ARG_RELADDR },
5799  { OP_POP, "pop", ARG_NON },
5800  { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1", ARG_SPECIAL },
5801  { OP_PUSH_IF_PEEK_NEXT, "push-if-peek-next", ARG_SPECIAL },
5802  { OP_REPEAT, "repeat", ARG_SPECIAL },
5803  { OP_REPEAT_NG, "repeat-ng", ARG_SPECIAL },
5804  { OP_REPEAT_INC, "repeat-inc", ARG_MEMNUM },
5805  { OP_REPEAT_INC_NG, "repeat-inc-ng", ARG_MEMNUM },
5806  { OP_REPEAT_INC_SG, "repeat-inc-sg", ARG_MEMNUM },
5807  { OP_REPEAT_INC_NG_SG, "repeat-inc-ng-sg", ARG_MEMNUM },
5808  { OP_NULL_CHECK_START, "null-check-start", ARG_MEMNUM },
5809  { OP_NULL_CHECK_END, "null-check-end", ARG_MEMNUM },
5810  { OP_NULL_CHECK_END_MEMST,"null-check-end-memst", ARG_MEMNUM },
5811  { OP_NULL_CHECK_END_MEMST_PUSH,"null-check-end-memst-push", ARG_MEMNUM },
5812  { OP_PUSH_POS, "push-pos", ARG_NON },
5813  { OP_POP_POS, "pop-pos", ARG_NON },
5814  { OP_PUSH_POS_NOT, "push-pos-not", ARG_RELADDR },
5815  { OP_FAIL_POS, "fail-pos", ARG_NON },
5816  { OP_PUSH_STOP_BT, "push-stop-bt", ARG_NON },
5817  { OP_POP_STOP_BT, "pop-stop-bt", ARG_NON },
5818  { OP_LOOK_BEHIND, "look-behind", ARG_SPECIAL },
5819  { OP_PUSH_LOOK_BEHIND_NOT, "push-look-behind-not", ARG_SPECIAL },
5820  { OP_FAIL_LOOK_BEHIND_NOT, "fail-look-behind-not", ARG_NON },
5821  { OP_CALL, "call", ARG_ABSADDR },
5822  { OP_RETURN, "return", ARG_NON },
5823  { OP_STATE_CHECK_PUSH, "state-check-push", ARG_SPECIAL },
5824  { OP_STATE_CHECK_PUSH_OR_JUMP, "state-check-push-or-jump", ARG_SPECIAL },
5825  { OP_STATE_CHECK, "state-check", ARG_STATE_CHECK },
5826  { OP_STATE_CHECK_ANYCHAR_STAR, "state-check-anychar*", ARG_STATE_CHECK },
5828  "state-check-anychar-ml*", ARG_STATE_CHECK },
5829  { -1, "", ARG_NON }
5830 };
5831 
5832 static const char*
5833 op2name(int opcode)
5834 {
5835  int i;
5836 
5837  for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
5838  if (opcode == OnigOpInfo[i].opcode)
5839  return OnigOpInfo[i].name;
5840  }
5841  return "";
5842 }
5843 
5844 static int
5845 op2arg_type(int opcode)
5846 {
5847  int i;
5848 
5849  for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
5850  if (opcode == OnigOpInfo[i].opcode)
5851  return OnigOpInfo[i].arg_type;
5852  }
5853  return ARG_SPECIAL;
5854 }
5855 
5856 static void
5857 Indent(FILE* f, int indent)
5858 {
5859  int i;
5860  for (i = 0; i < indent; i++) putc(' ', f);
5861 }
5862 
5863 static void
5864 p_string(FILE* f, int len, UChar* s)
5865 {
5866  fputs(":", f);
5867  while (len-- > 0) { fputc(*s++, f); }
5868 }
5869 
5870 static void
5871 p_len_string(FILE* f, LengthType len, int mb_len, UChar* s)
5872 {
5873  int x = len * mb_len;
5874 
5875  fprintf(f, ":%d:", len);
5876  while (x-- > 0) { fputc(*s++, f); }
5877 }
5878 
5879 extern void
5880 onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar* bpend, UChar** nextp,
5881  OnigEncoding enc)
5882 {
5883  int i, n, arg_type;
5884  RelAddrType addr;
5885  LengthType len;
5886  MemNumType mem;
5887  StateCheckNumType scn;
5889  UChar *q;
5890 
5891  fprintf(f, "[%s", op2name(*bp));
5892  arg_type = op2arg_type(*bp);
5893  if (arg_type != ARG_SPECIAL) {
5894  bp++;
5895  switch (arg_type) {
5896  case ARG_NON:
5897  break;
5898  case ARG_RELADDR:
5899  GET_RELADDR_INC(addr, bp);
5900  fprintf(f, ":(%d)", addr);
5901  break;
5902  case ARG_ABSADDR:
5903  GET_ABSADDR_INC(addr, bp);
5904  fprintf(f, ":(%d)", addr);
5905  break;
5906  case ARG_LENGTH:
5907  GET_LENGTH_INC(len, bp);
5908  fprintf(f, ":%d", len);
5909  break;
5910  case ARG_MEMNUM:
5911  mem = *((MemNumType* )bp);
5912  bp += SIZE_MEMNUM;
5913  fprintf(f, ":%d", mem);
5914  break;
5915  case ARG_OPTION:
5916  {
5917  OnigOptionType option = *((OnigOptionType* )bp);
5918  bp += SIZE_OPTION;
5919  fprintf(f, ":%d", option);
5920  }
5921  break;
5922 
5923  case ARG_STATE_CHECK:
5924  scn = *((StateCheckNumType* )bp);
5925  bp += SIZE_STATE_CHECK_NUM;
5926  fprintf(f, ":%d", scn);
5927  break;
5928  }
5929  }
5930  else {
5931  switch (*bp++) {
5932  case OP_EXACT1:
5935  p_string(f, 1, bp++); break;
5936  case OP_EXACT2:
5937  p_string(f, 2, bp); bp += 2; break;
5938  case OP_EXACT3:
5939  p_string(f, 3, bp); bp += 3; break;
5940  case OP_EXACT4:
5941  p_string(f, 4, bp); bp += 4; break;
5942  case OP_EXACT5:
5943  p_string(f, 5, bp); bp += 5; break;
5944  case OP_EXACTN:
5945  GET_LENGTH_INC(len, bp);
5946  p_len_string(f, len, 1, bp);
5947  bp += len;
5948  break;
5949 
5950  case OP_EXACTMB2N1:
5951  p_string(f, 2, bp); bp += 2; break;
5952  case OP_EXACTMB2N2:
5953  p_string(f, 4, bp); bp += 4; break;
5954  case OP_EXACTMB2N3:
5955  p_string(f, 6, bp); bp += 6; break;
5956  case OP_EXACTMB2N:
5957  GET_LENGTH_INC(len, bp);
5958  p_len_string(f, len, 2, bp);
5959  bp += len * 2;
5960  break;
5961  case OP_EXACTMB3N:
5962  GET_LENGTH_INC(len, bp);
5963  p_len_string(f, len, 3, bp);
5964  bp += len * 3;
5965  break;
5966  case OP_EXACTMBN:
5967  {
5968  int mb_len;
5969 
5970  GET_LENGTH_INC(mb_len, bp);
5971  GET_LENGTH_INC(len, bp);
5972  fprintf(f, ":%d:%d:", mb_len, len);
5973  n = len * mb_len;
5974  while (n-- > 0) { fputc(*bp++, f); }
5975  }
5976  break;
5977 
5978  case OP_EXACT1_IC:
5979  len = enclen(enc, bp, bpend);
5980  p_string(f, len, bp);
5981  bp += len;
5982  break;
5983  case OP_EXACTN_IC:
5984  GET_LENGTH_INC(len, bp);
5985  p_len_string(f, len, 1, bp);
5986  bp += len;
5987  break;
5988 
5989  case OP_CCLASS:
5990  n = bitset_on_num((BitSetRef )bp);
5991  bp += SIZE_BITSET;
5992  fprintf(f, ":%d", n);
5993  break;
5994 
5995  case OP_CCLASS_NOT:
5996  n = bitset_on_num((BitSetRef )bp);
5997  bp += SIZE_BITSET;
5998  fprintf(f, ":%d", n);
5999  break;
6000 
6001  case OP_CCLASS_MB:
6002  case OP_CCLASS_MB_NOT:
6003  GET_LENGTH_INC(len, bp);
6004  q = bp;
6005 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
6006  ALIGNMENT_RIGHT(q);
6007 #endif
6008  GET_CODE_POINT(code, q);
6009  bp += len;
6010  fprintf(f, ":%d:%d", (int )code, len);
6011  break;
6012 
6013  case OP_CCLASS_MIX:
6014  case OP_CCLASS_MIX_NOT:
6015  n = bitset_on_num((BitSetRef )bp);
6016  bp += SIZE_BITSET;
6017  GET_LENGTH_INC(len, bp);
6018  q = bp;
6019 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
6020  ALIGNMENT_RIGHT(q);
6021 #endif
6022  GET_CODE_POINT(code, q);
6023  bp += len;
6024  fprintf(f, ":%d:%d:%d", n, (int )code, len);
6025  break;
6026 
6027  case OP_CCLASS_NODE:
6028  {
6029  CClassNode *cc;
6030 
6031  GET_POINTER_INC(cc, bp);
6032  n = bitset_on_num(cc->bs);
6033  fprintf(f, ":%"PRIuPTR":%d", (uintptr_t)cc, n);
6034  }
6035  break;
6036 
6037  case OP_BACKREFN_IC:
6038  mem = *((MemNumType* )bp);
6039  bp += SIZE_MEMNUM;
6040  fprintf(f, ":%d", mem);
6041  break;
6042 
6043  case OP_BACKREF_MULTI_IC:
6044  case OP_BACKREF_MULTI:
6045  fputs(" ", f);
6046  GET_LENGTH_INC(len, bp);
6047  for (i = 0; i < len; i++) {
6048  GET_MEMNUM_INC(mem, bp);
6049  if (i > 0) fputs(", ", f);
6050  fprintf(f, "%d", mem);
6051  }
6052  break;
6053 
6054  case OP_BACKREF_WITH_LEVEL:
6055  {
6056  OnigOptionType option;
6057  LengthType level;
6058 
6059  GET_OPTION_INC(option, bp);
6060  fprintf(f, ":%d", option);
6061  GET_LENGTH_INC(level, bp);
6062  fprintf(f, ":%d", level);
6063 
6064  fputs(" ", f);
6065  GET_LENGTH_INC(len, bp);
6066  for (i = 0; i < len; i++) {
6067  GET_MEMNUM_INC(mem, bp);
6068  if (i > 0) fputs(", ", f);
6069  fprintf(f, "%d", mem);
6070  }
6071  }
6072  break;
6073 
6074  case OP_REPEAT:
6075  case OP_REPEAT_NG:
6076  {
6077  mem = *((MemNumType* )bp);
6078  bp += SIZE_MEMNUM;
6079  addr = *((RelAddrType* )bp);
6080  bp += SIZE_RELADDR;
6081  fprintf(f, ":%d:%d", mem, addr);
6082  }
6083  break;
6084 
6086  case OP_PUSH_IF_PEEK_NEXT:
6087  addr = *((RelAddrType* )bp);
6088  bp += SIZE_RELADDR;
6089  fprintf(f, ":(%d)", addr);
6090  p_string(f, 1, bp);
6091  bp += 1;
6092  break;
6093 
6094  case OP_LOOK_BEHIND:
6095  GET_LENGTH_INC(len, bp);
6096  fprintf(f, ":%d", len);
6097  break;
6098 
6100  GET_RELADDR_INC(addr, bp);
6101  GET_LENGTH_INC(len, bp);
6102  fprintf(f, ":%d:(%d)", len, addr);
6103  break;
6104 
6105  case OP_STATE_CHECK_PUSH:
6107  scn = *((StateCheckNumType* )bp);
6108  bp += SIZE_STATE_CHECK_NUM;
6109  addr = *((RelAddrType* )bp);
6110  bp += SIZE_RELADDR;
6111  fprintf(f, ":%d:(%d)", scn, addr);
6112  break;
6113 
6114  default:
6115  fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n",
6116  *--bp);
6117  }
6118  }
6119  fputs("]", f);
6120  if (nextp) *nextp = bp;
6121 }
6122 
6123 static void
6124 print_compiled_byte_code_list(FILE* f, regex_t* reg)
6125 {
6126  int ncode;
6127  UChar* bp = reg->p;
6128  UChar* end = reg->p + reg->used;
6129 
6130  fprintf(f, "code length: %d\n", reg->used);
6131 
6132  ncode = -1;
6133  while (bp < end) {
6134  ncode++;
6135  if (bp > reg->p) {
6136  if (ncode % 5 == 0)
6137  fprintf(f, "\n%ld:", bp-reg->p);
6138  else
6139  fprintf(f, " %ld:", bp-reg->p);
6140  }
6141  onig_print_compiled_byte_code(f, bp, end, &bp, reg->enc);
6142  }
6143 
6144  fprintf(f, "\n");
6145 }
6146 
6147 static void
6148 print_indent_tree(FILE* f, Node* node, int indent)
6149 {
6150  int i, type, container_p = 0;
6151  int add = 3;
6152  UChar* p;
6153 
6154  Indent(f, indent);
6155  if (IS_NULL(node)) {
6156  fprintf(f, "ERROR: null node!!!\n");
6157  exit (0);
6158  }
6159 
6160  type = NTYPE(node);
6161  switch (type) {
6162  case NT_LIST:
6163  case NT_ALT:
6164  if (NTYPE(node) == NT_LIST)
6165  fprintf(f, "<list:%"PRIxPTR">\n", (intptr_t)node);
6166  else
6167  fprintf(f, "<alt:%"PRIxPTR">\n", (intptr_t)node);
6168 
6169  print_indent_tree(f, NCAR(node), indent + add);
6170  while (IS_NOT_NULL(node = NCDR(node))) {
6171  if (NTYPE(node) != type) {
6172  fprintf(f, "ERROR: list/alt right is not a cons. %d\n", NTYPE(node));
6173  exit(0);
6174  }
6175  print_indent_tree(f, NCAR(node), indent + add);
6176  }
6177  break;
6178 
6179  case NT_STR:
6180  fprintf(f, "<string%s:%"PRIxPTR">",
6181  (NSTRING_IS_RAW(node) ? "-raw" : ""), (intptr_t)node);
6182  for (p = NSTR(node)->s; p < NSTR(node)->end; p++) {
6183  if (*p >= 0x20 && *p < 0x7f)
6184  fputc(*p, f);
6185  else {
6186  fprintf(f, " 0x%02x", *p);
6187  }
6188  }
6189  break;
6190 
6191  case NT_CCLASS:
6192  fprintf(f, "<cclass:%"PRIxPTR">", (intptr_t)node);
6193  if (IS_NCCLASS_NOT(NCCLASS(node))) fputs(" not", f);
6194  if (NCCLASS(node)->mbuf) {
6195  BBuf* bbuf = NCCLASS(node)->mbuf;
6196  for (i = 0; i < (int)bbuf->used; i++) {
6197  if (i > 0) fprintf(f, ",");
6198  fprintf(f, "%0x", bbuf->p[i]);
6199  }
6200  }
6201  break;
6202 
6203  case NT_CTYPE:
6204  fprintf(f, "<ctype:%"PRIxPTR"> ", (intptr_t)node);
6205  switch (NCTYPE(node)->ctype) {
6206  case ONIGENC_CTYPE_WORD:
6207  if (NCTYPE(node)->not != 0)
6208  fputs("not word", f);
6209  else
6210  fputs("word", f);
6211  break;
6212 
6213  default:
6214  fprintf(f, "ERROR: undefined ctype.\n");
6215  exit(0);
6216  }
6217  break;
6218 
6219  case NT_CANY:
6220  fprintf(f, "<anychar:%"PRIxPTR">", (intptr_t)node);
6221  break;
6222 
6223  case NT_ANCHOR:
6224  fprintf(f, "<anchor:%"PRIxPTR"> ", (intptr_t)node);
6225  switch (NANCHOR(node)->type) {
6226  case ANCHOR_BEGIN_BUF: fputs("begin buf", f); break;
6227  case ANCHOR_END_BUF: fputs("end buf", f); break;
6228  case ANCHOR_BEGIN_LINE: fputs("begin line", f); break;
6229  case ANCHOR_END_LINE: fputs("end line", f); break;
6230  case ANCHOR_SEMI_END_BUF: fputs("semi end buf", f); break;
6231  case ANCHOR_BEGIN_POSITION: fputs("begin position", f); break;
6232 
6233  case ANCHOR_WORD_BOUND: fputs("word bound", f); break;
6234  case ANCHOR_NOT_WORD_BOUND: fputs("not word bound", f); break;
6235 #ifdef USE_WORD_BEGIN_END
6236  case ANCHOR_WORD_BEGIN: fputs("word begin", f); break;
6237  case ANCHOR_WORD_END: fputs("word end", f); break;
6238 #endif
6239  case ANCHOR_PREC_READ: fputs("prec read", f); container_p = TRUE; break;
6240  case ANCHOR_PREC_READ_NOT: fputs("prec read not", f); container_p = TRUE; break;
6241  case ANCHOR_LOOK_BEHIND: fputs("look_behind", f); container_p = TRUE; break;
6242  case ANCHOR_LOOK_BEHIND_NOT: fputs("look_behind_not",f); container_p = TRUE; break;
6243 
6244  default:
6245  fprintf(f, "ERROR: undefined anchor type.\n");
6246  break;
6247  }
6248  break;
6249 
6250  case NT_BREF:
6251  {
6252  int* p;
6253  BRefNode* br = NBREF(node);
6254  p = BACKREFS_P(br);
6255  fprintf(f, "<backref:%"PRIxPTR">", (intptr_t)node);
6256  for (i = 0; i < br->back_num; i++) {
6257  if (i > 0) fputs(", ", f);
6258  fprintf(f, "%d", p[i]);
6259  }
6260  }
6261  break;
6262 
6263 #ifdef USE_SUBEXP_CALL
6264  case NT_CALL:
6265  {
6266  CallNode* cn = NCALL(node);
6267  fprintf(f, "<call:%"PRIxPTR">", (intptr_t)node);
6268  p_string(f, cn->name_end - cn->name, cn->name);
6269  }
6270  break;
6271 #endif
6272 
6273  case NT_QTFR:
6274  fprintf(f, "<quantifier:%"PRIxPTR">{%d,%d}%s\n", (intptr_t)node,
6275  NQTFR(node)->lower, NQTFR(node)->upper,
6276  (NQTFR(node)->greedy ? "" : "?"));
6277  print_indent_tree(f, NQTFR(node)->target, indent + add);
6278  break;
6279 
6280  case NT_ENCLOSE:
6281  fprintf(f, "<enclose:%"PRIxPTR"> ", (intptr_t)node);
6282  switch (NENCLOSE(node)->type) {
6283  case ENCLOSE_OPTION:
6284  fprintf(f, "option:%d\n", NENCLOSE(node)->option);
6285  print_indent_tree(f, NENCLOSE(node)->target, indent + add);
6286  break;
6287  case ENCLOSE_MEMORY:
6288  fprintf(f, "memory:%d", NENCLOSE(node)->regnum);
6289  break;
6291  fprintf(f, "stop-bt");
6292  break;
6293 
6294  default:
6295  break;
6296  }
6297  fprintf(f, "\n");
6298  print_indent_tree(f, NENCLOSE(node)->target, indent + add);
6299  break;
6300 
6301  default:
6302  fprintf(f, "print_indent_tree: undefined node type %d\n", NTYPE(node));
6303  break;
6304  }
6305 
6306  if (type != NT_LIST && type != NT_ALT && type != NT_QTFR &&
6307  type != NT_ENCLOSE)
6308  fprintf(f, "\n");
6309 
6310  if (container_p) print_indent_tree(f, NANCHOR(node)->target, indent + add);
6311 
6312  fflush(f);
6313 }
6314 #endif /* ONIG_DEBUG */
6315 
6316 #ifdef ONIG_DEBUG_PARSE_TREE
6317 static void
6318 print_tree(FILE* f, Node* node)
6319 {
6320  print_indent_tree(f, node, 0);
6321 }
6322 #endif
#define SIZE_OP_SET_OPTION_PUSH
Definition: regint.h:631
void onig_transfer(regex_t *to, regex_t *from)
Definition: regcomp.c:5268
#define ANCHOR_ANYCHAR_STAR_ML
Definition: regint.h:471
#define SIZE_OP_MEMORY_END_PUSH_REC
Definition: regint.h:636
#define IS_DYNAMIC_OPTION(option)
Definition: regint.h:336
#define NSTRING_SET_AMBIG(node)
Definition: regparse.h:106
#define BIT_STATUS_AT(stats, n)
Definition: regint.h:295
#define IS_ENCLOSE_CALLED(en)
Definition: regparse.h:142
void onig_scan_env_set_error_string(ScanEnv *env, int ecode ARG_UNUSED, UChar *arg, UChar *arg_end)
Definition: regparse.c:5681
int onig_new_without_alloc(regex_t *reg, const UChar *pattern, const UChar *pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType *syntax, OnigErrorInfo *einfo)
Definition: regcomp.c:5585
static int add_bitset(regex_t *reg, BitSetRef bs)
Definition: regcomp.c:294
static void concat_opt_exact_info_str(OptExactInfo *to, UChar *s, UChar *end, int raw ARG_UNUSED, OnigEncoding enc)
Definition: regcomp.c:4270
int is_refered
Definition: regparse.h:183
unsigned int OnigCodePoint
Definition: oniguruma.h:111
#define IS_NULL(p)
Definition: regint.h:240
ssize_t n
Definition: bigdecimal.c:5519
#define ENCLOSE_MEMORY
Definition: regparse.h:91
#define ONIGENC_CASE_FOLD_DEFAULT
Definition: oniguruma.h:127
unsigned int alloc
Definition: regint.h:377
int onig_new(regex_t **reg, const UChar *pattern, const UChar *pattern_end, OnigOptionType option, OnigEncoding enc, const OnigSyntaxType *syntax, OnigErrorInfo *einfo)
Definition: regcomp.c:5599
VP_EXPORT int
Definition: bigdecimal.c:4911
#define OPT_EXACT_MAXLEN
Definition: regcomp.c:3966
int * int_map_backward
Definition: oniguruma.h:664
#define IS_REPEAT_INFINITE(n)
Definition: regint.h:342
UChar * end
Definition: regparse.h:167
int onig_free_node_list(void)
Definition: regparse.c:1099
#define ALLOWED_ANCHOR_IN_LB_NOT
#define ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL
Definition: oniguruma.h:553
int onig_node_str_cat(Node *node, const UChar *s, const UChar *end)
Definition: regparse.c:1430
int regnum
Definition: regparse.h:193
Node * onig_node_list_add(Node *list, Node *x)
Definition: regparse.c:1247
#define IS_SYNTAX_BV(syn, bvm)
Definition: regparse.h:321
static void alt_merge_opt_exact_info(OptExactInfo *to, OptExactInfo *add, OptEnv *env)
Definition: regcomp.c:4287
static int get_char_length_tree(Node *node, regex_t *reg, int *len)
Definition: regcomp.c:2424
code
Definition: tcltklib.c:3375
static void concat_opt_anc_info(OptAncInfo *to, OptAncInfo *left, OptAncInfo *right, OnigDistance left_len, OnigDistance right_len)
Definition: regcomp.c:4156
struct _Node * target
Definition: regparse.h:223
#define ONIGENC_IS_CODE_WORD(enc, code)
Definition: oniguruma.h:298
#define NSTRING_IS_RAW(node)
Definition: regparse.h:109
void onig_print_compiled_byte_code(FILE *f, UChar *bp, UChar *bpend, UChar **nextp, OnigEncoding enc)
#define WORD_ALIGNMENT_SIZE
Definition: regint.h:261
#define ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM
Definition: oniguruma.h:131
#define BIT_STATUS_ON_AT(stats, n)
Definition: regint.h:298
#define ONIG_OPTION_SINGLELINE
Definition: oniguruma.h:353
#define ONIG_OPTIMIZE_EXACT
Definition: regint.h:283
#define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION
#define ANCHOR_END_BUF
Definition: regint.h:457
static int compile_length_quantifier_node(QtfrNode *qn, regex_t *reg)
Definition: regcomp.c:971
#define SIZE_OPCODE
Definition: regint.h:587
#define BBUF_ADD1(buf, byte)
Definition: regint.h:419
#define d1
parser parser_yylval val
Definition: ripper.c:14289
#define ANCHOR_WORD_BEGIN
Definition: regint.h:463
#define SINGLE_BYTE_SIZE
Definition: regint.h:346
#define SIZE_MEMNUM
Definition: regint.h:591
#define IS_ENCLOSE_RECURSION(en)
Definition: regparse.h:144
#define SIZE_OP_REPEAT_INC
Definition: regint.h:624
int num_call
Definition: regparse.h:296
#define NSTRING_IS_AMBIG(node)
Definition: regparse.h:110
#define BBUF_WRITE(buf, pos, bytes, n)
Definition: regint.h:404
static int add_multi_byte_cclass(BBuf *mbuf, regex_t *reg)
Definition: regcomp.c:559
if(len<=MAX_WORD_LENGTH &&len >=MIN_WORD_LENGTH)
Definition: name2ctype.h:23841
static int get_char_length_tree1(Node *node, regex_t *reg, int *len, int level)
Definition: regcomp.c:2301
MinMaxLen len
Definition: regcomp.c:4005
OnigUChar * par_end
Definition: oniguruma.h:605
unsigned int OnigCaseFoldType
Definition: oniguruma.h:117
#define ONIGENC_MBC_CASE_FOLD(enc, flag, pp, end, buf)
Definition: oniguruma.h:230
static void swap_node(Node *a, Node *b)
Definition: regcomp.c:67
int sourceline
Definition: regparse.h:315
unsigned int bt_mem_end
Definition: oniguruma.h:642
#define REGEX_CHAIN_HEAD(reg)
Definition: regcomp.c:5275
#define NST_CALLED
Definition: regparse.h:131
#define IS_ENCLOSE_MAX_FIXED(en)
Definition: regparse.h:148
static int comp_distance_value(MinMaxLen *d1, MinMaxLen *d2, int v1, int v2)
Definition: regcomp.c:4070
#define SCANENV_MEM_NODES(senv)
Definition: regparse.h:278
size_t onig_memsize(const regex_t *reg)
Definition: regcomp.c:5247
#define THREAD_SYSTEM_END
Definition: regint.h:104
static int add_bytes(regex_t *reg, UChar *bytes, OnigDistance len)
Definition: regcomp.c:287
#define IS_CODE_SB_WORD(enc, code)
Definition: regint.h:781
int onig_names_free(regex_t *reg)
Definition: regparse.c:479
static int set_bm_skip(UChar *s, UChar *end, OnigEncoding enc ARG_UNUSED, UChar skip[], int **int_skip)
Definition: regcomp.c:3941
#define ANCHOR_BEGIN_LINE
Definition: regint.h:455
VALUE target
Definition: tcltklib.c:5521
#define SIZE_OP_CALL
Definition: regint.h:646
static void select_opt_exact_info(OnigEncoding enc, OptExactInfo *now, OptExactInfo *alt)
Definition: regcomp.c:4323
int onig_is_in_code_range(const UChar *p, OnigCodePoint code)
Definition: regcomp.c:5669
#define QUANTIFIER_EXPAND_LIMIT_SIZE
Definition: regcomp.c:731
int ret
Definition: tcltklib.c:276
int nest_level
Definition: regparse.h:235
SYMID SyckParser * p
Definition: yaml2byte.c:119
UChar * error
Definition: regparse.h:293
int onig_node_str_set(Node *node, const UChar *s, const UChar *end)
Definition: regparse.c:1466
MinMaxLen mmd
Definition: regcomp.c:3997
OnigCaseFoldType onig_get_default_case_fold_flag(void)
Definition: regcomp.c:35
#define SIZE_OP_POP_POS
Definition: regint.h:628
#define IS_NCCLASS_NOT(nd)
Definition: regint.h:706
#define ONIGENC_IS_CODE_PRINT(enc, code)
Definition: oniguruma.h:276
int reach_end
Definition: regcomp.c:3990
static void concat_left_node_opt_info(OnigEncoding enc, NodeOptInfo *to, NodeOptInfo *add)
Definition: regcomp.c:4503
#define bp()
Definition: debug.h:27
#define GET_CHAR_LEN_VARLEN
Definition: regcomp.c:2296
static OnigDistance distance_add(OnigDistance d1, OnigDistance d2)
Definition: regcomp.c:92
#define NENCLOSE(node)
Definition: regparse.h:78
VALUE enc
Definition: tcltklib.c:10402
OptExactInfo exm
Definition: regcomp.c:4009
static int compile_length_string_raw_node(StrNode *sn, regex_t *reg)
Definition: regcomp.c:502
int target_empty_info
Definition: regparse.h:180
UChar * s
Definition: regparse.h:166
#define IS_ENCLOSE_MIN_FIXED(en)
Definition: regparse.h:147
OnigDistance anchor_dmax
Definition: oniguruma.h:658
#define ANCHOR_END_LINE
Definition: regint.h:459
#define ANCHOR_PREC_READ
Definition: regint.h:465
int opt_count
Definition: regparse.h:201
UnsetAddrList * unset_addr_list
Definition: regparse.h:298
#define IS_BACKREF_NEST_LEVEL(bn)
Definition: regparse.h:158
#define NT_QTFR
Definition: regparse.h:44
#define SIZE_ABSADDR
Definition: regint.h:589
#define ONIGERR_INVALID_COMBINATION_OF_OPTIONS
Definition: oniguruma.h:561
static int map_position_value(OnigEncoding enc, int i)
Definition: regcomp.c:4017
struct _Node * next_head_exact
Definition: regparse.h:182
int onigenc_strlen(OnigEncoding enc, const UChar *p, const UChar *end)
Definition: regenc.c:122
static void set_optimize_map_info(regex_t *reg, OptMapInfo *m)
Definition: regcomp.c:4968
#define CKN_ON
Definition: regcomp.c:732
#define GET_CODE_POINT(code, p)
Definition: regint.h:609
#define ONIGERR_NEVER_ENDING_RECURSION
Definition: oniguruma.h:554
Node * onig_node_new_alt(Node *left, Node *right)
Definition: regparse.c:1265
#define NQ_TARGET_IS_EMPTY_MEM
Definition: regparse.h:119
void * PointerType
Definition: regint.h:585
int rb_const_defined(VALUE, ID)
Definition: variable.c:1847
unsigned char map[ONIG_CHAR_TABLE_SIZE]
Definition: oniguruma.h:662
#define IS_CALL_RECURSION(cn)
Definition: regparse.h:155
#define BIT_STATUS_ON_AT_SIMPLE(stats, n)
Definition: regint.h:305
#define ALLOWED_ANCHOR_IN_LB
#define ARG_UNUSED
static UChar * str_dup(UChar *s, UChar *end)
Definition: regcomp.c:52
#define ONIGENC_CODE_TO_MBC_MAXLEN
Definition: oniguruma.h:186
#define REPEAT_RANGE_ALLOC
static int optimize_node_left(Node *node, NodeOptInfo *opt, OptEnv *env)
Definition: regcomp.c:4577
int onig_init(void)
Definition: regcomp.c:5622
r
Definition: bigdecimal.c:1154
static int compile_string_node(Node *node, regex_t *reg)
Definition: regcomp.c:511
#define ONIG_OPTIMIZE_EXACT_IC
Definition: regint.h:286
#define NST_MEM_BACKREFED
Definition: regparse.h:128
Definition: regint.h:541
static int add_opcode_rel_addr(regex_t *reg, int opcode, int addr)
Definition: regcomp.c:276
#define BACKREFS_P(br)
Definition: regparse.h:114
#define GET_OPTION_INC(option, p)
Definition: regint.h:604
#define NST_RECURSION
Definition: regparse.h:130
#define ONIGERR_TYPE_BUG
Definition: oniguruma.h:502
unsigned char * p
Definition: oniguruma.h:630
#define GET_LENGTH_INC(len, p)
Definition: regint.h:601
#define ANCHOR_BEGIN_POSITION
Definition: regint.h:456
UnsetAddr * us
Definition: regparse.h:214
#define ONIG_STATE_MODIFY
Definition: oniguruma.h:623
OnigCaseFoldType case_fold_flag
Definition: regcomp.c:3977
void onig_chain_link_add(regex_t *to, regex_t *add)
Definition: regcomp.c:5282
regex_t * reg
Definition: regparse.h:295
#define ONIGERR_INVALID_LOOK_BEHIND_PATTERN
Definition: oniguruma.h:533
#define NST_ADDR_FIXED
Definition: regparse.h:132
OptAncInfo anc
Definition: regcomp.c:3988
int left_anchor
Definition: regcomp.c:3982
unsigned int OnigOptionType
Definition: oniguruma.h:344
OnigDistance anchor_dmin
Definition: oniguruma.h:657
static int set_optimize_exact_info(regex_t *reg, OptExactInfo *e)
Definition: regcomp.c:4921
UChar buf[NODE_STR_BUF_SIZE]
Definition: regparse.h:170
#define ONIGENC_CTYPE_WORD
Definition: oniguruma.h:203
#define ANCHOR_NOT_WORD_BOUND
Definition: regint.h:462
int num_named
Definition: regparse.h:302
unsigned int bt_mem_start
Definition: oniguruma.h:641
MinMaxLen mmd
Definition: regcomp.c:3987
int char_len
Definition: regparse.h:200
#define ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, case_fold_flag, p, end, acs)
Definition: oniguruma.h:238
unsigned int BitStatusType
Definition: regint.h:290
#define BIT_STATUS_ON_ALL(stats)
Definition: regint.h:294
unsigned char * exact
Definition: oniguruma.h:660
OptAncInfo anc
Definition: regcomp.c:4007
#define SIZE_OP_ANYCHAR_STAR_PEEK_NEXT
Definition: regint.h:618
static int add_opcode_option(regex_t *reg, int opcode, OnigOptionType option)
Definition: regcomp.c:301
#define SIZE_OP_FAIL
Definition: regint.h:632
flag
Definition: tcltklib.c:2039
static int compile_tree_empty_check(Node *node, regex_t *reg, int empty_info)
Definition: regcomp.c:365
VALUE einfo
Definition: tcltklib.c:843
d
Definition: strlcat.c:58
#define SIZE_STATE_CHECK_NUM
Definition: regint.h:592
#define IS_ENCLOSE_CLEN_FIXED(en)
Definition: regparse.h:149
#define NBREF(node)
Definition: regparse.h:76
#define NT_ALT
Definition: regparse.h:48
#define IN_ALT
Definition: regcomp.c:3677
#define ALLOWED_ENCLOSE_IN_LB
void onig_free_body(regex_t *reg)
Definition: regcomp.c:5221
#define COMPILE_INIT_SIZE
#define BIT_STATUS_CLEAR(stats)
Definition: regint.h:293
#define SIZE_OP_MEMORY_START
Definition: regint.h:633
UnsetAddrList * unset_addr_list
Definition: regparse.h:224
#define SIZE_OPTION
Definition: regint.h:594
static int compile_cclass_node(CClassNode *cc, regex_t *reg)
Definition: regcomp.c:612
struct _Node * target
Definition: regparse.h:241
int group_num
Definition: regparse.h:220
static int setup_tree(Node *node, regex_t *reg, int state, ScanEnv *env)
Definition: regcomp.c:3691
static int compile_quantifier_node(QtfrNode *qn, regex_t *reg)
Definition: regcomp.c:1036
#define NCAR(node)
Definition: regparse.h:83
#define COMP_EM_BASE
Real * b
Definition: bigdecimal.c:1140
#define xalloca
Definition: regint.h:176
int onig_compile(regex_t *reg, const UChar *pattern, const UChar *pattern_end, OnigErrorInfo *einfo, const char *sourcefile, int sourceline)
Definition: regcomp.c:5316
#define SIZE_OP_PUSH_STOP_BT
Definition: regint.h:639
Definition: regint.h:374
struct _Node * head_exact
Definition: regparse.h:181
static int add_option(regex_t *reg, OnigOptionType option)
Definition: regcomp.c:269
#define ALLOWED_ENCLOSE_IN_LB_NOT
#define IS_IGNORECASE(option)
Definition: regint.h:321
void onig_chain_reduce(regex_t *reg)
Definition: regcomp.c:5291
BitSet bs
Definition: regint.h:718
static int is_full_opt_exact_info(OptExactInfo *ex)
Definition: regcomp.c:4217
int state
Definition: regparse.h:231
OnigRepeatRange * repeat_range
Definition: oniguruma.h:645
#define GET_MEMNUM_INC(num, p)
Definition: regint.h:602
static int set_optimize_info_from_tree(Node *node, regex_t *reg, ScanEnv *scan_env)
Definition: regcomp.c:4996
#define STACK_POP_LEVEL_MEM_START
Definition: regint.h:278
static int add_rel_addr(regex_t *reg, int addr)
Definition: regcomp.c:224
#define enclen(enc, p, e)
int RelAddrType
Definition: regint.h:579
#define STACK_POP_LEVEL_ALL
Definition: regint.h:279
const char * sourcefile
Definition: regparse.h:314
static int compile_length_option_node(EncloseNode *node, regex_t *reg)
Definition: regcomp.c:1164
int onig_bbuf_init(BBuf *buf, OnigDistance size)
Definition: regcomp.c:144
n NULL
Definition: yaml2byte.c:134
#define THREAD_ATOMIC_END
Definition: regint.h:106
#define SIZE_OP_NULL_CHECK_START
Definition: regint.h:641
static void add_mml(MinMaxLen *to, MinMaxLen *from)
Definition: regcomp.c:4114
#define putc(_c, _stream)
Definition: win32.h:133
void * data
Definition: yaml2byte.c:131
#define IS_NCCLASS_SHARE(nd)
Definition: regint.h:707
#define ONIG_IS_OPTION_ON(options, option)
Definition: oniguruma.h:367
BDIGIT m
Definition: bigdecimal.c:4946
static int compile_length_tree(Node *node, regex_t *reg)
Definition: regcomp.c:1494
static int check_type_tree(Node *node, int type_mask, int enclose_mask, int anchor_mask)
Definition: regcomp.c:2694
int capa
Definition: regparse.h:169
#define GET_CHAR_LEN_TOP_ALT_VARLEN
Definition: regcomp.c:2297
OnigOptionType options
Definition: regcomp.c:3976
#define SIZE_OP_POP
Definition: regint.h:621
OnigCaseFoldType case_fold_flag
Definition: oniguruma.h:650
#define MAX_NODE_OPT_INFO_REF_COUNT
Definition: regcomp.c:4574
static void set_mml(MinMaxLen *mml, OnigDistance min, OnigDistance max)
Definition: regcomp.c:4094
OnigDistance min_len
Definition: regparse.h:198
int AbsAddrType
Definition: regint.h:580
#define ONIG_CHAR_TABLE_SIZE
Definition: oniguruma.h:617
int onigenc_init(void)
Definition: regenc.c:35
static void clear_opt_exact_info(OptExactInfo *ex)
Definition: regcomp.c:4223
#define ONIG_OPTION_NEGATE_SINGLELINE
Definition: oniguruma.h:356
#define BBUF_INIT(buf, size)
Definition: regint.h:380
#define ONIG_STATE_COMPILING
Definition: oniguruma.h:622
#define SET_ENCLOSE_STATUS(node, f)
Definition: regparse.h:139
const OnigSyntaxType * syntax
Definition: regparse.h:286
#define ONIG_STATE_NORMAL
Definition: oniguruma.h:620
static void copy_mml(MinMaxLen *to, MinMaxLen *from)
Definition: regcomp.c:4107
#define IS_MULTILINE(option)
Definition: regint.h:320
static void alt_merge_mml(MinMaxLen *to, MinMaxLen *from)
Definition: regcomp.c:4130
static int expand_case_fold_string(Node *node, regex_t *reg)
Definition: regcomp.c:3401
#define ALIGNMENT_RIGHT(addr)
Definition: regint.h:269
static int is_equal_mml(MinMaxLen *a, MinMaxLen *b)
Definition: regcomp.c:4087
#define ONIGERR_INVALID_BACKREF
Definition: oniguruma.h:544
#define DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag)
Definition: regint.h:338
static int divide_look_behind_alternatives(Node *node)
Definition: regcomp.c:3125
OptAncInfo anc
Definition: regcomp.c:3998
#define add(x, y)
#define SIZE_OP_PUSH_LOOK_BEHIND_NOT
Definition: regint.h:644
#define NQ_TARGET_IS_EMPTY_REC
Definition: regparse.h:120
#define BITSET_AT(bs, pos)
Definition: regint.h:368
int onig_free_shared_cclass_table(void)
Definition: regparse.c:5015
int * back_dynamic
Definition: regparse.h:234
int lower
Definition: regparse.h:177
#define NCCLASS(node)
Definition: regparse.h:74
#define xmemcpy
Definition: regint.h:169
static int distance_value(MinMaxLen *mm)
Definition: regcomp.c:4041
#define CHECK_NULL_RETURN_MEMERR(p)
Definition: regint.h:243
Node * onig_node_new_enclose(int type)
Definition: regparse.c:1401
#define NCTYPE(node)
Definition: regparse.h:75
int onig_reg_init(regex_t *reg, OnigOptionType option, OnigCaseFoldType case_fold_flag, OnigEncoding enc, const OnigSyntaxType *syntax)
Definition: regcomp.c:5539
#define ONIG_OPTIMIZE_MAP
Definition: regint.h:287
#define SIZE_OP_MEMORY_END_PUSH
Definition: regint.h:635
#define SIZE_OP_RETURN
Definition: regint.h:647
#define NQ_TARGET_IS_EMPTY
Definition: regparse.h:118
static void clear_node_opt_info(NodeOptInfo *opt)
Definition: regcomp.c:4486
static int compile_tree_n_times(Node *node, int n, regex_t *reg)
Definition: regcomp.c:412
static void alt_merge_opt_anc_info(OptAncInfo *to, OptAncInfo *add)
Definition: regcomp.c:4210
unsigned char buf[MIME_BUF_SIZE]
Definition: nkf.c:3913
short int StateCheckNumType
Definition: regint.h:584
#define NT_LIST
Definition: regparse.h:47
struct re_pattern_buffer * chain
Definition: oniguruma.h:669
static void clear_optimize_info(regex_t *reg)
Definition: regcomp.c:5052
int err
Definition: win32.c:78
#define NT_ANCHOR
Definition: regparse.h:46
#define ANCHOR_WORD_END
Definition: regint.h:464
#define IN_REPEAT
Definition: regcomp.c:3679
#define SIZE_OP_ANYCHAR_STAR
Definition: regint.h:617
#define ALLOWED_TYPE_IN_LB
#define GET_ALIGNMENT_PAD_SIZE(addr, pad_size)
Definition: regint.h:263
#define SIZE_LENGTH
Definition: regint.h:590
int upper
Definition: regparse.h:178
#define NST_CLEN_FIXED
Definition: regparse.h:125
static int compile_length_string_node(Node *node, regex_t *reg)
Definition: regcomp.c:463
static void copy_opt_anc_info(OptAncInfo *to, OptAncInfo *from)
Definition: regcomp.c:4150
static int update_string_node_case_fold(regex_t *reg, Node *node)
Definition: regcomp.c:3224
#define ONIGERR_UNDEFINED_NAME_REFERENCE
Definition: oniguruma.h:550
#define EXPAND_STRING_MAX_LENGTH
#define ONIGENC_CODE_TO_MBCLEN(enc, code)
Definition: oniguruma.h:265
#define SIZE_OP_MEMORY_END
Definition: regint.h:637
#define SIZE_OP_MEMORY_START_PUSH
Definition: regint.h:634
void onig_node_conv_to_str_node(Node *node, int flag)
Definition: regparse.c:1482
#define NT_STR
Definition: regparse.h:39
gz level
Definition: zlib.c:2025
int type
Definition: regparse.h:240
#define NQTFR(node)
Definition: regparse.h:77
static void alt_merge_node_opt_info(NodeOptInfo *to, NodeOptInfo *add, OptEnv *env)
Definition: regcomp.c:4562
const int id
Definition: nkf.c:209
#define SIZE_BITSET
Definition: regint.h:358
#define THREAD_ATOMIC_START
Definition: regint.h:105
#define ONIGENC_IS_MBC_WORD(enc, s, end)
Definition: oniguruma.h:224
#define ONIGENC_CASE_FOLD_MIN
Definition: oniguruma.h:126
#define TRUE
Definition: nkf.h:186
struct _Node * target
Definition: regparse.h:208
static int compile_anchor_node(AnchorNode *node, regex_t *reg)
Definition: regcomp.c:1410
MinMaxLen mmd
Definition: regcomp.c:3974
Bits * BitSetRef
Definition: regint.h:356
#define SIZE_OP_PUSH_IF_PEEK_NEXT
Definition: regint.h:623
#define ONIGENC_MBC_CASE_FOLD_MAXLEN
Definition: oniguruma.h:187
OnigDistance max_len
Definition: regparse.h:199
n anchor
Definition: yaml2byte.c:134
#define NST_MARK2
Definition: regparse.h:127
#define NT_CANY
Definition: regparse.h:42
register char * s
Definition: os2.c:56
ONIG_EXTERN OnigCaseFoldType OnigDefaultCaseFoldFlag
Definition: oniguruma.h:119
UChar * name_end
Definition: regparse.h:222
OnigEncoding enc
Definition: regcomp.c:3975
int onig_name_to_group_numbers(regex_t *reg, const UChar *name, const UChar *name_end, int **nums)
Definition: regparse.c:837
BBuf * mbuf
Definition: regint.h:719
int LengthType
Definition: regint.h:581
OptMapInfo map
Definition: regcomp.c:4012
#define ANCHOR_END_BUF_MASK
Definition: regparse.h:89
int right_anchor
Definition: regcomp.c:3983
static int get_max_match_length(Node *node, OnigDistance *max, ScanEnv *env)
Definition: regcomp.c:2180
#define IS_QUANTIFIER_IN_REPEAT(qn)
Definition: regparse.h:159
static int expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[], UChar *p, int slen, UChar *end, regex_t *reg, Node **rnode)
Definition: regcomp.c:3289
static int add_compile_string_length(UChar *s ARG_UNUSED, int mb_len, OnigDistance str_len, regex_t *reg ARG_UNUSED, int ignore_case)
Definition: regcomp.c:424
int onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode *cc)
Definition: regcomp.c:5713
unsigned int uintptr_t
Definition: win32.h:96
#define SIZE_OP_JUMP
Definition: regint.h:619
#define NTYPE(node)
Definition: regparse.h:70
static int is_anychar_star_quantifier(QtfrNode *qn)
Definition: regcomp.c:722
#define ANCHOR_LOOK_BEHIND_NOT
Definition: regint.h:468
int state
Definition: regparse.h:175
int type
Definition: tcltklib.c:107
BitStatusType backrefed_mem
Definition: regparse.h:290
#define P_(args)
Definition: oniguruma.h:70
int ignore_case
Definition: regcomp.c:3991
const char * name
Definition: oniguruma.h:158
#define NCALL(node)
Definition: regparse.h:81
#define SIZE_OP_NULL_CHECK_END
Definition: regint.h:642
static int compile_length_anchor_node(AnchorNode *node, regex_t *reg)
Definition: regcomp.c:1377
OnigEncoding enc
Definition: oniguruma.h:603
static int expand_case_fold_make_rem_string(Node **rnode, UChar *s, UChar *end, regex_t *reg)
Definition: regcomp.c:3267
RUBY_EXTERN VALUE rb_cThread
Definition: ruby.h:1279
int num_mem
Definition: regparse.h:300
ssize_t ex
Definition: bigdecimal.c:4948
static void alt_merge_opt_map_info(OnigEncoding enc, OptMapInfo *to, OptMapInfo *add)
Definition: regcomp.c:4451
#define ONIGENC_CODE_TO_MBC(enc, code, buf)
Definition: oniguruma.h:266
OnigDistance max
Definition: regcomp.c:3970
Node * onig_node_new_list(Node *left, Node *right)
Definition: regparse.c:1241
int intptr_t
Definition: win32.h:88
#define NSTRING_IS_DONT_GET_OPT_INFO(node)
Definition: regparse.h:111
#define IS_ENCLOSE_ADDR_FIXED(en)
Definition: regparse.h:143
static int add_pointer(regex_t *reg, void *addr)
Definition: regcomp.c:260
#define IS_BACKREF_NAME_REF(bn)
Definition: regparse.h:157
static int is_set_opt_anc_info(OptAncInfo *to, int anc)
Definition: regcomp.c:4184
static Node * get_head_value_node(Node *node, int exact, regex_t *reg)
Definition: regcomp.c:2607
#define NT_CCLASS
Definition: regparse.h:40
#define SIZE_OP_POP_STOP_BT
Definition: regint.h:640
static int setup_look_behind(Node *node, regex_t *reg, ScanEnv *env)
Definition: regcomp.c:3155
#define ONIGERR_PARSER_BUG
Definition: oniguruma.h:503
#define ANCHOR_SEMI_END_BUF
Definition: regint.h:458
#define ONIG_OPTIMIZE_NONE
Definition: regint.h:282
register unsigned int len
Definition: name2ctype.h:22210
unsigned int used
Definition: oniguruma.h:631
#define xfree
Definition: defines.h:69
Node * onig_node_new_str(const UChar *s, const UChar *end)
Definition: regparse.c:1524
#define alloc(type)
Definition: st.c:69
Definition: regint.h:476
#define IS_ENCLOSE_MARK1(en)
Definition: regparse.h:145
#define ONIG_INFINITE_DISTANCE
Definition: oniguruma.h:115
UChar * error_end
Definition: regparse.h:294
#define SIZE_OP_FAIL_LOOK_BEHIND_NOT
Definition: regint.h:645
Node ** mem_nodes_dynamic
Definition: regparse.h:306
static void set_sub_anchor(regex_t *reg, OptAncInfo *anc)
Definition: regcomp.c:4985
UChar s[OPT_EXACT_MAXLEN]
Definition: regcomp.c:3993
#define ONIG_OPTION_CAPTURE_GROUP
Definition: oniguruma.h:358
static int add_opcode(regex_t *reg, int opcode)
Definition: regcomp.c:206
#define ONIGENC_MBC_TO_CODE(enc, p, end)
Definition: oniguruma.h:264
static void copy_node_opt_info(NodeOptInfo *to, NodeOptInfo *from)
Definition: regcomp.c:4497
BitStatusType bt_mem_end
Definition: regparse.h:289
return ptr
Definition: tcltklib.c:780
VpDivd * c
Definition: bigdecimal.c:1163
#define ONIGENC_MBC_MINLEN(enc)
Definition: oniguruma.h:262
#define ENCLOSE_STOP_BACKTRACK
Definition: regparse.h:93
state
Definition: gb18030.c:213
#define USE_SUBEXP_CALL
Definition: regint.h:59
static int is_left_anchor(int anc)
Definition: regcomp.c:4173
#define UChar
Definition: oniguruma.h:107
OnigEncoding enc
Definition: regparse.h:285
void onig_node_free(Node *node)
Definition: regparse.c:1016
#define ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED
Definition: oniguruma.h:545
#define NANCHOR(node)
Definition: regparse.h:79
#define NST_STOP_BT_SIMPLE_REPEAT
Definition: regparse.h:129
static int add_char_amb_opt_map_info(OptMapInfo *map, UChar *p, UChar *end, OnigEncoding enc, OnigCaseFoldType case_fold_flag)
Definition: regcomp.c:4397
gz end
Definition: zlib.c:2033
#define ANCHOR_ANYCHAR_STAR
Definition: regint.h:470
#define NT_BREF
Definition: regparse.h:43
#define SIZE_RELADDR
Definition: regint.h:588
static unsigned char PadBuf[WORD_ALIGNMENT_SIZE]
Definition: regcomp.c:48
OnigDistance min
Definition: regcomp.c:3969
#define NST_IN_REPEAT
Definition: regparse.h:135
#define IS_NEED_STR_LEN_OP_EXACT(op)
Definition: regcomp.c:315
#define NULL_NODE(parser, node)
Definition: gram.c:106
static int compile_string_raw_node(StrNode *sn, regex_t *reg)
Definition: regcomp.c:550
#define STACK_POP_LEVEL_FREE
Definition: regint.h:277
static void add_char_opt_map_info(OptMapInfo *map, UChar c, OnigEncoding enc)
Definition: regcomp.c:4388
#define CHECK_NULL_RETURN(p)
Definition: regint.h:242
#define SIZE_OP_FAIL_POS
Definition: regint.h:629
unsigned char * exact_end
Definition: oniguruma.h:661
static int add_length(regex_t *reg, OnigDistance len)
Definition: regcomp.c:242
OnigDistance dmax
Definition: oniguruma.h:666
static void copy_opt_map_info(OptMapInfo *to, OptMapInfo *from)
Definition: regcomp.c:4382
static void clear_opt_anc_info(OptAncInfo *anc)
Definition: regcomp.c:4143
int size
Definition: encoding.c:51
#define ONIGENC_IS_UNDEF(enc)
Definition: oniguruma.h:219
static int compile_tree(Node *node, regex_t *reg)
Definition: regcomp.c:1587
#define ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc, s, end)
Definition: oniguruma.h:232
unsigned int used
Definition: regint.h:376
#define SIZE_OP_SET_OPTION
Definition: regint.h:630
#define ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP
Definition: oniguruma.h:469
OnigRegexType regex_t
Definition: oniguruma.h:675
#define GET_RELADDR_INC(addr, p)
Definition: regint.h:599
unsigned int alloc
Definition: oniguruma.h:632
#define NST_MARK1
Definition: regparse.h:126
#define ONIG_MAX_CAPTURE_HISTORY_GROUP
Definition: oniguruma.h:568
#define xmalloc
Definition: defines.h:64
static int onig_inited
Definition: regcomp.c:5536
static int add_compile_string(UChar *s, int mb_len, OnigDistance str_len, regex_t *reg, int ignore_case)
Definition: regcomp.c:441
#define ANCHOR_PREC_READ_NOT
Definition: regint.h:466
size_t OnigDistance
Definition: oniguruma.h:113
#define IN_NOT
Definition: regcomp.c:3678
void onig_reduce_nested_quantifier(Node *pnode, Node *cnode)
Definition: regparse.c:2236
return
Definition: name2ctype.h:23857
#define ENCLOSE_OPTION
Definition: regparse.h:92
char * start
Definition: yaml2byte.c:126
static int is_not_included(Node *x, Node *y, regex_t *reg)
Definition: regcomp.c:2431
#define NST_MIN_FIXED
Definition: regparse.h:123
unsigned int capture_history
Definition: oniguruma.h:640
#define IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(en)
Definition: regparse.h:150
#define ONIGERR_MEMORY
Definition: oniguruma.h:501
OptExactInfo exb
Definition: regcomp.c:4008
#define IS_FIND_CONDITION(option)
Definition: regint.h:325
#define THREAD_SYSTEM_INIT
Definition: regint.h:103
UChar * p
Definition: regint.h:375
static OnigDistance distance_multiply(OnigDistance d, int m)
Definition: regcomp.c:103
#define PRIuSIZE
Definition: ruby.h:173
#define NSTR(node)
Definition: regparse.h:73
static void clear_opt_map_info(OptMapInfo *map)
Definition: regcomp.c:4354
static void select_opt_map_info(OptMapInfo *now, OptMapInfo *alt)
Definition: regcomp.c:4419
#define SIZE_OP_MEMORY_END_REC
Definition: regint.h:638
OnigCodePoint code[ONIGENC_MAX_COMP_CASE_FOLD_CODE_LEN]
Definition: oniguruma.h:142
#define ANCHOR_ANYCHAR_STAR_MASK
Definition: regparse.h:88
OptExactInfo expr
Definition: regcomp.c:4010
static void concat_opt_exact_info(OptExactInfo *to, OptExactInfo *add, OnigEncoding enc)
Definition: regcomp.c:4240
static void set_bound_node_opt_info(NodeOptInfo *opt, MinMaxLen *mmd)
Definition: regcomp.c:4478
OnigDistance dmin
Definition: oniguruma.h:665
#define ONIGENC_MBC_MAXLEN_DIST(enc)
Definition: oniguruma.h:261
static int get_min_match_length(Node *node, OnigDistance *min, ScanEnv *env)
Definition: regcomp.c:2057
OnigOptionType option
Definition: regparse.h:283
#define CLEAR_ENCLOSE_STATUS(node, f)
Definition: regparse.h:140
static int compile_range_repeat_node(QtfrNode *qn, int target_len, int empty_info, regex_t *reg)
Definition: regcomp.c:686
static int compile_enclose_node(EncloseNode *node, regex_t *reg)
Definition: regcomp.c:1276
struct _Node * target
Definition: regparse.h:176
#define NTYPE2BIT(type)
Definition: regparse.h:52
#define SIZE_POINTER
Definition: regint.h:596
int greedy
Definition: regparse.h:179
#define ONIG_OPTION_DONT_CAPTURE_GROUP
Definition: oniguruma.h:357
#define ONIGERR_INVALID_ARGUMENT
Definition: oniguruma.h:511
int value
Definition: regcomp.c:4000
int onig_is_code_in_cc_len(int elen, OnigCodePoint code, CClassNode *cc)
Definition: regcomp.c:5690
int onig_parse_make_tree(Node **root, const UChar *pattern, const UChar *end, regex_t *reg, ScanEnv *env)
Definition: regparse.c:5654
#define NT_CALL
Definition: regparse.h:49
#define xrealloc
Definition: defines.h:67
#define SIZE_OP_PUSH_POS
Definition: regint.h:626
#define NT_CTYPE
Definition: regparse.h:41
#define ONIG_OPTION_IGNORECASE
Definition: oniguruma.h:350
#define NST_MAX_FIXED
Definition: regparse.h:124
#define ONIG_OPTIMIZE_EXACT_BM_NOT_REV
Definition: regint.h:285
static int compile_length_cclass_node(CClassNode *cc, regex_t *reg)
Definition: regcomp.c:582
#define ANCHOR_BEGIN_BUF
Definition: regint.h:454
#define IN_VAR_REPEAT
Definition: regcomp.c:3680
BDIGIT e
Definition: bigdecimal.c:4946
OnigEncoding enc
Definition: oniguruma.h:647
#define SIZE_OP_PUSH_OR_JUMP_EXACT1
Definition: regint.h:622
static void copy_opt_env(OptEnv *to, OptEnv *from)
Definition: regcomp.c:4137
static void copy_opt_exact_info(OptExactInfo *to, OptExactInfo *from)
Definition: regcomp.c:4234
#define IS_NOT_NULL(p)
Definition: regint.h:241
options
Definition: tcltklib.c:4470
static int entry_repeat_range(regex_t *reg, int id, int lower, int upper)
Definition: regcomp.c:655
#define IS_NODE_TYPE_SIMPLE(type)
Definition: regparse.h:66
int onig_end(void)
Definition: regcomp.c:5645
#define ANCHOR_LOOK_BEHIND
Definition: regint.h:467
short int MemNumType
Definition: regint.h:583
#define IS_ENCLOSE_MARK2(en)
Definition: regparse.h:146
static int add_abs_addr(regex_t *reg, int addr)
Definition: regcomp.c:233
static int add_mem_num(regex_t *reg, int num)
Definition: regcomp.c:251
#define GET_POINTER_INC(ptr, p)
Definition: regint.h:605
#define NSTRING_LEN(node)
Definition: regparse.h:103
#define IS_ENCLOSE_NAMED_GROUP(en)
Definition: regparse.h:152
static void clear_mml(MinMaxLen *mml)
Definition: regcomp.c:4101
int char_len
Definition: regparse.h:242
UChar * name
Definition: regparse.h:221
#define rb_intern_const(str)
Definition: ruby.h:1141
struct _Node * target
Definition: regparse.h:195
static void remove_opt_anc_info(OptAncInfo *to, int anc)
Definition: regcomp.c:4201
UChar map[ONIG_CHAR_TABLE_SIZE]
Definition: regcomp.c:4001
BitStatusType bt_mem_start
Definition: regparse.h:288
ssize_t i
Definition: bigdecimal.c:5519
#define NSTRING_SET_DONT_GET_OPT_INFO(node)
Definition: regparse.h:107
#define SIZE_OP_PUSH_POS_NOT
Definition: regint.h:627
#define ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED
Definition: oniguruma.h:508
ScanEnv * scan_env
Definition: regcomp.c:3978
#define ANCHOR_WORD_BOUND
Definition: regint.h:461
static int comp_opt_exact_or_map_info(OptExactInfo *e, OptMapInfo *m)
Definition: regcomp.c:4438
BDIGIT v
Definition: bigdecimal.c:5520
OnigOptionType option
Definition: regparse.h:194
AbsAddrType call_addr
Definition: regparse.h:196
#define ONIG_OPTIMIZE_EXACT_BM
Definition: regint.h:284
OnigOptionType options
Definition: oniguruma.h:648
#define env
int onig_renumber_name_table(regex_t *reg, GroupNumRemap *map)
Definition: regparse.c:565
q
Definition: tcltklib.c:2957
#define SIZE_OP_PUSH
Definition: regint.h:620
static int next_setup(Node *node, Node *next_node, regex_t *reg)
Definition: regcomp.c:3176
void onig_free(regex_t *reg)
Definition: regcomp.c:5238
static int compile_length_enclose_node(EncloseNode *node, regex_t *reg)
Definition: regcomp.c:1210
int back_num
Definition: regparse.h:232
#define NCDR(node)
Definition: regparse.h:84
Real * a
Definition: bigdecimal.c:1140
OnigOptionType options
Definition: oniguruma.h:374
#define GET_ABSADDR_INC(addr, p)
Definition: regint.h:600
#define ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND
Definition: oniguruma.h:468
static int compile_option_node(EncloseNode *node, regex_t *reg)
Definition: regcomp.c:1184
#define SET_CALL_RECURSION(node)
Definition: regparse.h:154
#define BBUF_GET_OFFSET_POS(buf)
Definition: regint.h:421
#define SET_NTYPE(node, ntype)
Definition: regparse.h:71
#define REGEX_TRANSFER(to, from)
Definition: regcomp.c:5260
int offset
Definition: regparse.h:207
static int bitset_is_empty(BitSetRef bs)
Definition: regcomp.c:114
#define NT_ENCLOSE
Definition: regparse.h:45
int onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag)
Definition: regcomp.c:41
static void add_opt_anc_info(OptAncInfo *to, int anc)
Definition: regcomp.c:4192
#define ONIGERR_UNDEFINED_GROUP_REFERENCE
Definition: oniguruma.h:551
#define ONIG_STATE(reg)
Definition: oniguruma.h:625
OnigUChar * par
Definition: oniguruma.h:604
#define BBUF_GET_ADD_ADDRESS(buf)
Definition: regint.h:420
#define SIZE_OP_LOOK_BEHIND
Definition: regint.h:643
#define BBUF_ADD(buf, bytes, n)
Definition: regint.h:418
static int select_str_opcode(int mb_len, OnigDistance str_len, int ignore_case)
Definition: regcomp.c:320
BitStatusType capture_history
Definition: regparse.h:287
#define ONIGENC_MBC_MAXLEN(enc)
Definition: oniguruma.h:260
#define BITSET_SIZE
Definition: regint.h:348
int back_static[NODE_BACKREFS_SIZE]
Definition: regparse.h:233
Node * onig_node_new_anchor(int type)
Definition: regparse.c:1277