Ruby  2.5.0dev(2017-10-22revision60238)
regcomp.c
Go to the documentation of this file.
1 /**********************************************************************
2  regcomp.c - Onigmo (Oniguruma-mod) (regular expression library)
3 **********************************************************************/
4 /*-
5  * Copyright (c) 2002-2013 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
6  * Copyright (c) 2011-2016 K.Takata <kentkt AT csc DOT jp>
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  * notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  * notice, this list of conditions and the following disclaimer in the
16  * documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  */
30 
31 #include "regparse.h"
32 
34 
35 extern OnigCaseFoldType
37 {
39 }
40 
41 extern int
43 {
44  OnigDefaultCaseFoldFlag = case_fold_flag;
45  return 0;
46 }
47 
48 
49 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
50 static unsigned char PadBuf[WORD_ALIGNMENT_SIZE];
51 #endif
52 
53 #if 0
54 static UChar*
55 str_dup(UChar* s, UChar* end)
56 {
57  ptrdiff_t len = end - s;
58 
59  if (len > 0) {
60  UChar* r = (UChar* )xmalloc(len + 1);
62  xmemcpy(r, s, len);
63  r[len] = (UChar )0;
64  return r;
65  }
66  else return NULL;
67 }
68 #endif
69 
70 static void
71 swap_node(Node* a, Node* b)
72 {
73  Node c;
74  c = *a; *a = *b; *b = c;
75 
76  if (NTYPE(a) == NT_STR) {
77  StrNode* sn = NSTR(a);
78  if (sn->capa == 0) {
79  size_t len = sn->end - sn->s;
80  sn->s = sn->buf;
81  sn->end = sn->s + len;
82  }
83  }
84 
85  if (NTYPE(b) == NT_STR) {
86  StrNode* sn = NSTR(b);
87  if (sn->capa == 0) {
88  size_t len = sn->end - sn->s;
89  sn->s = sn->buf;
90  sn->end = sn->s + len;
91  }
92  }
93 }
94 
95 static OnigDistance
96 distance_add(OnigDistance d1, OnigDistance d2)
97 {
100  else {
101  if (d1 <= ONIG_INFINITE_DISTANCE - d2) return d1 + d2;
102  else return ONIG_INFINITE_DISTANCE;
103  }
104 }
105 
106 static OnigDistance
107 distance_multiply(OnigDistance d, int m)
108 {
109  if (m == 0) return 0;
110 
111  if (d < ONIG_INFINITE_DISTANCE / m)
112  return d * m;
113  else
114  return ONIG_INFINITE_DISTANCE;
115 }
116 
117 static int
118 bitset_is_empty(BitSetRef bs)
119 {
120  int i;
121  for (i = 0; i < BITSET_SIZE; i++) {
122  if (bs[i] != 0) return 0;
123  }
124  return 1;
125 }
126 
127 #ifdef ONIG_DEBUG
128 static int
129 bitset_on_num(BitSetRef bs)
130 {
131  int i, n;
132 
133  n = 0;
134  for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
135  if (BITSET_AT(bs, i)) n++;
136  }
137  return n;
138 }
139 #endif
140 
141 extern int
143 {
144  if (size <= 0) {
145  size = 0;
146  buf->p = NULL;
147  }
148  else {
149  buf->p = (UChar* )xmalloc(size);
150  if (IS_NULL(buf->p)) return(ONIGERR_MEMORY);
151  }
152 
153  buf->alloc = (unsigned int )size;
154  buf->used = 0;
155  return 0;
156 }
157 
158 
159 #ifdef USE_SUBEXP_CALL
160 
161 static int
162 unset_addr_list_init(UnsetAddrList* uslist, int size)
163 {
164  UnsetAddr* p;
165 
166  p = (UnsetAddr* )xmalloc(sizeof(UnsetAddr)* size);
168  uslist->num = 0;
169  uslist->alloc = size;
170  uslist->us = p;
171  return 0;
172 }
173 
174 static void
175 unset_addr_list_end(UnsetAddrList* uslist)
176 {
177  if (IS_NOT_NULL(uslist->us))
178  xfree(uslist->us);
179 }
180 
181 static int
182 unset_addr_list_add(UnsetAddrList* uslist, int offset, struct _Node* node)
183 {
184  UnsetAddr* p;
185  int size;
186 
187  if (uslist->num >= uslist->alloc) {
188  size = uslist->alloc * 2;
189  p = (UnsetAddr* )xrealloc(uslist->us, sizeof(UnsetAddr) * size);
191  uslist->alloc = size;
192  uslist->us = p;
193  }
194 
195  uslist->us[uslist->num].offset = offset;
196  uslist->us[uslist->num].target = node;
197  uslist->num++;
198  return 0;
199 }
200 #endif /* USE_SUBEXP_CALL */
201 
202 
203 static int
204 add_opcode(regex_t* reg, int opcode)
205 {
206  BBUF_ADD1(reg, opcode);
207  return 0;
208 }
209 
210 #ifdef USE_COMBINATION_EXPLOSION_CHECK
211 static int
212 add_state_check_num(regex_t* reg, int num)
213 {
215 
216  BBUF_ADD(reg, &n, SIZE_STATE_CHECK_NUM);
217  return 0;
218 }
219 #endif
220 
221 static int
222 add_rel_addr(regex_t* reg, int addr)
223 {
224  RelAddrType ra = (RelAddrType )addr;
225 
226  BBUF_ADD(reg, &ra, SIZE_RELADDR);
227  return 0;
228 }
229 
230 static int
231 add_abs_addr(regex_t* reg, int addr)
232 {
233  AbsAddrType ra = (AbsAddrType )addr;
234 
235  BBUF_ADD(reg, &ra, SIZE_ABSADDR);
236  return 0;
237 }
238 
239 static int
240 add_length(regex_t* reg, OnigDistance len)
241 {
242  LengthType l = (LengthType )len;
243 
244  BBUF_ADD(reg, &l, SIZE_LENGTH);
245  return 0;
246 }
247 
248 static int
249 add_mem_num(regex_t* reg, int num)
250 {
251  MemNumType n = (MemNumType )num;
252 
253  BBUF_ADD(reg, &n, SIZE_MEMNUM);
254  return 0;
255 }
256 
257 #if 0
258 static int
259 add_pointer(regex_t* reg, void* addr)
260 {
261  PointerType ptr = (PointerType )addr;
262 
263  BBUF_ADD(reg, &ptr, SIZE_POINTER);
264  return 0;
265 }
266 #endif
267 
268 static int
269 add_option(regex_t* reg, OnigOptionType option)
270 {
271  BBUF_ADD(reg, &option, SIZE_OPTION);
272  return 0;
273 }
274 
275 static int
276 add_opcode_rel_addr(regex_t* reg, int opcode, int addr)
277 {
278  int r;
279 
280  r = add_opcode(reg, opcode);
281  if (r) return r;
282  r = add_rel_addr(reg, addr);
283  return r;
284 }
285 
286 static int
287 add_bytes(regex_t* reg, UChar* bytes, OnigDistance len)
288 {
289  BBUF_ADD(reg, bytes, len);
290  return 0;
291 }
292 
293 static int
294 add_bitset(regex_t* reg, BitSetRef bs)
295 {
296  BBUF_ADD(reg, bs, SIZE_BITSET);
297  return 0;
298 }
299 
300 static int
301 add_opcode_option(regex_t* reg, int opcode, OnigOptionType option)
302 {
303  int r;
304 
305  r = add_opcode(reg, opcode);
306  if (r) return r;
307  r = add_option(reg, option);
308  return r;
309 }
310 
311 static int compile_length_tree(Node* node, regex_t* reg);
312 static int compile_tree(Node* node, regex_t* reg);
313 
314 
315 #define IS_NEED_STR_LEN_OP_EXACT(op) \
316  ((op) == OP_EXACTN || (op) == OP_EXACTMB2N ||\
317  (op) == OP_EXACTMB3N || (op) == OP_EXACTMBN || (op) == OP_EXACTN_IC)
318 
319 static int
320 select_str_opcode(int mb_len, OnigDistance byte_len, int ignore_case)
321 {
322  int op;
323  OnigDistance str_len = (byte_len + mb_len - 1) / mb_len;
324 
325  if (ignore_case) {
326  switch (str_len) {
327  case 1: op = OP_EXACT1_IC; break;
328  default: op = OP_EXACTN_IC; break;
329  }
330  }
331  else {
332  switch (mb_len) {
333  case 1:
334  switch (str_len) {
335  case 1: op = OP_EXACT1; break;
336  case 2: op = OP_EXACT2; break;
337  case 3: op = OP_EXACT3; break;
338  case 4: op = OP_EXACT4; break;
339  case 5: op = OP_EXACT5; break;
340  default: op = OP_EXACTN; break;
341  }
342  break;
343 
344  case 2:
345  switch (str_len) {
346  case 1: op = OP_EXACTMB2N1; break;
347  case 2: op = OP_EXACTMB2N2; break;
348  case 3: op = OP_EXACTMB2N3; break;
349  default: op = OP_EXACTMB2N; break;
350  }
351  break;
352 
353  case 3:
354  op = OP_EXACTMB3N;
355  break;
356 
357  default:
358  op = OP_EXACTMBN;
359  break;
360  }
361  }
362  return op;
363 }
364 
365 static int
366 compile_tree_empty_check(Node* node, regex_t* reg, int empty_info)
367 {
368  int r;
369  int saved_num_null_check = reg->num_null_check;
370 
371  if (empty_info != 0) {
372  r = add_opcode(reg, OP_NULL_CHECK_START);
373  if (r) return r;
374  r = add_mem_num(reg, reg->num_null_check); /* NULL CHECK ID */
375  if (r) return r;
376  reg->num_null_check++;
377  }
378 
379  r = compile_tree(node, reg);
380  if (r) return r;
381 
382  if (empty_info != 0) {
383  if (empty_info == NQ_TARGET_IS_EMPTY)
384  r = add_opcode(reg, OP_NULL_CHECK_END);
385  else if (empty_info == NQ_TARGET_IS_EMPTY_MEM)
386  r = add_opcode(reg, OP_NULL_CHECK_END_MEMST);
387  else if (empty_info == NQ_TARGET_IS_EMPTY_REC)
388  r = add_opcode(reg, OP_NULL_CHECK_END_MEMST_PUSH);
389 
390  if (r) return r;
391  r = add_mem_num(reg, saved_num_null_check); /* NULL CHECK ID */
392  }
393  return r;
394 }
395 
396 #ifdef USE_SUBEXP_CALL
397 static int
398 compile_call(CallNode* node, regex_t* reg)
399 {
400  int r;
401 
402  r = add_opcode(reg, OP_CALL);
403  if (r) return r;
404  r = unset_addr_list_add(node->unset_addr_list, BBUF_GET_OFFSET_POS(reg),
405  node->target);
406  if (r) return r;
407  r = add_abs_addr(reg, 0 /*dummy addr.*/);
408  return r;
409 }
410 #endif
411 
412 static int
413 compile_tree_n_times(Node* node, int n, regex_t* reg)
414 {
415  int i, r;
416 
417  for (i = 0; i < n; i++) {
418  r = compile_tree(node, reg);
419  if (r) return r;
420  }
421  return 0;
422 }
423 
424 static int
425 add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, OnigDistance byte_len,
426  regex_t* reg ARG_UNUSED, int ignore_case)
427 {
428  int len;
429  int op = select_str_opcode(mb_len, byte_len, ignore_case);
430 
431  len = SIZE_OPCODE;
432 
433  if (op == OP_EXACTMBN) len += SIZE_LENGTH;
434  if (IS_NEED_STR_LEN_OP_EXACT(op))
435  len += SIZE_LENGTH;
436 
437  len += (int )byte_len;
438  return len;
439 }
440 
441 static int
442 add_compile_string(UChar* s, int mb_len, OnigDistance byte_len,
443  regex_t* reg, int ignore_case)
444 {
445  int op = select_str_opcode(mb_len, byte_len, ignore_case);
446  add_opcode(reg, op);
447 
448  if (op == OP_EXACTMBN)
449  add_length(reg, mb_len);
450 
451  if (IS_NEED_STR_LEN_OP_EXACT(op)) {
452  if (op == OP_EXACTN_IC)
453  add_length(reg, byte_len);
454  else
455  add_length(reg, byte_len / mb_len);
456  }
457 
458  add_bytes(reg, s, byte_len);
459  return 0;
460 }
461 
462 
463 static int
464 compile_length_string_node(Node* node, regex_t* reg)
465 {
466  int rlen, r, len, prev_len, blen, ambig;
467  OnigEncoding enc = reg->enc;
468  UChar *p, *prev;
469  StrNode* sn;
470 
471  sn = NSTR(node);
472  if (sn->end <= sn->s)
473  return 0;
474 
475  ambig = NSTRING_IS_AMBIG(node);
476 
477  p = prev = sn->s;
478  prev_len = enclen(enc, p, sn->end);
479  p += prev_len;
480  blen = prev_len;
481  rlen = 0;
482 
483  for (; p < sn->end; ) {
484  len = enclen(enc, p, sn->end);
485  if (len == prev_len || ambig) {
486  blen += len;
487  }
488  else {
489  r = add_compile_string_length(prev, prev_len, blen, reg, ambig);
490  rlen += r;
491  prev = p;
492  blen = len;
493  prev_len = len;
494  }
495  p += len;
496  }
497  r = add_compile_string_length(prev, prev_len, blen, reg, ambig);
498  rlen += r;
499  return rlen;
500 }
501 
502 static int
503 compile_length_string_raw_node(StrNode* sn, regex_t* reg)
504 {
505  if (sn->end <= sn->s)
506  return 0;
507 
508  return add_compile_string_length(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0);
509 }
510 
511 static int
512 compile_string_node(Node* node, regex_t* reg)
513 {
514  int r, len, prev_len, blen, ambig;
515  OnigEncoding enc = reg->enc;
516  UChar *p, *prev, *end;
517  StrNode* sn;
518 
519  sn = NSTR(node);
520  if (sn->end <= sn->s)
521  return 0;
522 
523  end = sn->end;
524  ambig = NSTRING_IS_AMBIG(node);
525 
526  p = prev = sn->s;
527  prev_len = enclen(enc, p, end);
528  p += prev_len;
529  blen = prev_len;
530 
531  for (; p < end; ) {
532  len = enclen(enc, p, end);
533  if (len == prev_len || ambig) {
534  blen += len;
535  }
536  else {
537  r = add_compile_string(prev, prev_len, blen, reg, ambig);
538  if (r) return r;
539 
540  prev = p;
541  blen = len;
542  prev_len = len;
543  }
544 
545  p += len;
546  }
547  return add_compile_string(prev, prev_len, blen, reg, ambig);
548 }
549 
550 static int
551 compile_string_raw_node(StrNode* sn, regex_t* reg)
552 {
553  if (sn->end <= sn->s)
554  return 0;
555 
556  return add_compile_string(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0);
557 }
558 
559 static int
560 add_multi_byte_cclass(BBuf* mbuf, regex_t* reg)
561 {
562 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
563  add_length(reg, mbuf->used);
564  return add_bytes(reg, mbuf->p, mbuf->used);
565 #else
566  int r, pad_size;
568 
569  GET_ALIGNMENT_PAD_SIZE(p, pad_size);
570  add_length(reg, mbuf->used + (WORD_ALIGNMENT_SIZE - 1));
571  if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
572 
573  r = add_bytes(reg, mbuf->p, mbuf->used);
574 
575  /* padding for return value from compile_length_cclass_node() to be fix. */
576  pad_size = (WORD_ALIGNMENT_SIZE - 1) - pad_size;
577  if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
578  return r;
579 #endif
580 }
581 
582 static int
583 compile_length_cclass_node(CClassNode* cc, regex_t* reg)
584 {
585  int len;
586 
587  if (IS_NULL(cc->mbuf)) {
588  len = SIZE_OPCODE + SIZE_BITSET;
589  }
590  else {
591  if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
592  len = SIZE_OPCODE;
593  }
594  else {
595  len = SIZE_OPCODE + SIZE_BITSET;
596  }
597 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
598  len += SIZE_LENGTH + cc->mbuf->used;
599 #else
600  len += SIZE_LENGTH + cc->mbuf->used + (WORD_ALIGNMENT_SIZE - 1);
601 #endif
602  }
603 
604  return len;
605 }
606 
607 static int
608 compile_cclass_node(CClassNode* cc, regex_t* reg)
609 {
610  int r;
611 
612  if (IS_NULL(cc->mbuf)) {
613  if (IS_NCCLASS_NOT(cc))
614  add_opcode(reg, OP_CCLASS_NOT);
615  else
616  add_opcode(reg, OP_CCLASS);
617 
618  r = add_bitset(reg, cc->bs);
619  }
620  else {
621  if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
622  if (IS_NCCLASS_NOT(cc))
623  add_opcode(reg, OP_CCLASS_MB_NOT);
624  else
625  add_opcode(reg, OP_CCLASS_MB);
626 
627  r = add_multi_byte_cclass(cc->mbuf, reg);
628  }
629  else {
630  if (IS_NCCLASS_NOT(cc))
631  add_opcode(reg, OP_CCLASS_MIX_NOT);
632  else
633  add_opcode(reg, OP_CCLASS_MIX);
634 
635  r = add_bitset(reg, cc->bs);
636  if (r) return r;
637  r = add_multi_byte_cclass(cc->mbuf, reg);
638  }
639  }
640 
641  return r;
642 }
643 
644 static int
645 entry_repeat_range(regex_t* reg, int id, int lower, int upper)
646 {
647 #define REPEAT_RANGE_ALLOC 4
648 
649  OnigRepeatRange* p;
650 
651  if (reg->repeat_range_alloc == 0) {
654  reg->repeat_range = p;
656  }
657  else if (reg->repeat_range_alloc <= id) {
658  int n;
661  sizeof(OnigRepeatRange) * n);
663  reg->repeat_range = p;
664  reg->repeat_range_alloc = n;
665  }
666  else {
667  p = reg->repeat_range;
668  }
669 
670  p[id].lower = lower;
671  p[id].upper = (IS_REPEAT_INFINITE(upper) ? 0x7fffffff : upper);
672  return 0;
673 }
674 
675 static int
676 compile_range_repeat_node(QtfrNode* qn, int target_len, int empty_info,
677  regex_t* reg)
678 {
679  int r;
680  int num_repeat = reg->num_repeat;
681 
682  r = add_opcode(reg, qn->greedy ? OP_REPEAT : OP_REPEAT_NG);
683  if (r) return r;
684  r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
685  reg->num_repeat++;
686  if (r) return r;
687  r = add_rel_addr(reg, target_len + SIZE_OP_REPEAT_INC);
688  if (r) return r;
689 
690  r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper);
691  if (r) return r;
692 
693  r = compile_tree_empty_check(qn->target, reg, empty_info);
694  if (r) return r;
695 
696  if (
697 #ifdef USE_SUBEXP_CALL
698  reg->num_call > 0 ||
699 #endif
701  r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC_SG : OP_REPEAT_INC_NG_SG);
702  }
703  else {
704  r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG);
705  }
706  if (r) return r;
707  r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
708  return r;
709 }
710 
711 static int
712 is_anychar_star_quantifier(QtfrNode* qn)
713 {
714  if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) &&
715  NTYPE(qn->target) == NT_CANY)
716  return 1;
717  else
718  return 0;
719 }
720 
721 #define QUANTIFIER_EXPAND_LIMIT_SIZE 50
722 #define CKN_ON (ckn > 0)
723 
724 #ifdef USE_COMBINATION_EXPLOSION_CHECK
725 
726 static int
727 compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
728 {
729  int len, mod_tlen, cklen;
730  int ckn;
731  int infinite = IS_REPEAT_INFINITE(qn->upper);
732  int empty_info = qn->target_empty_info;
733  int tlen = compile_length_tree(qn->target, reg);
734 
735  if (tlen < 0) return tlen;
736 
737  ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
738 
739  cklen = (CKN_ON ? SIZE_STATE_CHECK_NUM: 0);
740 
741  /* anychar repeat */
742  if (NTYPE(qn->target) == NT_CANY) {
743  if (qn->greedy && infinite) {
744  if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON)
745  return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower + cklen;
746  else
747  return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower + cklen;
748  }
749  }
750 
751  if (empty_info != 0)
752  mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
753  else
754  mod_tlen = tlen;
755 
756  if (infinite && qn->lower <= 1) {
757  if (qn->greedy) {
758  if (qn->lower == 1)
759  len = SIZE_OP_JUMP;
760  else
761  len = 0;
762 
763  len += SIZE_OP_PUSH + cklen + mod_tlen + SIZE_OP_JUMP;
764  }
765  else {
766  if (qn->lower == 0)
767  len = SIZE_OP_JUMP;
768  else
769  len = 0;
770 
771  len += mod_tlen + SIZE_OP_PUSH + cklen;
772  }
773  }
774  else if (qn->upper == 0) {
775  if (qn->is_refered != 0) /* /(?<n>..){0}/ */
776  len = SIZE_OP_JUMP + tlen;
777  else
778  len = 0;
779  }
780  else if (qn->upper == 1 && qn->greedy) {
781  if (qn->lower == 0) {
782  if (CKN_ON) {
783  len = SIZE_OP_STATE_CHECK_PUSH + tlen;
784  }
785  else {
786  len = SIZE_OP_PUSH + tlen;
787  }
788  }
789  else {
790  len = tlen;
791  }
792  }
793  else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
794  len = SIZE_OP_PUSH + cklen + SIZE_OP_JUMP + tlen;
795  }
796  else {
797  len = SIZE_OP_REPEAT_INC
798  + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
799  if (CKN_ON)
800  len += SIZE_OP_STATE_CHECK;
801  }
802 
803  return len;
804 }
805 
806 static int
807 compile_quantifier_node(QtfrNode* qn, regex_t* reg)
808 {
809  int r, mod_tlen;
810  int ckn;
811  int infinite = IS_REPEAT_INFINITE(qn->upper);
812  int empty_info = qn->target_empty_info;
813  int tlen = compile_length_tree(qn->target, reg);
814 
815  if (tlen < 0) return tlen;
816 
817  ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
818 
819  if (is_anychar_star_quantifier(qn)) {
820  r = compile_tree_n_times(qn->target, qn->lower, reg);
821  if (r) return r;
822  if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) {
823  if (IS_MULTILINE(reg->options))
824  r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
825  else
826  r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
827  if (r) return r;
828  if (CKN_ON) {
829  r = add_state_check_num(reg, ckn);
830  if (r) return r;
831  }
832 
833  return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
834  }
835  else {
836  if (IS_MULTILINE(reg->options)) {
837  r = add_opcode(reg, (CKN_ON ?
839  : OP_ANYCHAR_ML_STAR));
840  }
841  else {
842  r = add_opcode(reg, (CKN_ON ?
844  : OP_ANYCHAR_STAR));
845  }
846  if (r) return r;
847  if (CKN_ON)
848  r = add_state_check_num(reg, ckn);
849 
850  return r;
851  }
852  }
853 
854  if (empty_info != 0)
855  mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
856  else
857  mod_tlen = tlen;
858 
859  if (infinite && qn->lower <= 1) {
860  if (qn->greedy) {
861  if (qn->lower == 1) {
862  r = add_opcode_rel_addr(reg, OP_JUMP,
863  (CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH));
864  if (r) return r;
865  }
866 
867  if (CKN_ON) {
868  r = add_opcode(reg, OP_STATE_CHECK_PUSH);
869  if (r) return r;
870  r = add_state_check_num(reg, ckn);
871  if (r) return r;
872  r = add_rel_addr(reg, mod_tlen + SIZE_OP_JUMP);
873  }
874  else {
875  r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
876  }
877  if (r) return r;
878  r = compile_tree_empty_check(qn->target, reg, empty_info);
879  if (r) return r;
880  r = add_opcode_rel_addr(reg, OP_JUMP,
881  -(mod_tlen + (int )SIZE_OP_JUMP
882  + (int )(CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH)));
883  }
884  else {
885  if (qn->lower == 0) {
886  r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
887  if (r) return r;
888  }
889  r = compile_tree_empty_check(qn->target, reg, empty_info);
890  if (r) return r;
891  if (CKN_ON) {
892  r = add_opcode(reg, OP_STATE_CHECK_PUSH_OR_JUMP);
893  if (r) return r;
894  r = add_state_check_num(reg, ckn);
895  if (r) return r;
896  r = add_rel_addr(reg,
897  -(mod_tlen + (int )SIZE_OP_STATE_CHECK_PUSH_OR_JUMP));
898  }
899  else
900  r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
901  }
902  }
903  else if (qn->upper == 0) {
904  if (qn->is_refered != 0) { /* /(?<n>..){0}/ */
905  r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
906  if (r) return r;
907  r = compile_tree(qn->target, reg);
908  }
909  else
910  r = 0;
911  }
912  else if (qn->upper == 1 && qn->greedy) {
913  if (qn->lower == 0) {
914  if (CKN_ON) {
915  r = add_opcode(reg, OP_STATE_CHECK_PUSH);
916  if (r) return r;
917  r = add_state_check_num(reg, ckn);
918  if (r) return r;
919  r = add_rel_addr(reg, tlen);
920  }
921  else {
922  r = add_opcode_rel_addr(reg, OP_PUSH, tlen);
923  }
924  if (r) return r;
925  }
926 
927  r = compile_tree(qn->target, reg);
928  }
929  else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
930  if (CKN_ON) {
931  r = add_opcode(reg, OP_STATE_CHECK_PUSH);
932  if (r) return r;
933  r = add_state_check_num(reg, ckn);
934  if (r) return r;
935  r = add_rel_addr(reg, SIZE_OP_JUMP);
936  }
937  else {
938  r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
939  }
940 
941  if (r) return r;
942  r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
943  if (r) return r;
944  r = compile_tree(qn->target, reg);
945  }
946  else {
947  r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
948  if (CKN_ON) {
949  if (r) return r;
950  r = add_opcode(reg, OP_STATE_CHECK);
951  if (r) return r;
952  r = add_state_check_num(reg, ckn);
953  }
954  }
955  return r;
956 }
957 
958 #else /* USE_COMBINATION_EXPLOSION_CHECK */
959 
960 static int
961 compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
962 {
963  int len, mod_tlen;
964  int infinite = IS_REPEAT_INFINITE(qn->upper);
965  int empty_info = qn->target_empty_info;
966  int tlen = compile_length_tree(qn->target, reg);
967 
968  if (tlen < 0) return tlen;
969 
970  /* anychar repeat */
971  if (NTYPE(qn->target) == NT_CANY) {
972  if (qn->greedy && infinite) {
973  if (IS_NOT_NULL(qn->next_head_exact))
974  return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower;
975  else
976  return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower;
977  }
978  }
979 
980  if (empty_info != 0)
981  mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
982  else
983  mod_tlen = tlen;
984 
985  if (infinite &&
986  (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
987  if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
988  len = SIZE_OP_JUMP;
989  }
990  else {
991  len = tlen * qn->lower;
992  }
993 
994  if (qn->greedy) {
995 #ifdef USE_OP_PUSH_OR_JUMP_EXACT
996  if (IS_NOT_NULL(qn->head_exact))
997  len += SIZE_OP_PUSH_OR_JUMP_EXACT1 + mod_tlen + SIZE_OP_JUMP;
998  else
999 #endif
1000  if (IS_NOT_NULL(qn->next_head_exact))
1001  len += SIZE_OP_PUSH_IF_PEEK_NEXT + mod_tlen + SIZE_OP_JUMP;
1002  else
1003  len += SIZE_OP_PUSH + mod_tlen + SIZE_OP_JUMP;
1004  }
1005  else
1006  len += SIZE_OP_JUMP + mod_tlen + SIZE_OP_PUSH;
1007  }
1008  else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */
1009  len = SIZE_OP_JUMP + tlen;
1010  }
1011  else if (!infinite && qn->greedy &&
1012  (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
1014  len = tlen * qn->lower;
1015  len += (SIZE_OP_PUSH + tlen) * (qn->upper - qn->lower);
1016  }
1017  else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
1018  len = SIZE_OP_PUSH + SIZE_OP_JUMP + tlen;
1019  }
1020  else {
1021  len = SIZE_OP_REPEAT_INC
1022  + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
1023  }
1024 
1025  return len;
1026 }
1027 
1028 static int
1029 compile_quantifier_node(QtfrNode* qn, regex_t* reg)
1030 {
1031  int i, r, mod_tlen;
1032  int infinite = IS_REPEAT_INFINITE(qn->upper);
1033  int empty_info = qn->target_empty_info;
1034  int tlen = compile_length_tree(qn->target, reg);
1035 
1036  if (tlen < 0) return tlen;
1037 
1038  if (is_anychar_star_quantifier(qn)) {
1039  r = compile_tree_n_times(qn->target, qn->lower, reg);
1040  if (r) return r;
1041  if (IS_NOT_NULL(qn->next_head_exact)) {
1042  if (IS_MULTILINE(reg->options))
1043  r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
1044  else
1045  r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
1046  if (r) return r;
1047  return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
1048  }
1049  else {
1050  if (IS_MULTILINE(reg->options))
1051  return add_opcode(reg, OP_ANYCHAR_ML_STAR);
1052  else
1053  return add_opcode(reg, OP_ANYCHAR_STAR);
1054  }
1055  }
1056 
1057  if (empty_info != 0)
1058  mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
1059  else
1060  mod_tlen = tlen;
1061 
1062  if (infinite &&
1063  (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1064  if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
1065  if (qn->greedy) {
1066 #ifdef USE_OP_PUSH_OR_JUMP_EXACT
1067  if (IS_NOT_NULL(qn->head_exact))
1068  r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_OR_JUMP_EXACT1);
1069  else
1070 #endif
1071  if (IS_NOT_NULL(qn->next_head_exact))
1072  r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_IF_PEEK_NEXT);
1073  else
1074  r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH);
1075  }
1076  else {
1077  r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_JUMP);
1078  }
1079  if (r) return r;
1080  }
1081  else {
1082  r = compile_tree_n_times(qn->target, qn->lower, reg);
1083  if (r) return r;
1084  }
1085 
1086  if (qn->greedy) {
1087 #ifdef USE_OP_PUSH_OR_JUMP_EXACT
1088  if (IS_NOT_NULL(qn->head_exact)) {
1089  r = add_opcode_rel_addr(reg, OP_PUSH_OR_JUMP_EXACT1,
1090  mod_tlen + SIZE_OP_JUMP);
1091  if (r) return r;
1092  add_bytes(reg, NSTR(qn->head_exact)->s, 1);
1093  r = compile_tree_empty_check(qn->target, reg, empty_info);
1094  if (r) return r;
1095  r = add_opcode_rel_addr(reg, OP_JUMP,
1096  -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1));
1097  }
1098  else
1099 #endif
1100  if (IS_NOT_NULL(qn->next_head_exact)) {
1101  r = add_opcode_rel_addr(reg, OP_PUSH_IF_PEEK_NEXT,
1102  mod_tlen + SIZE_OP_JUMP);
1103  if (r) return r;
1104  add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
1105  r = compile_tree_empty_check(qn->target, reg, empty_info);
1106  if (r) return r;
1107  r = add_opcode_rel_addr(reg, OP_JUMP,
1108  -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_IF_PEEK_NEXT));
1109  }
1110  else {
1111  r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
1112  if (r) return r;
1113  r = compile_tree_empty_check(qn->target, reg, empty_info);
1114  if (r) return r;
1115  r = add_opcode_rel_addr(reg, OP_JUMP,
1116  -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH));
1117  }
1118  }
1119  else {
1120  r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
1121  if (r) return r;
1122  r = compile_tree_empty_check(qn->target, reg, empty_info);
1123  if (r) return r;
1124  r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
1125  }
1126  }
1127  else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */
1128  r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
1129  if (r) return r;
1130  r = compile_tree(qn->target, reg);
1131  }
1132  else if (!infinite && qn->greedy &&
1133  (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
1135  int n = qn->upper - qn->lower;
1136 
1137  r = compile_tree_n_times(qn->target, qn->lower, reg);
1138  if (r) return r;
1139 
1140  for (i = 0; i < n; i++) {
1141  r = add_opcode_rel_addr(reg, OP_PUSH,
1142  (n - i) * tlen + (n - i - 1) * SIZE_OP_PUSH);
1143  if (r) return r;
1144  r = compile_tree(qn->target, reg);
1145  if (r) return r;
1146  }
1147  }
1148  else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
1149  r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
1150  if (r) return r;
1151  r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
1152  if (r) return r;
1153  r = compile_tree(qn->target, reg);
1154  }
1155  else {
1156  r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
1157  }
1158  return r;
1159 }
1160 #endif /* USE_COMBINATION_EXPLOSION_CHECK */
1161 
1162 static int
1163 compile_length_option_node(EncloseNode* node, regex_t* reg)
1164 {
1165  int tlen;
1166  OnigOptionType prev = reg->options;
1167 
1168  reg->options = node->option;
1169  tlen = compile_length_tree(node->target, reg);
1170  reg->options = prev;
1171 
1172  if (tlen < 0) return tlen;
1173 
1174  if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1176  + tlen + SIZE_OP_SET_OPTION;
1177  }
1178  else
1179  return tlen;
1180 }
1181 
1182 static int
1183 compile_option_node(EncloseNode* node, regex_t* reg)
1184 {
1185  int r;
1186  OnigOptionType prev = reg->options;
1187 
1188  if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1189  r = add_opcode_option(reg, OP_SET_OPTION_PUSH, node->option);
1190  if (r) return r;
1191  r = add_opcode_option(reg, OP_SET_OPTION, prev);
1192  if (r) return r;
1193  r = add_opcode(reg, OP_FAIL);
1194  if (r) return r;
1195  }
1196 
1197  reg->options = node->option;
1198  r = compile_tree(node->target, reg);
1199  reg->options = prev;
1200 
1201  if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1202  if (r) return r;
1203  r = add_opcode_option(reg, OP_SET_OPTION, prev);
1204  }
1205  return r;
1206 }
1207 
1208 static int
1209 compile_length_enclose_node(EncloseNode* node, regex_t* reg)
1210 {
1211  int len;
1212  int tlen;
1213 
1214  if (node->type == ENCLOSE_OPTION)
1215  return compile_length_option_node(node, reg);
1216 
1217  if (node->target) {
1218  tlen = compile_length_tree(node->target, reg);
1219  if (tlen < 0) return tlen;
1220  }
1221  else
1222  tlen = 0;
1223 
1224  switch (node->type) {
1225  case ENCLOSE_MEMORY:
1226 #ifdef USE_SUBEXP_CALL
1227  if (IS_ENCLOSE_CALLED(node)) {
1228  len = SIZE_OP_MEMORY_START_PUSH + tlen
1230  if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1231  len += (IS_ENCLOSE_RECURSION(node)
1233  else
1234  len += (IS_ENCLOSE_RECURSION(node)
1236  }
1237  else if (IS_ENCLOSE_RECURSION(node)) {
1239  len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)
1241  }
1242  else
1243 #endif
1244  {
1245  if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
1247  else
1248  len = SIZE_OP_MEMORY_START;
1249 
1250  len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)
1252  }
1253  break;
1254 
1257  QtfrNode* qn = NQTFR(node->target);
1258  tlen = compile_length_tree(qn->target, reg);
1259  if (tlen < 0) return tlen;
1260 
1261  len = tlen * qn->lower
1262  + SIZE_OP_PUSH + tlen + SIZE_OP_POP + SIZE_OP_JUMP;
1263  }
1264  else {
1266  }
1267  break;
1268 
1269  case ENCLOSE_CONDITION:
1270  len = SIZE_OP_CONDITION;
1271  if (NTYPE(node->target) == NT_ALT) {
1272  Node* x = node->target;
1273 
1274  tlen = compile_length_tree(NCAR(x), reg); /* yes-node */
1275  if (tlen < 0) return tlen;
1276  len += tlen + SIZE_OP_JUMP;
1277  if (NCDR(x) == NULL) return ONIGERR_PARSER_BUG;
1278  x = NCDR(x);
1279  tlen = compile_length_tree(NCAR(x), reg); /* no-node */
1280  if (tlen < 0) return tlen;
1281  len += tlen;
1282  if (NCDR(x) != NULL) return ONIGERR_INVALID_CONDITION_PATTERN;
1283  }
1284  else {
1285  return ONIGERR_PARSER_BUG;
1286  }
1287  break;
1288 
1289  case ENCLOSE_ABSENT:
1291  break;
1292 
1293  default:
1294  return ONIGERR_TYPE_BUG;
1295  break;
1296  }
1297 
1298  return len;
1299 }
1300 
1301 static int get_char_length_tree(Node* node, regex_t* reg, int* len);
1302 
1303 static int
1304 compile_enclose_node(EncloseNode* node, regex_t* reg)
1305 {
1306  int r, len;
1307 
1308  if (node->type == ENCLOSE_OPTION)
1309  return compile_option_node(node, reg);
1310 
1311  switch (node->type) {
1312  case ENCLOSE_MEMORY:
1313 #ifdef USE_SUBEXP_CALL
1314  if (IS_ENCLOSE_CALLED(node)) {
1315  r = add_opcode(reg, OP_CALL);
1316  if (r) return r;
1318  node->state |= NST_ADDR_FIXED;
1319  r = add_abs_addr(reg, (int )node->call_addr);
1320  if (r) return r;
1321  len = compile_length_tree(node->target, reg);
1323  if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1324  len += (IS_ENCLOSE_RECURSION(node)
1326  else
1327  len += (IS_ENCLOSE_RECURSION(node)
1329 
1330  r = add_opcode_rel_addr(reg, OP_JUMP, len);
1331  if (r) return r;
1332  }
1333 #endif
1334  if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
1335  r = add_opcode(reg, OP_MEMORY_START_PUSH);
1336  else
1337  r = add_opcode(reg, OP_MEMORY_START);
1338  if (r) return r;
1339  r = add_mem_num(reg, node->regnum);
1340  if (r) return r;
1341  r = compile_tree(node->target, reg);
1342  if (r) return r;
1343 #ifdef USE_SUBEXP_CALL
1344  if (IS_ENCLOSE_CALLED(node)) {
1345  if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1346  r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
1348  else
1349  r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
1351 
1352  if (r) return r;
1353  r = add_mem_num(reg, node->regnum);
1354  if (r) return r;
1355  r = add_opcode(reg, OP_RETURN);
1356  }
1357  else if (IS_ENCLOSE_RECURSION(node)) {
1358  if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1359  r = add_opcode(reg, OP_MEMORY_END_PUSH_REC);
1360  else
1361  r = add_opcode(reg, OP_MEMORY_END_REC);
1362  if (r) return r;
1363  r = add_mem_num(reg, node->regnum);
1364  }
1365  else
1366 #endif
1367  {
1368  if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1369  r = add_opcode(reg, OP_MEMORY_END_PUSH);
1370  else
1371  r = add_opcode(reg, OP_MEMORY_END);
1372  if (r) return r;
1373  r = add_mem_num(reg, node->regnum);
1374  }
1375  break;
1376 
1379  QtfrNode* qn = NQTFR(node->target);
1380  r = compile_tree_n_times(qn->target, qn->lower, reg);
1381  if (r) return r;
1382 
1383  len = compile_length_tree(qn->target, reg);
1384  if (len < 0) return len;
1385 
1386  r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_POP + SIZE_OP_JUMP);
1387  if (r) return r;
1388  r = compile_tree(qn->target, reg);
1389  if (r) return r;
1390  r = add_opcode(reg, OP_POP);
1391  if (r) return r;
1392  r = add_opcode_rel_addr(reg, OP_JUMP,
1393  -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP + (int )SIZE_OP_JUMP));
1394  }
1395  else {
1396  r = add_opcode(reg, OP_PUSH_STOP_BT);
1397  if (r) return r;
1398  r = compile_tree(node->target, reg);
1399  if (r) return r;
1400  r = add_opcode(reg, OP_POP_STOP_BT);
1401  }
1402  break;
1403 
1404  case ENCLOSE_CONDITION:
1405  r = add_opcode(reg, OP_CONDITION);
1406  if (r) return r;
1407  r = add_mem_num(reg, node->regnum);
1408  if (r) return r;
1409 
1410  if (NTYPE(node->target) == NT_ALT) {
1411  Node* x = node->target;
1412  int len2;
1413 
1414  len = compile_length_tree(NCAR(x), reg); /* yes-node */
1415  if (len < 0) return len;
1416  if (NCDR(x) == NULL) return ONIGERR_PARSER_BUG;
1417  x = NCDR(x);
1418  len2 = compile_length_tree(NCAR(x), reg); /* no-node */
1419  if (len2 < 0) return len2;
1420  if (NCDR(x) != NULL) return ONIGERR_INVALID_CONDITION_PATTERN;
1421 
1422  x = node->target;
1423  r = add_rel_addr(reg, len + SIZE_OP_JUMP);
1424  if (r) return r;
1425  r = compile_tree(NCAR(x), reg); /* yes-node */
1426  if (r) return r;
1427  r = add_opcode_rel_addr(reg, OP_JUMP, len2);
1428  if (r) return r;
1429  x = NCDR(x);
1430  r = compile_tree(NCAR(x), reg); /* no-node */
1431  }
1432  else {
1433  return ONIGERR_PARSER_BUG;
1434  }
1435  break;
1436 
1437  case ENCLOSE_ABSENT:
1438  len = compile_length_tree(node->target, reg);
1439  if (len < 0) return len;
1440 
1441  r = add_opcode(reg, OP_PUSH_ABSENT_POS);
1442  if (r) return r;
1443  r = add_opcode_rel_addr(reg, OP_ABSENT, len + SIZE_OP_ABSENT_END);
1444  if (r) return r;
1445  r = compile_tree(node->target, reg);
1446  if (r) return r;
1447  r = add_opcode(reg, OP_ABSENT_END);
1448  break;
1449 
1450  default:
1451  return ONIGERR_TYPE_BUG;
1452  break;
1453  }
1454 
1455  return r;
1456 }
1457 
1458 static int
1459 compile_length_anchor_node(AnchorNode* node, regex_t* reg)
1460 {
1461  int len;
1462  int tlen = 0;
1463 
1464  if (node->target) {
1465  tlen = compile_length_tree(node->target, reg);
1466  if (tlen < 0) return tlen;
1467  }
1468 
1469  switch (node->type) {
1470  case ANCHOR_PREC_READ:
1471  len = SIZE_OP_PUSH_POS + tlen + SIZE_OP_POP_POS;
1472  break;
1473  case ANCHOR_PREC_READ_NOT:
1474  len = SIZE_OP_PUSH_POS_NOT + tlen + SIZE_OP_FAIL_POS;
1475  break;
1476  case ANCHOR_LOOK_BEHIND:
1477  len = SIZE_OP_LOOK_BEHIND + tlen;
1478  break;
1481  break;
1482 
1483  default:
1484  len = SIZE_OPCODE;
1485  break;
1486  }
1487 
1488  return len;
1489 }
1490 
1491 static int
1492 compile_anchor_node(AnchorNode* node, regex_t* reg)
1493 {
1494  int r, len;
1495 
1496  switch (node->type) {
1497  case ANCHOR_BEGIN_BUF: r = add_opcode(reg, OP_BEGIN_BUF); break;
1498  case ANCHOR_END_BUF: r = add_opcode(reg, OP_END_BUF); break;
1499  case ANCHOR_BEGIN_LINE: r = add_opcode(reg, OP_BEGIN_LINE); break;
1500  case ANCHOR_END_LINE: r = add_opcode(reg, OP_END_LINE); break;
1501  case ANCHOR_SEMI_END_BUF: r = add_opcode(reg, OP_SEMI_END_BUF); break;
1502  case ANCHOR_BEGIN_POSITION: r = add_opcode(reg, OP_BEGIN_POSITION); break;
1503 
1504  case ANCHOR_WORD_BOUND:
1505  if (node->ascii_range) r = add_opcode(reg, OP_ASCII_WORD_BOUND);
1506  else r = add_opcode(reg, OP_WORD_BOUND);
1507  break;
1508  case ANCHOR_NOT_WORD_BOUND:
1509  if (node->ascii_range) r = add_opcode(reg, OP_NOT_ASCII_WORD_BOUND);
1510  else r = add_opcode(reg, OP_NOT_WORD_BOUND);
1511  break;
1512 #ifdef USE_WORD_BEGIN_END
1513  case ANCHOR_WORD_BEGIN:
1514  if (node->ascii_range) r = add_opcode(reg, OP_ASCII_WORD_BEGIN);
1515  else r = add_opcode(reg, OP_WORD_BEGIN);
1516  break;
1517  case ANCHOR_WORD_END:
1518  if (node->ascii_range) r = add_opcode(reg, OP_ASCII_WORD_END);
1519  else r = add_opcode(reg, OP_WORD_END);
1520  break;
1521 #endif
1522  case ANCHOR_KEEP: r = add_opcode(reg, OP_KEEP); break;
1523 
1524  case ANCHOR_PREC_READ:
1525  r = add_opcode(reg, OP_PUSH_POS);
1526  if (r) return r;
1527  r = compile_tree(node->target, reg);
1528  if (r) return r;
1529  r = add_opcode(reg, OP_POP_POS);
1530  break;
1531 
1532  case ANCHOR_PREC_READ_NOT:
1533  len = compile_length_tree(node->target, reg);
1534  if (len < 0) return len;
1535  r = add_opcode_rel_addr(reg, OP_PUSH_POS_NOT, len + SIZE_OP_FAIL_POS);
1536  if (r) return r;
1537  r = compile_tree(node->target, reg);
1538  if (r) return r;
1539  r = add_opcode(reg, OP_FAIL_POS);
1540  break;
1541 
1542  case ANCHOR_LOOK_BEHIND:
1543  {
1544  int n;
1545  r = add_opcode(reg, OP_LOOK_BEHIND);
1546  if (r) return r;
1547  if (node->char_len < 0) {
1548  r = get_char_length_tree(node->target, reg, &n);
1550  }
1551  else
1552  n = node->char_len;
1553  r = add_length(reg, n);
1554  if (r) return r;
1555  r = compile_tree(node->target, reg);
1556  }
1557  break;
1558 
1560  {
1561  int n;
1562  len = compile_length_tree(node->target, reg);
1563  r = add_opcode_rel_addr(reg, OP_PUSH_LOOK_BEHIND_NOT,
1565  if (r) return r;
1566  if (node->char_len < 0) {
1567  r = get_char_length_tree(node->target, reg, &n);
1569  }
1570  else
1571  n = node->char_len;
1572  r = add_length(reg, n);
1573  if (r) return r;
1574  r = compile_tree(node->target, reg);
1575  if (r) return r;
1576  r = add_opcode(reg, OP_FAIL_LOOK_BEHIND_NOT);
1577  }
1578  break;
1579 
1580  default:
1581  return ONIGERR_TYPE_BUG;
1582  break;
1583  }
1584 
1585  return r;
1586 }
1587 
1588 static int
1589 compile_length_tree(Node* node, regex_t* reg)
1590 {
1591  int len, type, r;
1592 
1593  type = NTYPE(node);
1594  switch (type) {
1595  case NT_LIST:
1596  len = 0;
1597  do {
1598  r = compile_length_tree(NCAR(node), reg);
1599  if (r < 0) return r;
1600  len += r;
1601  } while (IS_NOT_NULL(node = NCDR(node)));
1602  r = len;
1603  break;
1604 
1605  case NT_ALT:
1606  {
1607  int n = 0;
1608  len = 0;
1609  do {
1610  r = compile_length_tree(NCAR(node), reg);
1611  if (r < 0) return r;
1612  len += r;
1613  n++;
1614  } while (IS_NOT_NULL(node = NCDR(node)));
1615  r = len;
1616  r += (SIZE_OP_PUSH + SIZE_OP_JUMP) * (n - 1);
1617  }
1618  break;
1619 
1620  case NT_STR:
1621  if (NSTRING_IS_RAW(node))
1622  r = compile_length_string_raw_node(NSTR(node), reg);
1623  else
1624  r = compile_length_string_node(node, reg);
1625  break;
1626 
1627  case NT_CCLASS:
1628  r = compile_length_cclass_node(NCCLASS(node), reg);
1629  break;
1630 
1631  case NT_CTYPE:
1632  case NT_CANY:
1633  r = SIZE_OPCODE;
1634  break;
1635 
1636  case NT_BREF:
1637  {
1638  BRefNode* br = NBREF(node);
1639 
1640 #ifdef USE_BACKREF_WITH_LEVEL
1641  if (IS_BACKREF_NEST_LEVEL(br)) {
1643  SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
1644  }
1645  else
1646 #endif
1647  if (br->back_num == 1) {
1648  r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2)
1650  }
1651  else {
1652  r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
1653  }
1654  }
1655  break;
1656 
1657 #ifdef USE_SUBEXP_CALL
1658  case NT_CALL:
1659  r = SIZE_OP_CALL;
1660  break;
1661 #endif
1662 
1663  case NT_QTFR:
1664  r = compile_length_quantifier_node(NQTFR(node), reg);
1665  break;
1666 
1667  case NT_ENCLOSE:
1668  r = compile_length_enclose_node(NENCLOSE(node), reg);
1669  break;
1670 
1671  case NT_ANCHOR:
1672  r = compile_length_anchor_node(NANCHOR(node), reg);
1673  break;
1674 
1675  default:
1676  return ONIGERR_TYPE_BUG;
1677  break;
1678  }
1679 
1680  return r;
1681 }
1682 
1683 static int
1684 compile_tree(Node* node, regex_t* reg)
1685 {
1686  int n, type, len, pos, r = 0;
1687 
1688  type = NTYPE(node);
1689  switch (type) {
1690  case NT_LIST:
1691  do {
1692  r = compile_tree(NCAR(node), reg);
1693  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1694  break;
1695 
1696  case NT_ALT:
1697  {
1698  Node* x = node;
1699  len = 0;
1700  do {
1701  len += compile_length_tree(NCAR(x), reg);
1702  if (NCDR(x) != NULL) {
1703  len += SIZE_OP_PUSH + SIZE_OP_JUMP;
1704  }
1705  } while (IS_NOT_NULL(x = NCDR(x)));
1706  pos = reg->used + len; /* goal position */
1707 
1708  do {
1709  len = compile_length_tree(NCAR(node), reg);
1710  if (IS_NOT_NULL(NCDR(node))) {
1711  r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_JUMP);
1712  if (r) break;
1713  }
1714  r = compile_tree(NCAR(node), reg);
1715  if (r) break;
1716  if (IS_NOT_NULL(NCDR(node))) {
1717  len = pos - (reg->used + SIZE_OP_JUMP);
1718  r = add_opcode_rel_addr(reg, OP_JUMP, len);
1719  if (r) break;
1720  }
1721  } while (IS_NOT_NULL(node = NCDR(node)));
1722  }
1723  break;
1724 
1725  case NT_STR:
1726  if (NSTRING_IS_RAW(node))
1727  r = compile_string_raw_node(NSTR(node), reg);
1728  else
1729  r = compile_string_node(node, reg);
1730  break;
1731 
1732  case NT_CCLASS:
1733  r = compile_cclass_node(NCCLASS(node), reg);
1734  break;
1735 
1736  case NT_CTYPE:
1737  {
1738  int op;
1739 
1740  switch (NCTYPE(node)->ctype) {
1741  case ONIGENC_CTYPE_WORD:
1742  if (NCTYPE(node)->ascii_range != 0) {
1743  if (NCTYPE(node)->not != 0) op = OP_NOT_ASCII_WORD;
1744  else op = OP_ASCII_WORD;
1745  }
1746  else {
1747  if (NCTYPE(node)->not != 0) op = OP_NOT_WORD;
1748  else op = OP_WORD;
1749  }
1750  break;
1751  default:
1752  return ONIGERR_TYPE_BUG;
1753  break;
1754  }
1755  r = add_opcode(reg, op);
1756  }
1757  break;
1758 
1759  case NT_CANY:
1760  if (IS_MULTILINE(reg->options))
1761  r = add_opcode(reg, OP_ANYCHAR_ML);
1762  else
1763  r = add_opcode(reg, OP_ANYCHAR);
1764  break;
1765 
1766  case NT_BREF:
1767  {
1768  BRefNode* br = NBREF(node);
1769 
1770 #ifdef USE_BACKREF_WITH_LEVEL
1771  if (IS_BACKREF_NEST_LEVEL(br)) {
1772  r = add_opcode(reg, OP_BACKREF_WITH_LEVEL);
1773  if (r) return r;
1774  r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE));
1775  if (r) return r;
1776  r = add_length(reg, br->nest_level);
1777  if (r) return r;
1778 
1779  goto add_bacref_mems;
1780  }
1781  else
1782 #endif
1783  if (br->back_num == 1) {
1784  n = br->back_static[0];
1785  if (IS_IGNORECASE(reg->options)) {
1786  r = add_opcode(reg, OP_BACKREFN_IC);
1787  if (r) return r;
1788  r = add_mem_num(reg, n);
1789  }
1790  else {
1791  switch (n) {
1792  case 1: r = add_opcode(reg, OP_BACKREF1); break;
1793  case 2: r = add_opcode(reg, OP_BACKREF2); break;
1794  default:
1795  r = add_opcode(reg, OP_BACKREFN);
1796  if (r) return r;
1797  r = add_mem_num(reg, n);
1798  break;
1799  }
1800  }
1801  }
1802  else {
1803  int i;
1804  int* p;
1805 
1806  if (IS_IGNORECASE(reg->options)) {
1807  r = add_opcode(reg, OP_BACKREF_MULTI_IC);
1808  }
1809  else {
1810  r = add_opcode(reg, OP_BACKREF_MULTI);
1811  }
1812  if (r) return r;
1813 
1814 #ifdef USE_BACKREF_WITH_LEVEL
1815  add_bacref_mems:
1816 #endif
1817  r = add_length(reg, br->back_num);
1818  if (r) return r;
1819  p = BACKREFS_P(br);
1820  for (i = br->back_num - 1; i >= 0; i--) {
1821  r = add_mem_num(reg, p[i]);
1822  if (r) return r;
1823  }
1824  }
1825  }
1826  break;
1827 
1828 #ifdef USE_SUBEXP_CALL
1829  case NT_CALL:
1830  r = compile_call(NCALL(node), reg);
1831  break;
1832 #endif
1833 
1834  case NT_QTFR:
1835  r = compile_quantifier_node(NQTFR(node), reg);
1836  break;
1837 
1838  case NT_ENCLOSE:
1839  r = compile_enclose_node(NENCLOSE(node), reg);
1840  break;
1841 
1842  case NT_ANCHOR:
1843  r = compile_anchor_node(NANCHOR(node), reg);
1844  break;
1845 
1846  default:
1847 #ifdef ONIG_DEBUG
1848  fprintf(stderr, "compile_tree: undefined node type %d\n", NTYPE(node));
1849 #endif
1850  break;
1851  }
1852 
1853  return r;
1854 }
1855 
1856 #ifdef USE_NAMED_GROUP
1857 
1858 static int
1859 noname_disable_map(Node** plink, GroupNumRemap* map, int* counter)
1860 {
1861  int r = 0;
1862  Node* node = *plink;
1863 
1864  switch (NTYPE(node)) {
1865  case NT_LIST:
1866  case NT_ALT:
1867  do {
1868  r = noname_disable_map(&(NCAR(node)), map, counter);
1869  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1870  break;
1871 
1872  case NT_QTFR:
1873  {
1874  Node** ptarget = &(NQTFR(node)->target);
1875  Node* old = *ptarget;
1876  r = noname_disable_map(ptarget, map, counter);
1877  if (*ptarget != old && NTYPE(*ptarget) == NT_QTFR) {
1878  onig_reduce_nested_quantifier(node, *ptarget);
1879  }
1880  }
1881  break;
1882 
1883  case NT_ENCLOSE:
1884  {
1885  EncloseNode* en = NENCLOSE(node);
1886  if (en->type == ENCLOSE_MEMORY) {
1887  if (IS_ENCLOSE_NAMED_GROUP(en)) {
1888  (*counter)++;
1889  map[en->regnum].new_val = *counter;
1890  en->regnum = *counter;
1891  }
1892  else if (en->regnum != 0) {
1893  *plink = en->target;
1894  en->target = NULL_NODE;
1895  onig_node_free(node);
1896  r = noname_disable_map(plink, map, counter);
1897  break;
1898  }
1899  }
1900  r = noname_disable_map(&(en->target), map, counter);
1901  }
1902  break;
1903 
1904  case NT_ANCHOR:
1905  if (NANCHOR(node)->target)
1906  r = noname_disable_map(&(NANCHOR(node)->target), map, counter);
1907  break;
1908 
1909  default:
1910  break;
1911  }
1912 
1913  return r;
1914 }
1915 
1916 static int
1917 renumber_node_backref(Node* node, GroupNumRemap* map)
1918 {
1919  int i, pos, n, old_num;
1920  int *backs;
1921  BRefNode* bn = NBREF(node);
1922 
1923  if (! IS_BACKREF_NAME_REF(bn))
1925 
1926  old_num = bn->back_num;
1927  if (IS_NULL(bn->back_dynamic))
1928  backs = bn->back_static;
1929  else
1930  backs = bn->back_dynamic;
1931 
1932  for (i = 0, pos = 0; i < old_num; i++) {
1933  n = map[backs[i]].new_val;
1934  if (n > 0) {
1935  backs[pos] = n;
1936  pos++;
1937  }
1938  }
1939 
1940  bn->back_num = pos;
1941  return 0;
1942 }
1943 
1944 static int
1945 renumber_by_map(Node* node, GroupNumRemap* map)
1946 {
1947  int r = 0;
1948 
1949  switch (NTYPE(node)) {
1950  case NT_LIST:
1951  case NT_ALT:
1952  do {
1953  r = renumber_by_map(NCAR(node), map);
1954  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1955  break;
1956  case NT_QTFR:
1957  r = renumber_by_map(NQTFR(node)->target, map);
1958  break;
1959  case NT_ENCLOSE:
1960  {
1961  EncloseNode* en = NENCLOSE(node);
1962  if (en->type == ENCLOSE_CONDITION)
1963  en->regnum = map[en->regnum].new_val;
1964  r = renumber_by_map(en->target, map);
1965  }
1966  break;
1967 
1968  case NT_BREF:
1969  r = renumber_node_backref(node, map);
1970  break;
1971 
1972  case NT_ANCHOR:
1973  if (NANCHOR(node)->target)
1974  r = renumber_by_map(NANCHOR(node)->target, map);
1975  break;
1976 
1977  default:
1978  break;
1979  }
1980 
1981  return r;
1982 }
1983 
1984 static int
1985 numbered_ref_check(Node* node)
1986 {
1987  int r = 0;
1988 
1989  switch (NTYPE(node)) {
1990  case NT_LIST:
1991  case NT_ALT:
1992  do {
1993  r = numbered_ref_check(NCAR(node));
1994  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1995  break;
1996  case NT_QTFR:
1997  r = numbered_ref_check(NQTFR(node)->target);
1998  break;
1999  case NT_ENCLOSE:
2000  r = numbered_ref_check(NENCLOSE(node)->target);
2001  break;
2002 
2003  case NT_BREF:
2004  if (! IS_BACKREF_NAME_REF(NBREF(node)))
2006  break;
2007 
2008  case NT_ANCHOR:
2009  if (NANCHOR(node)->target)
2010  r = numbered_ref_check(NANCHOR(node)->target);
2011  break;
2012 
2013  default:
2014  break;
2015  }
2016 
2017  return r;
2018 }
2019 
2020 static int
2021 disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env)
2022 {
2023  int r, i, pos, counter;
2024  BitStatusType loc;
2025  GroupNumRemap* map;
2026 
2027  map = (GroupNumRemap* )xalloca(sizeof(GroupNumRemap) * (env->num_mem + 1));
2029  for (i = 1; i <= env->num_mem; i++) {
2030  map[i].new_val = 0;
2031  }
2032  counter = 0;
2033  r = noname_disable_map(root, map, &counter);
2034  if (r != 0) return r;
2035 
2036  r = renumber_by_map(*root, map);
2037  if (r != 0) return r;
2038 
2039  for (i = 1, pos = 1; i <= env->num_mem; i++) {
2040  if (map[i].new_val > 0) {
2041  SCANENV_MEM_NODES(env)[pos] = SCANENV_MEM_NODES(env)[i];
2042  pos++;
2043  }
2044  }
2045 
2046  loc = env->capture_history;
2048  for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
2049  if (BIT_STATUS_AT(loc, i)) {
2051  }
2052  }
2053 
2054  env->num_mem = env->num_named;
2055  reg->num_mem = env->num_named;
2056 
2057  return onig_renumber_name_table(reg, map);
2058 }
2059 #endif /* USE_NAMED_GROUP */
2060 
2061 #ifdef USE_SUBEXP_CALL
2062 static int
2063 unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg)
2064 {
2065  int i, offset;
2066  EncloseNode* en;
2067  AbsAddrType addr;
2068 
2069  for (i = 0; i < uslist->num; i++) {
2070  en = NENCLOSE(uslist->us[i].target);
2071  if (! IS_ENCLOSE_ADDR_FIXED(en)) return ONIGERR_PARSER_BUG;
2072  addr = en->call_addr;
2073  offset = uslist->us[i].offset;
2074 
2075  BBUF_WRITE(reg, offset, &addr, SIZE_ABSADDR);
2076  }
2077  return 0;
2078 }
2079 #endif
2080 
2081 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
2082 static int
2083 quantifiers_memory_node_info(Node* node)
2084 {
2085  int r = 0;
2086 
2087  switch (NTYPE(node)) {
2088  case NT_LIST:
2089  case NT_ALT:
2090  {
2091  int v;
2092  do {
2093  v = quantifiers_memory_node_info(NCAR(node));
2094  if (v > r) r = v;
2095  } while (v >= 0 && IS_NOT_NULL(node = NCDR(node)));
2096  }
2097  break;
2098 
2099 # ifdef USE_SUBEXP_CALL
2100  case NT_CALL:
2101  if (IS_CALL_RECURSION(NCALL(node))) {
2102  return NQ_TARGET_IS_EMPTY_REC; /* tiny version */
2103  }
2104  else
2105  r = quantifiers_memory_node_info(NCALL(node)->target);
2106  break;
2107 # endif
2108 
2109  case NT_QTFR:
2110  {
2111  QtfrNode* qn = NQTFR(node);
2112  if (qn->upper != 0) {
2113  r = quantifiers_memory_node_info(qn->target);
2114  }
2115  }
2116  break;
2117 
2118  case NT_ENCLOSE:
2119  {
2120  EncloseNode* en = NENCLOSE(node);
2121  switch (en->type) {
2122  case ENCLOSE_MEMORY:
2123  return NQ_TARGET_IS_EMPTY_MEM;
2124  break;
2125 
2126  case ENCLOSE_OPTION:
2128  case ENCLOSE_CONDITION:
2129  case ENCLOSE_ABSENT:
2130  r = quantifiers_memory_node_info(en->target);
2131  break;
2132  default:
2133  break;
2134  }
2135  }
2136  break;
2137 
2138  case NT_BREF:
2139  case NT_STR:
2140  case NT_CTYPE:
2141  case NT_CCLASS:
2142  case NT_CANY:
2143  case NT_ANCHOR:
2144  default:
2145  break;
2146  }
2147 
2148  return r;
2149 }
2150 #endif /* USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT */
2151 
2152 static int
2153 get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env)
2154 {
2155  OnigDistance tmin;
2156  int r = 0;
2157 
2158  *min = 0;
2159  switch (NTYPE(node)) {
2160  case NT_BREF:
2161  {
2162  int i;
2163  int* backs;
2164  Node** nodes = SCANENV_MEM_NODES(env);
2165  BRefNode* br = NBREF(node);
2166  if (br->state & NST_RECURSION) break;
2167 
2168  backs = BACKREFS_P(br);
2169  if (backs[0] > env->num_mem) return ONIGERR_INVALID_BACKREF;
2170  r = get_min_match_length(nodes[backs[0]], min, env);
2171  if (r != 0) break;
2172  for (i = 1; i < br->back_num; i++) {
2173  if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
2174  r = get_min_match_length(nodes[backs[i]], &tmin, env);
2175  if (r != 0) break;
2176  if (*min > tmin) *min = tmin;
2177  }
2178  }
2179  break;
2180 
2181 #ifdef USE_SUBEXP_CALL
2182  case NT_CALL:
2183  if (IS_CALL_RECURSION(NCALL(node))) {
2184  EncloseNode* en = NENCLOSE(NCALL(node)->target);
2185  if (IS_ENCLOSE_MIN_FIXED(en))
2186  *min = en->min_len;
2187  }
2188  else
2189  r = get_min_match_length(NCALL(node)->target, min, env);
2190  break;
2191 #endif
2192 
2193  case NT_LIST:
2194  do {
2195  r = get_min_match_length(NCAR(node), &tmin, env);
2196  if (r == 0) *min += tmin;
2197  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2198  break;
2199 
2200  case NT_ALT:
2201  {
2202  Node *x, *y;
2203  y = node;
2204  do {
2205  x = NCAR(y);
2206  r = get_min_match_length(x, &tmin, env);
2207  if (r != 0) break;
2208  if (y == node) *min = tmin;
2209  else if (*min > tmin) *min = tmin;
2210  } while (r == 0 && IS_NOT_NULL(y = NCDR(y)));
2211  }
2212  break;
2213 
2214  case NT_STR:
2215  {
2216  StrNode* sn = NSTR(node);
2217  *min = sn->end - sn->s;
2218  }
2219  break;
2220 
2221  case NT_CTYPE:
2222  *min = 1;
2223  break;
2224 
2225  case NT_CCLASS:
2226  case NT_CANY:
2227  *min = 1;
2228  break;
2229 
2230  case NT_QTFR:
2231  {
2232  QtfrNode* qn = NQTFR(node);
2233 
2234  if (qn->lower > 0) {
2235  r = get_min_match_length(qn->target, min, env);
2236  if (r == 0)
2237  *min = distance_multiply(*min, qn->lower);
2238  }
2239  }
2240  break;
2241 
2242  case NT_ENCLOSE:
2243  {
2244  EncloseNode* en = NENCLOSE(node);
2245  switch (en->type) {
2246  case ENCLOSE_MEMORY:
2247  if (IS_ENCLOSE_MIN_FIXED(en))
2248  *min = en->min_len;
2249  else {
2250  if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
2251  *min = 0; /* recursive */
2252  else {
2254  r = get_min_match_length(en->target, min, env);
2256  if (r == 0) {
2257  en->min_len = *min;
2259  }
2260  }
2261  }
2262  break;
2263 
2264  case ENCLOSE_OPTION:
2266  case ENCLOSE_CONDITION:
2267  r = get_min_match_length(en->target, min, env);
2268  break;
2269 
2270  case ENCLOSE_ABSENT:
2271  break;
2272  }
2273  }
2274  break;
2275 
2276  case NT_ANCHOR:
2277  default:
2278  break;
2279  }
2280 
2281  return r;
2282 }
2283 
2284 static int
2285 get_max_match_length(Node* node, OnigDistance *max, ScanEnv* env)
2286 {
2287  OnigDistance tmax;
2288  int r = 0;
2289 
2290  *max = 0;
2291  switch (NTYPE(node)) {
2292  case NT_LIST:
2293  do {
2294  r = get_max_match_length(NCAR(node), &tmax, env);
2295  if (r == 0)
2296  *max = distance_add(*max, tmax);
2297  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2298  break;
2299 
2300  case NT_ALT:
2301  do {
2302  r = get_max_match_length(NCAR(node), &tmax, env);
2303  if (r == 0 && *max < tmax) *max = tmax;
2304  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2305  break;
2306 
2307  case NT_STR:
2308  {
2309  StrNode* sn = NSTR(node);
2310  *max = sn->end - sn->s;
2311  }
2312  break;
2313 
2314  case NT_CTYPE:
2315  *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
2316  break;
2317 
2318  case NT_CCLASS:
2319  case NT_CANY:
2320  *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
2321  break;
2322 
2323  case NT_BREF:
2324  {
2325  int i;
2326  int* backs;
2327  Node** nodes = SCANENV_MEM_NODES(env);
2328  BRefNode* br = NBREF(node);
2329  if (br->state & NST_RECURSION) {
2330  *max = ONIG_INFINITE_DISTANCE;
2331  break;
2332  }
2333  backs = BACKREFS_P(br);
2334  for (i = 0; i < br->back_num; i++) {
2335  if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
2336  r = get_max_match_length(nodes[backs[i]], &tmax, env);
2337  if (r != 0) break;
2338  if (*max < tmax) *max = tmax;
2339  }
2340  }
2341  break;
2342 
2343 #ifdef USE_SUBEXP_CALL
2344  case NT_CALL:
2345  if (! IS_CALL_RECURSION(NCALL(node)))
2346  r = get_max_match_length(NCALL(node)->target, max, env);
2347  else
2348  *max = ONIG_INFINITE_DISTANCE;
2349  break;
2350 #endif
2351 
2352  case NT_QTFR:
2353  {
2354  QtfrNode* qn = NQTFR(node);
2355 
2356  if (qn->upper != 0) {
2357  r = get_max_match_length(qn->target, max, env);
2358  if (r == 0 && *max != 0) {
2359  if (! IS_REPEAT_INFINITE(qn->upper))
2360  *max = distance_multiply(*max, qn->upper);
2361  else
2362  *max = ONIG_INFINITE_DISTANCE;
2363  }
2364  }
2365  }
2366  break;
2367 
2368  case NT_ENCLOSE:
2369  {
2370  EncloseNode* en = NENCLOSE(node);
2371  switch (en->type) {
2372  case ENCLOSE_MEMORY:
2373  if (IS_ENCLOSE_MAX_FIXED(en))
2374  *max = en->max_len;
2375  else {
2376  if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
2377  *max = ONIG_INFINITE_DISTANCE;
2378  else {
2380  r = get_max_match_length(en->target, max, env);
2382  if (r == 0) {
2383  en->max_len = *max;
2385  }
2386  }
2387  }
2388  break;
2389 
2390  case ENCLOSE_OPTION:
2392  case ENCLOSE_CONDITION:
2393  r = get_max_match_length(en->target, max, env);
2394  break;
2395 
2396  case ENCLOSE_ABSENT:
2397  break;
2398  }
2399  }
2400  break;
2401 
2402  case NT_ANCHOR:
2403  default:
2404  break;
2405  }
2406 
2407  return r;
2408 }
2409 
2410 #define GET_CHAR_LEN_VARLEN -1
2411 #define GET_CHAR_LEN_TOP_ALT_VARLEN -2
2412 
2413 /* fixed size pattern node only */
2414 static int
2415 get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)
2416 {
2417  int tlen;
2418  int r = 0;
2419 
2420  level++;
2421  *len = 0;
2422  switch (NTYPE(node)) {
2423  case NT_LIST:
2424  do {
2425  r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
2426  if (r == 0)
2427  *len = (int )distance_add(*len, tlen);
2428  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2429  break;
2430 
2431  case NT_ALT:
2432  {
2433  int tlen2;
2434  int varlen = 0;
2435 
2436  r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
2437  while (r == 0 && IS_NOT_NULL(node = NCDR(node))) {
2438  r = get_char_length_tree1(NCAR(node), reg, &tlen2, level);
2439  if (r == 0) {
2440  if (tlen != tlen2)
2441  varlen = 1;
2442  }
2443  }
2444  if (r == 0) {
2445  if (varlen != 0) {
2446  if (level == 1)
2448  else
2449  r = GET_CHAR_LEN_VARLEN;
2450  }
2451  else
2452  *len = tlen;
2453  }
2454  }
2455  break;
2456 
2457  case NT_STR:
2458  {
2459  StrNode* sn = NSTR(node);
2460  UChar *s = sn->s;
2461  while (s < sn->end) {
2462  s += enclen(reg->enc, s, sn->end);
2463  (*len)++;
2464  }
2465  }
2466  break;
2467 
2468  case NT_QTFR:
2469  {
2470  QtfrNode* qn = NQTFR(node);
2471  if (qn->lower == qn->upper) {
2472  r = get_char_length_tree1(qn->target, reg, &tlen, level);
2473  if (r == 0)
2474  *len = (int )distance_multiply(tlen, qn->lower);
2475  }
2476  else
2477  r = GET_CHAR_LEN_VARLEN;
2478  }
2479  break;
2480 
2481 #ifdef USE_SUBEXP_CALL
2482  case NT_CALL:
2483  if (! IS_CALL_RECURSION(NCALL(node)))
2484  r = get_char_length_tree1(NCALL(node)->target, reg, len, level);
2485  else
2486  r = GET_CHAR_LEN_VARLEN;
2487  break;
2488 #endif
2489 
2490  case NT_CTYPE:
2491  *len = 1;
2492  break;
2493 
2494  case NT_CCLASS:
2495  case NT_CANY:
2496  *len = 1;
2497  break;
2498 
2499  case NT_ENCLOSE:
2500  {
2501  EncloseNode* en = NENCLOSE(node);
2502  switch (en->type) {
2503  case ENCLOSE_MEMORY:
2504 #ifdef USE_SUBEXP_CALL
2505  if (IS_ENCLOSE_CLEN_FIXED(en))
2506  *len = en->char_len;
2507  else {
2508  r = get_char_length_tree1(en->target, reg, len, level);
2509  if (r == 0) {
2510  en->char_len = *len;
2512  }
2513  }
2514  break;
2515 #endif
2516  case ENCLOSE_OPTION:
2518  case ENCLOSE_CONDITION:
2519  r = get_char_length_tree1(en->target, reg, len, level);
2520  break;
2521  case ENCLOSE_ABSENT:
2522  default:
2523  break;
2524  }
2525  }
2526  break;
2527 
2528  case NT_ANCHOR:
2529  break;
2530 
2531  default:
2532  r = GET_CHAR_LEN_VARLEN;
2533  break;
2534  }
2535 
2536  return r;
2537 }
2538 
2539 static int
2540 get_char_length_tree(Node* node, regex_t* reg, int* len)
2541 {
2542  return get_char_length_tree1(node, reg, len, 0);
2543 }
2544 
2545 /* x is not included y ==> 1 : 0 */
2546 static int
2547 is_not_included(Node* x, Node* y, regex_t* reg)
2548 {
2549  int i;
2550  OnigDistance len;
2551  OnigCodePoint code;
2552  UChar *p;
2553  int ytype;
2554 
2555  retry:
2556  ytype = NTYPE(y);
2557  switch (NTYPE(x)) {
2558  case NT_CTYPE:
2559  {
2560  switch (ytype) {
2561  case NT_CTYPE:
2562  if (NCTYPE(y)->ctype == NCTYPE(x)->ctype &&
2563  NCTYPE(y)->not != NCTYPE(x)->not &&
2564  NCTYPE(y)->ascii_range == NCTYPE(x)->ascii_range)
2565  return 1;
2566  else
2567  return 0;
2568  break;
2569 
2570  case NT_CCLASS:
2571  swap:
2572  {
2573  Node* tmp;
2574  tmp = x; x = y; y = tmp;
2575  goto retry;
2576  }
2577  break;
2578 
2579  case NT_STR:
2580  goto swap;
2581  break;
2582 
2583  default:
2584  break;
2585  }
2586  }
2587  break;
2588 
2589  case NT_CCLASS:
2590  {
2591  CClassNode* xc = NCCLASS(x);
2592  switch (ytype) {
2593  case NT_CTYPE:
2594  switch (NCTYPE(y)->ctype) {
2595  case ONIGENC_CTYPE_WORD:
2596  if (NCTYPE(y)->not == 0) {
2597  if (IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) {
2598  for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2599  if (BITSET_AT(xc->bs, i)) {
2600  if (NCTYPE(y)->ascii_range) {
2601  if (IS_CODE_SB_WORD(reg->enc, i)) return 0;
2602  }
2603  else {
2604  if (ONIGENC_IS_CODE_WORD(reg->enc, i)) return 0;
2605  }
2606  }
2607  }
2608  return 1;
2609  }
2610  return 0;
2611  }
2612  else {
2613  if (IS_NOT_NULL(xc->mbuf)) return 0;
2614  for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2615  int is_word;
2616  if (NCTYPE(y)->ascii_range)
2617  is_word = IS_CODE_SB_WORD(reg->enc, i);
2618  else
2619  is_word = ONIGENC_IS_CODE_WORD(reg->enc, i);
2620  if (! is_word) {
2621  if (!IS_NCCLASS_NOT(xc)) {
2622  if (BITSET_AT(xc->bs, i))
2623  return 0;
2624  }
2625  else {
2626  if (! BITSET_AT(xc->bs, i))
2627  return 0;
2628  }
2629  }
2630  }
2631  return 1;
2632  }
2633  break;
2634 
2635  default:
2636  break;
2637  }
2638  break;
2639 
2640  case NT_CCLASS:
2641  {
2642  int v;
2643  CClassNode* yc = NCCLASS(y);
2644 
2645  for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2646  v = BITSET_AT(xc->bs, i);
2647  if ((v != 0 && !IS_NCCLASS_NOT(xc)) ||
2648  (v == 0 && IS_NCCLASS_NOT(xc))) {
2649  v = BITSET_AT(yc->bs, i);
2650  if ((v != 0 && !IS_NCCLASS_NOT(yc)) ||
2651  (v == 0 && IS_NCCLASS_NOT(yc)))
2652  return 0;
2653  }
2654  }
2655  if ((IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) ||
2656  (IS_NULL(yc->mbuf) && !IS_NCCLASS_NOT(yc)))
2657  return 1;
2658  return 0;
2659  }
2660  break;
2661 
2662  case NT_STR:
2663  goto swap;
2664  break;
2665 
2666  default:
2667  break;
2668  }
2669  }
2670  break;
2671 
2672  case NT_STR:
2673  {
2674  StrNode* xs = NSTR(x);
2675  if (NSTRING_LEN(x) == 0)
2676  break;
2677 
2678  switch (ytype) {
2679  case NT_CTYPE:
2680  switch (NCTYPE(y)->ctype) {
2681  case ONIGENC_CTYPE_WORD:
2682  if (NCTYPE(y)->ascii_range) {
2683  if (ONIGENC_IS_MBC_ASCII_WORD(reg->enc, xs->s, xs->end))
2684  return NCTYPE(y)->not;
2685  else
2686  return !(NCTYPE(y)->not);
2687  }
2688  else {
2689  if (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end))
2690  return NCTYPE(y)->not;
2691  else
2692  return !(NCTYPE(y)->not);
2693  }
2694  break;
2695  default:
2696  break;
2697  }
2698  break;
2699 
2700  case NT_CCLASS:
2701  {
2702  CClassNode* cc = NCCLASS(y);
2703 
2704  code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s,
2705  xs->s + ONIGENC_MBC_MAXLEN(reg->enc));
2706  return (onig_is_code_in_cc(reg->enc, code, cc) != 0 ? 0 : 1);
2707  }
2708  break;
2709 
2710  case NT_STR:
2711  {
2712  UChar *q;
2713  StrNode* ys = NSTR(y);
2714  len = NSTRING_LEN(x);
2715  if (len > NSTRING_LEN(y)) len = NSTRING_LEN(y);
2716  if (NSTRING_IS_AMBIG(x) || NSTRING_IS_AMBIG(y)) {
2717  /* tiny version */
2718  return 0;
2719  }
2720  else {
2721  for (i = 0, p = ys->s, q = xs->s; (OnigDistance )i < len; i++, p++, q++) {
2722  if (*p != *q) return 1;
2723  }
2724  }
2725  }
2726  break;
2727 
2728  default:
2729  break;
2730  }
2731  }
2732  break;
2733 
2734  default:
2735  break;
2736  }
2737 
2738  return 0;
2739 }
2740 
2741 static Node*
2742 get_head_value_node(Node* node, int exact, regex_t* reg)
2743 {
2744  Node* n = NULL_NODE;
2745 
2746  switch (NTYPE(node)) {
2747  case NT_BREF:
2748  case NT_ALT:
2749  case NT_CANY:
2750 #ifdef USE_SUBEXP_CALL
2751  case NT_CALL:
2752 #endif
2753  break;
2754 
2755  case NT_CTYPE:
2756  case NT_CCLASS:
2757  if (exact == 0) {
2758  n = node;
2759  }
2760  break;
2761 
2762  case NT_LIST:
2763  n = get_head_value_node(NCAR(node), exact, reg);
2764  break;
2765 
2766  case NT_STR:
2767  {
2768  StrNode* sn = NSTR(node);
2769 
2770  if (sn->end <= sn->s)
2771  break;
2772 
2773  if (exact != 0 &&
2774  !NSTRING_IS_RAW(node) && IS_IGNORECASE(reg->options)) {
2775  }
2776  else {
2777  n = node;
2778  }
2779  }
2780  break;
2781 
2782  case NT_QTFR:
2783  {
2784  QtfrNode* qn = NQTFR(node);
2785  if (qn->lower > 0) {
2786 #ifdef USE_OP_PUSH_OR_JUMP_EXACT
2787  if (IS_NOT_NULL(qn->head_exact))
2788  n = qn->head_exact;
2789  else
2790 #endif
2791  n = get_head_value_node(qn->target, exact, reg);
2792  }
2793  }
2794  break;
2795 
2796  case NT_ENCLOSE:
2797  {
2798  EncloseNode* en = NENCLOSE(node);
2799  switch (en->type) {
2800  case ENCLOSE_OPTION:
2801  {
2802  OnigOptionType options = reg->options;
2803 
2804  reg->options = NENCLOSE(node)->option;
2805  n = get_head_value_node(NENCLOSE(node)->target, exact, reg);
2806  reg->options = options;
2807  }
2808  break;
2809 
2810  case ENCLOSE_MEMORY:
2812  case ENCLOSE_CONDITION:
2813  n = get_head_value_node(en->target, exact, reg);
2814  break;
2815 
2816  case ENCLOSE_ABSENT:
2817  break;
2818  }
2819  }
2820  break;
2821 
2822  case NT_ANCHOR:
2823  if (NANCHOR(node)->type == ANCHOR_PREC_READ)
2824  n = get_head_value_node(NANCHOR(node)->target, exact, reg);
2825  break;
2826 
2827  default:
2828  break;
2829  }
2830 
2831  return n;
2832 }
2833 
2834 static int
2835 check_type_tree(Node* node, int type_mask, int enclose_mask, int anchor_mask)
2836 {
2837  int type, r = 0;
2838 
2839  type = NTYPE(node);
2840  if ((NTYPE2BIT(type) & type_mask) == 0)
2841  return 1;
2842 
2843  switch (type) {
2844  case NT_LIST:
2845  case NT_ALT:
2846  do {
2847  r = check_type_tree(NCAR(node), type_mask, enclose_mask,
2848  anchor_mask);
2849  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2850  break;
2851 
2852  case NT_QTFR:
2853  r = check_type_tree(NQTFR(node)->target, type_mask, enclose_mask,
2854  anchor_mask);
2855  break;
2856 
2857  case NT_ENCLOSE:
2858  {
2859  EncloseNode* en = NENCLOSE(node);
2860  if ((en->type & enclose_mask) == 0)
2861  return 1;
2862 
2863  r = check_type_tree(en->target, type_mask, enclose_mask, anchor_mask);
2864  }
2865  break;
2866 
2867  case NT_ANCHOR:
2868  type = NANCHOR(node)->type;
2869  if ((type & anchor_mask) == 0)
2870  return 1;
2871 
2872  if (NANCHOR(node)->target)
2873  r = check_type_tree(NANCHOR(node)->target,
2874  type_mask, enclose_mask, anchor_mask);
2875  break;
2876 
2877  default:
2878  break;
2879  }
2880  return r;
2881 }
2882 
2883 #ifdef USE_SUBEXP_CALL
2884 
2885 # define RECURSION_EXIST 1
2886 # define RECURSION_INFINITE 2
2887 
2888 static int
2889 subexp_inf_recursive_check(Node* node, ScanEnv* env, int head)
2890 {
2891  int type;
2892  int r = 0;
2893 
2894  type = NTYPE(node);
2895  switch (type) {
2896  case NT_LIST:
2897  {
2898  Node *x;
2899  OnigDistance min;
2900  int ret;
2901 
2902  x = node;
2903  do {
2904  ret = subexp_inf_recursive_check(NCAR(x), env, head);
2905  if (ret < 0 || ret == RECURSION_INFINITE) return ret;
2906  r |= ret;
2907  if (head) {
2908  ret = get_min_match_length(NCAR(x), &min, env);
2909  if (ret != 0) return ret;
2910  if (min != 0) head = 0;
2911  }
2912  } while (IS_NOT_NULL(x = NCDR(x)));
2913  }
2914  break;
2915 
2916  case NT_ALT:
2917  {
2918  int ret;
2919  r = RECURSION_EXIST;
2920  do {
2921  ret = subexp_inf_recursive_check(NCAR(node), env, head);
2922  if (ret < 0 || ret == RECURSION_INFINITE) return ret;
2923  r &= ret;
2924  } while (IS_NOT_NULL(node = NCDR(node)));
2925  }
2926  break;
2927 
2928  case NT_QTFR:
2929  r = subexp_inf_recursive_check(NQTFR(node)->target, env, head);
2930  if (r == RECURSION_EXIST) {
2931  if (NQTFR(node)->lower == 0) r = 0;
2932  }
2933  break;
2934 
2935  case NT_ANCHOR:
2936  {
2937  AnchorNode* an = NANCHOR(node);
2938  switch (an->type) {
2939  case ANCHOR_PREC_READ:
2940  case ANCHOR_PREC_READ_NOT:
2941  case ANCHOR_LOOK_BEHIND:
2943  r = subexp_inf_recursive_check(an->target, env, head);
2944  break;
2945  }
2946  }
2947  break;
2948 
2949  case NT_CALL:
2950  r = subexp_inf_recursive_check(NCALL(node)->target, env, head);
2951  break;
2952 
2953  case NT_ENCLOSE:
2954  if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
2955  return 0;
2956  else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
2957  return (head == 0 ? RECURSION_EXIST : RECURSION_INFINITE);
2958  else {
2960  r = subexp_inf_recursive_check(NENCLOSE(node)->target, env, head);
2962  }
2963  break;
2964 
2965  default:
2966  break;
2967  }
2968 
2969  return r;
2970 }
2971 
2972 static int
2973 subexp_inf_recursive_check_trav(Node* node, ScanEnv* env)
2974 {
2975  int type;
2976  int r = 0;
2977 
2978  type = NTYPE(node);
2979  switch (type) {
2980  case NT_LIST:
2981  case NT_ALT:
2982  do {
2983  r = subexp_inf_recursive_check_trav(NCAR(node), env);
2984  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2985  break;
2986 
2987  case NT_QTFR:
2988  r = subexp_inf_recursive_check_trav(NQTFR(node)->target, env);
2989  break;
2990 
2991  case NT_ANCHOR:
2992  {
2993  AnchorNode* an = NANCHOR(node);
2994  switch (an->type) {
2995  case ANCHOR_PREC_READ:
2996  case ANCHOR_PREC_READ_NOT:
2997  case ANCHOR_LOOK_BEHIND:
2999  r = subexp_inf_recursive_check_trav(an->target, env);
3000  break;
3001  }
3002  }
3003  break;
3004 
3005  case NT_ENCLOSE:
3006  {
3007  EncloseNode* en = NENCLOSE(node);
3008 
3009  if (IS_ENCLOSE_RECURSION(en)) {
3011  r = subexp_inf_recursive_check(en->target, env, 1);
3012  if (r > 0) return ONIGERR_NEVER_ENDING_RECURSION;
3014  }
3015  r = subexp_inf_recursive_check_trav(en->target, env);
3016  }
3017 
3018  break;
3019 
3020  default:
3021  break;
3022  }
3023 
3024  return r;
3025 }
3026 
3027 static int
3028 subexp_recursive_check(Node* node)
3029 {
3030  int r = 0;
3031 
3032  switch (NTYPE(node)) {
3033  case NT_LIST:
3034  case NT_ALT:
3035  do {
3036  r |= subexp_recursive_check(NCAR(node));
3037  } while (IS_NOT_NULL(node = NCDR(node)));
3038  break;
3039 
3040  case NT_QTFR:
3041  r = subexp_recursive_check(NQTFR(node)->target);
3042  break;
3043 
3044  case NT_ANCHOR:
3045  {
3046  AnchorNode* an = NANCHOR(node);
3047  switch (an->type) {
3048  case ANCHOR_PREC_READ:
3049  case ANCHOR_PREC_READ_NOT:
3050  case ANCHOR_LOOK_BEHIND:
3052  r = subexp_recursive_check(an->target);
3053  break;
3054  }
3055  }
3056  break;
3057 
3058  case NT_CALL:
3059  r = subexp_recursive_check(NCALL(node)->target);
3060  if (r != 0) SET_CALL_RECURSION(node);
3061  break;
3062 
3063  case NT_ENCLOSE:
3064  if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
3065  return 0;
3066  else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
3067  return 1; /* recursion */
3068  else {
3070  r = subexp_recursive_check(NENCLOSE(node)->target);
3072  }
3073  break;
3074 
3075  default:
3076  break;
3077  }
3078 
3079  return r;
3080 }
3081 
3082 
3083 static int
3084 subexp_recursive_check_trav(Node* node, ScanEnv* env)
3085 {
3086 # define FOUND_CALLED_NODE 1
3087 
3088  int type;
3089  int r = 0;
3090 
3091  type = NTYPE(node);
3092  switch (type) {
3093  case NT_LIST:
3094  case NT_ALT:
3095  {
3096  int ret;
3097  do {
3098  ret = subexp_recursive_check_trav(NCAR(node), env);
3099  if (ret == FOUND_CALLED_NODE) r = FOUND_CALLED_NODE;
3100  else if (ret < 0) return ret;
3101  } while (IS_NOT_NULL(node = NCDR(node)));
3102  }
3103  break;
3104 
3105  case NT_QTFR:
3106  r = subexp_recursive_check_trav(NQTFR(node)->target, env);
3107  if (NQTFR(node)->upper == 0) {
3108  if (r == FOUND_CALLED_NODE)
3109  NQTFR(node)->is_refered = 1;
3110  }
3111  break;
3112 
3113  case NT_ANCHOR:
3114  {
3115  AnchorNode* an = NANCHOR(node);
3116  switch (an->type) {
3117  case ANCHOR_PREC_READ:
3118  case ANCHOR_PREC_READ_NOT:
3119  case ANCHOR_LOOK_BEHIND:
3121  r = subexp_recursive_check_trav(an->target, env);
3122  break;
3123  }
3124  }
3125  break;
3126 
3127  case NT_ENCLOSE:
3128  {
3129  EncloseNode* en = NENCLOSE(node);
3130 
3131  if (! IS_ENCLOSE_RECURSION(en)) {
3132  if (IS_ENCLOSE_CALLED(en)) {
3134  r = subexp_recursive_check(en->target);
3135  if (r != 0) SET_ENCLOSE_STATUS(node, NST_RECURSION);
3137  }
3138  }
3139  r = subexp_recursive_check_trav(en->target, env);
3140  if (IS_ENCLOSE_CALLED(en))
3141  r |= FOUND_CALLED_NODE;
3142  }
3143  break;
3144 
3145  default:
3146  break;
3147  }
3148 
3149  return r;
3150 }
3151 
3152 static int
3153 setup_subexp_call(Node* node, ScanEnv* env)
3154 {
3155  int type;
3156  int r = 0;
3157 
3158  type = NTYPE(node);
3159  switch (type) {
3160  case NT_LIST:
3161  do {
3162  r = setup_subexp_call(NCAR(node), env);
3163  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3164  break;
3165 
3166  case NT_ALT:
3167  do {
3168  r = setup_subexp_call(NCAR(node), env);
3169  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3170  break;
3171 
3172  case NT_QTFR:
3173  r = setup_subexp_call(NQTFR(node)->target, env);
3174  break;
3175  case NT_ENCLOSE:
3176  r = setup_subexp_call(NENCLOSE(node)->target, env);
3177  break;
3178 
3179  case NT_CALL:
3180  {
3181  CallNode* cn = NCALL(node);
3182  Node** nodes = SCANENV_MEM_NODES(env);
3183 
3184  if (cn->group_num != 0) {
3185  int gnum = cn->group_num;
3186 
3187 # ifdef USE_NAMED_GROUP
3188  if (env->num_named > 0 &&
3192  }
3193 # endif
3194  if (gnum > env->num_mem) {
3198  }
3199 
3200 # ifdef USE_NAMED_GROUP
3201  set_call_attr:
3202 # endif
3203  cn->target = nodes[cn->group_num];
3204  if (IS_NULL(cn->target)) {
3208  }
3211  cn->unset_addr_list = env->unset_addr_list;
3212  }
3213 # ifdef USE_NAMED_GROUP
3214 # ifdef USE_PERL_SUBEXP_CALL
3215  else if (cn->name == cn->name_end) {
3216  goto set_call_attr;
3217  }
3218 # endif
3219  else {
3220  int *refs;
3221 
3222  int n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end,
3223  &refs);
3224  if (n <= 0) {
3228  }
3229  else if (n > 1 &&
3234  }
3235  else {
3236  cn->group_num = refs[0];
3237  goto set_call_attr;
3238  }
3239  }
3240 # endif
3241  }
3242  break;
3243 
3244  case NT_ANCHOR:
3245  {
3246  AnchorNode* an = NANCHOR(node);
3247 
3248  switch (an->type) {
3249  case ANCHOR_PREC_READ:
3250  case ANCHOR_PREC_READ_NOT:
3251  case ANCHOR_LOOK_BEHIND:
3253  r = setup_subexp_call(an->target, env);
3254  break;
3255  }
3256  }
3257  break;
3258 
3259  default:
3260  break;
3261  }
3262 
3263  return r;
3264 }
3265 #endif
3266 
3267 /* divide different length alternatives in look-behind.
3268  (?<=A|B) ==> (?<=A)|(?<=B)
3269  (?<!A|B) ==> (?<!A)(?<!B)
3270 */
3271 static int
3272 divide_look_behind_alternatives(Node* node)
3273 {
3274  Node *head, *np, *insert_node;
3275  AnchorNode* an = NANCHOR(node);
3276  int anc_type = an->type;
3277 
3278  head = an->target;
3279  np = NCAR(head);
3280  swap_node(node, head);
3281  NCAR(node) = head;
3282  NANCHOR(head)->target = np;
3283 
3284  np = node;
3285  while ((np = NCDR(np)) != NULL_NODE) {
3286  insert_node = onig_node_new_anchor(anc_type);
3287  CHECK_NULL_RETURN_MEMERR(insert_node);
3288  NANCHOR(insert_node)->target = NCAR(np);
3289  NCAR(np) = insert_node;
3290  }
3291 
3292  if (anc_type == ANCHOR_LOOK_BEHIND_NOT) {
3293  np = node;
3294  do {
3295  SET_NTYPE(np, NT_LIST); /* alt -> list */
3296  } while ((np = NCDR(np)) != NULL_NODE);
3297  }
3298  return 0;
3299 }
3300 
3301 static int
3302 setup_look_behind(Node* node, regex_t* reg, ScanEnv* env)
3303 {
3304  int r, len;
3305  AnchorNode* an = NANCHOR(node);
3306 
3307  r = get_char_length_tree(an->target, reg, &len);
3308  if (r == 0)
3309  an->char_len = len;
3310  else if (r == GET_CHAR_LEN_VARLEN)
3312  else if (r == GET_CHAR_LEN_TOP_ALT_VARLEN) {
3314  r = divide_look_behind_alternatives(node);
3315  else
3317  }
3318 
3319  return r;
3320 }
3321 
3322 static int
3323 next_setup(Node* node, Node* next_node, regex_t* reg)
3324 {
3325  int type;
3326 
3327  retry:
3328  type = NTYPE(node);
3329  if (type == NT_QTFR) {
3330  QtfrNode* qn = NQTFR(node);
3331  if (qn->greedy && IS_REPEAT_INFINITE(qn->upper)) {
3332 #ifdef USE_QTFR_PEEK_NEXT
3333  Node* n = get_head_value_node(next_node, 1, reg);
3334  /* '\0': for UTF-16BE etc... */
3335  if (IS_NOT_NULL(n) && NSTR(n)->s[0] != '\0') {
3336  qn->next_head_exact = n;
3337  }
3338 #endif
3339  /* automatic possessification a*b ==> (?>a*)b */
3340  if (qn->lower <= 1) {
3341  int ttype = NTYPE(qn->target);
3342  if (IS_NODE_TYPE_SIMPLE(ttype)) {
3343  Node *x, *y;
3344  x = get_head_value_node(qn->target, 0, reg);
3345  if (IS_NOT_NULL(x)) {
3346  y = get_head_value_node(next_node, 0, reg);
3347  if (IS_NOT_NULL(y) && is_not_included(x, y, reg)) {
3351  swap_node(node, en);
3352  NENCLOSE(node)->target = en;
3353  }
3354  }
3355  }
3356  }
3357  }
3358  }
3359  else if (type == NT_ENCLOSE) {
3360  EncloseNode* en = NENCLOSE(node);
3361  if (en->type == ENCLOSE_MEMORY) {
3362  node = en->target;
3363  goto retry;
3364  }
3365  }
3366  return 0;
3367 }
3368 
3369 
3370 static int
3371 update_string_node_case_fold(regex_t* reg, Node *node)
3372 {
3374  UChar *sbuf, *ebuf, *sp;
3375  int r, i, len;
3376  OnigDistance sbuf_size;
3377  StrNode* sn = NSTR(node);
3378 
3379  end = sn->end;
3380  sbuf_size = (end - sn->s) * 2;
3381  sbuf = (UChar* )xmalloc(sbuf_size);
3383  ebuf = sbuf + sbuf_size;
3384 
3385  sp = sbuf;
3386  p = sn->s;
3387  while (p < end) {
3388  len = ONIGENC_MBC_CASE_FOLD(reg->enc, reg->case_fold_flag, &p, end, buf);
3389  for (i = 0; i < len; i++) {
3390  if (sp >= ebuf) {
3391  UChar* p = (UChar* )xrealloc(sbuf, sbuf_size * 2);
3392  if (IS_NULL(p)) {
3393  xfree(sbuf);
3394  return ONIGERR_MEMORY;
3395  }
3396  sbuf = p;
3397  sp = sbuf + sbuf_size;
3398  sbuf_size *= 2;
3399  ebuf = sbuf + sbuf_size;
3400  }
3401 
3402  *sp++ = buf[i];
3403  }
3404  }
3405 
3406  r = onig_node_str_set(node, sbuf, sp);
3407 
3408  xfree(sbuf);
3409  return r;
3410 }
3411 
3412 static int
3413 expand_case_fold_make_rem_string(Node** rnode, UChar *s, UChar *end,
3414  regex_t* reg)
3415 {
3416  int r;
3417  Node *node;
3418 
3419  node = onig_node_new_str(s, end);
3420  if (IS_NULL(node)) return ONIGERR_MEMORY;
3421 
3422  r = update_string_node_case_fold(reg, node);
3423  if (r != 0) {
3424  onig_node_free(node);
3425  return r;
3426  }
3427 
3428  NSTRING_SET_AMBIG(node);
3430  *rnode = node;
3431  return 0;
3432 }
3433 
3434 static int
3435 is_case_fold_variable_len(int item_num, OnigCaseFoldCodeItem items[],
3436  int slen)
3437 {
3438  int i;
3439 
3440  for (i = 0; i < item_num; i++) {
3441  if (items[i].byte_len != slen) {
3442  return 1;
3443  }
3444  if (items[i].code_len != 1) {
3445  return 1;
3446  }
3447  }
3448  return 0;
3449 }
3450 
3451 static int
3452 expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[],
3453  UChar *p, int slen, UChar *end,
3454  regex_t* reg, Node **rnode)
3455 {
3456  int r, i, j, len, varlen;
3457  Node *anode, *var_anode, *snode, *xnode, *an;
3459 
3460  *rnode = var_anode = NULL_NODE;
3461 
3462  varlen = 0;
3463  for (i = 0; i < item_num; i++) {
3464  if (items[i].byte_len != slen) {
3465  varlen = 1;
3466  break;
3467  }
3468  }
3469 
3470  if (varlen != 0) {
3471  *rnode = var_anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3472  if (IS_NULL(var_anode)) return ONIGERR_MEMORY;
3473 
3474  xnode = onig_node_new_list(NULL, NULL);
3475  if (IS_NULL(xnode)) goto mem_err;
3476  NCAR(var_anode) = xnode;
3477 
3479  if (IS_NULL(anode)) goto mem_err;
3480  NCAR(xnode) = anode;
3481  }
3482  else {
3483  *rnode = anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3484  if (IS_NULL(anode)) return ONIGERR_MEMORY;
3485  }
3486 
3487  snode = onig_node_new_str(p, p + slen);
3488  if (IS_NULL(snode)) goto mem_err;
3489 
3490  NCAR(anode) = snode;
3491 
3492  for (i = 0; i < item_num; i++) {
3493  snode = onig_node_new_str(NULL, NULL);
3494  if (IS_NULL(snode)) goto mem_err;
3495 
3496  for (j = 0; j < items[i].code_len; j++) {
3497  len = ONIGENC_CODE_TO_MBC(reg->enc, items[i].code[j], buf);
3498  if (len < 0) {
3499  r = len;
3500  goto mem_err2;
3501  }
3502 
3503  r = onig_node_str_cat(snode, buf, buf + len);
3504  if (r != 0) goto mem_err2;
3505  }
3506 
3508  if (IS_NULL(an)) {
3509  goto mem_err2;
3510  }
3511 
3512  if (items[i].byte_len != slen) {
3513  Node *rem;
3514  UChar *q = p + items[i].byte_len;
3515 
3516  if (q < end) {
3517  r = expand_case_fold_make_rem_string(&rem, q, end, reg);
3518  if (r != 0) {
3519  onig_node_free(an);
3520  goto mem_err2;
3521  }
3522 
3523  xnode = onig_node_list_add(NULL_NODE, snode);
3524  if (IS_NULL(xnode)) {
3525  onig_node_free(an);
3526  onig_node_free(rem);
3527  goto mem_err2;
3528  }
3529  if (IS_NULL(onig_node_list_add(xnode, rem))) {
3530  onig_node_free(an);
3531  onig_node_free(xnode);
3532  onig_node_free(rem);
3533  goto mem_err;
3534  }
3535 
3536  NCAR(an) = xnode;
3537  }
3538  else {
3539  NCAR(an) = snode;
3540  }
3541 
3542  NCDR(var_anode) = an;
3543  var_anode = an;
3544  }
3545  else {
3546  NCAR(an) = snode;
3547  NCDR(anode) = an;
3548  anode = an;
3549  }
3550  }
3551 
3552  return varlen;
3553 
3554  mem_err2:
3555  onig_node_free(snode);
3556 
3557  mem_err:
3558  onig_node_free(*rnode);
3559 
3560  return ONIGERR_MEMORY;
3561 }
3562 
3563 static int
3564 expand_case_fold_string(Node* node, regex_t* reg)
3565 {
3566 #define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION 8
3567 
3568  int r, n, len, alt_num;
3569  int varlen = 0;
3570  UChar *start, *end, *p;
3571  Node *top_root, *root, *snode, *prev_node;
3573  StrNode* sn = NSTR(node);
3574 
3575  if (NSTRING_IS_AMBIG(node)) return 0;
3576 
3577  start = sn->s;
3578  end = sn->end;
3579  if (start >= end) return 0;
3580 
3581  r = 0;
3582  top_root = root = prev_node = snode = NULL_NODE;
3583  alt_num = 1;
3584  p = start;
3585  while (p < end) {
3587  p, end, items);
3588  if (n < 0) {
3589  r = n;
3590  goto err;
3591  }
3592 
3593  len = enclen(reg->enc, p, end);
3594 
3595  varlen = is_case_fold_variable_len(n, items, len);
3596  if (n == 0 || varlen == 0) {
3597  if (IS_NULL(snode)) {
3598  if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
3599  top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3600  if (IS_NULL(root)) {
3601  onig_node_free(prev_node);
3602  goto mem_err;
3603  }
3604  }
3605 
3606  prev_node = snode = onig_node_new_str(NULL, NULL);
3607  if (IS_NULL(snode)) goto mem_err;
3608  if (IS_NOT_NULL(root)) {
3609  if (IS_NULL(onig_node_list_add(root, snode))) {
3610  onig_node_free(snode);
3611  goto mem_err;
3612  }
3613  }
3614  }
3615 
3616  r = onig_node_str_cat(snode, p, p + len);
3617  if (r != 0) goto err;
3618  }
3619  else {
3620  alt_num *= (n + 1);
3621  if (alt_num > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) break;
3622 
3623  if (IS_NOT_NULL(snode)) {
3624  r = update_string_node_case_fold(reg, snode);
3625  if (r == 0) {
3626  NSTRING_SET_AMBIG(snode);
3627  }
3628  }
3629  if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
3630  top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3631  if (IS_NULL(root)) {
3632  onig_node_free(prev_node);
3633  goto mem_err;
3634  }
3635  }
3636 
3637  r = expand_case_fold_string_alt(n, items, p, len, end, reg, &prev_node);
3638  if (r < 0) goto mem_err;
3639  if (r == 1) {
3640  if (IS_NULL(root)) {
3641  top_root = prev_node;
3642  }
3643  else {
3644  if (IS_NULL(onig_node_list_add(root, prev_node))) {
3645  onig_node_free(prev_node);
3646  goto mem_err;
3647  }
3648  }
3649 
3650  root = NCAR(prev_node);
3651  }
3652  else { /* r == 0 */
3653  if (IS_NOT_NULL(root)) {
3654  if (IS_NULL(onig_node_list_add(root, prev_node))) {
3655  onig_node_free(prev_node);
3656  goto mem_err;
3657  }
3658  }
3659  }
3660 
3661  snode = NULL_NODE;
3662  }
3663 
3664  p += len;
3665  }
3666  if (IS_NOT_NULL(snode)) {
3667  r = update_string_node_case_fold(reg, snode);
3668  if (r == 0) {
3669  NSTRING_SET_AMBIG(snode);
3670  }
3671  }
3672 
3673  if (p < end) {
3674  Node *srem;
3675 
3676  r = expand_case_fold_make_rem_string(&srem, p, end, reg);
3677  if (r != 0) goto mem_err;
3678 
3679  if (IS_NOT_NULL(prev_node) && IS_NULL(root)) {
3680  top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3681  if (IS_NULL(root)) {
3682  onig_node_free(srem);
3683  onig_node_free(prev_node);
3684  goto mem_err;
3685  }
3686  }
3687 
3688  if (IS_NULL(root)) {
3689  prev_node = srem;
3690  }
3691  else {
3692  if (IS_NULL(onig_node_list_add(root, srem))) {
3693  onig_node_free(srem);
3694  goto mem_err;
3695  }
3696  }
3697  }
3698 
3699  /* ending */
3700  top_root = (IS_NOT_NULL(top_root) ? top_root : prev_node);
3701  swap_node(node, top_root);
3702  onig_node_free(top_root);
3703  return 0;
3704 
3705  mem_err:
3706  r = ONIGERR_MEMORY;
3707 
3708  err:
3709  onig_node_free(top_root);
3710  return r;
3711 }
3712 
3713 
3714 #ifdef USE_COMBINATION_EXPLOSION_CHECK
3715 
3716 # define CEC_THRES_NUM_BIG_REPEAT 512
3717 # define CEC_INFINITE_NUM 0x7fffffff
3718 
3719 # define CEC_IN_INFINITE_REPEAT (1<<0)
3720 # define CEC_IN_FINITE_REPEAT (1<<1)
3721 # define CEC_CONT_BIG_REPEAT (1<<2)
3722 
3723 static int
3724 setup_comb_exp_check(Node* node, int state, ScanEnv* env)
3725 {
3726  int type;
3727  int r = state;
3728 
3729  type = NTYPE(node);
3730  switch (type) {
3731  case NT_LIST:
3732  {
3733  Node* prev = NULL_NODE;
3734  do {
3735  r = setup_comb_exp_check(NCAR(node), r, env);
3736  prev = NCAR(node);
3737  } while (r >= 0 && IS_NOT_NULL(node = NCDR(node)));
3738  }
3739  break;
3740 
3741  case NT_ALT:
3742  {
3743  int ret;
3744  do {
3745  ret = setup_comb_exp_check(NCAR(node), state, env);
3746  r |= ret;
3747  } while (ret >= 0 && IS_NOT_NULL(node = NCDR(node)));
3748  }
3749  break;
3750 
3751  case NT_QTFR:
3752  {
3753  int child_state = state;
3754  int add_state = 0;
3755  QtfrNode* qn = NQTFR(node);
3756  Node* target = qn->target;
3757  int var_num;
3758 
3759  if (! IS_REPEAT_INFINITE(qn->upper)) {
3760  if (qn->upper > 1) {
3761  /* {0,1}, {1,1} are allowed */
3762  child_state |= CEC_IN_FINITE_REPEAT;
3763 
3764  /* check (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n} */
3765  if (env->backrefed_mem == 0) {
3766  if (NTYPE(qn->target) == NT_ENCLOSE) {
3767  EncloseNode* en = NENCLOSE(qn->target);
3768  if (en->type == ENCLOSE_MEMORY) {
3769  if (NTYPE(en->target) == NT_QTFR) {
3770  QtfrNode* q = NQTFR(en->target);
3771  if (IS_REPEAT_INFINITE(q->upper)
3772  && q->greedy == qn->greedy) {
3773  qn->upper = (qn->lower == 0 ? 1 : qn->lower);
3774  if (qn->upper == 1)
3775  child_state = state;
3776  }
3777  }
3778  }
3779  }
3780  }
3781  }
3782  }
3783 
3784  if (state & CEC_IN_FINITE_REPEAT) {
3785  qn->comb_exp_check_num = -1;
3786  }
3787  else {
3788  if (IS_REPEAT_INFINITE(qn->upper)) {
3789  var_num = CEC_INFINITE_NUM;
3790  child_state |= CEC_IN_INFINITE_REPEAT;
3791  }
3792  else {
3793  var_num = qn->upper - qn->lower;
3794  }
3795 
3796  if (var_num >= CEC_THRES_NUM_BIG_REPEAT)
3797  add_state |= CEC_CONT_BIG_REPEAT;
3798 
3799  if (((state & CEC_IN_INFINITE_REPEAT) != 0 && var_num != 0) ||
3800  ((state & CEC_CONT_BIG_REPEAT) != 0 &&
3801  var_num >= CEC_THRES_NUM_BIG_REPEAT)) {
3802  if (qn->comb_exp_check_num == 0) {
3803  env->num_comb_exp_check++;
3804  qn->comb_exp_check_num = env->num_comb_exp_check;
3805  if (env->curr_max_regnum > env->comb_exp_max_regnum)
3806  env->comb_exp_max_regnum = env->curr_max_regnum;
3807  }
3808  }
3809  }
3810 
3811  r = setup_comb_exp_check(target, child_state, env);
3812  r |= add_state;
3813  }
3814  break;
3815 
3816  case NT_ENCLOSE:
3817  {
3818  EncloseNode* en = NENCLOSE(node);
3819 
3820  switch (en->type) {
3821  case ENCLOSE_MEMORY:
3822  {
3823  if (env->curr_max_regnum < en->regnum)
3824  env->curr_max_regnum = en->regnum;
3825 
3826  r = setup_comb_exp_check(en->target, state, env);
3827  }
3828  break;
3829 
3830  default:
3831  r = setup_comb_exp_check(en->target, state, env);
3832  break;
3833  }
3834  }
3835  break;
3836 
3837 # ifdef USE_SUBEXP_CALL
3838  case NT_CALL:
3839  if (IS_CALL_RECURSION(NCALL(node)))
3840  env->has_recursion = 1;
3841  else
3842  r = setup_comb_exp_check(NCALL(node)->target, state, env);
3843  break;
3844 # endif
3845 
3846  default:
3847  break;
3848  }
3849 
3850  return r;
3851 }
3852 #endif
3853 
3854 #define IN_ALT (1<<0)
3855 #define IN_NOT (1<<1)
3856 #define IN_REPEAT (1<<2)
3857 #define IN_VAR_REPEAT (1<<3)
3858 #define IN_CALL (1<<4)
3859 #define IN_RECCALL (1<<5)
3860 
3861 /* setup_tree does the following work.
3862  1. check empty loop. (set qn->target_empty_info)
3863  2. expand ignore-case in char class.
3864  3. set memory status bit flags. (reg->mem_stats)
3865  4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact].
3866  5. find invalid patterns in look-behind.
3867  6. expand repeated string.
3868  */
3869 static int
3870 setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
3871 {
3872  int type;
3873  int r = 0;
3874 
3875 restart:
3876  type = NTYPE(node);
3877  switch (type) {
3878  case NT_LIST:
3879  {
3880  Node* prev = NULL_NODE;
3881  do {
3882  r = setup_tree(NCAR(node), reg, state, env);
3883  if (IS_NOT_NULL(prev) && r == 0) {
3884  r = next_setup(prev, NCAR(node), reg);
3885  }
3886  prev = NCAR(node);
3887  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3888  }
3889  break;
3890 
3891  case NT_ALT:
3892  do {
3893  r = setup_tree(NCAR(node), reg, (state | IN_ALT), env);
3894  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3895  break;
3896 
3897  case NT_CCLASS:
3898  break;
3899 
3900  case NT_STR:
3901  if (IS_IGNORECASE(reg->options) && !NSTRING_IS_RAW(node)) {
3902  r = expand_case_fold_string(node, reg);
3903  }
3904  break;
3905 
3906  case NT_CTYPE:
3907  case NT_CANY:
3908  break;
3909 
3910 #ifdef USE_SUBEXP_CALL
3911  case NT_CALL:
3912  break;
3913 #endif
3914 
3915  case NT_BREF:
3916  {
3917  int i;
3918  int* p;
3919  Node** nodes = SCANENV_MEM_NODES(env);
3920  BRefNode* br = NBREF(node);
3921  p = BACKREFS_P(br);
3922  for (i = 0; i < br->back_num; i++) {
3923  if (p[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
3924  BIT_STATUS_ON_AT(env->backrefed_mem, p[i]);
3925  BIT_STATUS_ON_AT(env->bt_mem_start, p[i]);
3926 #ifdef USE_BACKREF_WITH_LEVEL
3927  if (IS_BACKREF_NEST_LEVEL(br)) {
3928  BIT_STATUS_ON_AT(env->bt_mem_end, p[i]);
3929  }
3930 #endif
3931  SET_ENCLOSE_STATUS(nodes[p[i]], NST_MEM_BACKREFED);
3932  }
3933  }
3934  break;
3935 
3936  case NT_QTFR:
3937  {
3938  OnigDistance d;
3939  QtfrNode* qn = NQTFR(node);
3940  Node* target = qn->target;
3941 
3942  if ((state & IN_REPEAT) != 0) {
3943  qn->state |= NST_IN_REPEAT;
3944  }
3945 
3946  if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 1) {
3947  r = get_min_match_length(target, &d, env);
3948  if (r) break;
3949  if (d == 0) {
3951 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
3952  r = quantifiers_memory_node_info(target);
3953  if (r < 0) break;
3954  if (r > 0) {
3955  qn->target_empty_info = r;
3956  }
3957 #endif
3958 #if 0
3959  r = get_max_match_length(target, &d, env);
3960  if (r == 0 && d == 0) {
3961  /* ()* ==> ()?, ()+ ==> () */
3962  qn->upper = 1;
3963  if (qn->lower > 1) qn->lower = 1;
3964  if (NTYPE(target) == NT_STR) {
3965  qn->upper = qn->lower = 0; /* /(?:)+/ ==> // */
3966  }
3967  }
3968 #endif
3969  }
3970  }
3971 
3972  state |= IN_REPEAT;
3973  if (qn->lower != qn->upper)
3974  state |= IN_VAR_REPEAT;
3975  r = setup_tree(target, reg, state, env);
3976  if (r) break;
3977 
3978  /* expand string */
3979 #define EXPAND_STRING_MAX_LENGTH 100
3980  if (NTYPE(target) == NT_STR) {
3981  if (qn->lower > 1) {
3982  int i, n = qn->lower;
3983  OnigDistance len = NSTRING_LEN(target);
3984  StrNode* sn = NSTR(target);
3985  Node* np;
3986 
3987  np = onig_node_new_str(sn->s, sn->end);
3988  if (IS_NULL(np)) return ONIGERR_MEMORY;
3989  NSTR(np)->flag = sn->flag;
3990 
3991  for (i = 1; i < n && (i+1) * len <= EXPAND_STRING_MAX_LENGTH; i++) {
3992  r = onig_node_str_cat(np, sn->s, sn->end);
3993  if (r) {
3994  onig_node_free(np);
3995  return r;
3996  }
3997  }
3998  if (i < qn->upper || IS_REPEAT_INFINITE(qn->upper)) {
3999  Node *np1, *np2;
4000 
4001  qn->lower -= i;
4002  if (! IS_REPEAT_INFINITE(qn->upper))
4003  qn->upper -= i;
4004 
4005  np1 = onig_node_new_list(np, NULL);
4006  if (IS_NULL(np1)) {
4007  onig_node_free(np);
4008  return ONIGERR_MEMORY;
4009  }
4010  swap_node(np1, node);
4011  np2 = onig_node_list_add(node, np1);
4012  if (IS_NULL(np2)) {
4013  onig_node_free(np1);
4014  return ONIGERR_MEMORY;
4015  }
4016  }
4017  else {
4018  swap_node(np, node);
4019  onig_node_free(np);
4020  }
4021  break; /* break case NT_QTFR: */
4022  }
4023  }
4024 
4025 #ifdef USE_OP_PUSH_OR_JUMP_EXACT
4026  if (qn->greedy && (qn->target_empty_info != 0)) {
4027  if (NTYPE(target) == NT_QTFR) {
4028  QtfrNode* tqn = NQTFR(target);
4029  if (IS_NOT_NULL(tqn->head_exact)) {
4030  qn->head_exact = tqn->head_exact;
4031  tqn->head_exact = NULL;
4032  }
4033  }
4034  else {
4035  qn->head_exact = get_head_value_node(qn->target, 1, reg);
4036  }
4037  }
4038 #endif
4039  }
4040  break;
4041 
4042  case NT_ENCLOSE:
4043  {
4044  EncloseNode* en = NENCLOSE(node);
4045 
4046  switch (en->type) {
4047  case ENCLOSE_OPTION:
4048  {
4049  OnigOptionType options = reg->options;
4050  reg->options = NENCLOSE(node)->option;
4051  r = setup_tree(NENCLOSE(node)->target, reg, state, env);
4052  reg->options = options;
4053  }
4054  break;
4055 
4056  case ENCLOSE_MEMORY:
4057  if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT | IN_CALL)) != 0) {
4059  /* SET_ENCLOSE_STATUS(node, NST_MEM_IN_ALT_NOT); */
4060  }
4061  if (IS_ENCLOSE_CALLED(en))
4062  state |= IN_CALL;
4063  if (IS_ENCLOSE_RECURSION(en))
4064  state |= IN_RECCALL;
4065  else if ((state & IN_RECCALL) != 0)
4066  SET_CALL_RECURSION(node);
4067  r = setup_tree(en->target, reg, state, env);
4068  break;
4069 
4071  {
4072  Node* target = en->target;
4073  r = setup_tree(target, reg, state, env);
4074  if (NTYPE(target) == NT_QTFR) {
4075  QtfrNode* tqn = NQTFR(target);
4076  if (IS_REPEAT_INFINITE(tqn->upper) && tqn->lower <= 1 &&
4077  tqn->greedy != 0) { /* (?>a*), a*+ etc... */
4078  int qtype = NTYPE(tqn->target);
4079  if (IS_NODE_TYPE_SIMPLE(qtype))
4081  }
4082  }
4083  }
4084  break;
4085 
4086  case ENCLOSE_CONDITION:
4087 #ifdef USE_NAMED_GROUP
4088  if (! IS_ENCLOSE_NAME_REF(NENCLOSE(node)) &&
4089  env->num_named > 0 &&
4093  }
4094 #endif
4095  if (NENCLOSE(node)->regnum > env->num_mem)
4096  return ONIGERR_INVALID_BACKREF;
4097  r = setup_tree(NENCLOSE(node)->target, reg, state, env);
4098  break;
4099 
4100  case ENCLOSE_ABSENT:
4101  r = setup_tree(NENCLOSE(node)->target, reg, state, env);
4102  break;
4103  }
4104  }
4105  break;
4106 
4107  case NT_ANCHOR:
4108  {
4109  AnchorNode* an = NANCHOR(node);
4110 
4111  switch (an->type) {
4112  case ANCHOR_PREC_READ:
4113  r = setup_tree(an->target, reg, state, env);
4114  break;
4115  case ANCHOR_PREC_READ_NOT:
4116  r = setup_tree(an->target, reg, (state | IN_NOT), env);
4117  break;
4118 
4119 /* allowed node types in look-behind */
4120 #define ALLOWED_TYPE_IN_LB \
4121  ( BIT_NT_LIST | BIT_NT_ALT | BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE | \
4122  BIT_NT_CANY | BIT_NT_ANCHOR | BIT_NT_ENCLOSE | BIT_NT_QTFR | BIT_NT_CALL )
4123 
4124 #define ALLOWED_ENCLOSE_IN_LB ( ENCLOSE_MEMORY | ENCLOSE_OPTION )
4125 #define ALLOWED_ENCLOSE_IN_LB_NOT ENCLOSE_OPTION
4126 
4127 #define ALLOWED_ANCHOR_IN_LB \
4128 ( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | \
4129  ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_KEEP | \
4130  ANCHOR_WORD_BOUND | ANCHOR_NOT_WORD_BOUND | \
4131  ANCHOR_WORD_BEGIN | ANCHOR_WORD_END )
4132 #define ALLOWED_ANCHOR_IN_LB_NOT \
4133 ( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | \
4134  ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_KEEP | \
4135  ANCHOR_WORD_BOUND | ANCHOR_NOT_WORD_BOUND | \
4136  ANCHOR_WORD_BEGIN | ANCHOR_WORD_END )
4137 
4138  case ANCHOR_LOOK_BEHIND:
4139  {
4140  r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
4142  if (r < 0) return r;
4143  if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
4144  if (NTYPE(node) != NT_ANCHOR) goto restart;
4145  r = setup_tree(an->target, reg, state, env);
4146  if (r != 0) return r;
4147  r = setup_look_behind(node, reg, env);
4148  }
4149  break;
4150 
4152  {
4153  r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
4155  if (r < 0) return r;
4156  if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
4157  if (NTYPE(node) != NT_ANCHOR) goto restart;
4158  r = setup_tree(an->target, reg, (state | IN_NOT), env);
4159  if (r != 0) return r;
4160  r = setup_look_behind(node, reg, env);
4161  }
4162  break;
4163  }
4164  }
4165  break;
4166 
4167  default:
4168  break;
4169  }
4170 
4171  return r;
4172 }
4173 
4174 #ifndef USE_SUNDAY_QUICK_SEARCH
4175 /* set skip map for Boyer-Moore search */
4176 static int
4177 set_bm_skip(UChar* s, UChar* end, regex_t* reg,
4178  UChar skip[], int** int_skip, int ignore_case)
4179 {
4180  OnigDistance i, len;
4181  int clen, flen, n, j, k;
4184  OnigEncoding enc = reg->enc;
4185 
4186  len = end - s;
4187  if (len < ONIG_CHAR_TABLE_SIZE) {
4188  for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = (UChar )len;
4189 
4190  n = 0;
4191  for (i = 0; i < len - 1; i += clen) {
4192  p = s + i;
4193  if (ignore_case)
4195  p, end, items);
4196  clen = enclen(enc, p, end);
4197  if (p + clen > end)
4198  clen = (int )(end - p);
4199 
4200  for (j = 0; j < n; j++) {
4201  if ((items[j].code_len != 1) || (items[j].byte_len != clen))
4202  return 1; /* different length isn't supported. */
4203  flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
4204  if (flen != clen)
4205  return 1; /* different length isn't supported. */
4206  }
4207  for (j = 0; j < clen; j++) {
4208  skip[s[i + j]] = (UChar )(len - 1 - i - j);
4209  for (k = 0; k < n; k++) {
4210  skip[buf[k][j]] = (UChar )(len - 1 - i - j);
4211  }
4212  }
4213  }
4214  }
4215  else {
4216 # if OPT_EXACT_MAXLEN < ONIG_CHAR_TABLE_SIZE
4217  /* This should not happen. */
4218  return ONIGERR_TYPE_BUG;
4219 # else
4220  if (IS_NULL(*int_skip)) {
4221  *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
4222  if (IS_NULL(*int_skip)) return ONIGERR_MEMORY;
4223  }
4224  for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = (int )len;
4225 
4226  n = 0;
4227  for (i = 0; i < len - 1; i += clen) {
4228  p = s + i;
4229  if (ignore_case)
4231  p, end, items);
4232  clen = enclen(enc, p, end);
4233  if (p + clen > end)
4234  clen = (int )(end - p);
4235 
4236  for (j = 0; j < n; j++) {
4237  if ((items[j].code_len != 1) || (items[j].byte_len != clen))
4238  return 1; /* different length isn't supported. */
4239  flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
4240  if (flen != clen)
4241  return 1; /* different length isn't supported. */
4242  }
4243  for (j = 0; j < clen; j++) {
4244  (*int_skip)[s[i + j]] = (int )(len - 1 - i - j);
4245  for (k = 0; k < n; k++) {
4246  (*int_skip)[buf[k][j]] = (int )(len - 1 - i - j);
4247  }
4248  }
4249  }
4250 # endif
4251  }
4252  return 0;
4253 }
4254 
4255 #else /* USE_SUNDAY_QUICK_SEARCH */
4256 
4257 /* set skip map for Sunday's quick search */
4258 static int
4259 set_bm_skip(UChar* s, UChar* end, regex_t* reg,
4260  UChar skip[], int** int_skip, int ignore_case)
4261 {
4262  OnigDistance i, len;
4263  int clen, flen, n, j, k;
4266  OnigEncoding enc = reg->enc;
4267 
4268  len = end - s;
4269  if (len < ONIG_CHAR_TABLE_SIZE) {
4270  for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = (UChar )(len + 1);
4271 
4272  n = 0;
4273  for (i = 0; i < len; i += clen) {
4274  p = s + i;
4275  if (ignore_case)
4277  p, end, items);
4278  clen = enclen(enc, p, end);
4279  if (p + clen > end)
4280  clen = (int )(end - p);
4281 
4282  for (j = 0; j < n; j++) {
4283  if ((items[j].code_len != 1) || (items[j].byte_len != clen))
4284  return 1; /* different length isn't supported. */
4285  flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
4286  if (flen != clen)
4287  return 1; /* different length isn't supported. */
4288  }
4289  for (j = 0; j < clen; j++) {
4290  skip[s[i + j]] = (UChar )(len - i - j);
4291  for (k = 0; k < n; k++) {
4292  skip[buf[k][j]] = (UChar )(len - i - j);
4293  }
4294  }
4295  }
4296  }
4297  else {
4298 # if OPT_EXACT_MAXLEN < ONIG_CHAR_TABLE_SIZE
4299  /* This should not happen. */
4300  return ONIGERR_TYPE_BUG;
4301 # else
4302  if (IS_NULL(*int_skip)) {
4303  *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
4304  if (IS_NULL(*int_skip)) return ONIGERR_MEMORY;
4305  }
4306  for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = (int )(len + 1);
4307 
4308  n = 0;
4309  for (i = 0; i < len; i += clen) {
4310  p = s + i;
4311  if (ignore_case)
4313  p, end, items);
4314  clen = enclen(enc, p, end);
4315  if (p + clen > end)
4316  clen = (int )(end - p);
4317 
4318  for (j = 0; j < n; j++) {
4319  if ((items[j].code_len != 1) || (items[j].byte_len != clen))
4320  return 1; /* different length isn't supported. */
4321  flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
4322  if (flen != clen)
4323  return 1; /* different length isn't supported. */
4324  }
4325  for (j = 0; j < clen; j++) {
4326  (*int_skip)[s[i + j]] = (int )(len - i - j);
4327  for (k = 0; k < n; k++) {
4328  (*int_skip)[buf[k][j]] = (int )(len - i - j);
4329  }
4330  }
4331  }
4332 # endif
4333  }
4334  return 0;
4335 }
4336 #endif /* USE_SUNDAY_QUICK_SEARCH */
4337 
4338 typedef struct {
4339  OnigDistance min; /* min byte length */
4340  OnigDistance max; /* max byte length */
4341 } MinMaxLen;
4342 
4343 typedef struct {
4349 } OptEnv;
4350 
4351 typedef struct {
4354 } OptAncInfo;
4355 
4356 typedef struct {
4357  MinMaxLen mmd; /* info position */
4359 
4361  int ignore_case; /* -1: unset, 0: case sensitive, 1: ignore case */
4362  int len;
4364 } OptExactInfo;
4365 
4366 typedef struct {
4367  MinMaxLen mmd; /* info position */
4369 
4370  int value; /* weighted value */
4372 } OptMapInfo;
4373 
4374 typedef struct {
4376 
4378  OptExactInfo exb; /* boundary */
4379  OptExactInfo exm; /* middle */
4380  OptExactInfo expr; /* prec read (?=...) */
4381 
4382  OptMapInfo map; /* boundary */
4383 } NodeOptInfo;
4384 
4385 
4386 static int
4387 map_position_value(OnigEncoding enc, int i)
4388 {
4389  static const short int ByteValTable[] = {
4390  5, 1, 1, 1, 1, 1, 1, 1, 1, 10, 10, 1, 1, 10, 1, 1,
4391  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
4392  12, 4, 7, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5,
4393  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5,
4394  5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
4395  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 6, 5, 5, 5,
4396  5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
4397  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 1
4398  };
4399 
4400  if (i < numberof(ByteValTable)) {
4401  if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1)
4402  return 20;
4403  else
4404  return (int )ByteValTable[i];
4405  }
4406  else
4407  return 4; /* Take it easy. */
4408 }
4409 
4410 static int
4411 distance_value(MinMaxLen* mm)
4412 {
4413  /* 1000 / (min-max-dist + 1) */
4414  static const short int dist_vals[] = {
4415  1000, 500, 333, 250, 200, 167, 143, 125, 111, 100,
4416  91, 83, 77, 71, 67, 63, 59, 56, 53, 50,
4417  48, 45, 43, 42, 40, 38, 37, 36, 34, 33,
4418  32, 31, 30, 29, 29, 28, 27, 26, 26, 25,
4419  24, 24, 23, 23, 22, 22, 21, 21, 20, 20,
4420  20, 19, 19, 19, 18, 18, 18, 17, 17, 17,
4421  16, 16, 16, 16, 15, 15, 15, 15, 14, 14,
4422  14, 14, 14, 14, 13, 13, 13, 13, 13, 13,
4423  12, 12, 12, 12, 12, 12, 11, 11, 11, 11,
4424  11, 11, 11, 11, 11, 10, 10, 10, 10, 10
4425  };
4426 
4427  OnigDistance d;
4428 
4429  if (mm->max == ONIG_INFINITE_DISTANCE) return 0;
4430 
4431  d = mm->max - mm->min;
4432  if (d < numberof(dist_vals))
4433  /* return dist_vals[d] * 16 / (mm->min + 12); */
4434  return (int )dist_vals[d];
4435  else
4436  return 1;
4437 }
4438 
4439 static int
4440 comp_distance_value(MinMaxLen* d1, MinMaxLen* d2, int v1, int v2)
4441 {
4442  if (v2 <= 0) return -1;
4443  if (v1 <= 0) return 1;
4444 
4445  v1 *= distance_value(d1);
4446  v2 *= distance_value(d2);
4447 
4448  if (v2 > v1) return 1;
4449  if (v2 < v1) return -1;
4450 
4451  if (d2->min < d1->min) return 1;
4452  if (d2->min > d1->min) return -1;
4453  return 0;
4454 }
4455 
4456 static int
4457 is_equal_mml(MinMaxLen* a, MinMaxLen* b)
4458 {
4459  return (a->min == b->min && a->max == b->max) ? 1 : 0;
4460 }
4461 
4462 
4463 static void
4464 set_mml(MinMaxLen* mml, OnigDistance min, OnigDistance max)
4465 {
4466  mml->min = min;
4467  mml->max = max;
4468 }
4469 
4470 static void
4471 clear_mml(MinMaxLen* mml)
4472 {
4473  mml->min = mml->max = 0;
4474 }
4475 
4476 static void
4477 copy_mml(MinMaxLen* to, MinMaxLen* from)
4478 {
4479  to->min = from->min;
4480  to->max = from->max;
4481 }
4482 
4483 static void
4484 add_mml(MinMaxLen* to, MinMaxLen* from)
4485 {
4486  to->min = distance_add(to->min, from->min);
4487  to->max = distance_add(to->max, from->max);
4488 }
4489 
4490 #if 0
4491 static void
4492 add_len_mml(MinMaxLen* to, OnigDistance len)
4493 {
4494  to->min = distance_add(to->min, len);
4495  to->max = distance_add(to->max, len);
4496 }
4497 #endif
4498 
4499 static void
4500 alt_merge_mml(MinMaxLen* to, MinMaxLen* from)
4501 {
4502  if (to->min > from->min) to->min = from->min;
4503  if (to->max < from->max) to->max = from->max;
4504 }
4505 
4506 static void
4507 copy_opt_env(OptEnv* to, OptEnv* from)
4508 {
4509  *to = *from;
4510 }
4511 
4512 static void
4513 clear_opt_anc_info(OptAncInfo* anc)
4514 {
4515  anc->left_anchor = 0;
4516  anc->right_anchor = 0;
4517 }
4518 
4519 static void
4520 copy_opt_anc_info(OptAncInfo* to, OptAncInfo* from)
4521 {
4522  *to = *from;
4523 }
4524 
4525 static void
4526 concat_opt_anc_info(OptAncInfo* to, OptAncInfo* left, OptAncInfo* right,
4527  OnigDistance left_len, OnigDistance right_len)
4528 {
4529  clear_opt_anc_info(to);
4530 
4531  to->left_anchor = left->left_anchor;
4532  if (left_len == 0) {
4533  to->left_anchor |= right->left_anchor;
4534  }
4535 
4536  to->right_anchor = right->right_anchor;
4537  if (right_len == 0) {
4538  to->right_anchor |= left->right_anchor;
4539  }
4540  else {
4542  }
4543 }
4544 
4545 static int
4546 is_left_anchor(int anc)
4547 {
4548  if (anc == ANCHOR_END_BUF || anc == ANCHOR_SEMI_END_BUF ||
4549  anc == ANCHOR_END_LINE || anc == ANCHOR_PREC_READ ||
4550  anc == ANCHOR_PREC_READ_NOT)
4551  return 0;
4552 
4553  return 1;
4554 }
4555 
4556 static int
4557 is_set_opt_anc_info(OptAncInfo* to, int anc)
4558 {
4559  if ((to->left_anchor & anc) != 0) return 1;
4560 
4561  return ((to->right_anchor & anc) != 0 ? 1 : 0);
4562 }
4563 
4564 static void
4565 add_opt_anc_info(OptAncInfo* to, int anc)
4566 {
4567  if (is_left_anchor(anc))
4568  to->left_anchor |= anc;
4569  else
4570  to->right_anchor |= anc;
4571 }
4572 
4573 static void
4574 remove_opt_anc_info(OptAncInfo* to, int anc)
4575 {
4576  if (is_left_anchor(anc))
4577  to->left_anchor &= ~anc;
4578  else
4579  to->right_anchor &= ~anc;
4580 }
4581 
4582 static void
4583 alt_merge_opt_anc_info(OptAncInfo* to, OptAncInfo* add)
4584 {
4585  to->left_anchor &= add->left_anchor;
4586  to->right_anchor &= add->right_anchor;
4587 }
4588 
4589 static int
4590 is_full_opt_exact_info(OptExactInfo* ex)
4591 {
4592  return (ex->len >= OPT_EXACT_MAXLEN ? 1 : 0);
4593 }
4594 
4595 static void
4596 clear_opt_exact_info(OptExactInfo* ex)
4597 {
4598  clear_mml(&ex->mmd);
4599  clear_opt_anc_info(&ex->anc);
4600  ex->reach_end = 0;
4601  ex->ignore_case = -1; /* unset */
4602  ex->len = 0;
4603  ex->s[0] = '\0';
4604 }
4605 
4606 static void
4607 copy_opt_exact_info(OptExactInfo* to, OptExactInfo* from)
4608 {
4609  *to = *from;
4610 }
4611 
4612 static void
4613 concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OnigEncoding enc)
4614 {
4615  int i, j, len;
4616  UChar *p, *end;
4617  OptAncInfo tanc;
4618 
4619  if (to->ignore_case < 0)
4620  to->ignore_case = add->ignore_case;
4621  else if (to->ignore_case != add->ignore_case)
4622  return ; /* avoid */
4623 
4624  p = add->s;
4625  end = p + add->len;
4626  for (i = to->len; p < end; ) {
4627  len = enclen(enc, p, end);
4628  if (i + len > OPT_EXACT_MAXLEN) break;
4629  for (j = 0; j < len && p < end; j++)
4630  to->s[i++] = *p++;
4631  }
4632 
4633  to->len = i;
4634  to->reach_end = (p == end ? add->reach_end : 0);
4635 
4636  concat_opt_anc_info(&tanc, &to->anc, &add->anc, 1, 1);
4637  if (! to->reach_end) tanc.right_anchor = 0;
4638  copy_opt_anc_info(&to->anc, &tanc);
4639 }
4640 
4641 static void
4642 concat_opt_exact_info_str(OptExactInfo* to, UChar* s, UChar* end,
4643  int raw ARG_UNUSED, OnigEncoding enc)
4644 {
4645  int i, j, len;
4646  UChar *p;
4647 
4648  for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) {
4649  len = enclen(enc, p, end);
4650  if (i + len > OPT_EXACT_MAXLEN) break;
4651  for (j = 0; j < len && p < end; j++)
4652  to->s[i++] = *p++;
4653  }
4654 
4655  to->len = i;
4656 }
4657 
4658 static void
4659 alt_merge_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OptEnv* env)
4660 {
4661  int i, j, len;
4662 
4663  if (add->len == 0 || to->len == 0) {
4664  clear_opt_exact_info(to);
4665  return ;
4666  }
4667 
4668  if (! is_equal_mml(&to->mmd, &add->mmd)) {
4669  clear_opt_exact_info(to);
4670  return ;
4671  }
4672 
4673  for (i = 0; i < to->len && i < add->len; ) {
4674  if (to->s[i] != add->s[i]) break;
4675  len = enclen(env->enc, to->s + i, to->s + to->len);
4676 
4677  for (j = 1; j < len; j++) {
4678  if (to->s[i+j] != add->s[i+j]) break;
4679  }
4680  if (j < len) break;
4681  i += len;
4682  }
4683 
4684  if (! add->reach_end || i < add->len || i < to->len) {
4685  to->reach_end = 0;
4686  }
4687  to->len = i;
4688  if (to->ignore_case < 0)
4689  to->ignore_case = add->ignore_case;
4690  else if (add->ignore_case >= 0)
4691  to->ignore_case |= add->ignore_case;
4692 
4693  alt_merge_opt_anc_info(&to->anc, &add->anc);
4694  if (! to->reach_end) to->anc.right_anchor = 0;
4695 }
4696 
4697 static void
4698 select_opt_exact_info(OnigEncoding enc, OptExactInfo* now, OptExactInfo* alt)
4699 {
4700  int v1, v2;
4701 
4702  v1 = now->len;
4703  v2 = alt->len;
4704 
4705  if (v2 == 0) {
4706  return ;
4707  }
4708  else if (v1 == 0) {
4709  copy_opt_exact_info(now, alt);
4710  return ;
4711  }
4712  else if (v1 <= 2 && v2 <= 2) {
4713  /* ByteValTable[x] is big value --> low price */
4714  v2 = map_position_value(enc, now->s[0]);
4715  v1 = map_position_value(enc, alt->s[0]);
4716 
4717  if (now->len > 1) v1 += 5;
4718  if (alt->len > 1) v2 += 5;
4719  }
4720 
4721  if (now->ignore_case <= 0) v1 *= 2;
4722  if (alt->ignore_case <= 0) v2 *= 2;
4723 
4724  if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
4725  copy_opt_exact_info(now, alt);
4726 }
4727 
4728 static void
4729 clear_opt_map_info(OptMapInfo* map)
4730 {
4731  static const OptMapInfo clean_info = {
4732  {0, 0}, {0, 0}, 0,
4733  {
4734  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4735  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4736  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4737  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4738  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4739  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4740  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4741  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4742  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4743  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4744  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4745  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4746  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4747  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4748  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4749  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
4750  }
4751  };
4752 
4753  xmemcpy(map, &clean_info, sizeof(OptMapInfo));
4754 }
4755 
4756 static void
4757 copy_opt_map_info(OptMapInfo* to, OptMapInfo* from)
4758 {
4759  *to = *from;
4760 }
4761 
4762 static void
4763 add_char_opt_map_info(OptMapInfo* map, UChar c, OnigEncoding enc)
4764 {
4765  if (map->map[c] == 0) {
4766  map->map[c] = 1;
4767  map->value += map_position_value(enc, c);
4768  }
4769 }
4770 
4771 static int
4772 add_char_amb_opt_map_info(OptMapInfo* map, UChar* p, UChar* end,
4773  OnigEncoding enc, OnigCaseFoldType case_fold_flag)
4774 {
4777  int i, n;
4778 
4779  add_char_opt_map_info(map, p[0], enc);
4780 
4781  case_fold_flag = DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag);
4782  n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, case_fold_flag, p, end, items);
4783  if (n < 0) return n;
4784 
4785  for (i = 0; i < n; i++) {
4786  ONIGENC_CODE_TO_MBC(enc, items[i].code[0], buf);
4787  add_char_opt_map_info(map, buf[0], enc);
4788  }
4789 
4790  return 0;
4791 }
4792 
4793 static void
4794 select_opt_map_info(OptMapInfo* now, OptMapInfo* alt)
4795 {
4796  const int z = 1<<15; /* 32768: something big value */
4797 
4798  int v1, v2;
4799 
4800  if (alt->value == 0) return ;
4801  if (now->value == 0) {
4802  copy_opt_map_info(now, alt);
4803  return ;
4804  }
4805 
4806  v1 = z / now->value;
4807  v2 = z / alt->value;
4808  if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
4809  copy_opt_map_info(now, alt);
4810 }
4811 
4812 static int
4813 comp_opt_exact_or_map_info(OptExactInfo* e, OptMapInfo* m)
4814 {
4815 #define COMP_EM_BASE 20
4816  int ve, vm;
4817 
4818  if (m->value <= 0) return -1;
4819 
4820  ve = COMP_EM_BASE * e->len * (e->ignore_case > 0 ? 1 : 2);
4821  vm = COMP_EM_BASE * 5 * 2 / m->value;
4822  return comp_distance_value(&e->mmd, &m->mmd, ve, vm);
4823 }
4824 
4825 static void
4826 alt_merge_opt_map_info(OnigEncoding enc, OptMapInfo* to, OptMapInfo* add)
4827 {
4828  int i, val;
4829 
4830  /* if (! is_equal_mml(&to->mmd, &add->mmd)) return ; */
4831  if (to->value == 0) return ;
4832  if (add->value == 0 || to->mmd.max < add->mmd.min) {
4833  clear_opt_map_info(to);
4834  return ;
4835  }
4836 
4837  alt_merge_mml(&to->mmd, &add->mmd);
4838 
4839  val = 0;
4840  for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
4841  if (add->map[i])
4842  to->map[i] = 1;
4843 
4844  if (to->map[i])
4845  val += map_position_value(enc, i);
4846  }
4847  to->value = val;
4848 
4849  alt_merge_opt_anc_info(&to->anc, &add->anc);
4850 }
4851 
4852 static void
4853 set_bound_node_opt_info(NodeOptInfo* opt, MinMaxLen* mmd)
4854 {
4855  copy_mml(&(opt->exb.mmd), mmd);
4856  copy_mml(&(opt->expr.mmd), mmd);
4857  copy_mml(&(opt->map.mmd), mmd);
4858 }
4859 
4860 static void
4861 clear_node_opt_info(NodeOptInfo* opt)
4862 {
4863  clear_mml(&opt->len);
4864  clear_opt_anc_info(&opt->anc);
4865  clear_opt_exact_info(&opt->exb);
4866  clear_opt_exact_info(&opt->exm);
4867  clear_opt_exact_info(&opt->expr);
4868  clear_opt_map_info(&opt->map);
4869 }
4870 
4871 static void
4872 copy_node_opt_info(NodeOptInfo* to, NodeOptInfo* from)
4873 {
4874  *to = *from;
4875 }
4876 
4877 static void
4878 concat_left_node_opt_info(OnigEncoding enc, NodeOptInfo* to, NodeOptInfo* add)
4879 {
4880  int exb_reach, exm_reach;
4881  OptAncInfo tanc;
4882 
4883  concat_opt_anc_info(&tanc, &to->anc, &add->anc, to->len.max, add->len.max);
4884  copy_opt_anc_info(&to->anc, &tanc);
4885 
4886  if (add->exb.len > 0 && to->len.max == 0) {
4887  concat_opt_anc_info(&tanc, &to->anc, &add->exb.anc,
4888  to->len.max, add->len.max);
4889  copy_opt_anc_info(&add->exb.anc, &tanc);
4890  }
4891 
4892  if (add->map.value > 0 && to->len.max == 0) {
4893  if (add->map.mmd.max == 0)
4894  add->map.anc.left_anchor |= to->anc.left_anchor;
4895  }
4896 
4897  exb_reach = to->exb.reach_end;
4898  exm_reach = to->exm.reach_end;
4899 
4900  if (add->len.max != 0)
4901  to->exb.reach_end = to->exm.reach_end = 0;
4902 
4903  if (add->exb.len > 0) {
4904  if (exb_reach) {
4905  concat_opt_exact_info(&to->exb, &add->exb, enc);
4906  clear_opt_exact_info(&add->exb);
4907  }
4908  else if (exm_reach) {
4909  concat_opt_exact_info(&to->exm, &add->exb, enc);
4910  clear_opt_exact_info(&add->exb);
4911  }
4912  }
4913  select_opt_exact_info(enc, &to->exm, &add->exb);
4914  select_opt_exact_info(enc, &to->exm, &add->exm);
4915 
4916  if (to->expr.len > 0) {
4917  if (add->len.max > 0) {
4918  if (to->expr.len > (int )add->len.max)
4919  to->expr.len = (int )add->len.max;
4920 
4921  if (to->expr.mmd.max == 0)
4922  select_opt_exact_info(enc, &to->exb, &to->expr);
4923  else
4924  select_opt_exact_info(enc, &to->exm, &to->expr);
4925  }
4926  }
4927  else if (add->expr.len > 0) {
4928  copy_opt_exact_info(&to->expr, &add->expr);
4929  }
4930 
4931  select_opt_map_info(&to->map, &add->map);
4932 
4933  add_mml(&to->len, &add->len);
4934 }
4935 
4936 static void
4937 alt_merge_node_opt_info(NodeOptInfo* to, NodeOptInfo* add, OptEnv* env)
4938 {
4939  alt_merge_opt_anc_info (&to->anc, &add->anc);
4940  alt_merge_opt_exact_info(&to->exb, &add->exb, env);
4941  alt_merge_opt_exact_info(&to->exm, &add->exm, env);
4942  alt_merge_opt_exact_info(&to->expr, &add->expr, env);
4943  alt_merge_opt_map_info(env->enc, &to->map, &add->map);
4944 
4945  alt_merge_mml(&to->len, &add->len);
4946 }
4947 
4948 
4949 #define MAX_NODE_OPT_INFO_REF_COUNT 5
4950 
4951 static int
4952 optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
4953 {
4954  int type;
4955  int r = 0;
4956 
4957  clear_node_opt_info(opt);
4958  set_bound_node_opt_info(opt, &env->mmd);
4959 
4960  type = NTYPE(node);
4961  switch (type) {
4962  case NT_LIST:
4963  {
4964  OptEnv nenv;
4965  NodeOptInfo nopt;
4966  Node* nd = node;
4967 
4968  copy_opt_env(&nenv, env);
4969  do {
4970  r = optimize_node_left(NCAR(nd), &nopt, &nenv);
4971  if (r == 0) {
4972  add_mml(&nenv.mmd, &nopt.len);
4973  concat_left_node_opt_info(env->enc, opt, &nopt);
4974  }
4975  } while (r == 0 && IS_NOT_NULL(nd = NCDR(nd)));
4976  }
4977  break;
4978 
4979  case NT_ALT:
4980  {
4981  NodeOptInfo nopt;
4982  Node* nd = node;
4983 
4984  do {
4985  r = optimize_node_left(NCAR(nd), &nopt, env);
4986  if (r == 0) {
4987  if (nd == node) copy_node_opt_info(opt, &nopt);
4988  else alt_merge_node_opt_info(opt, &nopt, env);
4989  }
4990  } while ((r == 0) && IS_NOT_NULL(nd = NCDR(nd)));
4991  }
4992  break;
4993 
4994  case NT_STR:
4995  {
4996  StrNode* sn = NSTR(node);
4997  OnigDistance slen = sn->end - sn->s;
4998  int is_raw = NSTRING_IS_RAW(node);
4999 
5000  if (! NSTRING_IS_AMBIG(node)) {
5001  concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
5002  is_raw, env->enc);
5003  opt->exb.ignore_case = 0;
5004  if (slen > 0) {
5005  add_char_opt_map_info(&opt->map, *(sn->s), env->enc);
5006  }
5007  set_mml(&opt->len, slen, slen);
5008  }
5009  else {
5010  OnigDistance max;
5011 
5012  if (NSTRING_IS_DONT_GET_OPT_INFO(node)) {
5013  int n = onigenc_strlen(env->enc, sn->s, sn->end);
5014  max = ONIGENC_MBC_MAXLEN_DIST(env->enc) * n;
5015  }
5016  else {
5017  concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
5018  is_raw, env->enc);
5019  opt->exb.ignore_case = 1;
5020 
5021  if (slen > 0) {
5022  r = add_char_amb_opt_map_info(&opt->map, sn->s, sn->end,
5023  env->enc, env->case_fold_flag);
5024  if (r != 0) break;
5025  }
5026 
5027  max = slen;
5028  }
5029 
5030  set_mml(&opt->len, slen, max);
5031  }
5032 
5033  if ((OnigDistance )opt->exb.len == slen)
5034  opt->exb.reach_end = 1;
5035  }
5036  break;
5037 
5038  case NT_CCLASS:
5039  {
5040  int i, z;
5041  CClassNode* cc = NCCLASS(node);
5042 
5043  /* no need to check ignore case. (set in setup_tree()) */
5044 
5045  if (IS_NOT_NULL(cc->mbuf) || IS_NCCLASS_NOT(cc)) {
5046  OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
5048 
5049  set_mml(&opt->len, min, max);
5050  }
5051  else {
5052  for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
5053  z = BITSET_AT(cc->bs, i);
5054  if ((z && !IS_NCCLASS_NOT(cc)) || (!z && IS_NCCLASS_NOT(cc))) {
5055  add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
5056  }
5057  }
5058  set_mml(&opt->len, 1, 1);
5059  }
5060  }
5061  break;
5062 
5063  case NT_CTYPE:
5064  {
5065  int i, min, max;
5066  int maxcode;
5067 
5068  max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
5069 
5070  if (max == 1) {
5071  min = 1;
5072 
5073  maxcode = NCTYPE(node)->ascii_range ? 0x80 : SINGLE_BYTE_SIZE;
5074  switch (NCTYPE(node)->ctype) {
5075  case ONIGENC_CTYPE_WORD:
5076  if (NCTYPE(node)->not != 0) {
5077  for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
5078  if (! ONIGENC_IS_CODE_WORD(env->enc, i) || i >= maxcode) {
5079  add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
5080  }
5081  }
5082  }
5083  else {
5084  for (i = 0; i < maxcode; i++) {
5085  if (ONIGENC_IS_CODE_WORD(env->enc, i)) {
5086  add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
5087  }
5088  }
5089  }
5090  break;
5091  }
5092  }
5093  else {
5094  min = ONIGENC_MBC_MINLEN(env->enc);
5095  }
5096  set_mml(&opt->len, min, max);
5097  }
5098  break;
5099 
5100  case NT_CANY:
5101  {
5102  OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
5104  set_mml(&opt->len, min, max);
5105  }
5106  break;
5107 
5108  case NT_ANCHOR:
5109  switch (NANCHOR(node)->type) {
5110  case ANCHOR_BEGIN_BUF:
5111  case ANCHOR_BEGIN_POSITION:
5112  case ANCHOR_BEGIN_LINE:
5113  case ANCHOR_END_BUF:
5114  case ANCHOR_SEMI_END_BUF:
5115  case ANCHOR_END_LINE:
5116  case ANCHOR_LOOK_BEHIND: /* just for (?<=x).* */
5117  case ANCHOR_PREC_READ_NOT: /* just for (?!x).* */
5118  add_opt_anc_info(&opt->anc, NANCHOR(node)->type);
5119  break;
5120 
5121  case ANCHOR_PREC_READ:
5122  {
5123  NodeOptInfo nopt;
5124 
5125  r = optimize_node_left(NANCHOR(node)->target, &nopt, env);
5126  if (r == 0) {
5127  if (nopt.exb.len > 0)
5128  copy_opt_exact_info(&opt->expr, &nopt.exb);
5129  else if (nopt.exm.len > 0)
5130  copy_opt_exact_info(&opt->expr, &nopt.exm);
5131 
5132  opt->expr.reach_end = 0;
5133 
5134  if (nopt.map.value > 0)
5135  copy_opt_map_info(&opt->map, &nopt.map);
5136  }
5137  }
5138  break;
5139 
5141  break;
5142  }
5143  break;
5144 
5145  case NT_BREF:
5146  {
5147  int i;
5148  int* backs;
5149  OnigDistance min, max, tmin, tmax;
5150  Node** nodes = SCANENV_MEM_NODES(env->scan_env);
5151  BRefNode* br = NBREF(node);
5152 
5153  if (br->state & NST_RECURSION) {
5154  set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
5155  break;
5156  }
5157  backs = BACKREFS_P(br);
5158  r = get_min_match_length(nodes[backs[0]], &min, env->scan_env);
5159  if (r != 0) break;
5160  r = get_max_match_length(nodes[backs[0]], &max, env->scan_env);
5161  if (r != 0) break;
5162  for (i = 1; i < br->back_num; i++) {
5163  r = get_min_match_length(nodes[backs[i]], &tmin, env->scan_env);
5164  if (r != 0) break;
5165  r = get_max_match_length(nodes[backs[i]], &tmax, env->scan_env);
5166  if (r != 0) break;
5167  if (min > tmin) min = tmin;
5168  if (max < tmax) max = tmax;
5169  }
5170  if (r == 0) set_mml(&opt->len, min, max);
5171  }
5172  break;
5173 
5174 #ifdef USE_SUBEXP_CALL
5175  case NT_CALL:
5176  if (IS_CALL_RECURSION(NCALL(node)))
5177  set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
5178  else {
5179  OnigOptionType save = env->options;
5180  env->options = NENCLOSE(NCALL(node)->target)->option;
5181  r = optimize_node_left(NCALL(node)->target, opt, env);
5182  env->options = save;
5183  }
5184  break;
5185 #endif
5186 
5187  case NT_QTFR:
5188  {
5189  int i;
5190  OnigDistance min, max;
5191  NodeOptInfo nopt;
5192  QtfrNode* qn = NQTFR(node);
5193 
5194  r = optimize_node_left(qn->target, &nopt, env);
5195  if (r) break;
5196 
5197  if (/*qn->lower == 0 &&*/ IS_REPEAT_INFINITE(qn->upper)) {
5198  if (env->mmd.max == 0 &&
5199  NTYPE(qn->target) == NT_CANY && qn->greedy) {
5200  if (IS_MULTILINE(env->options))
5201  /* implicit anchor: /.*a/ ==> /\A.*a/ */
5202  add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_ML);
5203  else
5204  add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR);
5205  }
5206  }
5207  else {
5208  if (qn->lower > 0) {
5209  copy_node_opt_info(opt, &nopt);
5210  if (nopt.exb.len > 0) {
5211  if (nopt.exb.reach_end) {
5212  for (i = 2; i <= qn->lower &&
5213  ! is_full_opt_exact_info(&opt->exb); i++) {
5214  concat_opt_exact_info(&opt->exb, &nopt.exb, env->enc);
5215  }
5216  if (i < qn->lower) {
5217  opt->exb.reach_end = 0;
5218  }
5219  }
5220  }
5221 
5222  if (qn->lower != qn->upper) {
5223  opt->exb.reach_end = 0;
5224  opt->exm.reach_end = 0;
5225  }
5226  if (qn->lower > 1)
5227  opt->exm.reach_end = 0;
5228  }
5229  }
5230 
5231  min = distance_multiply(nopt.len.min, qn->lower);
5232  if (IS_REPEAT_INFINITE(qn->upper))
5233  max = (nopt.len.max > 0 ? ONIG_INFINITE_DISTANCE : 0);
5234  else
5235  max = distance_multiply(nopt.len.max, qn->upper);
5236 
5237  set_mml(&opt->len, min, max);
5238  }
5239  break;
5240 
5241  case NT_ENCLOSE:
5242  {
5243  EncloseNode* en = NENCLOSE(node);
5244 
5245  switch (en->type) {
5246  case ENCLOSE_OPTION:
5247  {
5248  OnigOptionType save = env->options;
5249 
5250  env->options = en->option;
5251  r = optimize_node_left(en->target, opt, env);
5252  env->options = save;
5253  }
5254  break;
5255 
5256  case ENCLOSE_MEMORY:
5257 #ifdef USE_SUBEXP_CALL
5258  en->opt_count++;
5260  OnigDistance min, max;
5261 
5262  min = 0;
5263  max = ONIG_INFINITE_DISTANCE;
5264  if (IS_ENCLOSE_MIN_FIXED(en)) min = en->min_len;
5265  if (IS_ENCLOSE_MAX_FIXED(en)) max = en->max_len;
5266  set_mml(&opt->len, min, max);
5267  }
5268  else
5269 #endif
5270  {
5271  r = optimize_node_left(en->target, opt, env);
5272 
5273  if (is_set_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK)) {
5274  if (BIT_STATUS_AT(env->scan_env->backrefed_mem, en->regnum))
5275  remove_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK);
5276  }
5277  }
5278  break;
5279 
5281  case ENCLOSE_CONDITION:
5282  r = optimize_node_left(en->target, opt, env);
5283  break;
5284 
5285  case ENCLOSE_ABSENT:
5286  set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
5287  break;
5288  }
5289  }
5290  break;
5291 
5292  default:
5293 #ifdef ONIG_DEBUG
5294  fprintf(stderr, "optimize_node_left: undefined node type %d\n",
5295  NTYPE(node));
5296 #endif
5297  r = ONIGERR_TYPE_BUG;
5298  break;
5299  }
5300 
5301  return r;
5302 }
5303 
5304 static int
5305 set_optimize_exact_info(regex_t* reg, OptExactInfo* e)
5306 {
5307  int r;
5308  int allow_reverse;
5309 
5310  if (e->len == 0) return 0;
5311 
5312  reg->exact = (UChar* )xmalloc(e->len);
5314  xmemcpy(reg->exact, e->s, e->len);
5315  reg->exact_end = reg->exact + e->len;
5316 
5317  allow_reverse =
5319 
5320  if (e->ignore_case > 0) {
5321  if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
5322  r = set_bm_skip(reg->exact, reg->exact_end, reg,
5323  reg->map, &(reg->int_map), 1);
5324  if (r == 0) {
5325  reg->optimize = (allow_reverse != 0
5327  }
5328  else {
5330  }
5331  }
5332  else {
5334  }
5335  }
5336  else {
5337  if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
5338  r = set_bm_skip(reg->exact, reg->exact_end, reg,
5339  reg->map, &(reg->int_map), 0);
5340  if (r == 0) {
5341  reg->optimize = (allow_reverse != 0
5343  }
5344  else {
5346  }
5347  }
5348  else {
5350  }
5351  }
5352 
5353  reg->dmin = e->mmd.min;
5354  reg->dmax = e->mmd.max;
5355 
5356  if (reg->dmin != ONIG_INFINITE_DISTANCE) {
5357  reg->threshold_len = (int )(reg->dmin + (reg->exact_end - reg->exact));
5358  }
5359 
5360  return 0;
5361 }
5362 
5363 static void
5364 set_optimize_map_info(regex_t* reg, OptMapInfo* m)
5365 {
5366  int i;
5367 
5368  for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
5369  reg->map[i] = m->map[i];
5370 
5371  reg->optimize = ONIG_OPTIMIZE_MAP;
5372  reg->dmin = m->mmd.min;
5373  reg->dmax = m->mmd.max;
5374 
5375  if (reg->dmin != ONIG_INFINITE_DISTANCE) {
5376  reg->threshold_len = (int )(reg->dmin + 1);
5377  }
5378 }
5379 
5380 static void
5381 set_sub_anchor(regex_t* reg, OptAncInfo* anc)
5382 {
5383  reg->sub_anchor |= anc->left_anchor & ANCHOR_BEGIN_LINE;
5384  reg->sub_anchor |= anc->right_anchor & ANCHOR_END_LINE;
5385 }
5386 
5387 #if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5388 static void print_optimize_info(FILE* f, regex_t* reg);
5389 #endif
5390 
5391 static int
5392 set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)
5393 {
5394 
5395  int r;
5396  NodeOptInfo opt;
5397  OptEnv env;
5398 
5399  env.enc = reg->enc;
5400  env.options = reg->options;
5401  env.case_fold_flag = reg->case_fold_flag;
5402  env.scan_env = scan_env;
5403  clear_mml(&env.mmd);
5404 
5405  r = optimize_node_left(node, &opt, &env);
5406  if (r) return r;
5407 
5408  reg->anchor = opt.anc.left_anchor & (ANCHOR_BEGIN_BUF |
5411 
5413  reg->anchor &= ~ANCHOR_ANYCHAR_STAR_ML;
5414 
5417 
5418  if (reg->anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)) {
5419  reg->anchor_dmin = opt.len.min;
5420  reg->anchor_dmax = opt.len.max;
5421  }
5422 
5423  if (opt.exb.len > 0 || opt.exm.len > 0) {
5424  select_opt_exact_info(reg->enc, &opt.exb, &opt.exm);
5425  if (opt.map.value > 0 &&
5426  comp_opt_exact_or_map_info(&opt.exb, &opt.map) > 0) {
5427  goto set_map;
5428  }
5429  else {
5430  r = set_optimize_exact_info(reg, &opt.exb);
5431  set_sub_anchor(reg, &opt.exb.anc);
5432  }
5433  }
5434  else if (opt.map.value > 0) {
5435  set_map:
5436  set_optimize_map_info(reg, &opt.map);
5437  set_sub_anchor(reg, &opt.map.anc);
5438  }
5439  else {
5441  if (opt.len.max == 0)
5443  }
5444 
5445 #if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5446  print_optimize_info(stderr, reg);
5447 #endif
5448  return r;
5449 }
5450 
5451 static void
5452 clear_optimize_info(regex_t* reg)
5453 {
5455  reg->anchor = 0;
5456  reg->anchor_dmin = 0;
5457  reg->anchor_dmax = 0;
5458  reg->sub_anchor = 0;
5459  reg->exact_end = (UChar* )NULL;
5460  reg->threshold_len = 0;
5461  if (IS_NOT_NULL(reg->exact)) {
5462  xfree(reg->exact);
5463  reg->exact = (UChar* )NULL;
5464  }
5465 }
5466 
5467 #ifdef ONIG_DEBUG
5468 
5469 static void print_enc_string(FILE* fp, OnigEncoding enc,
5470  const UChar *s, const UChar *end)
5471 {
5472  fprintf(fp, "\nPATTERN: /");
5473 
5474  if (ONIGENC_MBC_MINLEN(enc) > 1) {
5475  const UChar *p;
5476  OnigCodePoint code;
5477 
5478  p = s;
5479  while (p < end) {
5480  code = ONIGENC_MBC_TO_CODE(enc, p, end);
5481  if (code >= 0x80) {
5482  fprintf(fp, " 0x%04x ", (int )code);
5483  }
5484  else {
5485  fputc((int )code, fp);
5486  }
5487 
5488  p += enclen(enc, p, end);
5489  }
5490  }
5491  else {
5492  while (s < end) {
5493  fputc((int )*s, fp);
5494  s++;
5495  }
5496  }
5497 
5498  fprintf(fp, "/ (%s)\n", enc->name);
5499 }
5500 #endif /* ONIG_DEBUG */
5501 
5502 #if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5503 static void
5504 print_distance_range(FILE* f, OnigDistance a, OnigDistance b)
5505 {
5506  if (a == ONIG_INFINITE_DISTANCE)
5507  fputs("inf", f);
5508  else
5509  fprintf(f, "(%"PRIuPTR")", a);
5510 
5511  fputs("-", f);
5512 
5513  if (b == ONIG_INFINITE_DISTANCE)
5514  fputs("inf", f);
5515  else
5516  fprintf(f, "(%"PRIuPTR")", b);
5517 }
5518 
5519 static void
5520 print_anchor(FILE* f, int anchor)
5521 {
5522  int q = 0;
5523 
5524  fprintf(f, "[");
5525 
5526  if (anchor & ANCHOR_BEGIN_BUF) {
5527  fprintf(f, "begin-buf");
5528  q = 1;
5529  }
5530  if (anchor & ANCHOR_BEGIN_LINE) {
5531  if (q) fprintf(f, ", ");
5532  q = 1;
5533  fprintf(f, "begin-line");
5534  }
5535  if (anchor & ANCHOR_BEGIN_POSITION) {
5536  if (q) fprintf(f, ", ");
5537  q = 1;
5538  fprintf(f, "begin-pos");
5539  }
5540  if (anchor & ANCHOR_END_BUF) {
5541  if (q) fprintf(f, ", ");
5542  q = 1;
5543  fprintf(f, "end-buf");
5544  }
5545  if (anchor & ANCHOR_SEMI_END_BUF) {
5546  if (q) fprintf(f, ", ");
5547  q = 1;
5548  fprintf(f, "semi-end-buf");
5549  }
5550  if (anchor & ANCHOR_END_LINE) {
5551  if (q) fprintf(f, ", ");
5552  q = 1;
5553  fprintf(f, "end-line");
5554  }
5555  if (anchor & ANCHOR_ANYCHAR_STAR) {
5556  if (q) fprintf(f, ", ");
5557  q = 1;
5558  fprintf(f, "anychar-star");
5559  }
5560  if (anchor & ANCHOR_ANYCHAR_STAR_ML) {
5561  if (q) fprintf(f, ", ");
5562  fprintf(f, "anychar-star-ml");
5563  }
5564 
5565  fprintf(f, "]");
5566 }
5567 
5568 static void
5569 print_optimize_info(FILE* f, regex_t* reg)
5570 {
5571  static const char* on[] = { "NONE", "EXACT", "EXACT_BM", "EXACT_BM_NOT_REV",
5572  "EXACT_IC", "MAP",
5573  "EXACT_BM_IC", "EXACT_BM_NOT_REV_IC" };
5574 
5575  fprintf(f, "optimize: %s\n", on[reg->optimize]);
5576  fprintf(f, " anchor: "); print_anchor(f, reg->anchor);
5577  if ((reg->anchor & ANCHOR_END_BUF_MASK) != 0)
5578  print_distance_range(f, reg->anchor_dmin, reg->anchor_dmax);
5579  fprintf(f, "\n");
5580 
5581  if (reg->optimize) {
5582  fprintf(f, " sub anchor: "); print_anchor(f, reg->sub_anchor);
5583  fprintf(f, "\n");
5584  }
5585  fprintf(f, "\n");
5586 
5587  if (reg->exact) {
5588  UChar *p;
5589  fprintf(f, "exact: [");
5590  for (p = reg->exact; p < reg->exact_end; p++) {
5591  fputc(*p, f);
5592  }
5593  fprintf(f, "]: length: %"PRIdPTR"\n", (reg->exact_end - reg->exact));
5594  }
5595  else if (reg->optimize & ONIG_OPTIMIZE_MAP) {
5596  int c, i, n = 0;
5597 
5598  for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
5599  if (reg->map[i]) n++;
5600 
5601  fprintf(f, "map: n=%d\n", n);
5602  if (n > 0) {
5603  c = 0;
5604  fputc('[', f);
5605  for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
5606  if (reg->map[i] != 0) {
5607  if (c > 0) fputs(", ", f);
5608  c++;
5609  if (ONIGENC_MBC_MAXLEN(reg->enc) == 1 &&
5611  fputc(i, f);
5612  else
5613  fprintf(f, "%d", i);
5614  }
5615  }
5616  fprintf(f, "]\n");
5617  }
5618  }
5619 }
5620 #endif /* ONIG_DEBUG_COMPILE || ONIG_DEBUG_MATCH */
5621 
5622 
5623 extern void
5625 {
5626  if (IS_NOT_NULL(reg)) {
5627  if (IS_NOT_NULL(reg->p)) xfree(reg->p);
5628  if (IS_NOT_NULL(reg->exact)) xfree(reg->exact);
5629  if (IS_NOT_NULL(reg->int_map)) xfree(reg->int_map);
5631  if (IS_NOT_NULL(reg->repeat_range)) xfree(reg->repeat_range);
5632  if (IS_NOT_NULL(reg->chain)) onig_free(reg->chain);
5633 
5634 #ifdef USE_NAMED_GROUP
5635  onig_names_free(reg);
5636 #endif
5637  }
5638 }
5639 
5640 extern void
5642 {
5643  if (IS_NOT_NULL(reg)) {
5644  onig_free_body(reg);
5645  xfree(reg);
5646  }
5647 }
5648 
5649 #ifdef RUBY
5650 size_t
5652 {
5653  size_t size = sizeof(regex_t);
5654  if (IS_NULL(reg)) return 0;
5655  if (IS_NOT_NULL(reg->p)) size += reg->alloc;
5656  if (IS_NOT_NULL(reg->exact)) size += reg->exact_end - reg->exact;
5657  if (IS_NOT_NULL(reg->int_map)) size += sizeof(int) * ONIG_CHAR_TABLE_SIZE;
5658  if (IS_NOT_NULL(reg->int_map_backward)) size += sizeof(int) * ONIG_CHAR_TABLE_SIZE;
5659  if (IS_NOT_NULL(reg->repeat_range)) size += reg->repeat_range_alloc * sizeof(OnigRepeatRange);
5660  if (IS_NOT_NULL(reg->chain)) size += onig_memsize(reg->chain);
5661 
5662  return size;
5663 }
5664 
5665 size_t
5667 {
5668  size_t size = sizeof(*regs);
5669  if (IS_NULL(regs)) return 0;
5670  size += regs->allocated * (sizeof(*regs->beg) + sizeof(*regs->end));
5671  return size;
5672 }
5673 #endif
5674 
5675 #define REGEX_TRANSFER(to,from) do {\
5676  onig_free_body(to);\
5677  xmemcpy(to, from, sizeof(regex_t));\
5678  xfree(from);\
5679 } while (0)
5680 
5681 #if 0
5682 extern void
5683 onig_transfer(regex_t* to, regex_t* from)
5684 {
5685  REGEX_TRANSFER(to, from);
5686 }
5687 #endif
5688 
5689 #ifdef ONIG_DEBUG_COMPILE
5690 static void print_compiled_byte_code_list(FILE* f, regex_t* reg);
5691 #endif
5692 #ifdef ONIG_DEBUG_PARSE_TREE
5693 static void print_tree(FILE* f, Node* node);
5694 #endif
5695 
5696 #ifdef RUBY
5697 extern int
5698 onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
5699  OnigErrorInfo* einfo)
5700 {
5701  return onig_compile_ruby(reg, pattern, pattern_end, einfo, NULL, 0);
5702 }
5703 #endif
5704 
5705 #ifdef RUBY
5706 extern int
5707 onig_compile_ruby(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
5708  OnigErrorInfo* einfo, const char *sourcefile, int sourceline)
5709 #else
5710 extern int
5711 onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
5712  OnigErrorInfo* einfo)
5713 #endif
5714 {
5715 #define COMPILE_INIT_SIZE 20
5716 
5717  int r;
5718  OnigDistance init_size;
5719  Node* root;
5720  ScanEnv scan_env = {0};
5721 #ifdef USE_SUBEXP_CALL
5722  UnsetAddrList uslist;
5723 #endif
5724 
5725  if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL;
5726 
5727 #ifdef RUBY
5728  scan_env.sourcefile = sourcefile;
5729  scan_env.sourceline = sourceline;
5730 #endif
5731 
5732 #ifdef ONIG_DEBUG
5733  print_enc_string(stderr, reg->enc, pattern, pattern_end);
5734 #endif
5735 
5736  if (reg->alloc == 0) {
5737  init_size = (pattern_end - pattern) * 2;
5738  if (init_size <= 0) init_size = COMPILE_INIT_SIZE;
5739  r = BBUF_INIT(reg, init_size);
5740  if (r != 0) goto end;
5741  }
5742  else
5743  reg->used = 0;
5744 
5745  reg->num_mem = 0;
5746  reg->num_repeat = 0;
5747  reg->num_null_check = 0;
5748  reg->repeat_range_alloc = 0;
5749  reg->repeat_range = (OnigRepeatRange* )NULL;
5750 #ifdef USE_COMBINATION_EXPLOSION_CHECK
5751  reg->num_comb_exp_check = 0;
5752 #endif
5753 
5754  r = onig_parse_make_tree(&root, pattern, pattern_end, reg, &scan_env);
5755  if (r != 0) goto err;
5756 
5757 #ifdef ONIG_DEBUG_PARSE_TREE
5758 # if 0
5759  fprintf(stderr, "ORIGINAL PARSE TREE:\n");
5760  print_tree(stderr, root);
5761 # endif
5762 #endif
5763 
5764 #ifdef USE_NAMED_GROUP
5765  /* mixed use named group and no-named group */
5766  if (scan_env.num_named > 0 &&
5769  if (scan_env.num_named != scan_env.num_mem)
5770  r = disable_noname_group_capture(&root, reg, &scan_env);
5771  else
5772  r = numbered_ref_check(root);
5773 
5774  if (r != 0) goto err;
5775  }
5776 #endif
5777 
5778 #ifdef USE_SUBEXP_CALL
5779  if (scan_env.num_call > 0) {
5780  r = unset_addr_list_init(&uslist, scan_env.num_call);
5781  if (r != 0) goto err;
5782  scan_env.unset_addr_list = &uslist;
5783  r = setup_subexp_call(root, &scan_env);
5784  if (r != 0) goto err_unset;
5785  r = subexp_recursive_check_trav(root, &scan_env);
5786  if (r < 0) goto err_unset;
5787  r = subexp_inf_recursive_check_trav(root, &scan_env);
5788  if (r != 0) goto err_unset;
5789 
5790  reg->num_call = scan_env.num_call;
5791  }
5792  else
5793  reg->num_call = 0;
5794 #endif
5795 
5796  r = setup_tree(root, reg, 0, &scan_env);
5797  if (r != 0) goto err_unset;
5798 
5799 #ifdef ONIG_DEBUG_PARSE_TREE
5800  print_tree(stderr, root);
5801 #endif
5802 
5803  reg->capture_history = scan_env.capture_history;
5804  reg->bt_mem_start = scan_env.bt_mem_start;
5805  reg->bt_mem_start |= reg->capture_history;
5806  if (IS_FIND_CONDITION(reg->options))
5808  else {
5809  reg->bt_mem_end = scan_env.bt_mem_end;
5810  reg->bt_mem_end |= reg->capture_history;
5811  }
5812 
5813 #ifdef USE_COMBINATION_EXPLOSION_CHECK
5814  if (scan_env.backrefed_mem == 0
5815 # ifdef USE_SUBEXP_CALL
5816  || scan_env.num_call == 0
5817 # endif
5818  ) {
5819  setup_comb_exp_check(root, 0, &scan_env);
5820 # ifdef USE_SUBEXP_CALL
5821  if (scan_env.has_recursion != 0) {
5822  scan_env.num_comb_exp_check = 0;
5823  }
5824  else
5825 # endif
5826  if (scan_env.comb_exp_max_regnum > 0) {
5827  int i;
5828  for (i = 1; i <= scan_env.comb_exp_max_regnum; i++) {
5829  if (BIT_STATUS_AT(scan_env.backrefed_mem, i) != 0) {
5830  scan_env.num_comb_exp_check = 0;
5831  break;
5832  }
5833  }
5834  }
5835  }
5836 
5837  reg->num_comb_exp_check = scan_env.num_comb_exp_check;
5838 #endif
5839 
5840  clear_optimize_info(reg);
5841 #ifndef ONIG_DONT_OPTIMIZE
5842  r = set_optimize_info_from_tree(root, reg, &scan_env);
5843  if (r != 0) goto err_unset;
5844 #endif
5845 
5846  if (IS_NOT_NULL(scan_env.mem_nodes_dynamic)) {
5847  xfree(scan_env.mem_nodes_dynamic);
5848  scan_env.mem_nodes_dynamic = (Node** )NULL;
5849  }
5850 
5851  r = compile_tree(root, reg);
5852  if (r == 0) {
5853  r = add_opcode(reg, OP_END);
5854 #ifdef USE_SUBEXP_CALL
5855  if (scan_env.num_call > 0) {
5856  r = unset_addr_list_fix(&uslist, reg);
5857  unset_addr_list_end(&uslist);
5858  if (r) goto err;
5859  }
5860 #endif
5861 
5862  if ((reg->num_repeat != 0) || (reg->bt_mem_end != 0))
5864  else {
5865  if (reg->bt_mem_start != 0)
5867  else
5869  }
5870  }
5871 #ifdef USE_SUBEXP_CALL
5872  else if (scan_env.num_call > 0) {
5873  unset_addr_list_end(&uslist);
5874  }
5875 #endif
5876  onig_node_free(root);
5877 
5878 #ifdef ONIG_DEBUG_COMPILE
5879 # ifdef USE_NAMED_GROUP
5880  onig_print_names(stderr, reg);
5881 # endif
5882  print_compiled_byte_code_list(stderr, reg);
5883 #endif
5884 
5885  end:
5886  return r;
5887 
5888  err_unset:
5889 #ifdef USE_SUBEXP_CALL
5890  if (scan_env.num_call > 0) {
5891  unset_addr_list_end(&uslist);
5892  }
5893 #endif
5894  err:
5895  if (IS_NOT_NULL(scan_env.error)) {
5896  if (IS_NOT_NULL(einfo)) {
5897  einfo->enc = scan_env.enc;
5898  einfo->par = scan_env.error;
5899  einfo->par_end = scan_env.error_end;
5900  }
5901  }
5902 
5903  onig_node_free(root);
5904  if (IS_NOT_NULL(scan_env.mem_nodes_dynamic))
5905  xfree(scan_env.mem_nodes_dynamic);
5906  return r;
5907 }
5908 
5909 static int onig_inited = 0;
5910 
5911 extern int
5913  OnigCaseFoldType case_fold_flag,
5914  OnigEncoding enc, const OnigSyntaxType* syntax)
5915 {
5916  if (! onig_inited)
5917  onig_init();
5918 
5919  if (IS_NULL(reg))
5920  return ONIGERR_INVALID_ARGUMENT;
5921 
5922  if (ONIGENC_IS_UNDEF(enc))
5924 
5928  }
5929 
5930  if ((option & ONIG_OPTION_NEGATE_SINGLELINE) != 0) {
5931  option |= syntax->options;
5932  option &= ~ONIG_OPTION_SINGLELINE;
5933  }
5934  else
5935  option |= syntax->options;
5936 
5937  (reg)->enc = enc;
5938  (reg)->options = option;
5939  (reg)->syntax = syntax;
5940  (reg)->optimize = 0;
5941  (reg)->exact = (UChar* )NULL;
5942  (reg)->int_map = (int* )NULL;
5943  (reg)->int_map_backward = (int* )NULL;
5944  (reg)->chain = (regex_t* )NULL;
5945 
5946  (reg)->p = (UChar* )NULL;
5947  (reg)->alloc = 0;
5948  (reg)->used = 0;
5949  (reg)->name_table = (void* )NULL;
5950 
5951  (reg)->case_fold_flag = case_fold_flag;
5952  return 0;
5953 }
5954 
5955 extern int
5956 onig_new_without_alloc(regex_t* reg, const UChar* pattern,
5957  const UChar* pattern_end, OnigOptionType option, OnigEncoding enc,
5958  const OnigSyntaxType* syntax, OnigErrorInfo* einfo)
5959 {
5960  int r;
5961 
5962  r = onig_reg_init(reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
5963  if (r) return r;
5964 
5965  r = onig_compile(reg, pattern, pattern_end, einfo);
5966  return r;
5967 }
5968 
5969 extern int
5970 onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
5971  OnigOptionType option, OnigEncoding enc, const OnigSyntaxType* syntax,
5972  OnigErrorInfo* einfo)
5973 {
5974  int r;
5975 
5976  *reg = (regex_t* )xmalloc(sizeof(regex_t));
5977  if (IS_NULL(*reg)) return ONIGERR_MEMORY;
5978 
5979  r = onig_reg_init(*reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
5980  if (r) goto err;
5981 
5982  r = onig_compile(*reg, pattern, pattern_end, einfo);
5983  if (r) {
5984  err:
5985  onig_free(*reg);
5986  *reg = NULL;
5987  }
5988  return r;
5989 }
5990 
5991 extern int
5992 onig_initialize(OnigEncoding encodings[] ARG_UNUSED, int n ARG_UNUSED)
5993 {
5994  return onig_init();
5995 }
5996 
5997 extern int
5999 {
6000  if (onig_inited != 0)
6001  return 0;
6002 
6003  onig_inited = 1;
6004 
6005 #if defined(ONIG_DEBUG_MEMLEAK) && defined(_MSC_VER)
6006  _CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF);
6007 #endif
6008 
6009  onigenc_init();
6010  /* onigenc_set_default_caseconv_table((UChar* )0); */
6011 
6012 #ifdef ONIG_DEBUG_STATISTICS
6013  onig_statistics_init();
6014 #endif
6015 
6016  return 0;
6017 }
6018 
6019 
6020 static OnigEndCallListItemType* EndCallTop;
6021 
6022 extern void onig_add_end_call(void (*func)(void))
6023 {
6025 
6026  item = (OnigEndCallListItemType* )xmalloc(sizeof(*item));
6027  if (item == 0) return ;
6028 
6029  item->next = EndCallTop;
6030  item->func = func;
6031 
6032  EndCallTop = item;
6033 }
6034 
6035 static void
6036 exec_end_call_list(void)
6037 {
6039  void (*func)(void);
6040 
6041  while (EndCallTop != 0) {
6042  func = EndCallTop->func;
6043  (*func)();
6044 
6045  prev = EndCallTop;
6046  EndCallTop = EndCallTop->next;
6047  xfree(prev);
6048  }
6049 }
6050 
6051 extern int
6053 {
6054  exec_end_call_list();
6055 
6056 #ifdef ONIG_DEBUG_STATISTICS
6057  onig_print_statistics(stderr);
6058 #endif
6059 
6060 #if defined(ONIG_DEBUG_MEMLEAK) && defined(_MSC_VER)
6061  _CrtDumpMemoryLeaks();
6062 #endif
6063 
6064  onig_inited = 0;
6065 
6066  return 0;
6067 }
6068 
6069 extern int
6071 {
6072  OnigCodePoint n, *data;
6073  OnigCodePoint low, high, x;
6074 
6075  GET_CODE_POINT(n, p);
6076  data = (OnigCodePoint* )p;
6077  data++;
6078 
6079  for (low = 0, high = n; low < high; ) {
6080  x = (low + high) >> 1;
6081  if (code > data[x * 2 + 1])
6082  low = x + 1;
6083  else
6084  high = x;
6085  }
6086 
6087  return ((low < n && code >= data[low * 2]) ? 1 : 0);
6088 }
6089 
6090 extern int
6092 {
6093  int found;
6094 
6095  if (elen > 1 || (code >= SINGLE_BYTE_SIZE)) {
6096  if (IS_NULL(cc->mbuf)) {
6097  found = 0;
6098  }
6099  else {
6100  found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0);
6101  }
6102  }
6103  else {
6104  found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1);
6105  }
6106 
6107  if (IS_NCCLASS_NOT(cc))
6108  return !found;
6109  else
6110  return found;
6111 }
6112 
6113 extern int
6115 {
6116  int len;
6117 
6118  if (ONIGENC_MBC_MINLEN(enc) > 1) {
6119  len = 2;
6120  }
6121  else {
6122  len = ONIGENC_CODE_TO_MBCLEN(enc, code);
6123  }
6124  return onig_is_code_in_cc_len(len, code, cc);
6125 }
6126 
6127 
6128 #ifdef ONIG_DEBUG
6129 
6130 /* arguments type */
6131 # define ARG_SPECIAL -1
6132 # define ARG_NON 0
6133 # define ARG_RELADDR 1
6134 # define ARG_ABSADDR 2
6135 # define ARG_LENGTH 3
6136 # define ARG_MEMNUM 4
6137 # define ARG_OPTION 5
6138 # define ARG_STATE_CHECK 6
6139 
6140 OnigOpInfoType OnigOpInfo[] = {
6141  { OP_FINISH, "finish", ARG_NON },
6142  { OP_END, "end", ARG_NON },
6143  { OP_EXACT1, "exact1", ARG_SPECIAL },
6144  { OP_EXACT2, "exact2", ARG_SPECIAL },
6145  { OP_EXACT3, "exact3", ARG_SPECIAL },
6146  { OP_EXACT4, "exact4", ARG_SPECIAL },
6147  { OP_EXACT5, "exact5", ARG_SPECIAL },
6148  { OP_EXACTN, "exactn", ARG_SPECIAL },
6149  { OP_EXACTMB2N1, "exactmb2-n1", ARG_SPECIAL },
6150  { OP_EXACTMB2N2, "exactmb2-n2", ARG_SPECIAL },
6151  { OP_EXACTMB2N3, "exactmb2-n3", ARG_SPECIAL },
6152  { OP_EXACTMB2N, "exactmb2-n", ARG_SPECIAL },
6153  { OP_EXACTMB3N, "exactmb3n" , ARG_SPECIAL },
6154  { OP_EXACTMBN, "exactmbn", ARG_SPECIAL },
6155  { OP_EXACT1_IC, "exact1-ic", ARG_SPECIAL },
6156  { OP_EXACTN_IC, "exactn-ic", ARG_SPECIAL },
6157  { OP_CCLASS, "cclass", ARG_SPECIAL },
6158  { OP_CCLASS_MB, "cclass-mb", ARG_SPECIAL },
6159  { OP_CCLASS_MIX, "cclass-mix", ARG_SPECIAL },
6160  { OP_CCLASS_NOT, "cclass-not", ARG_SPECIAL },
6161  { OP_CCLASS_MB_NOT, "cclass-mb-not", ARG_SPECIAL },
6162  { OP_CCLASS_MIX_NOT, "cclass-mix-not", ARG_SPECIAL },
6163  { OP_ANYCHAR, "anychar", ARG_NON },
6164  { OP_ANYCHAR_ML, "anychar-ml", ARG_NON },
6165  { OP_ANYCHAR_STAR, "anychar*", ARG_NON },
6166  { OP_ANYCHAR_ML_STAR, "anychar-ml*", ARG_NON },
6167  { OP_ANYCHAR_STAR_PEEK_NEXT, "anychar*-peek-next", ARG_SPECIAL },
6168  { OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next", ARG_SPECIAL },
6169  { OP_WORD, "word", ARG_NON },
6170  { OP_NOT_WORD, "not-word", ARG_NON },
6171  { OP_WORD_BOUND, "word-bound", ARG_NON },
6172  { OP_NOT_WORD_BOUND, "not-word-bound", ARG_NON },
6173  { OP_WORD_BEGIN, "word-begin", ARG_NON },
6174  { OP_WORD_END, "word-end", ARG_NON },
6175  { OP_ASCII_WORD, "ascii-word", ARG_NON },
6176  { OP_NOT_ASCII_WORD, "not-ascii-word", ARG_NON },
6177  { OP_ASCII_WORD_BOUND, "ascii-word-bound", ARG_NON },
6178  { OP_NOT_ASCII_WORD_BOUND,"not-ascii-word-bound", ARG_NON },
6179  { OP_ASCII_WORD_BEGIN, "ascii-word-begin", ARG_NON },
6180  { OP_ASCII_WORD_END, "ascii-word-end", ARG_NON },
6181  { OP_BEGIN_BUF, "begin-buf", ARG_NON },
6182  { OP_END_BUF, "end-buf", ARG_NON },
6183  { OP_BEGIN_LINE, "begin-line", ARG_NON },
6184  { OP_END_LINE, "end-line", ARG_NON },
6185  { OP_SEMI_END_BUF, "semi-end-buf", ARG_NON },
6186  { OP_BEGIN_POSITION, "begin-position", ARG_NON },
6187  { OP_BACKREF1, "backref1", ARG_NON },
6188  { OP_BACKREF2, "backref2", ARG_NON },
6189  { OP_BACKREFN, "backrefn", ARG_MEMNUM },
6190  { OP_BACKREFN_IC, "backrefn-ic", ARG_SPECIAL },
6191  { OP_BACKREF_MULTI, "backref_multi", ARG_SPECIAL },
6192  { OP_BACKREF_MULTI_IC, "backref_multi-ic", ARG_SPECIAL },
6193  { OP_BACKREF_WITH_LEVEL, "backref_at_level", ARG_SPECIAL },
6194  { OP_MEMORY_START_PUSH, "mem-start-push", ARG_MEMNUM },
6195  { OP_MEMORY_START, "mem-start", ARG_MEMNUM },
6196  { OP_MEMORY_END_PUSH, "mem-end-push", ARG_MEMNUM },
6197  { OP_MEMORY_END_PUSH_REC, "mem-end-push-rec", ARG_MEMNUM },
6198  { OP_MEMORY_END, "mem-end", ARG_MEMNUM },
6199  { OP_MEMORY_END_REC, "mem-end-rec", ARG_MEMNUM },
6200  { OP_SET_OPTION_PUSH, "set-option-push", ARG_OPTION },
6201  { OP_SET_OPTION, "set-option", ARG_OPTION },
6202  { OP_KEEP, "keep", ARG_NON },
6203  { OP_FAIL, "fail", ARG_NON },
6204  { OP_JUMP, "jump", ARG_RELADDR },
6205  { OP_PUSH, "push", ARG_RELADDR },
6206  { OP_POP, "pop", ARG_NON },
6207  { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1", ARG_SPECIAL },
6208  { OP_PUSH_IF_PEEK_NEXT, "push-if-peek-next", ARG_SPECIAL },
6209  { OP_REPEAT, "repeat", ARG_SPECIAL },
6210  { OP_REPEAT_NG, "repeat-ng", ARG_SPECIAL },
6211  { OP_REPEAT_INC, "repeat-inc", ARG_MEMNUM },
6212  { OP_REPEAT_INC_NG, "repeat-inc-ng", ARG_MEMNUM },
6213  { OP_REPEAT_INC_SG, "repeat-inc-sg", ARG_MEMNUM },
6214  { OP_REPEAT_INC_NG_SG, "repeat-inc-ng-sg", ARG_MEMNUM },
6215  { OP_NULL_CHECK_START, "null-check-start", ARG_MEMNUM },
6216  { OP_NULL_CHECK_END, "null-check-end", ARG_MEMNUM },
6217  { OP_NULL_CHECK_END_MEMST,"null-check-end-memst", ARG_MEMNUM },
6218  { OP_NULL_CHECK_END_MEMST_PUSH,"null-check-end-memst-push", ARG_MEMNUM },
6219  { OP_PUSH_POS, "push-pos", ARG_NON },
6220  { OP_POP_POS, "pop-pos", ARG_NON },
6221  { OP_PUSH_POS_NOT, "push-pos-not", ARG_RELADDR },
6222  { OP_FAIL_POS, "fail-pos", ARG_NON },
6223  { OP_PUSH_STOP_BT, "push-stop-bt", ARG_NON },
6224  { OP_POP_STOP_BT, "pop-stop-bt", ARG_NON },
6225  { OP_LOOK_BEHIND, "look-behind", ARG_SPECIAL },
6226  { OP_PUSH_LOOK_BEHIND_NOT, "push-look-behind-not", ARG_SPECIAL },
6227  { OP_FAIL_LOOK_BEHIND_NOT, "fail-look-behind-not", ARG_NON },
6228  { OP_PUSH_ABSENT_POS, "push-absent-pos", ARG_NON },
6229  { OP_ABSENT, "absent", ARG_RELADDR },
6230  { OP_ABSENT_END, "absent-end", ARG_NON },
6231  { OP_CALL, "call", ARG_ABSADDR },
6232  { OP_RETURN, "return", ARG_NON },
6233  { OP_CONDITION, "condition", ARG_SPECIAL },
6234  { OP_STATE_CHECK_PUSH, "state-check-push", ARG_SPECIAL },
6235  { OP_STATE_CHECK_PUSH_OR_JUMP, "state-check-push-or-jump", ARG_SPECIAL },
6236  { OP_STATE_CHECK, "state-check", ARG_STATE_CHECK },
6237  { OP_STATE_CHECK_ANYCHAR_STAR, "state-check-anychar*", ARG_STATE_CHECK },
6239  "state-check-anychar-ml*", ARG_STATE_CHECK },
6240  { -1, "", ARG_NON }
6241 };
6242 
6243 static const char*
6244 op2name(int opcode)
6245 {
6246  int i;
6247 
6248  for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
6249  if (opcode == OnigOpInfo[i].opcode)
6250  return OnigOpInfo[i].name;
6251  }
6252  return "";
6253 }
6254 
6255 static int
6256 op2arg_type(int opcode)
6257 {
6258  int i;
6259 
6260  for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
6261  if (opcode == OnigOpInfo[i].opcode)
6262  return OnigOpInfo[i].arg_type;
6263  }
6264  return ARG_SPECIAL;
6265 }
6266 
6267 # ifdef ONIG_DEBUG_PARSE_TREE
6268 static void
6269 Indent(FILE* f, int indent)
6270 {
6271  int i;
6272  for (i = 0; i < indent; i++) putc(' ', f);
6273 }
6274 # endif /* ONIG_DEBUG_PARSE_TREE */
6275 
6276 static void
6277 p_string(FILE* f, ptrdiff_t len, UChar* s)
6278 {
6279  fputs(":", f);
6280  while (len-- > 0) { fputc(*s++, f); }
6281 }
6282 
6283 static void
6284 p_len_string(FILE* f, LengthType len, int mb_len, UChar* s)
6285 {
6286  int x = len * mb_len;
6287 
6288  fprintf(f, ":%d:", len);
6289  while (x-- > 0) { fputc(*s++, f); }
6290 }
6291 
6292 extern void
6293 onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar* bpend, UChar** nextp,
6294  OnigEncoding enc)
6295 {
6296  int i, n, arg_type;
6297  RelAddrType addr;
6298  LengthType len;
6299  MemNumType mem;
6300  StateCheckNumType scn;
6301  OnigCodePoint code;
6302  UChar *q;
6303 
6304  fprintf(f, "[%s", op2name(*bp));
6305  arg_type = op2arg_type(*bp);
6306  if (arg_type != ARG_SPECIAL) {
6307  bp++;
6308  switch (arg_type) {
6309  case ARG_NON:
6310  break;
6311  case ARG_RELADDR:
6312  GET_RELADDR_INC(addr, bp);
6313  fprintf(f, ":(%s%d)", (addr >= 0) ? "+" : "", addr);
6314  break;
6315  case ARG_ABSADDR:
6316  GET_ABSADDR_INC(addr, bp);
6317  fprintf(f, ":(%d)", addr);
6318  break;
6319  case ARG_LENGTH:
6320  GET_LENGTH_INC(len, bp);
6321  fprintf(f, ":%d", len);
6322  break;
6323  case ARG_MEMNUM:
6324  mem = *((MemNumType* )bp);
6325  bp += SIZE_MEMNUM;
6326  fprintf(f, ":%d", mem);
6327  break;
6328  case ARG_OPTION:
6329  {
6330  OnigOptionType option = *((OnigOptionType* )bp);
6331  bp += SIZE_OPTION;
6332  fprintf(f, ":%d", option);
6333  }
6334  break;
6335 
6336  case ARG_STATE_CHECK:
6337  scn = *((StateCheckNumType* )bp);
6338  bp += SIZE_STATE_CHECK_NUM;
6339  fprintf(f, ":%d", scn);
6340  break;
6341  }
6342  }
6343  else {
6344  switch (*bp++) {
6345  case OP_EXACT1:
6348  p_string(f, 1, bp++); break;
6349  case OP_EXACT2:
6350  p_string(f, 2, bp); bp += 2; break;
6351  case OP_EXACT3:
6352  p_string(f, 3, bp); bp += 3; break;
6353  case OP_EXACT4:
6354  p_string(f, 4, bp); bp += 4; break;
6355  case OP_EXACT5:
6356  p_string(f, 5, bp); bp += 5; break;
6357  case OP_EXACTN:
6358  GET_LENGTH_INC(len, bp);
6359  p_len_string(f, len, 1, bp);
6360  bp += len;
6361  break;
6362 
6363  case OP_EXACTMB2N1:
6364  p_string(f, 2, bp); bp += 2; break;
6365  case OP_EXACTMB2N2:
6366  p_string(f, 4, bp); bp += 4; break;
6367  case OP_EXACTMB2N3:
6368  p_string(f, 6, bp); bp += 6; break;
6369  case OP_EXACTMB2N:
6370  GET_LENGTH_INC(len, bp);
6371  p_len_string(f, len, 2, bp);
6372  bp += len * 2;
6373  break;
6374  case OP_EXACTMB3N:
6375  GET_LENGTH_INC(len, bp);
6376  p_len_string(f, len, 3, bp);
6377  bp += len * 3;
6378  break;
6379  case OP_EXACTMBN:
6380  {
6381  int mb_len;
6382 
6383  GET_LENGTH_INC(mb_len, bp);
6384  GET_LENGTH_INC(len, bp);
6385  fprintf(f, ":%d:%d:", mb_len, len);
6386  n = len * mb_len;
6387  while (n-- > 0) { fputc(*bp++, f); }
6388  }
6389  break;
6390 
6391  case OP_EXACT1_IC:
6392  len = enclen(enc, bp, bpend);
6393  p_string(f, len, bp);
6394  bp += len;
6395  break;
6396  case OP_EXACTN_IC:
6397  GET_LENGTH_INC(len, bp);
6398  p_len_string(f, len, 1, bp);
6399  bp += len;
6400  break;
6401 
6402  case OP_CCLASS:
6403  n = bitset_on_num((BitSetRef )bp);
6404  bp += SIZE_BITSET;
6405  fprintf(f, ":%d", n);
6406  break;
6407 
6408  case OP_CCLASS_NOT:
6409  n = bitset_on_num((BitSetRef )bp);
6410  bp += SIZE_BITSET;
6411  fprintf(f, ":%d", n);
6412  break;
6413 
6414  case OP_CCLASS_MB:
6415  case OP_CCLASS_MB_NOT:
6416  GET_LENGTH_INC(len, bp);
6417  q = bp;
6418 # ifndef PLATFORM_UNALIGNED_WORD_ACCESS
6419  ALIGNMENT_RIGHT(q);
6420 # endif
6421  GET_CODE_POINT(code, q);
6422  bp += len;
6423  fprintf(f, ":%d:%d", (int )code, len);
6424  break;
6425 
6426  case OP_CCLASS_MIX:
6427  case OP_CCLASS_MIX_NOT:
6428  n = bitset_on_num((BitSetRef )bp);
6429  bp += SIZE_BITSET;
6430  GET_LENGTH_INC(len, bp);
6431  q = bp;
6432 # ifndef PLATFORM_UNALIGNED_WORD_ACCESS
6433  ALIGNMENT_RIGHT(q);
6434 # endif
6435  GET_CODE_POINT(code, q);
6436  bp += len;
6437  fprintf(f, ":%d:%d:%d", n, (int )code, len);
6438  break;
6439 
6440  case OP_BACKREFN_IC:
6441  mem = *((MemNumType* )bp);
6442  bp += SIZE_MEMNUM;
6443  fprintf(f, ":%d", mem);
6444  break;
6445 
6446  case OP_BACKREF_MULTI_IC:
6447  case OP_BACKREF_MULTI:
6448  fputs(" ", f);
6449  GET_LENGTH_INC(len, bp);
6450  for (i = 0; i < len; i++) {
6451  GET_MEMNUM_INC(mem, bp);
6452  if (i > 0) fputs(", ", f);
6453  fprintf(f, "%d", mem);
6454  }
6455  break;
6456 
6457  case OP_BACKREF_WITH_LEVEL:
6458  {
6459  OnigOptionType option;
6460  LengthType level;
6461 
6462  GET_OPTION_INC(option, bp);
6463  fprintf(f, ":%d", option);
6464  GET_LENGTH_INC(level, bp);
6465  fprintf(f, ":%d", level);
6466 
6467  fputs(" ", f);
6468  GET_LENGTH_INC(len, bp);
6469  for (i = 0; i < len; i++) {
6470  GET_MEMNUM_INC(mem, bp);
6471  if (i > 0) fputs(", ", f);
6472  fprintf(f, "%d", mem);
6473  }
6474  }
6475  break;
6476 
6477  case OP_REPEAT:
6478  case OP_REPEAT_NG:
6479  {
6480  mem = *((MemNumType* )bp);
6481  bp += SIZE_MEMNUM;
6482  addr = *((RelAddrType* )bp);
6483  bp += SIZE_RELADDR;
6484  fprintf(f, ":%d:%d", mem, addr);
6485  }
6486  break;
6487 
6489  case OP_PUSH_IF_PEEK_NEXT:
6490  addr = *((RelAddrType* )bp);
6491  bp += SIZE_RELADDR;
6492  fprintf(f, ":(%s%d)", (addr >= 0) ? "+" : "", addr);
6493  p_string(f, 1, bp);
6494  bp += 1;
6495  break;
6496 
6497  case OP_LOOK_BEHIND:
6498  GET_LENGTH_INC(len, bp);
6499  fprintf(f, ":%d", len);
6500  break;
6501 
6503  GET_RELADDR_INC(addr, bp);
6504  GET_LENGTH_INC(len, bp);
6505  fprintf(f, ":%d:(%s%d)", len, (addr >= 0) ? "+" : "", addr);
6506  break;
6507 
6508  case OP_STATE_CHECK_PUSH:
6510  scn = *((StateCheckNumType* )bp);
6511  bp += SIZE_STATE_CHECK_NUM;
6512  addr = *((RelAddrType* )bp);
6513  bp += SIZE_RELADDR;
6514  fprintf(f, ":%d:(%s%d)", scn, (addr >= 0) ? "+" : "", addr);
6515  break;
6516 
6517  case OP_CONDITION:
6518  GET_MEMNUM_INC(mem, bp);
6519  GET_RELADDR_INC(addr, bp);
6520  fprintf(f, ":%d:(%s%d)", mem, (addr >= 0) ? "+" : "", addr);
6521  break;
6522 
6523  default:
6524  fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n",
6525  bp[-1]);
6526  }
6527  }
6528  fputs("]", f);
6529  if (nextp) *nextp = bp;
6530 }
6531 
6532 # ifdef ONIG_DEBUG_COMPILE
6533 static void
6534 print_compiled_byte_code_list(FILE* f, regex_t* reg)
6535 {
6536  int ncode;
6537  UChar* bp = reg->p;
6538  UChar* end = reg->p + reg->used;
6539 
6540  fprintf(f, "code length: %d", reg->used);
6541 
6542  ncode = -1;
6543  while (bp < end) {
6544  ncode++;
6545  if (ncode % 5 == 0)
6546  fprintf(f, "\n%ld:", bp - reg->p);
6547  else
6548  fprintf(f, " %ld:", bp - reg->p);
6549  onig_print_compiled_byte_code(f, bp, end, &bp, reg->enc);
6550  }
6551 
6552  fprintf(f, "\n");
6553 }
6554 # endif /* ONIG_DEBUG_COMPILE */
6555 
6556 # ifdef ONIG_DEBUG_PARSE_TREE
6557 static void
6558 print_indent_tree(FILE* f, Node* node, int indent)
6559 {
6560  int i, type, container_p = 0;
6561  int add = 3;
6562  UChar* p;
6563 
6564  Indent(f, indent);
6565  if (IS_NULL(node)) {
6566  fprintf(f, "ERROR: null node!!!\n");
6567  exit (0);
6568  }
6569 
6570  type = NTYPE(node);
6571  switch (type) {
6572  case NT_LIST:
6573  case NT_ALT:
6574  if (NTYPE(node) == NT_LIST)
6575  fprintf(f, "<list:%"PRIxPTR">\n", (intptr_t )node);
6576  else
6577  fprintf(f, "<alt:%"PRIxPTR">\n", (intptr_t )node);
6578 
6579  print_indent_tree(f, NCAR(node), indent + add);
6580  while (IS_NOT_NULL(node = NCDR(node))) {
6581  if (NTYPE(node) != type) {
6582  fprintf(f, "ERROR: list/alt right is not a cons. %d\n", NTYPE(node));
6583  exit(0);
6584  }
6585  print_indent_tree(f, NCAR(node), indent + add);
6586  }
6587  break;
6588 
6589  case NT_STR:
6590  fprintf(f, "<string%s:%"PRIxPTR">",
6591  (NSTRING_IS_RAW(node) ? "-raw" : ""), (intptr_t )node);
6592  for (p = NSTR(node)->s; p < NSTR(node)->end; p++) {
6593  if (*p >= 0x20 && *p < 0x7f)
6594  fputc(*p, f);
6595  else {
6596  fprintf(f, " 0x%02x", *p);
6597  }
6598  }
6599  break;
6600 
6601  case NT_CCLASS:
6602  fprintf(f, "<cclass:%"PRIxPTR">", (intptr_t )node);
6603  if (IS_NCCLASS_NOT(NCCLASS(node))) fputs("not ", f);
6604  if (NCCLASS(node)->mbuf) {
6605  BBuf* bbuf = NCCLASS(node)->mbuf;
6606  OnigCodePoint* data = (OnigCodePoint* )bbuf->p;
6607  OnigCodePoint* end = (OnigCodePoint* )(bbuf->p + bbuf->used);
6608  fprintf(f, "%d", *data++);
6609  for (; data < end; data+=2) {
6610  fprintf(f, ",");
6611  fprintf(f, "%04x-%04x", data[0], data[1]);
6612  }
6613  }
6614  break;
6615 
6616  case NT_CTYPE:
6617  fprintf(f, "<ctype:%"PRIxPTR"> ", (intptr_t )node);
6618  switch (NCTYPE(node)->ctype) {
6619  case ONIGENC_CTYPE_WORD:
6620  if (NCTYPE(node)->not != 0)
6621  fputs("not word", f);
6622  else
6623  fputs("word", f);
6624  break;
6625 
6626  default:
6627  fprintf(f, "ERROR: undefined ctype.\n");
6628  exit(0);
6629  }
6630  break;
6631 
6632  case NT_CANY:
6633  fprintf(f, "<anychar:%"PRIxPTR">", (intptr_t )node);
6634  break;
6635 
6636  case NT_ANCHOR:
6637  fprintf(f, "<anchor:%"PRIxPTR"> ", (intptr_t )node);
6638  switch (NANCHOR(node)->type) {
6639  case ANCHOR_BEGIN_BUF: fputs("begin buf", f); break;
6640  case ANCHOR_END_BUF: fputs("end buf", f); break;
6641  case ANCHOR_BEGIN_LINE: fputs("begin line", f); break;
6642  case ANCHOR_END_LINE: fputs("end line", f); break;
6643  case ANCHOR_SEMI_END_BUF: fputs("semi end buf", f); break;
6644  case ANCHOR_BEGIN_POSITION: fputs("begin position", f); break;
6645 
6646  case ANCHOR_WORD_BOUND: fputs("word bound", f); break;
6647  case ANCHOR_NOT_WORD_BOUND: fputs("not word bound", f); break;
6648 # ifdef USE_WORD_BEGIN_END
6649  case ANCHOR_WORD_BEGIN: fputs("word begin", f); break;
6650  case ANCHOR_WORD_END: fputs("word end", f); break;
6651 # endif
6652  case ANCHOR_PREC_READ: fputs("prec read", f); container_p = TRUE; break;
6653  case ANCHOR_PREC_READ_NOT: fputs("prec read not", f); container_p = TRUE; break;
6654  case ANCHOR_LOOK_BEHIND: fputs("look_behind", f); container_p = TRUE; break;
6655  case ANCHOR_LOOK_BEHIND_NOT: fputs("look_behind_not",f); container_p = TRUE; break;
6656  case ANCHOR_KEEP: fputs("keep",f); break;
6657 
6658  default:
6659  fprintf(f, "ERROR: undefined anchor type.\n");
6660  break;
6661  }
6662  break;
6663 
6664  case NT_BREF:
6665  {
6666  int* p;
6667  BRefNode* br = NBREF(node);
6668  p = BACKREFS_P(br);
6669  fprintf(f, "<backref:%"PRIxPTR">", (intptr_t )node);
6670  for (i = 0; i < br->back_num; i++) {
6671  if (i > 0) fputs(", ", f);
6672  fprintf(f, "%d", p[i]);
6673  }
6674  }
6675  break;
6676 
6677 # ifdef USE_SUBEXP_CALL
6678  case NT_CALL:
6679  {
6680  CallNode* cn = NCALL(node);
6681  fprintf(f, "<call:%"PRIxPTR">", (intptr_t )node);
6682  p_string(f, cn->name_end - cn->name, cn->name);
6683  }
6684  break;
6685 # endif
6686 
6687  case NT_QTFR:
6688  fprintf(f, "<quantifier:%"PRIxPTR">{%d,%d}%s\n", (intptr_t )node,
6689  NQTFR(node)->lower, NQTFR(node)->upper,
6690  (NQTFR(node)->greedy ? "" : "?"));
6691  print_indent_tree(f, NQTFR(node)->target, indent + add);
6692  break;
6693 
6694  case NT_ENCLOSE:
6695  fprintf(f, "<enclose:%"PRIxPTR"> ", (intptr_t )node);
6696  switch (NENCLOSE(node)->type) {
6697  case ENCLOSE_OPTION:
6698  fprintf(f, "option:%d", NENCLOSE(node)->option);
6699  break;
6700  case ENCLOSE_MEMORY:
6701  fprintf(f, "memory:%d", NENCLOSE(node)->regnum);
6702  break;
6704  fprintf(f, "stop-bt");
6705  break;
6706  case ENCLOSE_CONDITION:
6707  fprintf(f, "condition:%d", NENCLOSE(node)->regnum);
6708  break;
6709  case ENCLOSE_ABSENT:
6710  fprintf(f, "absent");
6711  break;
6712 
6713  default:
6714  break;
6715  }
6716  fprintf(f, "\n");
6717  print_indent_tree(f, NENCLOSE(node)->target, indent + add);
6718  break;
6719 
6720  default:
6721  fprintf(f, "print_indent_tree: undefined node type %d\n", NTYPE(node));
6722  break;
6723  }
6724 
6725  if (type != NT_LIST && type != NT_ALT && type != NT_QTFR &&
6726  type != NT_ENCLOSE)
6727  fprintf(f, "\n");
6728 
6729  if (container_p) print_indent_tree(f, NANCHOR(node)->target, indent + add);
6730 
6731  fflush(f);
6732 }
6733 
6734 static void
6735 print_tree(FILE* f, Node* node)
6736 {
6737  print_indent_tree(f, node, 0);
6738 }
6739 # endif /* ONIG_DEBUG_PARSE_TREE */
6740 #endif /* ONIG_DEBUG */
unsigned int OnigOptionType
Definition: onigmo.h:445
#define SIZE_OP_SET_OPTION_PUSH
Definition: regint.h:719
#define ANCHOR_ANYCHAR_STAR_ML
Definition: regint.h:544
#define SIZE_OP_MEMORY_END_PUSH_REC
Definition: regint.h:724
#define IS_DYNAMIC_OPTION(option)
Definition: regint.h:403
#define NSTRING_SET_AMBIG(node)
Definition: regparse.h:111
#define BIT_STATUS_AT(stats, n)
Definition: regint.h:357
#define IS_ENCLOSE_CALLED(en)
Definition: regparse.h:147
void onig_scan_env_set_error_string(ScanEnv *env, int ecode ARG_UNUSED, UChar *arg, UChar *arg_end)
Definition: regparse.c:7040
#define OPT_EXACT_MAXLEN
Definition: regint.h:90
int is_refered
Definition: regparse.h:189
ONIG_EXTERN int onigenc_init(void)
Definition: regenc.c:36
#define IS_NULL(p)
Definition: regint.h:298
#define ONIGENC_CODE_TO_MBCLEN(enc, code)
Definition: onigmo.h:367
int onig_compile_ruby(regex_t *reg, const UChar *pattern, const UChar *pattern_end, OnigErrorInfo *einfo, const char *sourcefile, int sourceline)
Definition: regcomp.c:5707
#define IN_RECCALL
Definition: regcomp.c:3859
#define ENCLOSE_MEMORY
Definition: regparse.h:94
unsigned int OnigCodePoint
Definition: onigmo.h:80
unsigned int alloc
Definition: regint.h:444
#define ONIGENC_MBC_MAXLEN(enc)
Definition: onigmo.h:362
int onig_new(regex_t **reg, const UChar *pattern, const UChar *pattern_end, OnigOptionType option, OnigEncoding enc, const OnigSyntaxType *syntax, OnigErrorInfo *einfo)
Definition: regcomp.c:5970
int * int_map_backward
Definition: onigmo.h:788
#define IS_REPEAT_INFINITE(n)
Definition: regint.h:409
UChar * end
Definition: regparse.h:173
#define ALLOWED_ANCHOR_IN_LB_NOT
int onig_node_str_cat(Node *node, const UChar *s, const UChar *end)
Definition: regparse.c:1376
int regnum
Definition: regparse.h:199
Node * onig_node_list_add(Node *list, Node *x)
Definition: regparse.c:1192
#define IS_SYNTAX_BV(syn, bvm)
Definition: regparse.h:332
struct _Node * target
Definition: regparse.h:229
#define NSTRING_IS_RAW(node)
Definition: regparse.h:114
#define WORD_ALIGNMENT_SIZE
Definition: regint.h:321
#define BIT_STATUS_ON_AT(stats, n)
Definition: regint.h:360
#define ONIG_OPTIMIZE_EXACT
Definition: regint.h:343
#define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION
#define ANCHOR_END_BUF
Definition: regint.h:530
#define SIZE_OPCODE
Definition: regint.h:675
#define ONIGERR_INVALID_COMBINATION_OF_OPTIONS
Definition: onigmo.h:693
#define ONIGERR_INVALID_LOOK_BEHIND_PATTERN
Definition: onigmo.h:662
#define BBUF_ADD1(buf, byte)
Definition: regint.h:492
#define ONIGERR_INVALID_BACKREF
Definition: onigmo.h:674
#define d1
#define ANCHOR_WORD_BEGIN
Definition: regint.h:536
#define SINGLE_BYTE_SIZE
Definition: regint.h:413
#define SIZE_MEMNUM
Definition: regint.h:679
#define IS_ENCLOSE_RECURSION(en)
Definition: regparse.h:149
#define SIZE_OP_REPEAT_INC
Definition: regint.h:712
int num_call
Definition: regparse.h:307
#define NSTRING_IS_AMBIG(node)
Definition: regparse.h:115
#define BBUF_WRITE(buf, pos, bytes, n)
Definition: regint.h:477
MinMaxLen len
Definition: regcomp.c:4375
OnigUChar * par_end
Definition: onigmo.h:737
int sourceline
Definition: regparse.h:325
unsigned int bt_mem_end
Definition: onigmo.h:764
#define NST_CALLED
Definition: regparse.h:136
#define IS_ENCLOSE_MAX_FIXED(en)
Definition: regparse.h:153
#define ONIGENC_IS_MBC_ASCII_WORD(enc, s, end)
Definition: onigmo.h:324
#define SCANENV_MEM_NODES(senv)
Definition: regparse.h:286
size_t onig_memsize(const regex_t *reg)
Definition: regcomp.c:5651
#define IS_CODE_SB_WORD(enc, code)
Definition: regint.h:876
#define ONIG_IS_OPTION_ON(options, option)
Definition: onigmo.h:476
#define SIZE_OP_ABSENT_END
Definition: regint.h:739
int onig_names_free(regex_t *reg)
Definition: regparse.c:525
#define ANCHOR_BEGIN_LINE
Definition: regint.h:528
OnigPosition * end
Definition: onigmo.h:718
int onig_initialize(OnigEncoding encodings[] ARG_UNUSED, int n ARG_UNUSED)
Definition: regcomp.c:5992
const int id
Definition: nkf.c:209
#define ONIGENC_CASE_FOLD_DEFAULT
Definition: onigmo.h:131
#define SIZE_OP_CALL
Definition: regint.h:734
int onig_is_in_code_range(const UChar *p, OnigCodePoint code)
Definition: regcomp.c:6070
#define QUANTIFIER_EXPAND_LIMIT_SIZE
Definition: regcomp.c:721
#define ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc, s, end)
Definition: onigmo.h:334
int nest_level
Definition: regparse.h:241
UChar * error
Definition: regparse.h:301
int onig_node_str_set(Node *node, const UChar *s, const UChar *end)
Definition: regparse.c:1412
MinMaxLen mmd
Definition: regcomp.c:4367
OnigCaseFoldType onig_get_default_case_fold_flag(void)
Definition: regcomp.c:36
#define SIZE_OP_POP_POS
Definition: regint.h:716
#define IS_NCCLASS_NOT(nd)
Definition: regint.h:796
int reach_end
Definition: regcomp.c:4360
#define GET_CHAR_LEN_VARLEN
Definition: regcomp.c:2410
#define NENCLOSE(node)
Definition: regparse.h:81
void onig_add_end_call(void(*func)(void))
Definition: regcomp.c:6022
#define ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED
Definition: onigmo.h:675
OptExactInfo exm
Definition: regcomp.c:4379
int target_empty_info
Definition: regparse.h:186
if(len<=MAX_WORD_LENGTH &&len >=MIN_WORD_LENGTH)
Definition: zonetab.h:883
UChar * s
Definition: regparse.h:172
#define IS_ENCLOSE_MIN_FIXED(en)
Definition: regparse.h:152
OnigDistance anchor_dmax
Definition: onigmo.h:782
#define ANCHOR_END_LINE
Definition: regint.h:532
OnigRegexType regex_t
Definition: onigmo.h:799
#define ANCHOR_PREC_READ
Definition: regint.h:538
int opt_count
Definition: regparse.h:207
UnsetAddrList * unset_addr_list
Definition: regparse.h:305
#define IS_BACKREF_NEST_LEVEL(bn)
Definition: regparse.h:164
#define NT_QTFR
Definition: regparse.h:43
#define SIZE_ABSADDR
Definition: regint.h:677
unsigned int OnigCaseFoldType
Definition: onigmo.h:95
struct _Node * next_head_exact
Definition: regparse.h:188
#define CKN_ON
Definition: regcomp.c:722
#define SIZE_OP_ABSENT
Definition: regint.h:738
#define GET_CODE_POINT(code, p)
Definition: regint.h:697
Node * onig_node_new_alt(Node *left, Node *right)
Definition: regparse.c:1210
#define NQ_TARGET_IS_EMPTY_MEM
Definition: regparse.h:124
void * PointerType
Definition: regint.h:673
unsigned char map[ONIG_CHAR_TABLE_SIZE]
Definition: onigmo.h:786
#define IS_CALL_RECURSION(cn)
Definition: regparse.h:161
#define BIT_STATUS_ON_AT_SIMPLE(stats, n)
Definition: regint.h:367
#define ALLOWED_ANCHOR_IN_LB
#define ONIG_OPTION_IGNORECASE
Definition: onigmo.h:451
#define REPEAT_RANGE_ALLOC
#define ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME_CALL
Definition: onigmo.h:598
int onig_init(void)
Definition: regcomp.c:5998
#define ONIG_OPTIMIZE_EXACT_IC
Definition: regint.h:346
#define NST_MEM_BACKREFED
Definition: regparse.h:133
Definition: regint.h:624
#define BACKREFS_P(br)
Definition: regparse.h:119
#define GET_OPTION_INC(option, p)
Definition: regint.h:692
#define NST_RECURSION
Definition: regparse.h:135
unsigned char * p
Definition: onigmo.h:753
#define ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM
Definition: onigmo.h:135
#define GET_LENGTH_INC(len, p)
Definition: regint.h:689
#define ANCHOR_BEGIN_POSITION
Definition: regint.h:529
UnsetAddr * us
Definition: regparse.h:220
OnigCaseFoldType case_fold_flag
Definition: regcomp.c:4347
#define RECURSION_INFINITE
Definition: regcomp.c:2886
regex_t * reg
Definition: regparse.h:303
#define ONIGENC_MBC_CASE_FOLD_MAXLEN
Definition: onigmo.h:290
#define NST_ADDR_FIXED
Definition: regparse.h:137
OptAncInfo anc
Definition: regcomp.c:4358
#define ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND
Definition: onigmo.h:594
int left_anchor
Definition: regcomp.c:4352
OnigDistance anchor_dmin
Definition: onigmo.h:781
UChar buf[NODE_STR_BUF_SIZE]
Definition: regparse.h:176
ONIG_EXTERN int onigenc_strlen(OnigEncoding enc, const OnigUChar *p, const OnigUChar *end)
#define ANCHOR_NOT_WORD_BOUND
Definition: regint.h:535
int num_named
Definition: regparse.h:310
unsigned int bt_mem_start
Definition: onigmo.h:763
MinMaxLen mmd
Definition: regcomp.c:4357
int char_len
Definition: regparse.h:206
unsigned int BitStatusType
Definition: regint.h:352
#define BIT_STATUS_ON_ALL(stats)
Definition: regint.h:356
unsigned char * exact
Definition: onigmo.h:784
OptAncInfo anc
Definition: regcomp.c:4377
#define SIZE_OP_ANYCHAR_STAR_PEEK_NEXT
Definition: regint.h:706
#define SIZE_OP_FAIL
Definition: regint.h:720
#define ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL
Definition: onigmo.h:685
#define SIZE_STATE_CHECK_NUM
Definition: regint.h:680
#define IS_ENCLOSE_CLEN_FIXED(en)
Definition: regparse.h:154
#define NBREF(node)
Definition: regparse.h:79
#define NT_ALT
Definition: regparse.h:47
#define IN_ALT
Definition: regcomp.c:3854
#define ALLOWED_ENCLOSE_IN_LB
void onig_free_body(regex_t *reg)
Definition: regcomp.c:5624
#define COMPILE_INIT_SIZE
#define BIT_STATUS_CLEAR(stats)
Definition: regint.h:355
#define SIZE_OP_MEMORY_START
Definition: regint.h:721
#define ONIGENC_CODE_TO_MBC_MAXLEN
Definition: onigmo.h:289
UnsetAddrList * unset_addr_list
Definition: regparse.h:230
#define SIZE_OPTION
Definition: regint.h:682
struct _Node * target
Definition: regparse.h:247
int group_num
Definition: regparse.h:226
#define NCAR(node)
Definition: regparse.h:86
#define COMP_EM_BASE
#define xalloca
Definition: regint.h:213
#define SIZE_OP_PUSH_STOP_BT
Definition: regint.h:727
Definition: regint.h:441
struct _Node * head_exact
Definition: regparse.h:187
#define ALLOWED_ENCLOSE_IN_LB_NOT
#define IS_IGNORECASE(option)
Definition: regint.h:383
#define SIZE_OP_CONDITION
Definition: regint.h:736
BitSet bs
Definition: regint.h:807
int state
Definition: regparse.h:237
#define ONIG_OPTION_CAPTURE_GROUP
Definition: onigmo.h:460
OnigRepeatRange * repeat_range
Definition: onigmo.h:770
#define ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP
Definition: onigmo.h:595
#define GET_MEMNUM_INC(num, p)
Definition: regint.h:690
#define STACK_POP_LEVEL_MEM_START
Definition: regint.h:338
int RelAddrType
Definition: regint.h:667
#define STACK_POP_LEVEL_ALL
Definition: regint.h:339
const char * sourcefile
Definition: regparse.h:324
#define ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC
Definition: regint.h:349
int onig_bbuf_init(BBuf *buf, OnigDistance size)
Definition: regcomp.c:142
#define SIZE_OP_NULL_CHECK_START
Definition: regint.h:729
int capa
Definition: regparse.h:175
#define GET_CHAR_LEN_TOP_ALT_VARLEN
Definition: regcomp.c:2411
#define RECURSION_EXIST
Definition: regcomp.c:2885
OnigOptionType options
Definition: regcomp.c:4346
#define SIZE_OP_POP
Definition: regint.h:709
OnigCaseFoldType case_fold_flag
Definition: onigmo.h:775
#define MAX_NODE_OPT_INFO_REF_COUNT
Definition: regcomp.c:4949
OnigDistance min_len
Definition: regparse.h:204
int AbsAddrType
Definition: regint.h:668
#define ONIG_MAX_CAPTURE_HISTORY_GROUP
Definition: onigmo.h:700
#define NULL_NODE
Definition: regparse.h:283
#define level
#define BBUF_INIT(buf, size)
Definition: regint.h:447
#define ONIGENC_MBC_CASE_FOLD(enc, flag, pp, end, buf)
Definition: onigmo.h:332
#define SET_ENCLOSE_STATUS(node, f)
Definition: regparse.h:144
const OnigSyntaxType * syntax
Definition: regparse.h:294
#define ONIGERR_INVALID_ARGUMENT
Definition: onigmo.h:640
#define ONIGENC_IS_CODE_WORD(enc, code)
Definition: onigmo.h:400
int num_comb_exp_check
Definition: onigmo.h:760
return
Definition: zonetab.h:899
#define ARG_UNUSED
Definition: nkf.h:181
#define IS_MULTILINE(option)
Definition: regint.h:382
#define ALIGNMENT_RIGHT(addr)
Definition: regint.h:329
#define DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag)
Definition: regint.h:405
OptAncInfo anc
Definition: regcomp.c:4368
#define add(x, y)
Definition: date_strftime.c:23
#define SIZE_OP_PUSH_LOOK_BEHIND_NOT
Definition: regint.h:732
#define NQ_TARGET_IS_EMPTY_REC
Definition: regparse.h:125
#define BITSET_AT(bs, pos)
Definition: regint.h:435
#define ONIGERR_TYPE_BUG
Definition: onigmo.h:630
#define ONIGERR_UNDEFINED_NAME_REFERENCE
Definition: onigmo.h:682
int * back_dynamic
Definition: regparse.h:240
int lower
Definition: regparse.h:183
#define NCCLASS(node)
Definition: regparse.h:77
OnigCaseFoldType OnigDefaultCaseFoldFlag
Definition: regcomp.c:33
#define xmemcpy
Definition: regint.h:202
#define CHECK_NULL_RETURN_MEMERR(p)
Definition: regint.h:301
Node * onig_node_new_enclose(int type)
Definition: regparse.c:1347
#define NCTYPE(node)
Definition: regparse.h:78
int onig_reg_init(regex_t *reg, OnigOptionType option, OnigCaseFoldType case_fold_flag, OnigEncoding enc, const OnigSyntaxType *syntax)
Definition: regcomp.c:5912
#define ONIG_OPTIMIZE_MAP
Definition: regint.h:347
#define ONIG_INFINITE_DISTANCE
Definition: onigmo.h:85
#define SIZE_OP_MEMORY_END_PUSH
Definition: regint.h:723
#define SIZE_OP_RETURN
Definition: regint.h:735
#define NQ_TARGET_IS_EMPTY
Definition: regparse.h:123
#define ONIGENC_MBC_MINLEN(enc)
Definition: onigmo.h:364
size_t onig_region_memsize(const OnigRegion *regs)
Definition: regcomp.c:5666
#define ONIG_OPTION_DONT_CAPTURE_GROUP
Definition: onigmo.h:459
int stack_pop_level
Definition: onigmo.h:765
int onig_compile(regex_t *reg, const UChar *pattern, const UChar *pattern_end, OnigErrorInfo *einfo)
Definition: regcomp.c:5698
short int StateCheckNumType
Definition: regint.h:672
#define NT_LIST
Definition: regparse.h:46
struct re_pattern_buffer * chain
Definition: onigmo.h:793
int err
Definition: win32.c:135
#define NT_ANCHOR
Definition: regparse.h:45
#define ANCHOR_WORD_END
Definition: regint.h:537
#define IN_REPEAT
Definition: regcomp.c:3856
#define ONIG_OPTION_NEGATE_SINGLELINE
Definition: onigmo.h:458
#define FOUND_CALLED_NODE
#define SIZE_OP_ANYCHAR_STAR
Definition: regint.h:705
#define ALLOWED_TYPE_IN_LB
#define UChar
Definition: onigmo.h:76
#define ONIGERR_UNDEFINED_GROUP_REFERENCE
Definition: onigmo.h:683
#define GET_ALIGNMENT_PAD_SIZE(addr, pad_size)
Definition: regint.h:323
#define SIZE_LENGTH
Definition: regint.h:678
int upper
Definition: regparse.h:184
#define NST_CLEN_FIXED
Definition: regparse.h:130
#define numberof(array)
Definition: etc.c:618
#define EXPAND_STRING_MAX_LENGTH
int onig_new_without_alloc(regex_t *reg, const UChar *pattern, const UChar *pattern_end, OnigOptionType option, OnigEncoding enc, const OnigSyntaxType *syntax, OnigErrorInfo *einfo)
Definition: regcomp.c:5956
#define ONIG_OPTION_SINGLELINE
Definition: onigmo.h:455
#define ONIGENC_CTYPE_WORD
Definition: onigmo.h:306
#define SIZE_OP_MEMORY_END
Definition: regint.h:725
#define ONIG_OPTIMIZE_EXACT_BM_IC
Definition: regint.h:348
#define SIZE_OP_MEMORY_START_PUSH
Definition: regint.h:722
#define NT_STR
Definition: regparse.h:38
int type
Definition: regparse.h:246
#define NQTFR(node)
Definition: regparse.h:80
#define ONIG_CHAR_TABLE_SIZE
Definition: onigmo.h:749
#define SIZE_BITSET
Definition: regint.h:425
#define TRUE
Definition: nkf.h:175
struct _Node * target
Definition: regparse.h:214
MinMaxLen mmd
Definition: regcomp.c:4344
#define IS_ENCLOSE_NAME_REF(en)
Definition: regparse.h:158
Bits * BitSetRef
Definition: regint.h:423
#define SIZE_OP_PUSH_IF_PEEK_NEXT
Definition: regint.h:711
OnigDistance max_len
Definition: regparse.h:205
unsigned int flag
Definition: regparse.h:174
#define NST_MARK2
Definition: regparse.h:132
#define NT_CANY
Definition: regparse.h:41
UChar * name_end
Definition: regparse.h:228
OnigEncoding enc
Definition: regcomp.c:4345
#define ONIGENC_MBC_TO_CODE(enc, p, end)
Definition: onigmo.h:366
BBuf * mbuf
Definition: regint.h:808
int LengthType
Definition: regint.h:669
#define ONIGENC_CASE_FOLD_MIN
Definition: onigmo.h:130
OptMapInfo map
Definition: regcomp.c:4382
#define ANCHOR_END_BUF_MASK
Definition: regparse.h:92
int right_anchor
Definition: regcomp.c:4353
unsigned char buf[MIME_BUF_SIZE]
Definition: nkf.c:4309
#define IS_QUANTIFIER_IN_REPEAT(qn)
Definition: regparse.h:165
int onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode *cc)
Definition: regcomp.c:6114
#define SIZE_OP_JUMP
Definition: regint.h:707
#define NTYPE(node)
Definition: regparse.h:69
#define ANCHOR_LOOK_BEHIND_NOT
Definition: regint.h:541
int state
Definition: regparse.h:181
BitStatusType backrefed_mem
Definition: regparse.h:298
int ignore_case
Definition: regcomp.c:4361
const char * name
Definition: onigmo.h:162
#define NCALL(node)
Definition: regparse.h:84
#define SIZE_OP_NULL_CHECK_END
Definition: regint.h:730
size_t OnigDistance
Definition: onigmo.h:82
OnigEncoding enc
Definition: onigmo.h:735
void onig_transfer(regex_t *to, regex_t *from)
int num_mem
Definition: regparse.h:308
OnigDistance max
Definition: regcomp.c:4340
Node * onig_node_new_list(Node *left, Node *right)
Definition: regparse.c:1186
int intptr_t
Definition: win32.h:90
#define NSTRING_IS_DONT_GET_OPT_INFO(node)
Definition: regparse.h:116
#define IS_ENCLOSE_ADDR_FIXED(en)
Definition: regparse.h:148
#define IS_BACKREF_NAME_REF(bn)
Definition: regparse.h:163
#define NT_CCLASS
Definition: regparse.h:39
#define SIZE_OP_POP_STOP_BT
Definition: regint.h:728
#define ANCHOR_SEMI_END_BUF
Definition: regint.h:531
#define ONIG_OPTIMIZE_NONE
Definition: regint.h:342
unsigned int used
Definition: onigmo.h:754
Node * onig_node_new_str(const UChar *s, const UChar *end)
Definition: regparse.c:1481
OnigPosition * beg
Definition: onigmo.h:717
Definition: regint.h:551
#define IS_ENCLOSE_MARK1(en)
Definition: regparse.h:150
UChar * error_end
Definition: regparse.h:302
#define SIZE_OP_FAIL_LOOK_BEHIND_NOT
Definition: regint.h:733
Node ** mem_nodes_dynamic
Definition: regparse.h:314
UChar s[OPT_EXACT_MAXLEN]
Definition: regcomp.c:4363
BitStatusType bt_mem_end
Definition: regparse.h:297
#define ENCLOSE_STOP_BACKTRACK
Definition: regparse.h:96
#define USE_SUBEXP_CALL
Definition: regint.h:70
OnigEncoding enc
Definition: regparse.h:293
void onig_node_free(Node *node)
Definition: regparse.c:1062
register unsigned int len
Definition: zonetab.h:51
#define NANCHOR(node)
Definition: regparse.h:82
#define NST_STOP_BT_SIMPLE_REPEAT
Definition: regparse.h:134
#define ANCHOR_ANYCHAR_STAR
Definition: regint.h:543
#define NT_BREF
Definition: regparse.h:42
#define SIZE_RELADDR
Definition: regint.h:676
OnigDistance min
Definition: regcomp.c:4339
#define NST_IN_REPEAT
Definition: regparse.h:140
#define IS_NEED_STR_LEN_OP_EXACT(op)
Definition: regcomp.c:315
#define STACK_POP_LEVEL_FREE
Definition: regint.h:337
#define CHECK_NULL_RETURN(p)
Definition: regint.h:300
#define SIZE_OP_FAIL_POS
Definition: regint.h:717
unsigned char * exact_end
Definition: onigmo.h:785
OnigDistance dmax
Definition: onigmo.h:790
int size
Definition: encoding.c:57
#define f
unsigned int used
Definition: regint.h:443
#define SIZE_OP_SET_OPTION
Definition: regint.h:718
#define GET_RELADDR_INC(addr, p)
Definition: regint.h:687
unsigned int alloc
Definition: onigmo.h:755
#define NST_MARK1
Definition: regparse.h:131
#define xmalloc
Definition: defines.h:183
#define ANCHOR_PREC_READ_NOT
Definition: regint.h:539
#define ENCLOSE_CONDITION
Definition: regparse.h:97
#define IN_NOT
Definition: regcomp.c:3855
void onig_reduce_nested_quantifier(Node *pnode, Node *cnode)
Definition: regparse.c:2204
#define ENCLOSE_OPTION
Definition: regparse.h:95
#define ONIGENC_MBC_MAXLEN_DIST(enc)
Definition: onigmo.h:363
#define NST_MIN_FIXED
Definition: regparse.h:128
unsigned int capture_history
Definition: onigmo.h:762
#define IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(en)
Definition: regparse.h:155
OptExactInfo exb
Definition: regcomp.c:4378
#define IS_FIND_CONDITION(option)
Definition: regint.h:387
#define ONIGENC_CODE_TO_MBC(enc, code, buf)
Definition: onigmo.h:368
UChar * p
Definition: regint.h:442
#define NSTR(node)
Definition: regparse.h:76
#define ENCLOSE_ABSENT
Definition: regparse.h:98
#define SIZE_OP_MEMORY_END_REC
Definition: regint.h:726
OnigCodePoint code[ONIGENC_MAX_COMP_CASE_FOLD_CODE_LEN]
Definition: onigmo.h:146
#define ANCHOR_ANYCHAR_STAR_MASK
Definition: regparse.h:91
OptExactInfo expr
Definition: regcomp.c:4380
#define ONIGERR_NEVER_ENDING_RECURSION
Definition: onigmo.h:686
OnigDistance dmin
Definition: onigmo.h:789
OnigOptionType option
Definition: regparse.h:291
#define CLEAR_ENCLOSE_STATUS(node, f)
Definition: regparse.h:145
struct _Node * target
Definition: regparse.h:182
#define NTYPE2BIT(type)
Definition: regparse.h:51
#define SIZE_POINTER
Definition: regint.h:684
int greedy
Definition: regparse.h:185
int value
Definition: regcomp.c:4370
#define ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, case_fold_flag, p, end, acs)
Definition: onigmo.h:340
int onig_is_code_in_cc_len(int elen, OnigCodePoint code, CClassNode *cc)
Definition: regcomp.c:6091
int onig_parse_make_tree(Node **root, const UChar *pattern, const UChar *end, regex_t *reg, ScanEnv *env)
Definition: regparse.c:7013
#define NT_CALL
Definition: regparse.h:48
#define xrealloc
Definition: defines.h:186
#define IN_CALL
Definition: regcomp.c:3858
#define SIZE_OP_PUSH_POS
Definition: regint.h:714
#define NT_CTYPE
Definition: regparse.h:40
#define NST_MAX_FIXED
Definition: regparse.h:129
#define SIZE_OP_PUSH_ABSENT_POS
Definition: regint.h:737
#define ONIG_OPTIMIZE_EXACT_BM_NOT_REV
Definition: regint.h:345
#define ANCHOR_BEGIN_BUF
Definition: regint.h:527
#define ONIGENC_IS_MBC_WORD(enc, s, end)
Definition: onigmo.h:322
#define IN_VAR_REPEAT
Definition: regcomp.c:3857
OnigEncoding enc
Definition: onigmo.h:772
#define SIZE_OP_PUSH_OR_JUMP_EXACT1
Definition: regint.h:710
#define IS_NOT_NULL(p)
Definition: regint.h:299
ONIG_EXTERN int onig_name_to_group_numbers(OnigRegex reg, const OnigUChar *name, const OnigUChar *name_end, int **nums)
#define ONIGENC_IS_UNDEF(enc)
Definition: onigmo.h:317
#define IS_NODE_TYPE_SIMPLE(type)
Definition: regparse.h:65
int onig_end(void)
Definition: regcomp.c:6052
#define ANCHOR_LOOK_BEHIND
Definition: regint.h:540
short int MemNumType
Definition: regint.h:671
#define IS_ENCLOSE_MARK2(en)
Definition: regparse.h:151
#define NSTRING_LEN(node)
Definition: regparse.h:108
#define IS_ENCLOSE_NAMED_GROUP(en)
Definition: regparse.h:157
int char_len
Definition: regparse.h:248
UChar * name
Definition: regparse.h:227
struct _Node * target
Definition: regparse.h:202
#define ONIGERR_MEMORY
Definition: onigmo.h:629
int num_null_check
Definition: onigmo.h:759
UChar map[ONIG_CHAR_TABLE_SIZE]
Definition: regcomp.c:4371
void void xfree(void *)
BitStatusType bt_mem_start
Definition: regparse.h:296
#define NSTRING_SET_DONT_GET_OPT_INFO(node)
Definition: regparse.h:112
#define SIZE_OP_PUSH_POS_NOT
Definition: regint.h:715
ScanEnv * scan_env
Definition: regcomp.c:4348
#define ONIGENC_IS_CODE_PRINT(enc, code)
Definition: onigmo.h:378
#define ANCHOR_WORD_BOUND
Definition: regint.h:534
int ascii_range
Definition: regparse.h:249
OnigOptionType option
Definition: regparse.h:200
int allocated
Definition: onigmo.h:715
#define ANCHOR_KEEP
Definition: regint.h:546
AbsAddrType call_addr
Definition: regparse.h:201
#define ONIG_OPTIMIZE_EXACT_BM
Definition: regint.h:344
OnigOptionType options
Definition: onigmo.h:768
#define env
void(* func)(void)
Definition: regint.h:881
int onig_renumber_name_table(regex_t *reg, GroupNumRemap *map)
Definition: regparse.c:611
#define PRIdPTR
Definition: regint.h:275
#define ONIGERR_DEFAULT_ENCODING_IS_NOT_SET
Definition: onigmo.h:637
#define NULL
Definition: _sdbm.c:102
#define SIZE_OP_PUSH
Definition: regint.h:708
void onig_free(regex_t *reg)
Definition: regcomp.c:5641
int back_num
Definition: regparse.h:238
#define NCDR(node)
Definition: regparse.h:87
struct OnigEndCallListItem * next
Definition: regint.h:880
OnigOptionType options
Definition: onigmo.h:483
#define GET_ABSADDR_INC(addr, p)
Definition: regint.h:688
int repeat_range_alloc
Definition: onigmo.h:766
#define SET_CALL_RECURSION(node)
Definition: regparse.h:160
#define bp()
Definition: vm_debug.h:25
#define BBUF_GET_OFFSET_POS(buf)
Definition: regint.h:494
#define SET_NTYPE(node, ntype)
Definition: regparse.h:70
#define ONIGERR_PARSER_BUG
Definition: onigmo.h:631
#define REGEX_TRANSFER(to, from)
Definition: regcomp.c:5675
#define PRIuPTR
Definition: regint.h:276
int offset
Definition: regparse.h:213
#define NT_ENCLOSE
Definition: regparse.h:44
#define ONIGERR_INVALID_CONDITION_PATTERN
Definition: onigmo.h:664
int onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag)
Definition: regcomp.c:42
#define PRIxPTR
Definition: regint.h:277
OnigUChar * par
Definition: onigmo.h:736
#define enclen(enc, p, e)
Definition: regenc.h:93
#define BBUF_GET_ADD_ADDRESS(buf)
Definition: regint.h:493
#define SIZE_OP_LOOK_BEHIND
Definition: regint.h:731
#define BBUF_ADD(buf, bytes, n)
Definition: regint.h:491
BitStatusType capture_history
Definition: regparse.h:295
#define BITSET_SIZE
Definition: regint.h:415
int back_static[NODE_BACKREFS_SIZE]
Definition: regparse.h:239
Node * onig_node_new_anchor(int type)
Definition: regparse.c:1222