Ruby  2.5.0dev(2017-10-22revision60238)
regparse.c
Go to the documentation of this file.
1 /**********************************************************************
2  regparse.c - Onigmo (Oniguruma-mod) (regular expression library)
3 **********************************************************************/
4 /*-
5  * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
6  * Copyright (c) 2011-2016 K.Takata <kentkt AT csc DOT jp>
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  * notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  * notice, this list of conditions and the following disclaimer in the
16  * documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  */
30 
31 #include "regparse.h"
32 #include <stdarg.h>
33 
34 #define WARN_BUFSIZE 256
35 
36 #define CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
37 
38 
55 #ifndef RUBY
57 #endif
63  , ( SYN_GNU_REGEX_BV |
74  ,
75  {
76  (OnigCodePoint )'\\' /* esc */
77  , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
78  , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
79  , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
80  , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
81  , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
82  }
83 };
84 
86 
87 extern void onig_null_warn(const char* s ARG_UNUSED) { }
88 
89 #ifdef DEFAULT_WARN_FUNCTION
90 static OnigWarnFunc onig_warn = (OnigWarnFunc )DEFAULT_WARN_FUNCTION;
91 #else
92 static OnigWarnFunc onig_warn = onig_null_warn;
93 #endif
94 
95 #ifdef DEFAULT_VERB_WARN_FUNCTION
96 static OnigWarnFunc onig_verb_warn = (OnigWarnFunc )DEFAULT_VERB_WARN_FUNCTION;
97 #else
98 static OnigWarnFunc onig_verb_warn = onig_null_warn;
99 #endif
100 
102 {
103  onig_warn = f;
104 }
105 
107 {
108  onig_verb_warn = f;
109 }
110 
111 static void CC_DUP_WARN(ScanEnv *env);
112 
113 
114 static unsigned int ParseDepthLimit = DEFAULT_PARSE_DEPTH_LIMIT;
115 
116 extern unsigned int
118 {
119  return ParseDepthLimit;
120 }
121 
122 extern int
123 onig_set_parse_depth_limit(unsigned int depth)
124 {
125  if (depth == 0)
126  ParseDepthLimit = DEFAULT_PARSE_DEPTH_LIMIT;
127  else
128  ParseDepthLimit = depth;
129  return 0;
130 }
131 
132 
133 static void
134 bbuf_free(BBuf* bbuf)
135 {
136  if (IS_NOT_NULL(bbuf)) {
137  if (IS_NOT_NULL(bbuf->p)) xfree(bbuf->p);
138  xfree(bbuf);
139  }
140 }
141 
142 static int
143 bbuf_clone(BBuf** rto, BBuf* from)
144 {
145  int r;
146  BBuf *to;
147 
148  *rto = to = (BBuf* )xmalloc(sizeof(BBuf));
150  r = BBUF_INIT(to, from->alloc);
151  if (r != 0) return r;
152  to->used = from->used;
153  xmemcpy(to->p, from->p, from->used);
154  return 0;
155 }
156 
157 #define BACKREF_REL_TO_ABS(rel_no, env) \
158  ((env)->num_mem + 1 + (rel_no))
159 
160 #define ONOFF(v,f,negative) (negative) ? ((v) &= ~(f)) : ((v) |= (f))
161 
162 #define MBCODE_START_POS(enc) \
163  (OnigCodePoint )(ONIGENC_MBC_MINLEN(enc) > 1 ? 0 : 0x80)
164 
165 #define SET_ALL_MULTI_BYTE_RANGE(enc, pbuf) \
166  add_code_range_to_buf(pbuf, env, MBCODE_START_POS(enc), ONIG_LAST_CODE_POINT)
167 
168 #define ADD_ALL_MULTI_BYTE_RANGE(enc, mbuf) do {\
169  if (! ONIGENC_IS_SINGLEBYTE(enc)) {\
170  r = SET_ALL_MULTI_BYTE_RANGE(enc, &(mbuf));\
171  if (r) return r;\
172  }\
173 } while (0)
174 
175 
176 #define BITSET_SET_BIT_CHKDUP(bs, pos) do { \
177  if (BITSET_AT(bs, pos)) CC_DUP_WARN(env); \
178  BS_ROOM(bs, pos) |= BS_BIT(pos); \
179 } while (0)
180 
181 #define BITSET_IS_EMPTY(bs,empty) do {\
182  int i;\
183  empty = 1;\
184  for (i = 0; i < BITSET_SIZE; i++) {\
185  if ((bs)[i] != 0) {\
186  empty = 0; break;\
187  }\
188  }\
189 } while (0)
190 
191 static void
192 bitset_set_range(ScanEnv *env, BitSetRef bs, int from, int to)
193 {
194  int i;
195  for (i = from; i <= to && i < SINGLE_BYTE_SIZE; i++) {
196  BITSET_SET_BIT_CHKDUP(bs, i);
197  }
198 }
199 
200 #if 0
201 static void
202 bitset_set_all(BitSetRef bs)
203 {
204  int i;
205  for (i = 0; i < BITSET_SIZE; i++) { bs[i] = ~((Bits )0); }
206 }
207 #endif
208 
209 static void
210 bitset_invert(BitSetRef bs)
211 {
212  int i;
213  for (i = 0; i < BITSET_SIZE; i++) { bs[i] = ~(bs[i]); }
214 }
215 
216 static void
217 bitset_invert_to(BitSetRef from, BitSetRef to)
218 {
219  int i;
220  for (i = 0; i < BITSET_SIZE; i++) { to[i] = ~(from[i]); }
221 }
222 
223 static void
224 bitset_and(BitSetRef dest, BitSetRef bs)
225 {
226  int i;
227  for (i = 0; i < BITSET_SIZE; i++) { dest[i] &= bs[i]; }
228 }
229 
230 static void
231 bitset_or(BitSetRef dest, BitSetRef bs)
232 {
233  int i;
234  for (i = 0; i < BITSET_SIZE; i++) { dest[i] |= bs[i]; }
235 }
236 
237 static void
238 bitset_copy(BitSetRef dest, BitSetRef bs)
239 {
240  int i;
241  for (i = 0; i < BITSET_SIZE; i++) { dest[i] = bs[i]; }
242 }
243 
244 #if defined(USE_NAMED_GROUP) && !defined(USE_ST_LIBRARY)
245 extern int
246 onig_strncmp(const UChar* s1, const UChar* s2, int n)
247 {
248  int x;
249 
250  while (n-- > 0) {
251  x = *s2++ - *s1++;
252  if (x) return x;
253  }
254  return 0;
255 }
256 #endif
257 
258 extern void
259 onig_strcpy(UChar* dest, const UChar* src, const UChar* end)
260 {
261  ptrdiff_t len = end - src;
262  if (len > 0) {
263  xmemcpy(dest, src, len);
264  dest[len] = (UChar )0;
265  }
266 }
267 
268 #ifdef USE_NAMED_GROUP
269 static UChar*
270 strdup_with_null(OnigEncoding enc, UChar* s, UChar* end)
271 {
272  ptrdiff_t slen;
273  int term_len, i;
274  UChar *r;
275 
276  slen = end - s;
277  term_len = ONIGENC_MBC_MINLEN(enc);
278 
279  r = (UChar* )xmalloc(slen + term_len);
281  xmemcpy(r, s, slen);
282 
283  for (i = 0; i < term_len; i++)
284  r[slen + i] = (UChar )0;
285 
286  return r;
287 }
288 #endif
289 
290 /* scan pattern methods */
291 #define PEND_VALUE 0
292 
293 #ifdef __GNUC__
294 /* get rid of Wunused-but-set-variable and Wuninitialized */
295 # define PFETCH_READY UChar* pfetch_prev = NULL; (void)pfetch_prev
296 #else
297 # define PFETCH_READY UChar* pfetch_prev
298 #endif
299 #define PEND (p < end ? 0 : 1)
300 #define PUNFETCH p = pfetch_prev
301 #define PINC do { \
302  pfetch_prev = p; \
303  p += enclen(enc, p, end); \
304 } while (0)
305 #define PFETCH(c) do { \
306  c = ((enc->max_enc_len == 1) ? *p : ONIGENC_MBC_TO_CODE(enc, p, end)); \
307  pfetch_prev = p; \
308  p += enclen(enc, p, end); \
309 } while (0)
310 
311 #define PINC_S do { \
312  p += enclen(enc, p, end); \
313 } while (0)
314 #define PFETCH_S(c) do { \
315  c = ((enc->max_enc_len == 1) ? *p : ONIGENC_MBC_TO_CODE(enc, p, end)); \
316  p += enclen(enc, p, end); \
317 } while (0)
318 
319 #define PPEEK (p < end ? ONIGENC_MBC_TO_CODE(enc, p, end) : PEND_VALUE)
320 #define PPEEK_IS(c) (PPEEK == (OnigCodePoint )c)
321 
322 static UChar*
323 strcat_capa(UChar* dest, UChar* dest_end, const UChar* src, const UChar* src_end,
324  size_t capa)
325 {
326  UChar* r;
327 
328  if (dest)
329  r = (UChar* )xrealloc(dest, capa + 1);
330  else
331  r = (UChar* )xmalloc(capa + 1);
332 
334  onig_strcpy(r + (dest_end - dest), src, src_end);
335  return r;
336 }
337 
338 /* dest on static area */
339 static UChar*
340 strcat_capa_from_static(UChar* dest, UChar* dest_end,
341  const UChar* src, const UChar* src_end, size_t capa)
342 {
343  UChar* r;
344 
345  r = (UChar* )xmalloc(capa + 1);
347  onig_strcpy(r, dest, dest_end);
348  onig_strcpy(r + (dest_end - dest), src, src_end);
349  return r;
350 }
351 
352 
353 #ifdef USE_ST_LIBRARY
354 
355 # ifdef RUBY
356 # include "ruby/st.h"
357 # else
358 # include "st.h"
359 # endif
360 
361 typedef struct {
362  const UChar* s;
363  const UChar* end;
365 
366 static int
367 str_end_cmp(st_data_t xp, st_data_t yp)
368 {
369  const st_str_end_key *x, *y;
370  const UChar *p, *q;
371  int c;
372 
373  x = (const st_str_end_key *)xp;
374  y = (const st_str_end_key *)yp;
375  if ((x->end - x->s) != (y->end - y->s))
376  return 1;
377 
378  p = x->s;
379  q = y->s;
380  while (p < x->end) {
381  c = (int )*p - (int )*q;
382  if (c != 0) return c;
383 
384  p++; q++;
385  }
386 
387  return 0;
388 }
389 
390 static st_index_t
391 str_end_hash(st_data_t xp)
392 {
393  const st_str_end_key *x = (const st_str_end_key *)xp;
394  const UChar *p;
395  st_index_t val = 0;
396 
397  p = x->s;
398  while (p < x->end) {
399  val = val * 997 + (int )*p++;
400  }
401 
402  return val + (val >> 5);
403 }
404 
405 extern hash_table_type*
407 {
408  static const struct st_hash_type hashType = {
409  str_end_cmp,
410  str_end_hash,
411  };
412 
413  return (hash_table_type* )
414  onig_st_init_table_with_size(&hashType, size);
415 }
416 
417 extern int
419  const UChar* end_key, hash_data_type *value)
420 {
422 
423  key.s = (UChar* )str_key;
424  key.end = (UChar* )end_key;
425 
426  return onig_st_lookup(table, (st_data_t )(&key), value);
427 }
428 
429 extern int
431  const UChar* end_key, hash_data_type value)
432 {
434  int result;
435 
436  key = (st_str_end_key* )xmalloc(sizeof(st_str_end_key));
437  key->s = (UChar* )str_key;
438  key->end = (UChar* )end_key;
439  result = onig_st_insert(table, (st_data_t )key, value);
440  if (result) {
441  xfree(key);
442  }
443  return result;
444 }
445 
446 #endif /* USE_ST_LIBRARY */
447 
448 
449 #ifdef USE_NAMED_GROUP
450 
451 # define INIT_NAME_BACKREFS_ALLOC_NUM 8
452 
453 typedef struct {
455  size_t name_len; /* byte length */
456  int back_num; /* number of backrefs */
459  int* back_refs;
460 } NameEntry;
461 
462 # ifdef USE_ST_LIBRARY
463 
465 typedef st_data_t HashDataType; /* 1.6 st.h doesn't define st_data_t type */
466 
467 # ifdef ONIG_DEBUG
468 static int
469 i_print_name_entry(UChar* key, NameEntry* e, void* arg)
470 {
471  int i;
472  FILE* fp = (FILE* )arg;
473 
474  fprintf(fp, "%s: ", e->name);
475  if (e->back_num == 0)
476  fputs("-", fp);
477  else if (e->back_num == 1)
478  fprintf(fp, "%d", e->back_ref1);
479  else {
480  for (i = 0; i < e->back_num; i++) {
481  if (i > 0) fprintf(fp, ", ");
482  fprintf(fp, "%d", e->back_refs[i]);
483  }
484  }
485  fputs("\n", fp);
486  return ST_CONTINUE;
487 }
488 
489 extern int
490 onig_print_names(FILE* fp, regex_t* reg)
491 {
492  NameTable* t = (NameTable* )reg->name_table;
493 
494  if (IS_NOT_NULL(t)) {
495  fprintf(fp, "name table\n");
496  onig_st_foreach(t, i_print_name_entry, (HashDataType )fp);
497  fputs("\n", fp);
498  }
499  return 0;
500 }
501 # endif /* ONIG_DEBUG */
502 
503 static int
504 i_free_name_entry(UChar* key, NameEntry* e, void* arg ARG_UNUSED)
505 {
506  xfree(e->name);
507  if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs);
508  xfree(key);
509  xfree(e);
510  return ST_DELETE;
511 }
512 
513 static int
514 names_clear(regex_t* reg)
515 {
516  NameTable* t = (NameTable* )reg->name_table;
517 
518  if (IS_NOT_NULL(t)) {
519  onig_st_foreach(t, i_free_name_entry, 0);
520  }
521  return 0;
522 }
523 
524 extern int
526 {
527  int r;
528  NameTable* t;
529 
530  r = names_clear(reg);
531  if (r) return r;
532 
533  t = (NameTable* )reg->name_table;
534  if (IS_NOT_NULL(t)) onig_st_free_table(t);
535  reg->name_table = (void* )NULL;
536  return 0;
537 }
538 
539 static NameEntry*
540 name_find(regex_t* reg, const UChar* name, const UChar* name_end)
541 {
542  NameEntry* e;
543  NameTable* t = (NameTable* )reg->name_table;
544 
545  e = (NameEntry* )NULL;
546  if (IS_NOT_NULL(t)) {
547  onig_st_lookup_strend(t, name, name_end, (HashDataType* )((void* )(&e)));
548  }
549  return e;
550 }
551 
552 typedef struct {
553  int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*);
554  regex_t* reg;
555  void* arg;
556  int ret;
558 } INamesArg;
559 
560 static int
561 i_names(UChar* key ARG_UNUSED, NameEntry* e, INamesArg* arg)
562 {
563  int r = (*(arg->func))(e->name,
564  e->name + e->name_len,
565  e->back_num,
566  (e->back_num > 1 ? e->back_refs : &(e->back_ref1)),
567  arg->reg, arg->arg);
568  if (r != 0) {
569  arg->ret = r;
570  return ST_STOP;
571  }
572  return ST_CONTINUE;
573 }
574 
575 extern int
577  int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg)
578 {
579  INamesArg narg;
580  NameTable* t = (NameTable* )reg->name_table;
581 
582  narg.ret = 0;
583  if (IS_NOT_NULL(t)) {
584  narg.func = func;
585  narg.reg = reg;
586  narg.arg = arg;
587  narg.enc = reg->enc; /* should be pattern encoding. */
588  onig_st_foreach(t, i_names, (HashDataType )&narg);
589  }
590  return narg.ret;
591 }
592 
593 static int
594 i_renumber_name(UChar* key ARG_UNUSED, NameEntry* e, GroupNumRemap* map)
595 {
596  int i;
597 
598  if (e->back_num > 1) {
599  for (i = 0; i < e->back_num; i++) {
600  e->back_refs[i] = map[e->back_refs[i]].new_val;
601  }
602  }
603  else if (e->back_num == 1) {
604  e->back_ref1 = map[e->back_ref1].new_val;
605  }
606 
607  return ST_CONTINUE;
608 }
609 
610 extern int
612 {
613  NameTable* t = (NameTable* )reg->name_table;
614 
615  if (IS_NOT_NULL(t)) {
616  onig_st_foreach(t, i_renumber_name, (HashDataType )map);
617  }
618  return 0;
619 }
620 
621 
622 extern int
624 {
625  NameTable* t = (NameTable* )reg->name_table;
626 
627  if (IS_NOT_NULL(t))
628  return (int )t->num_entries;
629  else
630  return 0;
631 }
632 
633 # else /* USE_ST_LIBRARY */
634 
635 # define INIT_NAMES_ALLOC_NUM 8
636 
637 typedef struct {
638  NameEntry* e;
639  int num;
640  int alloc;
641 } NameTable;
642 
643 # ifdef ONIG_DEBUG
644 extern int
645 onig_print_names(FILE* fp, regex_t* reg)
646 {
647  int i, j;
648  NameEntry* e;
649  NameTable* t = (NameTable* )reg->name_table;
650 
651  if (IS_NOT_NULL(t) && t->num > 0) {
652  fprintf(fp, "name table\n");
653  for (i = 0; i < t->num; i++) {
654  e = &(t->e[i]);
655  fprintf(fp, "%s: ", e->name);
656  if (e->back_num == 0) {
657  fputs("-", fp);
658  }
659  else if (e->back_num == 1) {
660  fprintf(fp, "%d", e->back_ref1);
661  }
662  else {
663  for (j = 0; j < e->back_num; j++) {
664  if (j > 0) fprintf(fp, ", ");
665  fprintf(fp, "%d", e->back_refs[j]);
666  }
667  }
668  fputs("\n", fp);
669  }
670  fputs("\n", fp);
671  }
672  return 0;
673 }
674 # endif
675 
676 static int
677 names_clear(regex_t* reg)
678 {
679  int i;
680  NameEntry* e;
681  NameTable* t = (NameTable* )reg->name_table;
682 
683  if (IS_NOT_NULL(t)) {
684  for (i = 0; i < t->num; i++) {
685  e = &(t->e[i]);
686  if (IS_NOT_NULL(e->name)) {
687  xfree(e->name);
688  e->name = NULL;
689  e->name_len = 0;
690  e->back_num = 0;
691  e->back_alloc = 0;
692  if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs);
693  e->back_refs = (int* )NULL;
694  }
695  }
696  if (IS_NOT_NULL(t->e)) {
697  xfree(t->e);
698  t->e = NULL;
699  }
700  t->num = 0;
701  }
702  return 0;
703 }
704 
705 extern int
707 {
708  int r;
709  NameTable* t;
710 
711  r = names_clear(reg);
712  if (r) return r;
713 
714  t = (NameTable* )reg->name_table;
715  if (IS_NOT_NULL(t)) xfree(t);
716  reg->name_table = NULL;
717  return 0;
718 }
719 
720 static NameEntry*
721 name_find(regex_t* reg, const UChar* name, const UChar* name_end)
722 {
723  int i, len;
724  NameEntry* e;
725  NameTable* t = (NameTable* )reg->name_table;
726 
727  if (IS_NOT_NULL(t)) {
728  len = name_end - name;
729  for (i = 0; i < t->num; i++) {
730  e = &(t->e[i]);
731  if (len == e->name_len && onig_strncmp(name, e->name, len) == 0)
732  return e;
733  }
734  }
735  return (NameEntry* )NULL;
736 }
737 
738 extern int
740  int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg)
741 {
742  int i, r;
743  NameEntry* e;
744  NameTable* t = (NameTable* )reg->name_table;
745 
746  if (IS_NOT_NULL(t)) {
747  for (i = 0; i < t->num; i++) {
748  e = &(t->e[i]);
749  r = (*func)(e->name, e->name + e->name_len, e->back_num,
750  (e->back_num > 1 ? e->back_refs : &(e->back_ref1)),
751  reg, arg);
752  if (r != 0) return r;
753  }
754  }
755  return 0;
756 }
757 
758 extern int
759 onig_number_of_names(const regex_t* reg)
760 {
761  NameTable* t = (NameTable* )reg->name_table;
762 
763  if (IS_NOT_NULL(t))
764  return t->num;
765  else
766  return 0;
767 }
768 
769 # endif /* else USE_ST_LIBRARY */
770 
771 static int
772 name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env)
773 {
774  int alloc;
775  NameEntry* e;
776  NameTable* t = (NameTable* )reg->name_table;
777 
778  if (name_end - name <= 0)
780 
781  e = name_find(reg, name, name_end);
782  if (IS_NULL(e)) {
783 # ifdef USE_ST_LIBRARY
784  if (IS_NULL(t)) {
786  reg->name_table = (void* )t;
787  }
788  e = (NameEntry* )xmalloc(sizeof(NameEntry));
790 
791  e->name = strdup_with_null(reg->enc, name, name_end);
792  if (IS_NULL(e->name)) {
793  xfree(e);
794  return ONIGERR_MEMORY;
795  }
796  onig_st_insert_strend(t, e->name, (e->name + (name_end - name)),
797  (HashDataType )e);
798 
799  e->name_len = name_end - name;
800  e->back_num = 0;
801  e->back_alloc = 0;
802  e->back_refs = (int* )NULL;
803 
804 # else
805 
806  if (IS_NULL(t)) {
807  alloc = INIT_NAMES_ALLOC_NUM;
808  t = (NameTable* )xmalloc(sizeof(NameTable));
810  t->e = NULL;
811  t->alloc = 0;
812  t->num = 0;
813 
814  t->e = (NameEntry* )xmalloc(sizeof(NameEntry) * alloc);
815  if (IS_NULL(t->e)) {
816  xfree(t);
817  return ONIGERR_MEMORY;
818  }
819  t->alloc = alloc;
820  reg->name_table = t;
821  goto clear;
822  }
823  else if (t->num == t->alloc) {
824  int i;
825  NameEntry* p;
826 
827  alloc = t->alloc * 2;
828  p = (NameEntry* )xrealloc(t->e, sizeof(NameEntry) * alloc);
830  t->e = p;
831  t->alloc = alloc;
832 
833  clear:
834  for (i = t->num; i < t->alloc; i++) {
835  t->e[i].name = NULL;
836  t->e[i].name_len = 0;
837  t->e[i].back_num = 0;
838  t->e[i].back_alloc = 0;
839  t->e[i].back_refs = (int* )NULL;
840  }
841  }
842  e = &(t->e[t->num]);
843  t->num++;
844  e->name = strdup_with_null(reg->enc, name, name_end);
845  if (IS_NULL(e->name)) return ONIGERR_MEMORY;
846  e->name_len = name_end - name;
847 # endif
848  }
849 
850  if (e->back_num >= 1 &&
853  name, name_end);
855  }
856 
857  e->back_num++;
858  if (e->back_num == 1) {
859  e->back_ref1 = backref;
860  }
861  else {
862  if (e->back_num == 2) {
864  e->back_refs = (int* )xmalloc(sizeof(int) * alloc);
866  e->back_alloc = alloc;
867  e->back_refs[0] = e->back_ref1;
868  e->back_refs[1] = backref;
869  }
870  else {
871  if (e->back_num > e->back_alloc) {
872  int* p;
873  alloc = e->back_alloc * 2;
874  p = (int* )xrealloc(e->back_refs, sizeof(int) * alloc);
876  e->back_refs = p;
877  e->back_alloc = alloc;
878  }
879  e->back_refs[e->back_num - 1] = backref;
880  }
881  }
882 
883  return 0;
884 }
885 
886 extern int
888  const UChar* name_end, int** nums)
889 {
890  NameEntry* e = name_find(reg, name, name_end);
891 
893 
894  switch (e->back_num) {
895  case 0:
896  *nums = 0;
897  break;
898  case 1:
899  *nums = &(e->back_ref1);
900  break;
901  default:
902  *nums = e->back_refs;
903  break;
904  }
905  return e->back_num;
906 }
907 
908 extern int
910  const UChar* name_end, const OnigRegion *region)
911 {
912  int i, n, *nums;
913 
914  n = onig_name_to_group_numbers(reg, name, name_end, &nums);
915  if (n < 0)
916  return n;
917  else if (n == 0)
918  return ONIGERR_PARSER_BUG;
919  else if (n == 1)
920  return nums[0];
921  else {
922  if (IS_NOT_NULL(region)) {
923  for (i = n - 1; i >= 0; i--) {
924  if (region->beg[nums[i]] != ONIG_REGION_NOTPOS)
925  return nums[i];
926  }
927  }
928  return nums[n - 1];
929  }
930 }
931 
932 #else /* USE_NAMED_GROUP */
933 
934 extern int
935 onig_name_to_group_numbers(regex_t* reg, const UChar* name,
936  const UChar* name_end, int** nums)
937 {
938  return ONIG_NO_SUPPORT_CONFIG;
939 }
940 
941 extern int
942 onig_name_to_backref_number(regex_t* reg, const UChar* name,
943  const UChar* name_end, const OnigRegion* region)
944 {
945  return ONIG_NO_SUPPORT_CONFIG;
946 }
947 
948 extern int
950  int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg)
951 {
952  return ONIG_NO_SUPPORT_CONFIG;
953 }
954 
955 extern int
956 onig_number_of_names(const regex_t* reg)
957 {
958  return 0;
959 }
960 #endif /* else USE_NAMED_GROUP */
961 
962 extern int
964 {
966  return 0;
967 
968 #ifdef USE_NAMED_GROUP
969  if (onig_number_of_names(reg) > 0 &&
972  return 0;
973  }
974 #endif
975 
976  return 1;
977 }
978 
979 
980 #define INIT_SCANENV_MEMNODES_ALLOC_SIZE 16
981 
982 static void
983 scan_env_clear(ScanEnv* env)
984 {
985  int i;
986 
991  env->error = (UChar* )NULL;
992  env->error_end = (UChar* )NULL;
993  env->num_call = 0;
994  env->num_mem = 0;
995 #ifdef USE_NAMED_GROUP
996  env->num_named = 0;
997 #endif
998  env->mem_alloc = 0;
999  env->mem_nodes_dynamic = (Node** )NULL;
1000 
1001  for (i = 0; i < SCANENV_MEMNODES_SIZE; i++)
1002  env->mem_nodes_static[i] = NULL_NODE;
1003 
1004 #ifdef USE_COMBINATION_EXPLOSION_CHECK
1005  env->num_comb_exp_check = 0;
1006  env->comb_exp_max_regnum = 0;
1007  env->curr_max_regnum = 0;
1008  env->has_recursion = 0;
1009 #endif
1010  env->parse_depth = 0;
1011  env->warnings_flag = 0;
1012 }
1013 
1014 static int
1015 scan_env_add_mem_entry(ScanEnv* env)
1016 {
1017  int i, need, alloc;
1018  Node** p;
1019 
1020  need = env->num_mem + 1;
1021  if (need > ONIG_MAX_CAPTURE_GROUP_NUM)
1023  if (need >= SCANENV_MEMNODES_SIZE) {
1024  if (env->mem_alloc <= need) {
1025  if (IS_NULL(env->mem_nodes_dynamic)) {
1027  p = (Node** )xmalloc(sizeof(Node*) * alloc);
1029  xmemcpy(p, env->mem_nodes_static,
1030  sizeof(Node*) * SCANENV_MEMNODES_SIZE);
1031  }
1032  else {
1033  alloc = env->mem_alloc * 2;
1034  p = (Node** )xrealloc(env->mem_nodes_dynamic, sizeof(Node*) * alloc);
1036  }
1037 
1038  for (i = env->num_mem + 1; i < alloc; i++)
1039  p[i] = NULL_NODE;
1040 
1041  env->mem_nodes_dynamic = p;
1042  env->mem_alloc = alloc;
1043  }
1044  }
1045 
1046  env->num_mem++;
1047  return env->num_mem;
1048 }
1049 
1050 static int
1051 scan_env_set_mem_node(ScanEnv* env, int num, Node* node)
1052 {
1053  if (env->num_mem >= num)
1054  SCANENV_MEM_NODES(env)[num] = node;
1055  else
1056  return ONIGERR_PARSER_BUG;
1057  return 0;
1058 }
1059 
1060 
1061 extern void
1063 {
1064  start:
1065  if (IS_NULL(node)) return ;
1066 
1067  switch (NTYPE(node)) {
1068  case NT_STR:
1069  if (NSTR(node)->capa != 0 &&
1070  IS_NOT_NULL(NSTR(node)->s) && NSTR(node)->s != NSTR(node)->buf) {
1071  xfree(NSTR(node)->s);
1072  }
1073  break;
1074 
1075  case NT_LIST:
1076  case NT_ALT:
1077  onig_node_free(NCAR(node));
1078  {
1079  Node* next_node = NCDR(node);
1080 
1081  xfree(node);
1082  node = next_node;
1083  goto start;
1084  }
1085  break;
1086 
1087  case NT_CCLASS:
1088  {
1089  CClassNode* cc = NCCLASS(node);
1090 
1091  if (cc->mbuf)
1092  bbuf_free(cc->mbuf);
1093  }
1094  break;
1095 
1096  case NT_QTFR:
1097  if (NQTFR(node)->target)
1098  onig_node_free(NQTFR(node)->target);
1099  break;
1100 
1101  case NT_ENCLOSE:
1102  if (NENCLOSE(node)->target)
1103  onig_node_free(NENCLOSE(node)->target);
1104  break;
1105 
1106  case NT_BREF:
1107  if (IS_NOT_NULL(NBREF(node)->back_dynamic))
1108  xfree(NBREF(node)->back_dynamic);
1109  break;
1110 
1111  case NT_ANCHOR:
1112  if (NANCHOR(node)->target)
1113  onig_node_free(NANCHOR(node)->target);
1114  break;
1115  }
1116 
1117  xfree(node);
1118 }
1119 
1120 static Node*
1121 node_new(void)
1122 {
1123  Node* node;
1124 
1125  node = (Node* )xmalloc(sizeof(Node));
1126  /* xmemset(node, 0, sizeof(Node)); */
1127  return node;
1128 }
1129 
1130 static void
1131 initialize_cclass(CClassNode* cc)
1132 {
1133  BITSET_CLEAR(cc->bs);
1134  /* cc->base.flags = 0; */
1135  cc->flags = 0;
1136  cc->mbuf = NULL;
1137 }
1138 
1139 static Node*
1140 node_new_cclass(void)
1141 {
1142  Node* node = node_new();
1143  CHECK_NULL_RETURN(node);
1144 
1145  SET_NTYPE(node, NT_CCLASS);
1146  initialize_cclass(NCCLASS(node));
1147  return node;
1148 }
1149 
1150 static Node*
1151 node_new_ctype(int type, int not, int ascii_range)
1152 {
1153  Node* node = node_new();
1154  CHECK_NULL_RETURN(node);
1155 
1156  SET_NTYPE(node, NT_CTYPE);
1157  NCTYPE(node)->ctype = type;
1158  NCTYPE(node)->not = not;
1159  NCTYPE(node)->ascii_range = ascii_range;
1160  return node;
1161 }
1162 
1163 static Node*
1164 node_new_anychar(void)
1165 {
1166  Node* node = node_new();
1167  CHECK_NULL_RETURN(node);
1168 
1169  SET_NTYPE(node, NT_CANY);
1170  return node;
1171 }
1172 
1173 static Node*
1174 node_new_list(Node* left, Node* right)
1175 {
1176  Node* node = node_new();
1177  CHECK_NULL_RETURN(node);
1178 
1179  SET_NTYPE(node, NT_LIST);
1180  NCAR(node) = left;
1181  NCDR(node) = right;
1182  return node;
1183 }
1184 
1185 extern Node*
1187 {
1188  return node_new_list(left, right);
1189 }
1190 
1191 extern Node*
1193 {
1194  Node *n;
1195 
1196  n = onig_node_new_list(x, NULL);
1197  if (IS_NULL(n)) return NULL_NODE;
1198 
1199  if (IS_NOT_NULL(list)) {
1200  while (IS_NOT_NULL(NCDR(list)))
1201  list = NCDR(list);
1202 
1203  NCDR(list) = n;
1204  }
1205 
1206  return n;
1207 }
1208 
1209 extern Node*
1211 {
1212  Node* node = node_new();
1213  CHECK_NULL_RETURN(node);
1214 
1215  SET_NTYPE(node, NT_ALT);
1216  NCAR(node) = left;
1217  NCDR(node) = right;
1218  return node;
1219 }
1220 
1221 extern Node*
1223 {
1224  Node* node = node_new();
1225  CHECK_NULL_RETURN(node);
1226 
1227  SET_NTYPE(node, NT_ANCHOR);
1228  NANCHOR(node)->type = type;
1229  NANCHOR(node)->target = NULL;
1230  NANCHOR(node)->char_len = -1;
1231  NANCHOR(node)->ascii_range = 0;
1232  return node;
1233 }
1234 
1235 static Node*
1236 node_new_backref(int back_num, int* backrefs, int by_name,
1238  int exist_level, int nest_level,
1239 #endif
1240  ScanEnv* env)
1241 {
1242  int i;
1243  Node* node = node_new();
1244 
1245  CHECK_NULL_RETURN(node);
1246 
1247  SET_NTYPE(node, NT_BREF);
1248  NBREF(node)->state = 0;
1249  NBREF(node)->back_num = back_num;
1250  NBREF(node)->back_dynamic = (int* )NULL;
1251  if (by_name != 0)
1252  NBREF(node)->state |= NST_NAME_REF;
1253 
1254 #ifdef USE_BACKREF_WITH_LEVEL
1255  if (exist_level != 0) {
1256  NBREF(node)->state |= NST_NEST_LEVEL;
1257  NBREF(node)->nest_level = nest_level;
1258  }
1259 #endif
1260 
1261  for (i = 0; i < back_num; i++) {
1262  if (backrefs[i] <= env->num_mem &&
1263  IS_NULL(SCANENV_MEM_NODES(env)[backrefs[i]])) {
1264  NBREF(node)->state |= NST_RECURSION; /* /...(\1).../ */
1265  break;
1266  }
1267  }
1268 
1269  if (back_num <= NODE_BACKREFS_SIZE) {
1270  for (i = 0; i < back_num; i++)
1271  NBREF(node)->back_static[i] = backrefs[i];
1272  }
1273  else {
1274  int* p = (int* )xmalloc(sizeof(int) * back_num);
1275  if (IS_NULL(p)) {
1276  onig_node_free(node);
1277  return NULL;
1278  }
1279  NBREF(node)->back_dynamic = p;
1280  for (i = 0; i < back_num; i++)
1281  p[i] = backrefs[i];
1282  }
1283  return node;
1284 }
1285 
1286 #ifdef USE_SUBEXP_CALL
1287 static Node*
1288 node_new_call(UChar* name, UChar* name_end, int gnum)
1289 {
1290  Node* node = node_new();
1291  CHECK_NULL_RETURN(node);
1292 
1293  SET_NTYPE(node, NT_CALL);
1294  NCALL(node)->state = 0;
1295  NCALL(node)->target = NULL_NODE;
1296  NCALL(node)->name = name;
1297  NCALL(node)->name_end = name_end;
1298  NCALL(node)->group_num = gnum; /* call by number if gnum != 0 */
1299  return node;
1300 }
1301 #endif
1302 
1303 static Node*
1304 node_new_quantifier(int lower, int upper, int by_number)
1305 {
1306  Node* node = node_new();
1307  CHECK_NULL_RETURN(node);
1308 
1309  SET_NTYPE(node, NT_QTFR);
1310  NQTFR(node)->state = 0;
1311  NQTFR(node)->target = NULL;
1312  NQTFR(node)->lower = lower;
1313  NQTFR(node)->upper = upper;
1314  NQTFR(node)->greedy = 1;
1315  NQTFR(node)->target_empty_info = NQ_TARGET_ISNOT_EMPTY;
1316  NQTFR(node)->head_exact = NULL_NODE;
1317  NQTFR(node)->next_head_exact = NULL_NODE;
1318  NQTFR(node)->is_refered = 0;
1319  if (by_number != 0)
1320  NQTFR(node)->state |= NST_BY_NUMBER;
1321 
1322 #ifdef USE_COMBINATION_EXPLOSION_CHECK
1323  NQTFR(node)->comb_exp_check_num = 0;
1324 #endif
1325 
1326  return node;
1327 }
1328 
1329 static Node*
1330 node_new_enclose(int type)
1331 {
1332  Node* node = node_new();
1333  CHECK_NULL_RETURN(node);
1334 
1335  SET_NTYPE(node, NT_ENCLOSE);
1336  NENCLOSE(node)->type = type;
1337  NENCLOSE(node)->state = 0;
1338  NENCLOSE(node)->regnum = 0;
1339  NENCLOSE(node)->option = 0;
1340  NENCLOSE(node)->target = NULL;
1341  NENCLOSE(node)->call_addr = -1;
1342  NENCLOSE(node)->opt_count = 0;
1343  return node;
1344 }
1345 
1346 extern Node*
1348 {
1349  return node_new_enclose(type);
1350 }
1351 
1352 static Node*
1353 node_new_enclose_memory(OnigOptionType option, int is_named)
1354 {
1355  Node* node = node_new_enclose(ENCLOSE_MEMORY);
1356  CHECK_NULL_RETURN(node);
1357  if (is_named != 0)
1359 
1360 #ifdef USE_SUBEXP_CALL
1361  NENCLOSE(node)->option = option;
1362 #endif
1363  return node;
1364 }
1365 
1366 static Node*
1367 node_new_option(OnigOptionType option)
1368 {
1369  Node* node = node_new_enclose(ENCLOSE_OPTION);
1370  CHECK_NULL_RETURN(node);
1371  NENCLOSE(node)->option = option;
1372  return node;
1373 }
1374 
1375 extern int
1376 onig_node_str_cat(Node* node, const UChar* s, const UChar* end)
1377 {
1378  ptrdiff_t addlen = end - s;
1379 
1380  if (addlen > 0) {
1381  ptrdiff_t len = NSTR(node)->end - NSTR(node)->s;
1382 
1383  if (NSTR(node)->capa > 0 || (len + addlen > NODE_STR_BUF_SIZE - 1)) {
1384  UChar* p;
1385  ptrdiff_t capa = len + addlen + NODE_STR_MARGIN;
1386 
1387  if (capa <= NSTR(node)->capa) {
1388  onig_strcpy(NSTR(node)->s + len, s, end);
1389  }
1390  else {
1391  if (NSTR(node)->s == NSTR(node)->buf)
1392  p = strcat_capa_from_static(NSTR(node)->s, NSTR(node)->end,
1393  s, end, capa);
1394  else
1395  p = strcat_capa(NSTR(node)->s, NSTR(node)->end, s, end, capa);
1396 
1398  NSTR(node)->s = p;
1399  NSTR(node)->capa = (int )capa;
1400  }
1401  }
1402  else {
1403  onig_strcpy(NSTR(node)->s + len, s, end);
1404  }
1405  NSTR(node)->end = NSTR(node)->s + len + addlen;
1406  }
1407 
1408  return 0;
1409 }
1410 
1411 extern int
1412 onig_node_str_set(Node* node, const UChar* s, const UChar* end)
1413 {
1414  onig_node_str_clear(node);
1415  return onig_node_str_cat(node, s, end);
1416 }
1417 
1418 static int
1419 node_str_cat_char(Node* node, UChar c)
1420 {
1421  UChar s[1];
1422 
1423  s[0] = c;
1424  return onig_node_str_cat(node, s, s + 1);
1425 }
1426 
1427 static int
1428 node_str_cat_codepoint(Node* node, OnigEncoding enc, OnigCodePoint c)
1429 {
1431  int num = ONIGENC_CODE_TO_MBC(enc, c, buf);
1432  if (num < 0) return num;
1433  return onig_node_str_cat(node, buf, buf + num);
1434 }
1435 
1436 #if 0
1437 extern void
1438 onig_node_conv_to_str_node(Node* node, int flag)
1439 {
1440  SET_NTYPE(node, NT_STR);
1441  NSTR(node)->flag = flag;
1442  NSTR(node)->capa = 0;
1443  NSTR(node)->s = NSTR(node)->buf;
1444  NSTR(node)->end = NSTR(node)->buf;
1445 }
1446 #endif
1447 
1448 extern void
1450 {
1451  if (NSTR(node)->capa != 0 &&
1452  IS_NOT_NULL(NSTR(node)->s) && NSTR(node)->s != NSTR(node)->buf) {
1453  xfree(NSTR(node)->s);
1454  }
1455 
1456  NSTR(node)->capa = 0;
1457  NSTR(node)->flag = 0;
1458  NSTR(node)->s = NSTR(node)->buf;
1459  NSTR(node)->end = NSTR(node)->buf;
1460 }
1461 
1462 static Node*
1463 node_new_str(const UChar* s, const UChar* end)
1464 {
1465  Node* node = node_new();
1466  CHECK_NULL_RETURN(node);
1467 
1468  SET_NTYPE(node, NT_STR);
1469  NSTR(node)->capa = 0;
1470  NSTR(node)->flag = 0;
1471  NSTR(node)->s = NSTR(node)->buf;
1472  NSTR(node)->end = NSTR(node)->buf;
1473  if (onig_node_str_cat(node, s, end)) {
1474  onig_node_free(node);
1475  return NULL;
1476  }
1477  return node;
1478 }
1479 
1480 extern Node*
1481 onig_node_new_str(const UChar* s, const UChar* end)
1482 {
1483  return node_new_str(s, end);
1484 }
1485 
1486 static Node*
1487 node_new_str_raw(UChar* s, UChar* end)
1488 {
1489  Node* node = node_new_str(s, end);
1490  if (IS_NOT_NULL(node))
1491  NSTRING_SET_RAW(node);
1492  return node;
1493 }
1494 
1495 static Node*
1496 node_new_empty(void)
1497 {
1498  return node_new_str(NULL, NULL);
1499 }
1500 
1501 static Node*
1502 node_new_str_raw_char(UChar c)
1503 {
1504  UChar p[1];
1505 
1506  p[0] = c;
1507  return node_new_str_raw(p, p + 1);
1508 }
1509 
1510 static Node*
1511 str_node_split_last_char(StrNode* sn, OnigEncoding enc)
1512 {
1513  const UChar *p;
1514  Node* n = NULL_NODE;
1515 
1516  if (sn->end > sn->s) {
1517  p = onigenc_get_prev_char_head(enc, sn->s, sn->end, sn->end);
1518  if (p && p > sn->s) { /* can be split. */
1519  n = node_new_str(p, sn->end);
1520  if (IS_NOT_NULL(n) && (sn->flag & NSTR_RAW) != 0)
1521  NSTRING_SET_RAW(n);
1522  sn->end = (UChar* )p;
1523  }
1524  }
1525  return n;
1526 }
1527 
1528 static int
1529 str_node_can_be_split(StrNode* sn, OnigEncoding enc)
1530 {
1531  if (sn->end > sn->s) {
1532  return ((enclen(enc, sn->s, sn->end) < sn->end - sn->s) ? 1 : 0);
1533  }
1534  return 0;
1535 }
1536 
1537 #ifdef USE_PAD_TO_SHORT_BYTE_CHAR
1538 static int
1539 node_str_head_pad(StrNode* sn, int num, UChar val)
1540 {
1542  int i, len;
1543 
1544  len = sn->end - sn->s;
1545  onig_strcpy(buf, sn->s, sn->end);
1546  onig_strcpy(&(sn->s[num]), buf, buf + len);
1547  sn->end += num;
1548 
1549  for (i = 0; i < num; i++) {
1550  sn->s[i] = val;
1551  }
1552 }
1553 #endif
1554 
1555 extern int
1557 {
1558  unsigned int num, val;
1559  OnigCodePoint c;
1560  UChar* p = *src;
1561  PFETCH_READY;
1562 
1563  num = 0;
1564  while (!PEND) {
1565  PFETCH(c);
1566  if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
1567  val = (unsigned int )DIGITVAL(c);
1568  if ((INT_MAX_LIMIT - val) / 10UL < num)
1569  return -1; /* overflow */
1570 
1571  num = num * 10 + val;
1572  }
1573  else {
1574  PUNFETCH;
1575  break;
1576  }
1577  }
1578  *src = p;
1579  return num;
1580 }
1581 
1582 static int
1583 scan_unsigned_hexadecimal_number(UChar** src, UChar* end, int minlen,
1584  int maxlen, OnigEncoding enc)
1585 {
1586  OnigCodePoint c;
1587  unsigned int num, val;
1588  int restlen;
1589  UChar* p = *src;
1590  PFETCH_READY;
1591 
1592  restlen = maxlen - minlen;
1593  num = 0;
1594  while (!PEND && maxlen-- != 0) {
1595  PFETCH(c);
1596  if (ONIGENC_IS_CODE_XDIGIT(enc, c)) {
1597  val = (unsigned int )XDIGITVAL(enc,c);
1598  if ((INT_MAX_LIMIT - val) / 16UL < num)
1599  return -1; /* overflow */
1600 
1601  num = (num << 4) + XDIGITVAL(enc,c);
1602  }
1603  else {
1604  PUNFETCH;
1605  maxlen++;
1606  break;
1607  }
1608  }
1609  if (maxlen > restlen)
1610  return -2; /* not enough digits */
1611  *src = p;
1612  return num;
1613 }
1614 
1615 static int
1616 scan_unsigned_octal_number(UChar** src, UChar* end, int maxlen,
1617  OnigEncoding enc)
1618 {
1619  OnigCodePoint c;
1620  unsigned int num, val;
1621  UChar* p = *src;
1622  PFETCH_READY;
1623 
1624  num = 0;
1625  while (!PEND && maxlen-- != 0) {
1626  PFETCH(c);
1627  if (ONIGENC_IS_CODE_DIGIT(enc, c) && c < '8') {
1628  val = ODIGITVAL(c);
1629  if ((INT_MAX_LIMIT - val) / 8UL < num)
1630  return -1; /* overflow */
1631 
1632  num = (num << 3) + val;
1633  }
1634  else {
1635  PUNFETCH;
1636  break;
1637  }
1638  }
1639  *src = p;
1640  return num;
1641 }
1642 
1643 
1644 #define BBUF_WRITE_CODE_POINT(bbuf,pos,code) \
1645  BBUF_WRITE(bbuf, pos, &(code), SIZE_CODE_POINT)
1646 
1647 /* data format:
1648  [n][from-1][to-1][from-2][to-2] ... [from-n][to-n]
1649  (all data size is OnigCodePoint)
1650  */
1651 static int
1652 new_code_range(BBuf** pbuf)
1653 {
1654 #define INIT_MULTI_BYTE_RANGE_SIZE (SIZE_CODE_POINT * 5)
1655  int r;
1656  OnigCodePoint n;
1657  BBuf* bbuf;
1658 
1659  bbuf = *pbuf = (BBuf* )xmalloc(sizeof(BBuf));
1660  CHECK_NULL_RETURN_MEMERR(*pbuf);
1662  if (r) return r;
1663 
1664  n = 0;
1665  BBUF_WRITE_CODE_POINT(bbuf, 0, n);
1666  return 0;
1667 }
1668 
1669 static int
1670 add_code_range_to_buf0(BBuf** pbuf, ScanEnv* env, OnigCodePoint from, OnigCodePoint to,
1671  int checkdup)
1672 {
1673  int r, inc_n, pos;
1674  OnigCodePoint low, high, bound, x;
1675  OnigCodePoint n, *data;
1676  BBuf* bbuf;
1677 
1678  if (from > to) {
1679  n = from; from = to; to = n;
1680  }
1681 
1682  if (IS_NULL(*pbuf)) {
1683  r = new_code_range(pbuf);
1684  if (r) return r;
1685  bbuf = *pbuf;
1686  n = 0;
1687  }
1688  else {
1689  bbuf = *pbuf;
1690  GET_CODE_POINT(n, bbuf->p);
1691  }
1692  data = (OnigCodePoint* )(bbuf->p);
1693  data++;
1694 
1695  bound = (from == 0) ? 0 : n;
1696  for (low = 0; low < bound; ) {
1697  x = (low + bound) >> 1;
1698  if (from - 1 > data[x*2 + 1])
1699  low = x + 1;
1700  else
1701  bound = x;
1702  }
1703 
1704  high = (to == ONIG_LAST_CODE_POINT) ? n : low;
1705  for (bound = n; high < bound; ) {
1706  x = (high + bound) >> 1;
1707  if (to + 1 >= data[x*2])
1708  high = x + 1;
1709  else
1710  bound = x;
1711  }
1712  /* data[(low-1)*2+1] << from <= data[low*2]
1713  * data[(high-1)*2+1] <= to << data[high*2]
1714  */
1715 
1716  inc_n = low + 1 - high;
1717  if (n + inc_n > ONIG_MAX_MULTI_BYTE_RANGES_NUM)
1719 
1720  if (inc_n != 1) {
1721  if (checkdup && from <= data[low*2+1]
1722  && (data[low*2] <= from || data[low*2+1] <= to))
1723  CC_DUP_WARN(env);
1724  if (from > data[low*2])
1725  from = data[low*2];
1726  if (to < data[(high - 1)*2 + 1])
1727  to = data[(high - 1)*2 + 1];
1728  }
1729 
1730  if (inc_n != 0) {
1731  int from_pos = SIZE_CODE_POINT * (1 + high * 2);
1732  int to_pos = SIZE_CODE_POINT * (1 + (low + 1) * 2);
1733 
1734  if (inc_n > 0) {
1735  if (high < n) {
1736  int size = (n - high) * 2 * SIZE_CODE_POINT;
1737  BBUF_MOVE_RIGHT(bbuf, from_pos, to_pos, size);
1738  }
1739  }
1740  else {
1741  BBUF_MOVE_LEFT_REDUCE(bbuf, from_pos, to_pos);
1742  }
1743  }
1744 
1745  pos = SIZE_CODE_POINT * (1 + low * 2);
1746  BBUF_ENSURE_SIZE(bbuf, pos + SIZE_CODE_POINT * 2);
1747  BBUF_WRITE_CODE_POINT(bbuf, pos, from);
1748  BBUF_WRITE_CODE_POINT(bbuf, pos + SIZE_CODE_POINT, to);
1749  n += inc_n;
1750  BBUF_WRITE_CODE_POINT(bbuf, 0, n);
1751 
1752  return 0;
1753 }
1754 
1755 static int
1756 add_code_range_to_buf(BBuf** pbuf, ScanEnv* env, OnigCodePoint from, OnigCodePoint to)
1757 {
1758  return add_code_range_to_buf0(pbuf, env, from, to, 1);
1759 }
1760 
1761 static int
1762 add_code_range0(BBuf** pbuf, ScanEnv* env, OnigCodePoint from, OnigCodePoint to, int checkdup)
1763 {
1764  if (from > to) {
1766  return 0;
1767  else
1769  }
1770 
1771  return add_code_range_to_buf0(pbuf, env, from, to, checkdup);
1772 }
1773 
1774 static int
1775 add_code_range(BBuf** pbuf, ScanEnv* env, OnigCodePoint from, OnigCodePoint to)
1776 {
1777  return add_code_range0(pbuf, env, from, to, 1);
1778 }
1779 
1780 static int
1781 not_code_range_buf(OnigEncoding enc, BBuf* bbuf, BBuf** pbuf, ScanEnv* env)
1782 {
1783  int r, i, n;
1784  OnigCodePoint pre, from, *data, to = 0;
1785 
1786  *pbuf = (BBuf* )NULL;
1787  if (IS_NULL(bbuf)) {
1788  set_all:
1789  return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);
1790  }
1791 
1792  data = (OnigCodePoint* )(bbuf->p);
1793  GET_CODE_POINT(n, data);
1794  data++;
1795  if (n <= 0) goto set_all;
1796 
1797  r = 0;
1798  pre = MBCODE_START_POS(enc);
1799  for (i = 0; i < n; i++) {
1800  from = data[i*2];
1801  to = data[i*2+1];
1802  if (pre <= from - 1) {
1803  r = add_code_range_to_buf(pbuf, env, pre, from - 1);
1804  if (r != 0) return r;
1805  }
1806  if (to == ONIG_LAST_CODE_POINT) break;
1807  pre = to + 1;
1808  }
1809  if (to < ONIG_LAST_CODE_POINT) {
1810  r = add_code_range_to_buf(pbuf, env, to + 1, ONIG_LAST_CODE_POINT);
1811  }
1812  return r;
1813 }
1814 
1815 #define SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2) do {\
1816  BBuf *tbuf; \
1817  int tnot; \
1818  tnot = not1; not1 = not2; not2 = tnot; \
1819  tbuf = bbuf1; bbuf1 = bbuf2; bbuf2 = tbuf; \
1820 } while (0)
1821 
1822 static int
1823 or_code_range_buf(OnigEncoding enc, BBuf* bbuf1, int not1,
1824  BBuf* bbuf2, int not2, BBuf** pbuf, ScanEnv* env)
1825 {
1826  int r;
1827  OnigCodePoint i, n1, *data1;
1828  OnigCodePoint from, to;
1829 
1830  *pbuf = (BBuf* )NULL;
1831  if (IS_NULL(bbuf1) && IS_NULL(bbuf2)) {
1832  if (not1 != 0 || not2 != 0)
1833  return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);
1834  return 0;
1835  }
1836 
1837  r = 0;
1838  if (IS_NULL(bbuf2))
1839  SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2);
1840 
1841  if (IS_NULL(bbuf1)) {
1842  if (not1 != 0) {
1843  return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);
1844  }
1845  else {
1846  if (not2 == 0) {
1847  return bbuf_clone(pbuf, bbuf2);
1848  }
1849  else {
1850  return not_code_range_buf(enc, bbuf2, pbuf, env);
1851  }
1852  }
1853  }
1854 
1855  if (not1 != 0)
1856  SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2);
1857 
1858  data1 = (OnigCodePoint* )(bbuf1->p);
1859  GET_CODE_POINT(n1, data1);
1860  data1++;
1861 
1862  if (not2 == 0 && not1 == 0) { /* 1 OR 2 */
1863  r = bbuf_clone(pbuf, bbuf2);
1864  }
1865  else if (not1 == 0) { /* 1 OR (not 2) */
1866  r = not_code_range_buf(enc, bbuf2, pbuf, env);
1867  }
1868  if (r != 0) return r;
1869 
1870  for (i = 0; i < n1; i++) {
1871  from = data1[i*2];
1872  to = data1[i*2+1];
1873  r = add_code_range_to_buf(pbuf, env, from, to);
1874  if (r != 0) return r;
1875  }
1876  return 0;
1877 }
1878 
1879 static int
1880 and_code_range1(BBuf** pbuf, ScanEnv* env, OnigCodePoint from1, OnigCodePoint to1,
1881  OnigCodePoint* data, int n)
1882 {
1883  int i, r;
1884  OnigCodePoint from2, to2;
1885 
1886  for (i = 0; i < n; i++) {
1887  from2 = data[i*2];
1888  to2 = data[i*2+1];
1889  if (from2 < from1) {
1890  if (to2 < from1) continue;
1891  else {
1892  from1 = to2 + 1;
1893  }
1894  }
1895  else if (from2 <= to1) {
1896  if (to2 < to1) {
1897  if (from1 <= from2 - 1) {
1898  r = add_code_range_to_buf(pbuf, env, from1, from2-1);
1899  if (r != 0) return r;
1900  }
1901  from1 = to2 + 1;
1902  }
1903  else {
1904  to1 = from2 - 1;
1905  }
1906  }
1907  else {
1908  from1 = from2;
1909  }
1910  if (from1 > to1) break;
1911  }
1912  if (from1 <= to1) {
1913  r = add_code_range_to_buf(pbuf, env, from1, to1);
1914  if (r != 0) return r;
1915  }
1916  return 0;
1917 }
1918 
1919 static int
1920 and_code_range_buf(BBuf* bbuf1, int not1, BBuf* bbuf2, int not2, BBuf** pbuf, ScanEnv* env)
1921 {
1922  int r;
1923  OnigCodePoint i, j, n1, n2, *data1, *data2;
1924  OnigCodePoint from, to, from1, to1, from2, to2;
1925 
1926  *pbuf = (BBuf* )NULL;
1927  if (IS_NULL(bbuf1)) {
1928  if (not1 != 0 && IS_NOT_NULL(bbuf2)) /* not1 != 0 -> not2 == 0 */
1929  return bbuf_clone(pbuf, bbuf2);
1930  return 0;
1931  }
1932  else if (IS_NULL(bbuf2)) {
1933  if (not2 != 0)
1934  return bbuf_clone(pbuf, bbuf1);
1935  return 0;
1936  }
1937 
1938  if (not1 != 0)
1939  SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2);
1940 
1941  data1 = (OnigCodePoint* )(bbuf1->p);
1942  data2 = (OnigCodePoint* )(bbuf2->p);
1943  GET_CODE_POINT(n1, data1);
1944  GET_CODE_POINT(n2, data2);
1945  data1++;
1946  data2++;
1947 
1948  if (not2 == 0 && not1 == 0) { /* 1 AND 2 */
1949  for (i = 0; i < n1; i++) {
1950  from1 = data1[i*2];
1951  to1 = data1[i*2+1];
1952  for (j = 0; j < n2; j++) {
1953  from2 = data2[j*2];
1954  to2 = data2[j*2+1];
1955  if (from2 > to1) break;
1956  if (to2 < from1) continue;
1957  from = MAX(from1, from2);
1958  to = MIN(to1, to2);
1959  r = add_code_range_to_buf(pbuf, env, from, to);
1960  if (r != 0) return r;
1961  }
1962  }
1963  }
1964  else if (not1 == 0) { /* 1 AND (not 2) */
1965  for (i = 0; i < n1; i++) {
1966  from1 = data1[i*2];
1967  to1 = data1[i*2+1];
1968  r = and_code_range1(pbuf, env, from1, to1, data2, n2);
1969  if (r != 0) return r;
1970  }
1971  }
1972 
1973  return 0;
1974 }
1975 
1976 static int
1977 and_cclass(CClassNode* dest, CClassNode* cc, ScanEnv* env)
1978 {
1979  OnigEncoding enc = env->enc;
1980  int r, not1, not2;
1981  BBuf *buf1, *buf2, *pbuf = 0;
1982  BitSetRef bsr1, bsr2;
1983  BitSet bs1, bs2;
1984 
1985  not1 = IS_NCCLASS_NOT(dest);
1986  bsr1 = dest->bs;
1987  buf1 = dest->mbuf;
1988  not2 = IS_NCCLASS_NOT(cc);
1989  bsr2 = cc->bs;
1990  buf2 = cc->mbuf;
1991 
1992  if (not1 != 0) {
1993  bitset_invert_to(bsr1, bs1);
1994  bsr1 = bs1;
1995  }
1996  if (not2 != 0) {
1997  bitset_invert_to(bsr2, bs2);
1998  bsr2 = bs2;
1999  }
2000  bitset_and(bsr1, bsr2);
2001  if (bsr1 != dest->bs) {
2002  bitset_copy(dest->bs, bsr1);
2003  bsr1 = dest->bs;
2004  }
2005  if (not1 != 0) {
2006  bitset_invert(dest->bs);
2007  }
2008 
2009  if (! ONIGENC_IS_SINGLEBYTE(enc)) {
2010  if (not1 != 0 && not2 != 0) {
2011  r = or_code_range_buf(enc, buf1, 0, buf2, 0, &pbuf, env);
2012  }
2013  else {
2014  r = and_code_range_buf(buf1, not1, buf2, not2, &pbuf, env);
2015  if (r == 0 && not1 != 0) {
2016  BBuf *tbuf = 0;
2017  r = not_code_range_buf(enc, pbuf, &tbuf, env);
2018  bbuf_free(pbuf);
2019  pbuf = tbuf;
2020  }
2021  }
2022  if (r != 0) {
2023  bbuf_free(pbuf);
2024  return r;
2025  }
2026 
2027  dest->mbuf = pbuf;
2028  bbuf_free(buf1);
2029  return r;
2030  }
2031  return 0;
2032 }
2033 
2034 static int
2035 or_cclass(CClassNode* dest, CClassNode* cc, ScanEnv* env)
2036 {
2037  OnigEncoding enc = env->enc;
2038  int r, not1, not2;
2039  BBuf *buf1, *buf2, *pbuf = 0;
2040  BitSetRef bsr1, bsr2;
2041  BitSet bs1, bs2;
2042 
2043  not1 = IS_NCCLASS_NOT(dest);
2044  bsr1 = dest->bs;
2045  buf1 = dest->mbuf;
2046  not2 = IS_NCCLASS_NOT(cc);
2047  bsr2 = cc->bs;
2048  buf2 = cc->mbuf;
2049 
2050  if (not1 != 0) {
2051  bitset_invert_to(bsr1, bs1);
2052  bsr1 = bs1;
2053  }
2054  if (not2 != 0) {
2055  bitset_invert_to(bsr2, bs2);
2056  bsr2 = bs2;
2057  }
2058  bitset_or(bsr1, bsr2);
2059  if (bsr1 != dest->bs) {
2060  bitset_copy(dest->bs, bsr1);
2061  bsr1 = dest->bs;
2062  }
2063  if (not1 != 0) {
2064  bitset_invert(dest->bs);
2065  }
2066 
2067  if (! ONIGENC_IS_SINGLEBYTE(enc)) {
2068  if (not1 != 0 && not2 != 0) {
2069  r = and_code_range_buf(buf1, 0, buf2, 0, &pbuf, env);
2070  }
2071  else {
2072  r = or_code_range_buf(enc, buf1, not1, buf2, not2, &pbuf, env);
2073  if (r == 0 && not1 != 0) {
2074  BBuf *tbuf = 0;
2075  r = not_code_range_buf(enc, pbuf, &tbuf, env);
2076  bbuf_free(pbuf);
2077  pbuf = tbuf;
2078  }
2079  }
2080  if (r != 0) {
2081  bbuf_free(pbuf);
2082  return r;
2083  }
2084 
2085  dest->mbuf = pbuf;
2086  bbuf_free(buf1);
2087  return r;
2088  }
2089  else
2090  return 0;
2091 }
2092 
2093 static void UNKNOWN_ESC_WARN(ScanEnv *env, int c);
2094 
2095 static OnigCodePoint
2096 conv_backslash_value(OnigCodePoint c, ScanEnv* env)
2097 {
2099  switch (c) {
2100  case 'n': return '\n';
2101  case 't': return '\t';
2102  case 'r': return '\r';
2103  case 'f': return '\f';
2104  case 'a': return '\007';
2105  case 'b': return '\010';
2106  case 'e': return '\033';
2107  case 'v':
2109  return '\v';
2110  break;
2111 
2112  default:
2113  if (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'))
2114  UNKNOWN_ESC_WARN(env, c);
2115  break;
2116  }
2117  }
2118  return c;
2119 }
2120 
2121 #ifdef USE_NO_INVALID_QUANTIFIER
2122 # define is_invalid_quantifier_target(node) 0
2123 #else
2124 static int
2126 {
2127  switch (NTYPE(node)) {
2128  case NT_ANCHOR:
2129  return 1;
2130  break;
2131 
2132  case NT_ENCLOSE:
2133  /* allow enclosed elements */
2134  /* return is_invalid_quantifier_target(NENCLOSE(node)->target); */
2135  break;
2136 
2137  case NT_LIST:
2138  do {
2139  if (! is_invalid_quantifier_target(NCAR(node))) return 0;
2140  } while (IS_NOT_NULL(node = NCDR(node)));
2141  return 0;
2142  break;
2143 
2144  case NT_ALT:
2145  do {
2146  if (is_invalid_quantifier_target(NCAR(node))) return 1;
2147  } while (IS_NOT_NULL(node = NCDR(node)));
2148  break;
2149 
2150  default:
2151  break;
2152  }
2153  return 0;
2154 }
2155 #endif
2156 
2157 /* ?:0, *:1, +:2, ??:3, *?:4, +?:5 */
2158 static int
2159 popular_quantifier_num(QtfrNode* q)
2160 {
2161  if (q->greedy) {
2162  if (q->lower == 0) {
2163  if (q->upper == 1) return 0;
2164  else if (IS_REPEAT_INFINITE(q->upper)) return 1;
2165  }
2166  else if (q->lower == 1) {
2167  if (IS_REPEAT_INFINITE(q->upper)) return 2;
2168  }
2169  }
2170  else {
2171  if (q->lower == 0) {
2172  if (q->upper == 1) return 3;
2173  else if (IS_REPEAT_INFINITE(q->upper)) return 4;
2174  }
2175  else if (q->lower == 1) {
2176  if (IS_REPEAT_INFINITE(q->upper)) return 5;
2177  }
2178  }
2179  return -1;
2180 }
2181 
2182 
2184  RQ_ASIS = 0, /* as is */
2185  RQ_DEL = 1, /* delete parent */
2186  RQ_A, /* to '*' */
2187  RQ_AQ, /* to '*?' */
2188  RQ_QQ, /* to '??' */
2189  RQ_P_QQ, /* to '+)??' */
2190  RQ_PQ_Q /* to '+?)?' */
2191 };
2192 
2193 static enum ReduceType const ReduceTypeTable[6][6] = {
2194 /* '?', '*', '+', '??', '*?', '+?' p / c */
2195  {RQ_DEL, RQ_A, RQ_A, RQ_QQ, RQ_AQ, RQ_ASIS}, /* '?' */
2196  {RQ_DEL, RQ_DEL, RQ_DEL, RQ_P_QQ, RQ_P_QQ, RQ_DEL}, /* '*' */
2197  {RQ_A, RQ_A, RQ_DEL, RQ_ASIS, RQ_P_QQ, RQ_DEL}, /* '+' */
2198  {RQ_DEL, RQ_AQ, RQ_AQ, RQ_DEL, RQ_AQ, RQ_AQ}, /* '??' */
2199  {RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL}, /* '*?' */
2200  {RQ_ASIS, RQ_PQ_Q, RQ_DEL, RQ_AQ, RQ_AQ, RQ_DEL} /* '+?' */
2201 };
2202 
2203 extern void
2205 {
2206  int pnum, cnum;
2207  QtfrNode *p, *c;
2208 
2209  p = NQTFR(pnode);
2210  c = NQTFR(cnode);
2211  pnum = popular_quantifier_num(p);
2212  cnum = popular_quantifier_num(c);
2213  if (pnum < 0 || cnum < 0) return ;
2214 
2215  switch (ReduceTypeTable[cnum][pnum]) {
2216  case RQ_DEL:
2217  *pnode = *cnode;
2218  break;
2219  case RQ_A:
2220  p->target = c->target;
2221  p->lower = 0; p->upper = REPEAT_INFINITE; p->greedy = 1;
2222  break;
2223  case RQ_AQ:
2224  p->target = c->target;
2225  p->lower = 0; p->upper = REPEAT_INFINITE; p->greedy = 0;
2226  break;
2227  case RQ_QQ:
2228  p->target = c->target;
2229  p->lower = 0; p->upper = 1; p->greedy = 0;
2230  break;
2231  case RQ_P_QQ:
2232  p->target = cnode;
2233  p->lower = 0; p->upper = 1; p->greedy = 0;
2234  c->lower = 1; c->upper = REPEAT_INFINITE; c->greedy = 1;
2235  return ;
2236  break;
2237  case RQ_PQ_Q:
2238  p->target = cnode;
2239  p->lower = 0; p->upper = 1; p->greedy = 1;
2240  c->lower = 1; c->upper = REPEAT_INFINITE; c->greedy = 0;
2241  return ;
2242  break;
2243  case RQ_ASIS:
2244  p->target = cnode;
2245  return ;
2246  break;
2247  }
2248 
2249  c->target = NULL_NODE;
2250  onig_node_free(cnode);
2251 }
2252 
2253 
2255  TK_EOT = 0, /* end of token */
2267  TK_ANYCHAR_ANYTIME, /* SQL '%' == .* */
2273  TK_CHAR_PROPERTY, /* \p{...}, \P{...} */
2277  /* in cc */
2281  TK_CC_AND, /* && */
2283 };
2284 
2285 typedef struct {
2286  enum TokenSyms type;
2287  int escaped;
2288  int base; /* is number: 8, 16 (used in [....]) */
2290  union {
2292  int c;
2294  struct {
2295  int subtype;
2297  } anchor;
2298  struct {
2299  int lower;
2300  int upper;
2301  int greedy;
2303  } repeat;
2304  struct {
2305  int num;
2306  int ref1;
2307  int* refs;
2308  int by_name;
2309 #ifdef USE_BACKREF_WITH_LEVEL
2311  int level; /* \k<name+n> */
2312 #endif
2313  } backref;
2314  struct {
2317  int gnum;
2318  int rel;
2319  } call;
2320  struct {
2321  int ctype;
2322  int not;
2323  } prop;
2324  } u;
2325 } OnigToken;
2326 
2327 
2328 static int
2329 fetch_range_quantifier(UChar** src, UChar* end, OnigToken* tok, ScanEnv* env)
2330 {
2331  int low, up, syn_allow, non_low = 0;
2332  int r = 0;
2333  OnigCodePoint c;
2334  OnigEncoding enc = env->enc;
2335  UChar* p = *src;
2336  PFETCH_READY;
2337 
2339 
2340  if (PEND) {
2341  if (syn_allow)
2342  return 1; /* "....{" : OK! */
2343  else
2344  return ONIGERR_END_PATTERN_AT_LEFT_BRACE; /* "....{" syntax error */
2345  }
2346 
2347  if (! syn_allow) {
2348  c = PPEEK;
2349  if (c == ')' || c == '(' || c == '|') {
2351  }
2352  }
2353 
2354  low = onig_scan_unsigned_number(&p, end, env->enc);
2355  if (low < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
2356  if (low > ONIG_MAX_REPEAT_NUM)
2358 
2359  if (p == *src) { /* can't read low */
2361  /* allow {,n} as {0,n} */
2362  low = 0;
2363  non_low = 1;
2364  }
2365  else
2366  goto invalid;
2367  }
2368 
2369  if (PEND) goto invalid;
2370  PFETCH(c);
2371  if (c == ',') {
2372  UChar* prev = p;
2373  up = onig_scan_unsigned_number(&p, end, env->enc);
2374  if (up < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
2375  if (up > ONIG_MAX_REPEAT_NUM)
2377 
2378  if (p == prev) {
2379  if (non_low != 0)
2380  goto invalid;
2381  up = REPEAT_INFINITE; /* {n,} : {n,infinite} */
2382  }
2383  }
2384  else {
2385  if (non_low != 0)
2386  goto invalid;
2387 
2388  PUNFETCH;
2389  up = low; /* {n} : exact n times */
2390  r = 2; /* fixed */
2391  }
2392 
2393  if (PEND) goto invalid;
2394  PFETCH(c);
2396  if (c != MC_ESC(env->syntax)) goto invalid;
2397  if (PEND) goto invalid;
2398  PFETCH(c);
2399  }
2400  if (c != '}') goto invalid;
2401 
2402  if (!IS_REPEAT_INFINITE(up) && low > up) {
2404  }
2405 
2406  tok->type = TK_INTERVAL;
2407  tok->u.repeat.lower = low;
2408  tok->u.repeat.upper = up;
2409  *src = p;
2410  return r; /* 0: normal {n,m}, 2: fixed {n} */
2411 
2412  invalid:
2413  if (syn_allow)
2414  return 1; /* OK */
2415  else
2417 }
2418 
2419 /* \M-, \C-, \c, or \... */
2420 static int
2421 fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env, OnigCodePoint* val)
2422 {
2423  int v;
2424  OnigCodePoint c;
2425  OnigEncoding enc = env->enc;
2426  UChar* p = *src;
2427 
2428  if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;
2429 
2430  PFETCH_S(c);
2431  switch (c) {
2432  case 'M':
2434  if (PEND) return ONIGERR_END_PATTERN_AT_META;
2435  PFETCH_S(c);
2436  if (c != '-') return ONIGERR_META_CODE_SYNTAX;
2437  if (PEND) return ONIGERR_END_PATTERN_AT_META;
2438  PFETCH_S(c);
2439  if (c == MC_ESC(env->syntax)) {
2440  v = fetch_escaped_value(&p, end, env, &c);
2441  if (v < 0) return v;
2442  }
2443  c = ((c & 0xff) | 0x80);
2444  }
2445  else
2446  goto backslash;
2447  break;
2448 
2449  case 'C':
2452  PFETCH_S(c);
2453  if (c != '-') return ONIGERR_CONTROL_CODE_SYNTAX;
2454  goto control;
2455  }
2456  else
2457  goto backslash;
2458 
2459  case 'c':
2461  control:
2463  PFETCH_S(c);
2464  if (c == '?') {
2465  c = 0177;
2466  }
2467  else {
2468  if (c == MC_ESC(env->syntax)) {
2469  v = fetch_escaped_value(&p, end, env, &c);
2470  if (v < 0) return v;
2471  }
2472  c &= 0x9f;
2473  }
2474  break;
2475  }
2476  /* fall through */
2477 
2478  default:
2479  {
2480  backslash:
2481  c = conv_backslash_value(c, env);
2482  }
2483  break;
2484  }
2485 
2486  *src = p;
2487  *val = c;
2488  return 0;
2489 }
2490 
2491 static int fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env);
2492 
2493 static OnigCodePoint
2494 get_name_end_code_point(OnigCodePoint start)
2495 {
2496  switch (start) {
2497  case '<': return (OnigCodePoint )'>'; break;
2498  case '\'': return (OnigCodePoint )'\''; break;
2499  case '(': return (OnigCodePoint )')'; break;
2500  case '{': return (OnigCodePoint )'}'; break;
2501  default:
2502  break;
2503  }
2504 
2505  return (OnigCodePoint )0;
2506 }
2507 
2508 #ifdef USE_NAMED_GROUP
2509 # ifdef RUBY
2510 # define ONIGENC_IS_CODE_NAME(enc, c) TRUE
2511 # else
2512 # define ONIGENC_IS_CODE_NAME(enc, c) ONIGENC_IS_CODE_WORD(enc, c)
2513 # endif
2514 
2515 # ifdef USE_BACKREF_WITH_LEVEL
2516 /*
2517  \k<name+n>, \k<name-n>
2518  \k<num+n>, \k<num-n>
2519  \k<-num+n>, \k<-num-n>
2520 */
2521 static int
2522 fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end,
2523  UChar** rname_end, ScanEnv* env,
2524  int* rback_num, int* rlevel)
2525 {
2526  int r, sign, is_num, exist_level;
2527  OnigCodePoint end_code;
2528  OnigCodePoint c = 0;
2529  OnigEncoding enc = env->enc;
2530  UChar *name_end;
2531  UChar *pnum_head;
2532  UChar *p = *src;
2533  PFETCH_READY;
2534 
2535  *rback_num = 0;
2536  is_num = exist_level = 0;
2537  sign = 1;
2538  pnum_head = *src;
2539 
2540  end_code = get_name_end_code_point(start_code);
2541 
2542  name_end = end;
2543  r = 0;
2544  if (PEND) {
2545  return ONIGERR_EMPTY_GROUP_NAME;
2546  }
2547  else {
2548  PFETCH(c);
2549  if (c == end_code)
2550  return ONIGERR_EMPTY_GROUP_NAME;
2551 
2552  if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
2553  is_num = 1;
2554  }
2555  else if (c == '-') {
2556  is_num = 2;
2557  sign = -1;
2558  pnum_head = p;
2559  }
2560  else if (!ONIGENC_IS_CODE_NAME(enc, c)) {
2562  }
2563  }
2564 
2565  while (!PEND) {
2566  name_end = p;
2567  PFETCH(c);
2568  if (c == end_code || c == ')' || c == '+' || c == '-') {
2569  if (is_num == 2) r = ONIGERR_INVALID_GROUP_NAME;
2570  break;
2571  }
2572 
2573  if (is_num != 0) {
2574  if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
2575  is_num = 1;
2576  }
2577  else {
2579  is_num = 0;
2580  }
2581  }
2582  else if (!ONIGENC_IS_CODE_NAME(enc, c)) {
2584  }
2585  }
2586 
2587  if (r == 0 && c != end_code) {
2588  if (c == '+' || c == '-') {
2589  int level;
2590  int flag = (c == '-' ? -1 : 1);
2591 
2592  if (PEND) {
2594  goto end;
2595  }
2596  PFETCH(c);
2597  if (! ONIGENC_IS_CODE_DIGIT(enc, c)) goto err;
2598  PUNFETCH;
2599  level = onig_scan_unsigned_number(&p, end, enc);
2600  if (level < 0) return ONIGERR_TOO_BIG_NUMBER;
2601  *rlevel = (level * flag);
2602  exist_level = 1;
2603 
2604  if (!PEND) {
2605  PFETCH(c);
2606  if (c == end_code)
2607  goto end;
2608  }
2609  }
2610 
2611  err:
2613  name_end = end;
2614  }
2615 
2616  end:
2617  if (r == 0) {
2618  if (is_num != 0) {
2619  *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc);
2620  if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;
2621  else if (*rback_num == 0) goto err;
2622 
2623  *rback_num *= sign;
2624  }
2625 
2626  *rname_end = name_end;
2627  *src = p;
2628  return (exist_level ? 1 : 0);
2629  }
2630  else {
2631  onig_scan_env_set_error_string(env, r, *src, name_end);
2632  return r;
2633  }
2634 }
2635 # endif /* USE_BACKREF_WITH_LEVEL */
2636 
2637 /*
2638  ref: 0 -> define name (don't allow number name)
2639  1 -> reference name (allow number name)
2640 */
2641 static int
2642 fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,
2643  UChar** rname_end, ScanEnv* env, int* rback_num, int ref)
2644 {
2645  int r, is_num, sign;
2646  OnigCodePoint end_code;
2647  OnigCodePoint c = 0;
2648  OnigEncoding enc = env->enc;
2649  UChar *name_end;
2650  UChar *pnum_head;
2651  UChar *p = *src;
2652 
2653  *rback_num = 0;
2654 
2655  end_code = get_name_end_code_point(start_code);
2656 
2657  name_end = end;
2658  pnum_head = *src;
2659  r = 0;
2660  is_num = 0;
2661  sign = 1;
2662  if (PEND) {
2663  return ONIGERR_EMPTY_GROUP_NAME;
2664  }
2665  else {
2666  PFETCH_S(c);
2667  if (c == end_code)
2668  return ONIGERR_EMPTY_GROUP_NAME;
2669 
2670  if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
2671  if (ref == 1)
2672  is_num = 1;
2673  else {
2675  is_num = 0;
2676  }
2677  }
2678  else if (c == '-') {
2679  if (ref == 1) {
2680  is_num = 2;
2681  sign = -1;
2682  pnum_head = p;
2683  }
2684  else {
2686  is_num = 0;
2687  }
2688  }
2689  else if (!ONIGENC_IS_CODE_NAME(enc, c)) {
2691  }
2692  }
2693 
2694  if (r == 0) {
2695  while (!PEND) {
2696  name_end = p;
2697  PFETCH_S(c);
2698  if (c == end_code || c == ')') {
2699  if (is_num == 2) {
2701  goto teardown;
2702  }
2703  break;
2704  }
2705 
2706  if (is_num != 0) {
2707  if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
2708  is_num = 1;
2709  }
2710  else {
2711  if (!ONIGENC_IS_CODE_WORD(enc, c))
2713  else
2715  goto teardown;
2716  }
2717  }
2718  else {
2719  if (!ONIGENC_IS_CODE_NAME(enc, c)) {
2721  goto teardown;
2722  }
2723  }
2724  }
2725 
2726  if (c != end_code) {
2728  name_end = end;
2729  goto err;
2730  }
2731 
2732  if (is_num != 0) {
2733  *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc);
2734  if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;
2735  else if (*rback_num == 0) {
2737  goto err;
2738  }
2739 
2740  *rback_num *= sign;
2741  }
2742 
2743  *rname_end = name_end;
2744  *src = p;
2745  return 0;
2746  }
2747  else {
2748 teardown:
2749  while (!PEND) {
2750  name_end = p;
2751  PFETCH_S(c);
2752  if (c == end_code || c == ')')
2753  break;
2754  }
2755  if (PEND)
2756  name_end = end;
2757 
2758  err:
2759  onig_scan_env_set_error_string(env, r, *src, name_end);
2760  return r;
2761  }
2762 }
2763 #else
2764 static int
2765 fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,
2766  UChar** rname_end, ScanEnv* env, int* rback_num, int ref)
2767 {
2768  int r, is_num, sign;
2769  OnigCodePoint end_code;
2770  OnigCodePoint c = 0;
2771  UChar *name_end;
2772  OnigEncoding enc = env->enc;
2773  UChar *pnum_head;
2774  UChar *p = *src;
2775  PFETCH_READY;
2776 
2777  *rback_num = 0;
2778 
2779  end_code = get_name_end_code_point(start_code);
2780 
2781  *rname_end = name_end = end;
2782  r = 0;
2783  pnum_head = *src;
2784  is_num = 0;
2785  sign = 1;
2786 
2787  if (PEND) {
2788  return ONIGERR_EMPTY_GROUP_NAME;
2789  }
2790  else {
2791  PFETCH(c);
2792  if (c == end_code)
2793  return ONIGERR_EMPTY_GROUP_NAME;
2794 
2795  if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
2796  is_num = 1;
2797  }
2798  else if (c == '-') {
2799  is_num = 2;
2800  sign = -1;
2801  pnum_head = p;
2802  }
2803  else {
2805  }
2806  }
2807 
2808  while (!PEND) {
2809  name_end = p;
2810 
2811  PFETCH(c);
2812  if (c == end_code || c == ')') break;
2813  if (! ONIGENC_IS_CODE_DIGIT(enc, c))
2815  }
2816  if (r == 0 && c != end_code) {
2818  name_end = end;
2819  }
2820 
2821  if (r == 0) {
2822  *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc);
2823  if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;
2824  else if (*rback_num == 0) {
2826  goto err;
2827  }
2828  *rback_num *= sign;
2829 
2830  *rname_end = name_end;
2831  *src = p;
2832  return 0;
2833  }
2834  else {
2835  err:
2836  onig_scan_env_set_error_string(env, r, *src, name_end);
2837  return r;
2838  }
2839 }
2840 #endif /* USE_NAMED_GROUP */
2841 
2842 
2843 static void
2844 onig_syntax_warn(ScanEnv *env, const char *fmt, ...)
2845 {
2846  va_list args;
2848  va_start(args, fmt);
2850  env->pattern, env->pattern_end,
2851  (const UChar *)fmt, args);
2852  va_end(args);
2853 #ifdef RUBY
2854  if (env->sourcefile == NULL)
2855  rb_warn("%s", (char *)buf);
2856  else
2857  rb_compile_warn(env->sourcefile, env->sourceline, "%s", (char *)buf);
2858 #else
2859  (*onig_warn)((char* )buf);
2860 #endif
2861 }
2862 
2863 static void
2864 CC_ESC_WARN(ScanEnv *env, UChar *c)
2865 {
2866  if (onig_warn == onig_null_warn) return ;
2867 
2870  onig_syntax_warn(env, "character class has '%s' without escape", c);
2871  }
2872 }
2873 
2874 static void
2875 CLOSE_BRACKET_WITHOUT_ESC_WARN(ScanEnv* env, UChar* c)
2876 {
2877  if (onig_warn == onig_null_warn) return ;
2878 
2880  onig_syntax_warn(env, "regular expression has '%s' without escape", c);
2881  }
2882 }
2883 
2884 #ifndef RTEST
2885 # define RTEST(v) 1
2886 #endif
2887 
2888 static void
2889 CC_DUP_WARN(ScanEnv *env)
2890 {
2891  if (onig_warn == onig_null_warn || !RTEST(ruby_verbose)) return ;
2892 
2894  !(env->warnings_flag & ONIG_SYN_WARN_CC_DUP)) {
2896  onig_syntax_warn(env, "character class has duplicated range");
2897  }
2898 }
2899 
2900 static void
2901 UNKNOWN_ESC_WARN(ScanEnv *env, int c)
2902 {
2903  if (onig_warn == onig_null_warn || !RTEST(ruby_verbose)) return ;
2904  onig_syntax_warn(env, "Unknown escape \\%c is ignored", c);
2905 }
2906 
2907 static UChar*
2908 find_str_position(OnigCodePoint s[], int n, UChar* from, UChar* to,
2909  UChar **next, OnigEncoding enc)
2910 {
2911  int i;
2912  OnigCodePoint x;
2913  UChar *q;
2914  UChar *p = from;
2915 
2916  while (p < to) {
2917  x = ONIGENC_MBC_TO_CODE(enc, p, to);
2918  q = p + enclen(enc, p, to);
2919  if (x == s[0]) {
2920  for (i = 1; i < n && q < to; i++) {
2921  x = ONIGENC_MBC_TO_CODE(enc, q, to);
2922  if (x != s[i]) break;
2923  q += enclen(enc, q, to);
2924  }
2925  if (i >= n) {
2926  if (IS_NOT_NULL(next))
2927  *next = q;
2928  return p;
2929  }
2930  }
2931  p = q;
2932  }
2933  return NULL_UCHARP;
2934 }
2935 
2936 static int
2937 str_exist_check_with_esc(OnigCodePoint s[], int n, UChar* from, UChar* to,
2938  OnigCodePoint bad, OnigEncoding enc, const OnigSyntaxType* syn)
2939 {
2940  int i, in_esc;
2941  OnigCodePoint x;
2942  UChar *q;
2943  UChar *p = from;
2944 
2945  in_esc = 0;
2946  while (p < to) {
2947  if (in_esc) {
2948  in_esc = 0;
2949  p += enclen(enc, p, to);
2950  }
2951  else {
2952  x = ONIGENC_MBC_TO_CODE(enc, p, to);
2953  q = p + enclen(enc, p, to);
2954  if (x == s[0]) {
2955  for (i = 1; i < n && q < to; i++) {
2956  x = ONIGENC_MBC_TO_CODE(enc, q, to);
2957  if (x != s[i]) break;
2958  q += enclen(enc, q, to);
2959  }
2960  if (i >= n) return 1;
2961  p += enclen(enc, p, to);
2962  }
2963  else {
2964  x = ONIGENC_MBC_TO_CODE(enc, p, to);
2965  if (x == bad) return 0;
2966  else if (x == MC_ESC(syn)) in_esc = 1;
2967  p = q;
2968  }
2969  }
2970  }
2971  return 0;
2972 }
2973 
2974 static int
2975 fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
2976 {
2977  int num;
2978  OnigCodePoint c, c2;
2979  const OnigSyntaxType* syn = env->syntax;
2980  OnigEncoding enc = env->enc;
2981  UChar* prev;
2982  UChar* p = *src;
2983  PFETCH_READY;
2984 
2985  if (PEND) {
2986  tok->type = TK_EOT;
2987  return tok->type;
2988  }
2989 
2990  PFETCH(c);
2991  tok->type = TK_CHAR;
2992  tok->base = 0;
2993  tok->u.c = c;
2994  tok->escaped = 0;
2995 
2996  if (c == ']') {
2997  tok->type = TK_CC_CLOSE;
2998  }
2999  else if (c == '-') {
3000  tok->type = TK_CC_RANGE;
3001  }
3002  else if (c == MC_ESC(syn)) {
3004  goto end;
3005 
3006  if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;
3007 
3008  PFETCH(c);
3009  tok->escaped = 1;
3010  tok->u.c = c;
3011  switch (c) {
3012  case 'w':
3013  tok->type = TK_CHAR_TYPE;
3014  tok->u.prop.ctype = ONIGENC_CTYPE_WORD;
3015  tok->u.prop.not = 0;
3016  break;
3017  case 'W':
3018  tok->type = TK_CHAR_TYPE;
3019  tok->u.prop.ctype = ONIGENC_CTYPE_WORD;
3020  tok->u.prop.not = 1;
3021  break;
3022  case 'd':
3023  tok->type = TK_CHAR_TYPE;
3024  tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;
3025  tok->u.prop.not = 0;
3026  break;
3027  case 'D':
3028  tok->type = TK_CHAR_TYPE;
3029  tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;
3030  tok->u.prop.not = 1;
3031  break;
3032  case 's':
3033  tok->type = TK_CHAR_TYPE;
3034  tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;
3035  tok->u.prop.not = 0;
3036  break;
3037  case 'S':
3038  tok->type = TK_CHAR_TYPE;
3039  tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;
3040  tok->u.prop.not = 1;
3041  break;
3042  case 'h':
3043  if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
3044  tok->type = TK_CHAR_TYPE;
3045  tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;
3046  tok->u.prop.not = 0;
3047  break;
3048  case 'H':
3049  if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
3050  tok->type = TK_CHAR_TYPE;
3051  tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;
3052  tok->u.prop.not = 1;
3053  break;
3054 
3055  case 'p':
3056  case 'P':
3057  if (PEND) break;
3058 
3059  c2 = PPEEK;
3060  if (c2 == '{' &&
3062  PINC;
3063  tok->type = TK_CHAR_PROPERTY;
3064  tok->u.prop.not = (c == 'P' ? 1 : 0);
3065 
3067  PFETCH(c2);
3068  if (c2 == '^') {
3069  tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0);
3070  }
3071  else
3072  PUNFETCH;
3073  }
3074  }
3075  else {
3076  onig_syntax_warn(env, "invalid Unicode Property \\%c", c);
3077  }
3078  break;
3079 
3080  case 'x':
3081  if (PEND) break;
3082 
3083  prev = p;
3085  PINC;
3086  num = scan_unsigned_hexadecimal_number(&p, end, 0, 8, enc);
3087  if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
3088  if (!PEND) {
3089  c2 = PPEEK;
3090  if (ONIGENC_IS_CODE_XDIGIT(enc, c2))
3092  }
3093 
3094  if (p > prev + enclen(enc, prev, end) && !PEND && (PPEEK_IS('}'))) {
3095  PINC;
3096  tok->type = TK_CODE_POINT;
3097  tok->base = 16;
3098  tok->u.code = (OnigCodePoint )num;
3099  }
3100  else {
3101  /* can't read nothing or invalid format */
3102  p = prev;
3103  }
3104  }
3105  else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) {
3106  num = scan_unsigned_hexadecimal_number(&p, end, 0, 2, enc);
3107  if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
3108  if (p == prev) { /* can't read nothing. */
3109  num = 0; /* but, it's not error */
3110  }
3111  tok->type = TK_RAW_BYTE;
3112  tok->base = 16;
3113  tok->u.c = num;
3114  }
3115  break;
3116 
3117  case 'u':
3118  if (PEND) break;
3119 
3120  prev = p;
3122  num = scan_unsigned_hexadecimal_number(&p, end, 4, 4, enc);
3123  if (num < -1) return ONIGERR_TOO_SHORT_DIGITS;
3124  else if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
3125  if (p == prev) { /* can't read nothing. */
3126  num = 0; /* but, it's not error */
3127  }
3128  tok->type = TK_CODE_POINT;
3129  tok->base = 16;
3130  tok->u.code = (OnigCodePoint )num;
3131  }
3132  break;
3133 
3134  case 'o':
3135  if (PEND) break;
3136 
3137  prev = p;
3139  PINC;
3140  num = scan_unsigned_octal_number(&p, end, 11, enc);
3141  if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
3142  if (!PEND) {
3143  c2 = PPEEK;
3144  if (ONIGENC_IS_CODE_DIGIT(enc, c2) && c2 < '8')
3146  }
3147 
3148  if (p > prev + enclen(enc, prev, end) && !PEND && (PPEEK_IS('}'))) {
3149  PINC;
3150  tok->type = TK_CODE_POINT;
3151  tok->base = 8;
3152  tok->u.code = (OnigCodePoint )num;
3153  }
3154  else {
3155  /* can't read nothing or invalid format */
3156  p = prev;
3157  }
3158  }
3159  break;
3160 
3161  case '0':
3162  case '1': case '2': case '3': case '4': case '5': case '6': case '7':
3164  PUNFETCH;
3165  prev = p;
3166  num = scan_unsigned_octal_number(&p, end, 3, enc);
3167  if (num < 0 || 0xff < num) return ONIGERR_TOO_BIG_NUMBER;
3168  if (p == prev) { /* can't read nothing. */
3169  num = 0; /* but, it's not error */
3170  }
3171  tok->type = TK_RAW_BYTE;
3172  tok->base = 8;
3173  tok->u.c = num;
3174  }
3175  break;
3176 
3177  default:
3178  PUNFETCH;
3179  num = fetch_escaped_value(&p, end, env, &c2);
3180  if (num < 0) return num;
3181  if ((OnigCodePoint )tok->u.c != c2) {
3182  tok->u.code = (OnigCodePoint )c2;
3183  tok->type = TK_CODE_POINT;
3184  }
3185  break;
3186  }
3187  }
3188  else if (c == '[') {
3189  if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_POSIX_BRACKET) && (PPEEK_IS(':'))) {
3190  OnigCodePoint send[] = { (OnigCodePoint )':', (OnigCodePoint )']' };
3191  tok->backp = p; /* point at '[' is read */
3192  PINC;
3193  if (str_exist_check_with_esc(send, 2, p, end,
3194  (OnigCodePoint )']', enc, syn)) {
3195  tok->type = TK_POSIX_BRACKET_OPEN;
3196  }
3197  else {
3198  PUNFETCH;
3199  goto cc_in_cc;
3200  }
3201  }
3202  else {
3203  cc_in_cc:
3205  tok->type = TK_CC_CC_OPEN;
3206  }
3207  else {
3208  CC_ESC_WARN(env, (UChar* )"[");
3209  }
3210  }
3211  }
3212  else if (c == '&') {
3214  !PEND && (PPEEK_IS('&'))) {
3215  PINC;
3216  tok->type = TK_CC_AND;
3217  }
3218  }
3219 
3220  end:
3221  *src = p;
3222  return tok->type;
3223 }
3224 
3225 #ifdef USE_NAMED_GROUP
3226 static int
3227 fetch_named_backref_token(OnigCodePoint c, OnigToken* tok, UChar** src,
3228  UChar* end, ScanEnv* env)
3229 {
3230  int r, num;
3231  const OnigSyntaxType* syn = env->syntax;
3232  UChar* prev;
3233  UChar* p = *src;
3234  UChar* name_end;
3235  int* backs;
3236  int back_num;
3237 
3238  prev = p;
3239 
3240 # ifdef USE_BACKREF_WITH_LEVEL
3241  name_end = NULL_UCHARP; /* no need. escape gcc warning. */
3242  r = fetch_name_with_level(c, &p, end, &name_end,
3243  env, &back_num, &tok->u.backref.level);
3244  if (r == 1) tok->u.backref.exist_level = 1;
3245  else tok->u.backref.exist_level = 0;
3246 # else
3247  r = fetch_name(&p, end, &name_end, env, &back_num, 1);
3248 # endif
3249  if (r < 0) return r;
3250 
3251  if (back_num != 0) {
3252  if (back_num < 0) {
3253  back_num = BACKREF_REL_TO_ABS(back_num, env);
3254  if (back_num <= 0)
3255  return ONIGERR_INVALID_BACKREF;
3256  }
3257 
3259  if (back_num > env->num_mem ||
3260  IS_NULL(SCANENV_MEM_NODES(env)[back_num]))
3261  return ONIGERR_INVALID_BACKREF;
3262  }
3263  tok->type = TK_BACKREF;
3264  tok->u.backref.by_name = 0;
3265  tok->u.backref.num = 1;
3266  tok->u.backref.ref1 = back_num;
3267  }
3268  else {
3269  num = onig_name_to_group_numbers(env->reg, prev, name_end, &backs);
3270  if (num <= 0) {
3272  ONIGERR_UNDEFINED_NAME_REFERENCE, prev, name_end);
3274  }
3276  int i;
3277  for (i = 0; i < num; i++) {
3278  if (backs[i] > env->num_mem ||
3279  IS_NULL(SCANENV_MEM_NODES(env)[backs[i]]))
3280  return ONIGERR_INVALID_BACKREF;
3281  }
3282  }
3283 
3284  tok->type = TK_BACKREF;
3285  tok->u.backref.by_name = 1;
3286  if (num == 1 || IS_SYNTAX_BV(syn, ONIG_SYN_USE_LEFT_MOST_NAMED_GROUP)) {
3287  tok->u.backref.num = 1;
3288  tok->u.backref.ref1 = backs[0];
3289  }
3290  else {
3291  tok->u.backref.num = num;
3292  tok->u.backref.refs = backs;
3293  }
3294  }
3295  *src = p;
3296  return 0;
3297 }
3298 #endif
3299 
3300 static int
3301 fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
3302 {
3303  int r, num;
3304  OnigCodePoint c;
3305  OnigEncoding enc = env->enc;
3306  const OnigSyntaxType* syn = env->syntax;
3307  UChar* prev;
3308  UChar* p = *src;
3309  PFETCH_READY;
3310 
3311  start:
3312  if (PEND) {
3313  tok->type = TK_EOT;
3314  return tok->type;
3315  }
3316 
3317  tok->type = TK_STRING;
3318  tok->base = 0;
3319  tok->backp = p;
3320 
3321  PFETCH(c);
3322  if (IS_MC_ESC_CODE(c, syn)) {
3323  if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;
3324 
3325  tok->backp = p;
3326  PFETCH(c);
3327 
3328  tok->u.c = c;
3329  tok->escaped = 1;
3330  switch (c) {
3331  case '*':
3333  tok->type = TK_OP_REPEAT;
3334  tok->u.repeat.lower = 0;
3335  tok->u.repeat.upper = REPEAT_INFINITE;
3336  goto greedy_check;
3337  break;
3338 
3339  case '+':
3340  if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_PLUS_ONE_INF)) break;
3341  tok->type = TK_OP_REPEAT;
3342  tok->u.repeat.lower = 1;
3343  tok->u.repeat.upper = REPEAT_INFINITE;
3344  goto greedy_check;
3345  break;
3346 
3347  case '?':
3348  if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_QMARK_ZERO_ONE)) break;
3349  tok->type = TK_OP_REPEAT;
3350  tok->u.repeat.lower = 0;
3351  tok->u.repeat.upper = 1;
3352  greedy_check:
3353  if (!PEND && PPEEK_IS('?') &&
3355  PFETCH(c);
3356  tok->u.repeat.greedy = 0;
3357  tok->u.repeat.possessive = 0;
3358  }
3359  else {
3360  possessive_check:
3361  if (!PEND && PPEEK_IS('+') &&
3363  tok->type != TK_INTERVAL) ||
3365  tok->type == TK_INTERVAL))) {
3366  PFETCH(c);
3367  tok->u.repeat.greedy = 1;
3368  tok->u.repeat.possessive = 1;
3369  }
3370  else {
3371  tok->u.repeat.greedy = 1;
3372  tok->u.repeat.possessive = 0;
3373  }
3374  }
3375  break;
3376 
3377  case '{':
3378  if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) break;
3379  r = fetch_range_quantifier(&p, end, tok, env);
3380  if (r < 0) return r; /* error */
3381  if (r == 0) goto greedy_check;
3382  else if (r == 2) { /* {n} */
3384  goto possessive_check;
3385 
3386  goto greedy_check;
3387  }
3388  /* r == 1 : normal char */
3389  break;
3390 
3391  case '|':
3392  if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_VBAR_ALT)) break;
3393  tok->type = TK_ALT;
3394  break;
3395 
3396  case '(':
3397  if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LPAREN_SUBEXP)) break;
3398  tok->type = TK_SUBEXP_OPEN;
3399  break;
3400 
3401  case ')':
3402  if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LPAREN_SUBEXP)) break;
3403  tok->type = TK_SUBEXP_CLOSE;
3404  break;
3405 
3406  case 'w':
3407  if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break;
3408  tok->type = TK_CHAR_TYPE;
3409  tok->u.prop.ctype = ONIGENC_CTYPE_WORD;
3410  tok->u.prop.not = 0;
3411  break;
3412 
3413  case 'W':
3414  if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break;
3415  tok->type = TK_CHAR_TYPE;
3416  tok->u.prop.ctype = ONIGENC_CTYPE_WORD;
3417  tok->u.prop.not = 1;
3418  break;
3419 
3420  case 'b':
3421  if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break;
3422  tok->type = TK_ANCHOR;
3423  tok->u.anchor.subtype = ANCHOR_WORD_BOUND;
3424  tok->u.anchor.ascii_range = IS_ASCII_RANGE(env->option)
3425  && ! IS_WORD_BOUND_ALL_RANGE(env->option);
3426  break;
3427 
3428  case 'B':
3429  if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break;
3430  tok->type = TK_ANCHOR;
3431  tok->u.anchor.subtype = ANCHOR_NOT_WORD_BOUND;
3432  tok->u.anchor.ascii_range = IS_ASCII_RANGE(env->option)
3433  && ! IS_WORD_BOUND_ALL_RANGE(env->option);
3434  break;
3435 
3436 #ifdef USE_WORD_BEGIN_END
3437  case '<':
3439  tok->type = TK_ANCHOR;
3440  tok->u.anchor.subtype = ANCHOR_WORD_BEGIN;
3441  tok->u.anchor.ascii_range = IS_ASCII_RANGE(env->option);
3442  break;
3443 
3444  case '>':
3446  tok->type = TK_ANCHOR;
3447  tok->u.anchor.subtype = ANCHOR_WORD_END;
3448  tok->u.anchor.ascii_range = IS_ASCII_RANGE(env->option);
3449  break;
3450 #endif
3451 
3452  case 's':
3453  if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_S_WHITE_SPACE)) break;
3454  tok->type = TK_CHAR_TYPE;
3455  tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;
3456  tok->u.prop.not = 0;
3457  break;
3458 
3459  case 'S':
3460  if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_S_WHITE_SPACE)) break;
3461  tok->type = TK_CHAR_TYPE;
3462  tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;
3463  tok->u.prop.not = 1;
3464  break;
3465 
3466  case 'd':
3467  if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break;
3468  tok->type = TK_CHAR_TYPE;
3469  tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;
3470  tok->u.prop.not = 0;
3471  break;
3472 
3473  case 'D':
3474  if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break;
3475  tok->type = TK_CHAR_TYPE;
3476  tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;
3477  tok->u.prop.not = 1;
3478  break;
3479 
3480  case 'h':
3481  if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
3482  tok->type = TK_CHAR_TYPE;
3483  tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;
3484  tok->u.prop.not = 0;
3485  break;
3486 
3487  case 'H':
3488  if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
3489  tok->type = TK_CHAR_TYPE;
3490  tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;
3491  tok->u.prop.not = 1;
3492  break;
3493 
3494  case 'A':
3495  if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;
3496  begin_buf:
3497  tok->type = TK_ANCHOR;
3498  tok->u.anchor.subtype = ANCHOR_BEGIN_BUF;
3499  break;
3500 
3501  case 'Z':
3502  if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;
3503  tok->type = TK_ANCHOR;
3504  tok->u.anchor.subtype = ANCHOR_SEMI_END_BUF;
3505  break;
3506 
3507  case 'z':
3508  if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;
3509  end_buf:
3510  tok->type = TK_ANCHOR;
3511  tok->u.anchor.subtype = ANCHOR_END_BUF;
3512  break;
3513 
3514  case 'G':
3516  tok->type = TK_ANCHOR;
3517  tok->u.anchor.subtype = ANCHOR_BEGIN_POSITION;
3518  break;
3519 
3520  case '`':
3522  goto begin_buf;
3523  break;
3524 
3525  case '\'':
3527  goto end_buf;
3528  break;
3529 
3530  case 'x':
3531  if (PEND) break;
3532 
3533  prev = p;
3535  PINC;
3536  num = scan_unsigned_hexadecimal_number(&p, end, 0, 8, enc);
3537  if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
3538  if (!PEND) {
3539  if (ONIGENC_IS_CODE_XDIGIT(enc, PPEEK))
3541  }
3542 
3543  if ((p > prev + enclen(enc, prev, end)) && !PEND && PPEEK_IS('}')) {
3544  PINC;
3545  tok->type = TK_CODE_POINT;
3546  tok->u.code = (OnigCodePoint )num;
3547  }
3548  else {
3549  /* can't read nothing or invalid format */
3550  p = prev;
3551  }
3552  }
3553  else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) {
3554  num = scan_unsigned_hexadecimal_number(&p, end, 0, 2, enc);
3555  if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
3556  if (p == prev) { /* can't read nothing. */
3557  num = 0; /* but, it's not error */
3558  }
3559  tok->type = TK_RAW_BYTE;
3560  tok->base = 16;
3561  tok->u.c = num;
3562  }
3563  break;
3564 
3565  case 'u':
3566  if (PEND) break;
3567 
3568  prev = p;
3570  num = scan_unsigned_hexadecimal_number(&p, end, 4, 4, enc);
3571  if (num < -1) return ONIGERR_TOO_SHORT_DIGITS;
3572  else if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
3573  if (p == prev) { /* can't read nothing. */
3574  num = 0; /* but, it's not error */
3575  }
3576  tok->type = TK_CODE_POINT;
3577  tok->base = 16;
3578  tok->u.code = (OnigCodePoint )num;
3579  }
3580  break;
3581 
3582  case 'o':
3583  if (PEND) break;
3584 
3585  prev = p;
3587  PINC;
3588  num = scan_unsigned_octal_number(&p, end, 11, enc);
3589  if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
3590  if (!PEND) {
3591  OnigCodePoint c = PPEEK;
3592  if (ONIGENC_IS_CODE_DIGIT(enc, c) && c < '8')
3594  }
3595 
3596  if ((p > prev + enclen(enc, prev, end)) && !PEND && PPEEK_IS('}')) {
3597  PINC;
3598  tok->type = TK_CODE_POINT;
3599  tok->u.code = (OnigCodePoint )num;
3600  }
3601  else {
3602  /* can't read nothing or invalid format */
3603  p = prev;
3604  }
3605  }
3606  break;
3607 
3608  case '1': case '2': case '3': case '4':
3609  case '5': case '6': case '7': case '8': case '9':
3610  PUNFETCH;
3611  prev = p;
3612  num = onig_scan_unsigned_number(&p, end, enc);
3613  if (num < 0 || num > ONIG_MAX_BACKREF_NUM) {
3614  goto skip_backref;
3615  }
3616 
3618  (num <= env->num_mem || num <= 9)) { /* This spec. from GNU regex */
3620  if (num > env->num_mem || IS_NULL(SCANENV_MEM_NODES(env)[num]))
3621  return ONIGERR_INVALID_BACKREF;
3622  }
3623 
3624  tok->type = TK_BACKREF;
3625  tok->u.backref.num = 1;
3626  tok->u.backref.ref1 = num;
3627  tok->u.backref.by_name = 0;
3628 #ifdef USE_BACKREF_WITH_LEVEL
3629  tok->u.backref.exist_level = 0;
3630 #endif
3631  break;
3632  }
3633 
3634  skip_backref:
3635  if (c == '8' || c == '9') {
3636  /* normal char */
3637  p = prev; PINC;
3638  break;
3639  }
3640 
3641  p = prev;
3642  /* fall through */
3643  case '0':
3645  prev = p;
3646  num = scan_unsigned_octal_number(&p, end, (c == '0' ? 2:3), enc);
3647  if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
3648  if (p == prev) { /* can't read nothing. */
3649  num = 0; /* but, it's not error */
3650  }
3651  tok->type = TK_RAW_BYTE;
3652  tok->base = 8;
3653  tok->u.c = num;
3654  }
3655  else if (c != '0') {
3656  PINC;
3657  }
3658  break;
3659 
3660 #ifdef USE_NAMED_GROUP
3661  case 'k':
3663  PFETCH(c);
3664  if (c == '<' || c == '\'') {
3665  r = fetch_named_backref_token(c, tok, &p, end, env);
3666  if (r < 0) return r;
3667  }
3668  else {
3669  PUNFETCH;
3670  onig_syntax_warn(env, "invalid back reference");
3671  }
3672  }
3673  break;
3674 #endif
3675 
3676 #if defined(USE_SUBEXP_CALL) || defined(USE_NAMED_GROUP)
3677  case 'g':
3678 # ifdef USE_NAMED_GROUP
3680  PFETCH(c);
3681  if (c == '{') {
3682  r = fetch_named_backref_token(c, tok, &p, end, env);
3683  if (r < 0) return r;
3684  }
3685  else
3686  PUNFETCH;
3687  }
3688 # endif
3689 # ifdef USE_SUBEXP_CALL
3691  PFETCH(c);
3692  if (c == '<' || c == '\'') {
3693  int gnum = -1, rel = 0;
3694  UChar* name_end;
3695  OnigCodePoint cnext;
3696 
3697  cnext = PPEEK;
3698  if (cnext == '0') {
3699  PINC;
3700  if (PPEEK_IS(get_name_end_code_point(c))) { /* \g<0>, \g'0' */
3701  PINC;
3702  name_end = p;
3703  gnum = 0;
3704  }
3705  }
3706  else if (cnext == '+') {
3707  PINC;
3708  rel = 1;
3709  }
3710  prev = p;
3711  if (gnum < 0) {
3712  r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, &gnum, 1);
3713  if (r < 0) return r;
3714  }
3715 
3716  tok->type = TK_CALL;
3717  tok->u.call.name = prev;
3718  tok->u.call.name_end = name_end;
3719  tok->u.call.gnum = gnum;
3720  tok->u.call.rel = rel;
3721  }
3722  else {
3723  onig_syntax_warn(env, "invalid subexp call");
3724  PUNFETCH;
3725  }
3726  }
3727 # endif
3728  break;
3729 #endif
3730 
3731  case 'Q':
3733  tok->type = TK_QUOTE_OPEN;
3734  }
3735  break;
3736 
3737  case 'p':
3738  case 'P':
3739  if (PPEEK_IS('{') &&
3741  PINC;
3742  tok->type = TK_CHAR_PROPERTY;
3743  tok->u.prop.not = (c == 'P' ? 1 : 0);
3744 
3746  PFETCH(c);
3747  if (c == '^') {
3748  tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0);
3749  }
3750  else
3751  PUNFETCH;
3752  }
3753  }
3754  else {
3755  onig_syntax_warn(env, "invalid Unicode Property \\%c", c);
3756  }
3757  break;
3758 
3759  case 'R':
3761  tok->type = TK_LINEBREAK;
3762  }
3763  break;
3764 
3765  case 'X':
3768  }
3769  break;
3770 
3771  case 'K':
3773  tok->type = TK_KEEP;
3774  }
3775  break;
3776 
3777  default:
3778  {
3779  OnigCodePoint c2;
3780 
3781  PUNFETCH;
3782  num = fetch_escaped_value(&p, end, env, &c2);
3783  if (num < 0) return num;
3784  /* set_raw: */
3785  if ((OnigCodePoint )tok->u.c != c2) {
3786  tok->type = TK_CODE_POINT;
3787  tok->u.code = (OnigCodePoint )c2;
3788  }
3789  else { /* string */
3790  p = tok->backp + enclen(enc, tok->backp, end);
3791  }
3792  }
3793  break;
3794  }
3795  }
3796  else {
3797  tok->u.c = c;
3798  tok->escaped = 0;
3799 
3800 #ifdef USE_VARIABLE_META_CHARS
3801  if ((c != ONIG_INEFFECTIVE_META_CHAR) &&
3803  if (c == MC_ANYCHAR(syn))
3804  goto any_char;
3805  else if (c == MC_ANYTIME(syn))
3806  goto anytime;
3807  else if (c == MC_ZERO_OR_ONE_TIME(syn))
3808  goto zero_or_one_time;
3809  else if (c == MC_ONE_OR_MORE_TIME(syn))
3810  goto one_or_more_time;
3811  else if (c == MC_ANYCHAR_ANYTIME(syn)) {
3812  tok->type = TK_ANYCHAR_ANYTIME;
3813  goto out;
3814  }
3815  }
3816 #endif
3817 
3818  switch (c) {
3819  case '.':
3820  if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_DOT_ANYCHAR)) break;
3821 #ifdef USE_VARIABLE_META_CHARS
3822  any_char:
3823 #endif
3824  tok->type = TK_ANYCHAR;
3825  break;
3826 
3827  case '*':
3828  if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ASTERISK_ZERO_INF)) break;
3829 #ifdef USE_VARIABLE_META_CHARS
3830  anytime:
3831 #endif
3832  tok->type = TK_OP_REPEAT;
3833  tok->u.repeat.lower = 0;
3834  tok->u.repeat.upper = REPEAT_INFINITE;
3835  goto greedy_check;
3836  break;
3837 
3838  case '+':
3839  if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_PLUS_ONE_INF)) break;
3840 #ifdef USE_VARIABLE_META_CHARS
3841  one_or_more_time:
3842 #endif
3843  tok->type = TK_OP_REPEAT;
3844  tok->u.repeat.lower = 1;
3845  tok->u.repeat.upper = REPEAT_INFINITE;
3846  goto greedy_check;
3847  break;
3848 
3849  case '?':
3850  if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_ZERO_ONE)) break;
3851 #ifdef USE_VARIABLE_META_CHARS
3852  zero_or_one_time:
3853 #endif
3854  tok->type = TK_OP_REPEAT;
3855  tok->u.repeat.lower = 0;
3856  tok->u.repeat.upper = 1;
3857  goto greedy_check;
3858  break;
3859 
3860  case '{':
3861  if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACE_INTERVAL)) break;
3862  r = fetch_range_quantifier(&p, end, tok, env);
3863  if (r < 0) return r; /* error */
3864  if (r == 0) goto greedy_check;
3865  else if (r == 2) { /* {n} */
3867  goto possessive_check;
3868 
3869  goto greedy_check;
3870  }
3871  /* r == 1 : normal char */
3872  break;
3873 
3874  case '|':
3875  if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_VBAR_ALT)) break;
3876  tok->type = TK_ALT;
3877  break;
3878 
3879  case '(':
3880  if (PPEEK_IS('?') &&
3882  PINC;
3883  if (PPEEK_IS('#')) {
3884  PFETCH(c);
3885  while (1) {
3886  if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
3887  PFETCH(c);
3888  if (c == MC_ESC(syn)) {
3889  if (!PEND) PFETCH(c);
3890  }
3891  else {
3892  if (c == ')') break;
3893  }
3894  }
3895  goto start;
3896  }
3897 #ifdef USE_PERL_SUBEXP_CALL
3898  /* (?&name), (?n), (?R), (?0), (?+n), (?-n) */
3899  c = PPEEK;
3900  if ((c == '&' || c == 'R' || ONIGENC_IS_CODE_DIGIT(enc, c)) &&
3902  /* (?&name), (?n), (?R), (?0) */
3903  int gnum;
3904  UChar *name;
3905  UChar *name_end;
3906 
3907  if (c == 'R' || c == '0') {
3908  PINC; /* skip 'R' / '0' */
3909  if (!PPEEK_IS(')')) return ONIGERR_INVALID_GROUP_NAME;
3910  PINC; /* skip ')' */
3911  name_end = name = p;
3912  gnum = 0;
3913  }
3914  else {
3915  int numref = 1;
3916  if (c == '&') { /* (?&name) */
3917  PINC;
3918  numref = 0; /* don't allow number name */
3919  }
3920  name = p;
3921  r = fetch_name((OnigCodePoint )'(', &p, end, &name_end, env, &gnum, numref);
3922  if (r < 0) return r;
3923  }
3924 
3925  tok->type = TK_CALL;
3926  tok->u.call.name = name;
3927  tok->u.call.name_end = name_end;
3928  tok->u.call.gnum = gnum;
3929  tok->u.call.rel = 0;
3930  break;
3931  }
3932  else if ((c == '-' || c == '+') &&
3934  /* (?+n), (?-n) */
3935  int gnum;
3936  UChar *name;
3937  UChar *name_end;
3938  OnigCodePoint cnext;
3939  PFETCH_READY;
3940 
3941  PINC; /* skip '-' / '+' */
3942  cnext = PPEEK;
3943  if (ONIGENC_IS_CODE_DIGIT(enc, cnext)) {
3944  if (c == '-') PUNFETCH;
3945  name = p;
3946  r = fetch_name((OnigCodePoint )'(', &p, end, &name_end, env, &gnum, 1);
3947  if (r < 0) return r;
3948 
3949  tok->type = TK_CALL;
3950  tok->u.call.name = name;
3951  tok->u.call.name_end = name_end;
3952  tok->u.call.gnum = gnum;
3953  tok->u.call.rel = 1;
3954  break;
3955  }
3956  }
3957 #endif /* USE_PERL_SUBEXP_CALL */
3958 #ifdef USE_CAPITAL_P_NAMED_GROUP
3959  if (PPEEK_IS('P') &&
3961  int gnum;
3962  UChar *name;
3963  UChar *name_end;
3964  PFETCH_READY;
3965 
3966  PINC; /* skip 'P' */
3968  PFETCH(c);
3969  if (c == '=') { /* (?P=name): backref */
3970  r = fetch_named_backref_token((OnigCodePoint )'(', tok, &p, end, env);
3971  if (r < 0) return r;
3972  break;
3973  }
3974  else if (c == '>') { /* (?P>name): subexp call */
3975  name = p;
3976  r = fetch_name((OnigCodePoint )'(', &p, end, &name_end, env, &gnum, 0);
3977  if (r < 0) return r;
3978 
3979  tok->type = TK_CALL;
3980  tok->u.call.name = name;
3981  tok->u.call.name_end = name_end;
3982  tok->u.call.gnum = gnum;
3983  tok->u.call.rel = 0;
3984  break;
3985  }
3986  }
3987 #endif /* USE_CAPITAL_P_NAMED_GROUP */
3988  PUNFETCH;
3989  }
3990 
3991  if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LPAREN_SUBEXP)) break;
3992  tok->type = TK_SUBEXP_OPEN;
3993  break;
3994 
3995  case ')':
3996  if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LPAREN_SUBEXP)) break;
3997  tok->type = TK_SUBEXP_CLOSE;
3998  break;
3999 
4000  case '^':
4001  if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break;
4002  tok->type = TK_ANCHOR;
4003  tok->u.anchor.subtype = (IS_SINGLELINE(env->option)
4005  break;
4006 
4007  case '$':
4008  if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break;
4009  tok->type = TK_ANCHOR;
4010  tok->u.anchor.subtype = (IS_SINGLELINE(env->option)
4012  break;
4013 
4014  case '[':
4015  if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACKET_CC)) break;
4016  tok->type = TK_CC_OPEN;
4017  break;
4018 
4019  case ']':
4020  if (*src > env->pattern) /* /].../ is allowed. */
4021  CLOSE_BRACKET_WITHOUT_ESC_WARN(env, (UChar* )"]");
4022  break;
4023 
4024  case '#':
4025  if (IS_EXTEND(env->option)) {
4026  while (!PEND) {
4027  PFETCH(c);
4028  if (ONIGENC_IS_CODE_NEWLINE(enc, c))
4029  break;
4030  }
4031  goto start;
4032  break;
4033  }
4034  break;
4035 
4036  case ' ': case '\t': case '\n': case '\r': case '\f':
4037  if (IS_EXTEND(env->option))
4038  goto start;
4039  break;
4040 
4041  default:
4042  /* string */
4043  break;
4044  }
4045  }
4046 
4047 #ifdef USE_VARIABLE_META_CHARS
4048  out:
4049 #endif
4050  *src = p;
4051  return tok->type;
4052 }
4053 
4054 static int
4055 add_ctype_to_cc_by_range(CClassNode* cc, int ctype ARG_UNUSED, int not,
4056  ScanEnv* env,
4057  OnigCodePoint sb_out, const OnigCodePoint mbr[])
4058 {
4059  int i, r;
4060  OnigCodePoint j;
4061 
4062  int n = ONIGENC_CODE_RANGE_NUM(mbr);
4063 
4064  if (not == 0) {
4065  for (i = 0; i < n; i++) {
4066  for (j = ONIGENC_CODE_RANGE_FROM(mbr, i);
4067  j <= ONIGENC_CODE_RANGE_TO(mbr, i); j++) {
4068  if (j >= sb_out) {
4069  if (j > ONIGENC_CODE_RANGE_FROM(mbr, i)) {
4070  r = add_code_range_to_buf(&(cc->mbuf), env, j,
4071  ONIGENC_CODE_RANGE_TO(mbr, i));
4072  if (r != 0) return r;
4073  i++;
4074  }
4075 
4076  goto sb_end;
4077  }
4078  BITSET_SET_BIT_CHKDUP(cc->bs, j);
4079  }
4080  }
4081 
4082  sb_end:
4083  for ( ; i < n; i++) {
4084  r = add_code_range_to_buf(&(cc->mbuf), env,
4085  ONIGENC_CODE_RANGE_FROM(mbr, i),
4086  ONIGENC_CODE_RANGE_TO(mbr, i));
4087  if (r != 0) return r;
4088  }
4089  }
4090  else {
4091  OnigCodePoint prev = 0;
4092 
4093  for (i = 0; i < n; i++) {
4094  for (j = prev;
4095  j < ONIGENC_CODE_RANGE_FROM(mbr, i); j++) {
4096  if (j >= sb_out) {
4097  goto sb_end2;
4098  }
4099  BITSET_SET_BIT_CHKDUP(cc->bs, j);
4100  }
4101  prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1;
4102  }
4103  for (j = prev; j < sb_out; j++) {
4104  BITSET_SET_BIT_CHKDUP(cc->bs, j);
4105  }
4106 
4107  sb_end2:
4108  prev = sb_out;
4109 
4110  for (i = 0; i < n; i++) {
4111  if (prev < ONIGENC_CODE_RANGE_FROM(mbr, i)) {
4112  r = add_code_range_to_buf(&(cc->mbuf), env, prev,
4113  ONIGENC_CODE_RANGE_FROM(mbr, i) - 1);
4114  if (r != 0) return r;
4115  }
4116  prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1;
4117  }
4118  if (prev < 0x7fffffff) {
4119  r = add_code_range_to_buf(&(cc->mbuf), env, prev, 0x7fffffff);
4120  if (r != 0) return r;
4121  }
4122  }
4123 
4124  return 0;
4125 }
4126 
4127 static int
4128 add_ctype_to_cc(CClassNode* cc, int ctype, int not, int ascii_range, ScanEnv* env)
4129 {
4130  int maxcode;
4131  int c, r;
4132  const OnigCodePoint *ranges;
4133  OnigCodePoint sb_out;
4134  OnigEncoding enc = env->enc;
4135 
4136  r = ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &sb_out, &ranges);
4137  if (r == 0) {
4138  if (ascii_range) {
4139  CClassNode ccwork;
4140  initialize_cclass(&ccwork);
4141  r = add_ctype_to_cc_by_range(&ccwork, ctype, not, env, sb_out,
4142  ranges);
4143  if (r == 0) {
4144  if (not) {
4145  r = add_code_range_to_buf0(&(ccwork.mbuf), env, 0x80, ONIG_LAST_CODE_POINT, FALSE);
4146  }
4147  else {
4148  CClassNode ccascii;
4149  initialize_cclass(&ccascii);
4150  if (ONIGENC_MBC_MINLEN(env->enc) > 1) {
4151  r = add_code_range(&(ccascii.mbuf), env, 0x00, 0x7F);
4152  }
4153  else {
4154  bitset_set_range(env, ccascii.bs, 0x00, 0x7F);
4155  r = 0;
4156  }
4157  if (r == 0) {
4158  r = and_cclass(&ccwork, &ccascii, env);
4159  }
4160  if (IS_NOT_NULL(ccascii.mbuf)) bbuf_free(ccascii.mbuf);
4161  }
4162  if (r == 0) {
4163  r = or_cclass(cc, &ccwork, env);
4164  }
4165  if (IS_NOT_NULL(ccwork.mbuf)) bbuf_free(ccwork.mbuf);
4166  }
4167  }
4168  else {
4169  r = add_ctype_to_cc_by_range(cc, ctype, not, env, sb_out, ranges);
4170  }
4171  return r;
4172  }
4173  else if (r != ONIG_NO_SUPPORT_CONFIG) {
4174  return r;
4175  }
4176 
4177  maxcode = ascii_range ? 0x80 : SINGLE_BYTE_SIZE;
4178  r = 0;
4179  switch (ctype) {
4180  case ONIGENC_CTYPE_ALPHA:
4181  case ONIGENC_CTYPE_BLANK:
4182  case ONIGENC_CTYPE_CNTRL:
4183  case ONIGENC_CTYPE_DIGIT:
4184  case ONIGENC_CTYPE_LOWER:
4185  case ONIGENC_CTYPE_PUNCT:
4186  case ONIGENC_CTYPE_SPACE:
4187  case ONIGENC_CTYPE_UPPER:
4188  case ONIGENC_CTYPE_XDIGIT:
4189  case ONIGENC_CTYPE_ASCII:
4190  case ONIGENC_CTYPE_ALNUM:
4191  if (not != 0) {
4192  for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
4193  if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
4194  BITSET_SET_BIT_CHKDUP(cc->bs, c);
4195  }
4196  ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
4197  }
4198  else {
4199  for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
4200  if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
4201  BITSET_SET_BIT_CHKDUP(cc->bs, c);
4202  }
4203  }
4204  break;
4205 
4206  case ONIGENC_CTYPE_GRAPH:
4207  case ONIGENC_CTYPE_PRINT:
4208  if (not != 0) {
4209  for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
4210  if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype)
4211  || c >= maxcode)
4212  BITSET_SET_BIT_CHKDUP(cc->bs, c);
4213  }
4214  if (ascii_range)
4215  ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
4216  }
4217  else {
4218  for (c = 0; c < maxcode; c++) {
4219  if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
4220  BITSET_SET_BIT_CHKDUP(cc->bs, c);
4221  }
4222  if (! ascii_range)
4223  ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
4224  }
4225  break;
4226 
4227  case ONIGENC_CTYPE_WORD:
4228  if (not == 0) {
4229  for (c = 0; c < maxcode; c++) {
4230  if (ONIGENC_IS_CODE_WORD(enc, c)) BITSET_SET_BIT_CHKDUP(cc->bs, c);
4231  }
4232  if (! ascii_range)
4233  ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
4234  }
4235  else {
4236  for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
4237  if ((ONIGENC_CODE_TO_MBCLEN(enc, c) > 0) /* check invalid code point */
4238  && (! ONIGENC_IS_CODE_WORD(enc, c) || c >= maxcode))
4239  BITSET_SET_BIT_CHKDUP(cc->bs, c);
4240  }
4241  if (ascii_range)
4242  ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
4243  }
4244  break;
4245 
4246  default:
4247  return ONIGERR_PARSER_BUG;
4248  break;
4249  }
4250 
4251  return r;
4252 }
4253 
4254 static int
4255 parse_posix_bracket(CClassNode* cc, CClassNode* asc_cc,
4256  UChar** src, UChar* end, ScanEnv* env)
4257 {
4258 #define POSIX_BRACKET_CHECK_LIMIT_LENGTH 20
4259 #define POSIX_BRACKET_NAME_MIN_LEN 4
4260 
4261  static const PosixBracketEntryType PBS[] = {
4276  };
4277 
4278  const PosixBracketEntryType *pb;
4279  int not, i, r;
4280  int ascii_range;
4281  OnigCodePoint c;
4282  OnigEncoding enc = env->enc;
4283  UChar *p = *src;
4284 
4285  if (PPEEK_IS('^')) {
4286  PINC_S;
4287  not = 1;
4288  }
4289  else
4290  not = 0;
4291 
4292  if (onigenc_strlen(enc, p, end) < POSIX_BRACKET_NAME_MIN_LEN + 3)
4293  goto not_posix_bracket;
4294 
4295  ascii_range = IS_ASCII_RANGE(env->option) &&
4297  for (pb = PBS; pb < PBS + numberof(PBS); pb++) {
4298  if (onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0) {
4299  p = (UChar* )onigenc_step(enc, p, end, pb->len);
4300  if (onigenc_with_ascii_strncmp(enc, p, end, (UChar* )":]", 2) != 0)
4302 
4303  r = add_ctype_to_cc(cc, pb->ctype, not, ascii_range, env);
4304  if (r != 0) return r;
4305 
4306  if (IS_NOT_NULL(asc_cc)) {
4307  if (pb->ctype != ONIGENC_CTYPE_WORD &&
4308  pb->ctype != ONIGENC_CTYPE_ASCII &&
4309  !ascii_range)
4310  r = add_ctype_to_cc(asc_cc, pb->ctype, not, ascii_range, env);
4311  if (r != 0) return r;
4312  }
4313 
4314  PINC_S; PINC_S;
4315  *src = p;
4316  return 0;
4317  }
4318  }
4319 
4320  not_posix_bracket:
4321  c = 0;
4322  i = 0;
4323  while (!PEND && ((c = PPEEK) != ':') && c != ']') {
4324  PINC_S;
4325  if (++i > POSIX_BRACKET_CHECK_LIMIT_LENGTH) break;
4326  }
4327  if (c == ':' && ! PEND) {
4328  PINC_S;
4329  if (! PEND) {
4330  PFETCH_S(c);
4331  if (c == ']')
4333  }
4334  }
4335 
4336  return 1; /* 1: is not POSIX bracket, but no error. */
4337 }
4338 
4339 static int
4340 fetch_char_property_to_ctype(UChar** src, UChar* end, ScanEnv* env)
4341 {
4342  int r;
4343  OnigCodePoint c;
4344  OnigEncoding enc = env->enc;
4345  UChar *prev, *start, *p = *src;
4346 
4347  r = 0;
4348  start = prev = p;
4349 
4350  while (!PEND) {
4351  prev = p;
4352  PFETCH_S(c);
4353  if (c == '}') {
4354  r = ONIGENC_PROPERTY_NAME_TO_CTYPE(enc, start, prev);
4355  if (r < 0) break;
4356 
4357  *src = p;
4358  return r;
4359  }
4360  else if (c == '(' || c == ')' || c == '{' || c == '|') {
4362  break;
4363  }
4364  }
4365 
4366  onig_scan_env_set_error_string(env, r, *src, prev);
4367  return r;
4368 }
4369 
4370 static int cclass_case_fold(Node** np, CClassNode* cc, CClassNode* asc_cc, ScanEnv* env);
4371 
4372 static int
4373 parse_char_property(Node** np, OnigToken* tok, UChar** src, UChar* end,
4374  ScanEnv* env)
4375 {
4376  int r, ctype;
4377  CClassNode* cc;
4378 
4379  ctype = fetch_char_property_to_ctype(src, end, env);
4380  if (ctype < 0) return ctype;
4381 
4382  *np = node_new_cclass();
4384  cc = NCCLASS(*np);
4385  r = add_ctype_to_cc(cc, ctype, 0, 0, env);
4386  if (r != 0) return r;
4387  if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc);
4388 
4389  if (IS_IGNORECASE(env->option)) {
4390  if (ctype != ONIGENC_CTYPE_ASCII)
4391  r = cclass_case_fold(np, cc, cc, env);
4392  }
4393  return r;
4394 }
4395 
4396 
4397 enum CCSTATE {
4402 };
4403 
4408 };
4409 
4410 static int
4411 next_state_class(CClassNode* cc, CClassNode* asc_cc,
4412  OnigCodePoint* vs, enum CCVALTYPE* type,
4413  enum CCSTATE* state, ScanEnv* env)
4414 {
4415  int r;
4416 
4417  if (*state == CCS_RANGE)
4419 
4420  if (*state == CCS_VALUE && *type != CCV_CLASS) {
4421  if (*type == CCV_SB) {
4422  BITSET_SET_BIT_CHKDUP(cc->bs, (int )(*vs));
4423  if (IS_NOT_NULL(asc_cc))
4424  BITSET_SET_BIT(asc_cc->bs, (int )(*vs));
4425  }
4426  else if (*type == CCV_CODE_POINT) {
4427  r = add_code_range(&(cc->mbuf), env, *vs, *vs);
4428  if (r < 0) return r;
4429  if (IS_NOT_NULL(asc_cc)) {
4430  r = add_code_range0(&(asc_cc->mbuf), env, *vs, *vs, 0);
4431  if (r < 0) return r;
4432  }
4433  }
4434  }
4435 
4436  *state = CCS_VALUE;
4437  *type = CCV_CLASS;
4438  return 0;
4439 }
4440 
4441 static int
4442 next_state_val(CClassNode* cc, CClassNode* asc_cc,
4444  int* vs_israw, int v_israw,
4445  enum CCVALTYPE intype, enum CCVALTYPE* type,
4446  enum CCSTATE* state, ScanEnv* env)
4447 {
4448  int r;
4449 
4450  switch (*state) {
4451  case CCS_VALUE:
4452  if (*type == CCV_SB) {
4453  BITSET_SET_BIT_CHKDUP(cc->bs, (int )(*vs));
4454  if (IS_NOT_NULL(asc_cc))
4455  BITSET_SET_BIT(asc_cc->bs, (int )(*vs));
4456  }
4457  else if (*type == CCV_CODE_POINT) {
4458  r = add_code_range(&(cc->mbuf), env, *vs, *vs);
4459  if (r < 0) return r;
4460  if (IS_NOT_NULL(asc_cc)) {
4461  r = add_code_range0(&(asc_cc->mbuf), env, *vs, *vs, 0);
4462  if (r < 0) return r;
4463  }
4464  }
4465  break;
4466 
4467  case CCS_RANGE:
4468  if (intype == *type) {
4469  if (intype == CCV_SB) {
4470  if (*vs > 0xff || v > 0xff)
4472 
4473  if (*vs > v) {
4475  goto ccs_range_end;
4476  else
4478  }
4479  bitset_set_range(env, cc->bs, (int )*vs, (int )v);
4480  if (IS_NOT_NULL(asc_cc))
4481  bitset_set_range(env, asc_cc->bs, (int )*vs, (int )v);
4482  }
4483  else {
4484  r = add_code_range(&(cc->mbuf), env, *vs, v);
4485  if (r < 0) return r;
4486  if (IS_NOT_NULL(asc_cc)) {
4487  r = add_code_range0(&(asc_cc->mbuf), env, *vs, v, 0);
4488  if (r < 0) return r;
4489  }
4490  }
4491  }
4492  else {
4493 #if 0
4494  if (intype == CCV_CODE_POINT && *type == CCV_SB) {
4495 #endif
4496  if (*vs > v) {
4498  goto ccs_range_end;
4499  else
4501  }
4502  bitset_set_range(env, cc->bs, (int )*vs, (int )(v < 0xff ? v : 0xff));
4503  r = add_code_range(&(cc->mbuf), env, (OnigCodePoint )*vs, v);
4504  if (r < 0) return r;
4505  if (IS_NOT_NULL(asc_cc)) {
4506  bitset_set_range(env, asc_cc->bs, (int )*vs, (int )(v < 0xff ? v : 0xff));
4507  r = add_code_range0(&(asc_cc->mbuf), env, (OnigCodePoint )*vs, v, 0);
4508  if (r < 0) return r;
4509  }
4510 #if 0
4511  }
4512  else
4514 #endif
4515  }
4516  ccs_range_end:
4517  *state = CCS_COMPLETE;
4518  break;
4519 
4520  case CCS_COMPLETE:
4521  case CCS_START:
4522  *state = CCS_VALUE;
4523  break;
4524 
4525  default:
4526  break;
4527  }
4528 
4529  *vs_israw = v_israw;
4530  *vs = v;
4531  *type = intype;
4532  return 0;
4533 }
4534 
4535 static int
4536 code_exist_check(OnigCodePoint c, UChar* from, UChar* end, int ignore_escaped,
4537  ScanEnv* env)
4538 {
4539  int in_esc;
4540  OnigCodePoint code;
4541  OnigEncoding enc = env->enc;
4542  UChar* p = from;
4543 
4544  in_esc = 0;
4545  while (! PEND) {
4546  if (ignore_escaped && in_esc) {
4547  in_esc = 0;
4548  }
4549  else {
4550  PFETCH_S(code);
4551  if (code == c) return 1;
4552  if (code == MC_ESC(env->syntax)) in_esc = 1;
4553  }
4554  }
4555  return 0;
4556 }
4557 
4558 static int
4559 parse_char_class(Node** np, Node** asc_np, OnigToken* tok, UChar** src, UChar* end,
4560  ScanEnv* env)
4561 {
4562  int r, neg, len, fetched, and_start;
4563  OnigCodePoint v, vs;
4564  UChar *p;
4565  Node* node;
4566  Node* asc_node;
4567  CClassNode *cc, *prev_cc;
4568  CClassNode *asc_cc, *asc_prev_cc;
4569  CClassNode work_cc, asc_work_cc;
4570 
4571  enum CCSTATE state;
4572  enum CCVALTYPE val_type, in_type;
4573  int val_israw, in_israw;
4574 
4575  *np = *asc_np = NULL_NODE;
4576  env->parse_depth++;
4577  if (env->parse_depth > ParseDepthLimit)
4579  prev_cc = asc_prev_cc = (CClassNode* )NULL;
4580  r = fetch_token_in_cc(tok, src, end, env);
4581  if (r == TK_CHAR && tok->u.c == '^' && tok->escaped == 0) {
4582  neg = 1;
4583  r = fetch_token_in_cc(tok, src, end, env);
4584  }
4585  else {
4586  neg = 0;
4587  }
4588 
4589  if (r < 0) return r;
4590  if (r == TK_CC_CLOSE) {
4591  if (! code_exist_check((OnigCodePoint )']',
4592  *src, env->pattern_end, 1, env))
4593  return ONIGERR_EMPTY_CHAR_CLASS;
4594 
4595  CC_ESC_WARN(env, (UChar* )"]");
4596  r = tok->type = TK_CHAR; /* allow []...] */
4597  }
4598 
4599  *np = node = node_new_cclass();
4601  cc = NCCLASS(node);
4602 
4603  if (IS_IGNORECASE(env->option)) {
4604  *asc_np = asc_node = node_new_cclass();
4605  CHECK_NULL_RETURN_MEMERR(asc_node);
4606  asc_cc = NCCLASS(asc_node);
4607  }
4608  else {
4609  asc_node = NULL_NODE;
4610  asc_cc = NULL;
4611  }
4612 
4613  and_start = 0;
4614  state = CCS_START;
4615  p = *src;
4616  while (r != TK_CC_CLOSE) {
4617  fetched = 0;
4618  switch (r) {
4619  case TK_CHAR:
4620  if ((tok->u.code >= SINGLE_BYTE_SIZE) ||
4621  (len = ONIGENC_CODE_TO_MBCLEN(env->enc, tok->u.c)) > 1) {
4622  in_type = CCV_CODE_POINT;
4623  }
4624  else if (len < 0) {
4625  r = len;
4626  goto err;
4627  }
4628  else {
4629  sb_char:
4630  in_type = CCV_SB;
4631  }
4632  v = (OnigCodePoint )tok->u.c;
4633  in_israw = 0;
4634  goto val_entry2;
4635  break;
4636 
4637  case TK_RAW_BYTE:
4638  /* tok->base != 0 : octal or hexadec. */
4639  if (! ONIGENC_IS_SINGLEBYTE(env->enc) && tok->base != 0) {
4641  UChar* bufe = buf + ONIGENC_CODE_TO_MBC_MAXLEN;
4642  UChar* psave = p;
4643  int i, base = tok->base;
4644 
4645  buf[0] = (UChar )tok->u.c;
4646  for (i = 1; i < ONIGENC_MBC_MAXLEN(env->enc); i++) {
4647  r = fetch_token_in_cc(tok, &p, end, env);
4648  if (r < 0) goto err;
4649  if (r != TK_RAW_BYTE || tok->base != base) {
4650  fetched = 1;
4651  break;
4652  }
4653  buf[i] = (UChar )tok->u.c;
4654  }
4655 
4656  if (i < ONIGENC_MBC_MINLEN(env->enc)) {
4658  goto err;
4659  }
4660 
4661  len = enclen(env->enc, buf, buf + i);
4662  if (i < len) {
4664  goto err;
4665  }
4666  else if (i > len) { /* fetch back */
4667  p = psave;
4668  for (i = 1; i < len; i++) {
4669  (void)fetch_token_in_cc(tok, &p, end, env);
4670  /* no need to check the retun value (already checked above) */
4671  }
4672  fetched = 0;
4673  }
4674 
4675  if (i == 1) {
4676  v = (OnigCodePoint )buf[0];
4677  goto raw_single;
4678  }
4679  else {
4680  v = ONIGENC_MBC_TO_CODE(env->enc, buf, bufe);
4681  in_type = CCV_CODE_POINT;
4682  }
4683  }
4684  else {
4685  v = (OnigCodePoint )tok->u.c;
4686  raw_single:
4687  in_type = CCV_SB;
4688  }
4689  in_israw = 1;
4690  goto val_entry2;
4691  break;
4692 
4693  case TK_CODE_POINT:
4694  v = tok->u.code;
4695  in_israw = 1;
4696  val_entry:
4697  len = ONIGENC_CODE_TO_MBCLEN(env->enc, v);
4698  if (len < 0) {
4699  r = len;
4700  goto err;
4701  }
4702  in_type = (len == 1 ? CCV_SB : CCV_CODE_POINT);
4703  val_entry2:
4704  r = next_state_val(cc, asc_cc, &vs, v, &val_israw, in_israw, in_type, &val_type,
4705  &state, env);
4706  if (r != 0) goto err;
4707  break;
4708 
4709  case TK_POSIX_BRACKET_OPEN:
4710  r = parse_posix_bracket(cc, asc_cc, &p, end, env);
4711  if (r < 0) goto err;
4712  if (r == 1) { /* is not POSIX bracket */
4713  CC_ESC_WARN(env, (UChar* )"[");
4714  p = tok->backp;
4715  v = (OnigCodePoint )tok->u.c;
4716  in_israw = 0;
4717  goto val_entry;
4718  }
4719  goto next_class;
4720  break;
4721 
4722  case TK_CHAR_TYPE:
4723  r = add_ctype_to_cc(cc, tok->u.prop.ctype, tok->u.prop.not,
4724  IS_ASCII_RANGE(env->option), env);
4725  if (r != 0) return r;
4726  if (IS_NOT_NULL(asc_cc)) {
4727  if (tok->u.prop.ctype != ONIGENC_CTYPE_WORD)
4728  r = add_ctype_to_cc(asc_cc, tok->u.prop.ctype, tok->u.prop.not,
4729  IS_ASCII_RANGE(env->option), env);
4730  if (r != 0) return r;
4731  }
4732 
4733  next_class:
4734  r = next_state_class(cc, asc_cc, &vs, &val_type, &state, env);
4735  if (r != 0) goto err;
4736  break;
4737 
4738  case TK_CHAR_PROPERTY:
4739  {
4740  int ctype;
4741 
4742  ctype = fetch_char_property_to_ctype(&p, end, env);
4743  if (ctype < 0) return ctype;
4744  r = add_ctype_to_cc(cc, ctype, tok->u.prop.not, 0, env);
4745  if (r != 0) return r;
4746  if (IS_NOT_NULL(asc_cc)) {
4747  if (ctype != ONIGENC_CTYPE_ASCII)
4748  r = add_ctype_to_cc(asc_cc, ctype, tok->u.prop.not, 0, env);
4749  if (r != 0) return r;
4750  }
4751  goto next_class;
4752  }
4753  break;
4754 
4755  case TK_CC_RANGE:
4756  if (state == CCS_VALUE) {
4757  r = fetch_token_in_cc(tok, &p, end, env);
4758  if (r < 0) goto err;
4759  fetched = 1;
4760  if (r == TK_CC_CLOSE) { /* allow [x-] */
4761  range_end_val:
4762  v = (OnigCodePoint )'-';
4763  in_israw = 0;
4764  goto val_entry;
4765  }
4766  else if (r == TK_CC_AND) {
4767  CC_ESC_WARN(env, (UChar* )"-");
4768  goto range_end_val;
4769  }
4770  state = CCS_RANGE;
4771  }
4772  else if (state == CCS_START) {
4773  /* [-xa] is allowed */
4774  v = (OnigCodePoint )tok->u.c;
4775  in_israw = 0;
4776 
4777  r = fetch_token_in_cc(tok, &p, end, env);
4778  if (r < 0) goto err;
4779  fetched = 1;
4780  /* [--x] or [a&&-x] is warned. */
4781  if (r == TK_CC_RANGE || and_start != 0)
4782  CC_ESC_WARN(env, (UChar* )"-");
4783 
4784  goto val_entry;
4785  }
4786  else if (state == CCS_RANGE) {
4787  CC_ESC_WARN(env, (UChar* )"-");
4788  goto sb_char; /* [!--x] is allowed */
4789  }
4790  else { /* CCS_COMPLETE */
4791  r = fetch_token_in_cc(tok, &p, end, env);
4792  if (r < 0) goto err;
4793  fetched = 1;
4794  if (r == TK_CC_CLOSE) goto range_end_val; /* allow [a-b-] */
4795  else if (r == TK_CC_AND) {
4796  CC_ESC_WARN(env, (UChar* )"-");
4797  goto range_end_val;
4798  }
4799 
4801  CC_ESC_WARN(env, (UChar* )"-");
4802  goto range_end_val; /* [0-9-a] is allowed as [0-9\-a] */
4803  }
4805  goto err;
4806  }
4807  break;
4808 
4809  case TK_CC_CC_OPEN: /* [ */
4810  {
4811  Node *anode, *aasc_node;
4812  CClassNode* acc;
4813 
4814  r = parse_char_class(&anode, &aasc_node, tok, &p, end, env);
4815  if (r == 0) {
4816  acc = NCCLASS(anode);
4817  r = or_cclass(cc, acc, env);
4818  }
4819  if (r == 0 && IS_NOT_NULL(aasc_node)) {
4820  acc = NCCLASS(aasc_node);
4821  r = or_cclass(asc_cc, acc, env);
4822  }
4823  onig_node_free(anode);
4824  onig_node_free(aasc_node);
4825  if (r != 0) goto err;
4826  }
4827  break;
4828 
4829  case TK_CC_AND: /* && */
4830  {
4831  if (state == CCS_VALUE) {
4832  r = next_state_val(cc, asc_cc, &vs, 0, &val_israw, 0, val_type,
4833  &val_type, &state, env);
4834  if (r != 0) goto err;
4835  }
4836  /* initialize local variables */
4837  and_start = 1;
4838  state = CCS_START;
4839 
4840  if (IS_NOT_NULL(prev_cc)) {
4841  r = and_cclass(prev_cc, cc, env);
4842  if (r != 0) goto err;
4843  bbuf_free(cc->mbuf);
4844  if (IS_NOT_NULL(asc_cc)) {
4845  r = and_cclass(asc_prev_cc, asc_cc, env);
4846  if (r != 0) goto err;
4847  bbuf_free(asc_cc->mbuf);
4848  }
4849  }
4850  else {
4851  prev_cc = cc;
4852  cc = &work_cc;
4853  if (IS_NOT_NULL(asc_cc)) {
4854  asc_prev_cc = asc_cc;
4855  asc_cc = &asc_work_cc;
4856  }
4857  }
4858  initialize_cclass(cc);
4859  if (IS_NOT_NULL(asc_cc))
4860  initialize_cclass(asc_cc);
4861  }
4862  break;
4863 
4864  case TK_EOT:
4866  goto err;
4867  break;
4868  default:
4869  r = ONIGERR_PARSER_BUG;
4870  goto err;
4871  break;
4872  }
4873 
4874  if (fetched)
4875  r = tok->type;
4876  else {
4877  r = fetch_token_in_cc(tok, &p, end, env);
4878  if (r < 0) goto err;
4879  }
4880  }
4881 
4882  if (state == CCS_VALUE) {
4883  r = next_state_val(cc, asc_cc, &vs, 0, &val_israw, 0, val_type,
4884  &val_type, &state, env);
4885  if (r != 0) goto err;
4886  }
4887 
4888  if (IS_NOT_NULL(prev_cc)) {
4889  r = and_cclass(prev_cc, cc, env);
4890  if (r != 0) goto err;
4891  bbuf_free(cc->mbuf);
4892  cc = prev_cc;
4893  if (IS_NOT_NULL(asc_cc)) {
4894  r = and_cclass(asc_prev_cc, asc_cc, env);
4895  if (r != 0) goto err;
4896  bbuf_free(asc_cc->mbuf);
4897  asc_cc = asc_prev_cc;
4898  }
4899  }
4900 
4901  if (neg != 0) {
4902  NCCLASS_SET_NOT(cc);
4903  if (IS_NOT_NULL(asc_cc))
4904  NCCLASS_SET_NOT(asc_cc);
4905  }
4906  else {
4907  NCCLASS_CLEAR_NOT(cc);
4908  if (IS_NOT_NULL(asc_cc))
4909  NCCLASS_CLEAR_NOT(asc_cc);
4910  }
4911  if (IS_NCCLASS_NOT(cc) &&
4913  int is_empty;
4914 
4915  is_empty = (IS_NULL(cc->mbuf) ? 1 : 0);
4916  if (is_empty != 0)
4917  BITSET_IS_EMPTY(cc->bs, is_empty);
4918 
4919  if (is_empty == 0) {
4920 #define NEWLINE_CODE 0x0a
4921 
4923  if (ONIGENC_CODE_TO_MBCLEN(env->enc, NEWLINE_CODE) == 1)
4925  else {
4926  r = add_code_range(&(cc->mbuf), env, NEWLINE_CODE, NEWLINE_CODE);
4927  if (r < 0) goto err;
4928  }
4929  }
4930  }
4931  }
4932  *src = p;
4933  env->parse_depth--;
4934  return 0;
4935 
4936  err:
4937  if (cc != NCCLASS(*np))
4938  bbuf_free(cc->mbuf);
4939  if (IS_NOT_NULL(asc_cc) && (asc_cc != NCCLASS(*asc_np)))
4940  bbuf_free(asc_cc->mbuf);
4941  return r;
4942 }
4943 
4944 static int parse_subexp(Node** top, OnigToken* tok, int term,
4945  UChar** src, UChar* end, ScanEnv* env);
4946 
4947 static int
4948 parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
4949  ScanEnv* env)
4950 {
4951  int r = 0, num;
4952  Node *target, *work1 = NULL, *work2 = NULL;
4953  OnigOptionType option;
4954  OnigCodePoint c;
4955  OnigEncoding enc = env->enc;
4956 
4957 #ifdef USE_NAMED_GROUP
4958  int list_capture;
4959 #endif
4960 
4961  UChar* p = *src;
4962  PFETCH_READY;
4963 
4964  *np = NULL;
4966 
4967  option = env->option;
4968  if (PPEEK_IS('?') &&
4970  PINC;
4971  if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
4972 
4973  PFETCH(c);
4974  switch (c) {
4975  case ':': /* (?:...) grouping only */
4976  group:
4977  r = fetch_token(tok, &p, end, env);
4978  if (r < 0) return r;
4979  r = parse_subexp(np, tok, term, &p, end, env);
4980  if (r < 0) return r;
4981  *src = p;
4982  return 1; /* group */
4983  break;
4984 
4985  case '=':
4987  break;
4988  case '!': /* preceding read */
4990  break;
4991  case '>': /* (?>...) stop backtrack */
4992  *np = node_new_enclose(ENCLOSE_STOP_BACKTRACK);
4993  break;
4994  case '~': /* (?~...) absent operator */
4996  *np = node_new_enclose(ENCLOSE_ABSENT);
4997  }
4998  else {
5000  }
5001  break;
5002 
5003 #ifdef USE_NAMED_GROUP
5004  case '\'':
5006  goto named_group1;
5007  }
5008  else
5010  break;
5011 
5012 # ifdef USE_CAPITAL_P_NAMED_GROUP
5013  case 'P': /* (?P<name>...) */
5014  if (!PEND &&
5016  PFETCH(c);
5017  if (c == '<') goto named_group1;
5018  }
5020  break;
5021 # endif
5022 #endif
5023 
5024  case '<': /* look behind (?<=...), (?<!...) */
5026  PFETCH(c);
5027  if (c == '=')
5029  else if (c == '!')
5031 #ifdef USE_NAMED_GROUP
5032  else { /* (?<name>...) */
5034  UChar *name;
5035  UChar *name_end;
5036 
5037  PUNFETCH;
5038  c = '<';
5039 
5040  named_group1:
5041  list_capture = 0;
5042 
5043 # ifdef USE_CAPTURE_HISTORY
5044  named_group2:
5045 # endif
5046  name = p;
5047  r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, &num, 0);
5048  if (r < 0) return r;
5049 
5050  num = scan_env_add_mem_entry(env);
5051  if (num < 0) return num;
5052  if (list_capture != 0 && num >= (int )BIT_STATUS_BITS_NUM)
5054 
5055  r = name_add(env->reg, name, name_end, num, env);
5056  if (r != 0) return r;
5057  *np = node_new_enclose_memory(env->option, 1);
5059  NENCLOSE(*np)->regnum = num;
5060  if (list_capture != 0)
5062  env->num_named++;
5063  }
5064  else {
5066  }
5067  }
5068 #else
5069  else {
5071  }
5072 #endif
5073  break;
5074 
5075 #ifdef USE_CAPTURE_HISTORY
5076  case '@':
5078 # ifdef USE_NAMED_GROUP
5079  if (!PEND &&
5081  PFETCH(c);
5082  if (c == '<' || c == '\'') {
5083  list_capture = 1;
5084  goto named_group2; /* (?@<name>...) */
5085  }
5086  PUNFETCH;
5087  }
5088 # endif
5089  *np = node_new_enclose_memory(env->option, 0);
5091  num = scan_env_add_mem_entry(env);
5092  if (num < 0) return num;
5093  if (num >= (int )BIT_STATUS_BITS_NUM)
5095 
5096  NENCLOSE(*np)->regnum = num;
5098  }
5099  else {
5101  }
5102  break;
5103 #endif /* USE_CAPTURE_HISTORY */
5104 
5105  case '(': /* conditional expression: (?(cond)yes), (?(cond)yes|no) */
5106  if (!PEND &&
5108  UChar *name = NULL;
5109  UChar *name_end;
5110  PFETCH(c);
5111  if (ONIGENC_IS_CODE_DIGIT(enc, c)) { /* (n) */
5112  PUNFETCH;
5113  r = fetch_name((OnigCodePoint )'(', &p, end, &name_end, env, &num, 1);
5114  if (r < 0) return r;
5115 #if 0
5116  /* Relative number is not currently supported. (same as Perl) */
5117  if (num < 0) {
5118  num = BACKREF_REL_TO_ABS(num, env);
5119  if (num <= 0)
5120  return ONIGERR_INVALID_BACKREF;
5121  }
5122 #endif
5124  if (num > env->num_mem ||
5125  IS_NULL(SCANENV_MEM_NODES(env)[num]))
5126  return ONIGERR_INVALID_BACKREF;
5127  }
5128  }
5129 #ifdef USE_NAMED_GROUP
5130  else if (c == '<' || c == '\'') { /* (<name>), ('name') */
5131  name = p;
5132  r = fetch_named_backref_token(c, tok, &p, end, env);
5133  if (r < 0) return r;
5134  if (!PPEEK_IS(')')) return ONIGERR_UNDEFINED_GROUP_OPTION;
5135  PINC;
5136 
5138  num = tok->u.backref.ref1;
5139  }
5140  else {
5141  /* FIXME:
5142  * Use left most named group for now. This is the same as Perl.
5143  * However this should use the same strategy as normal back-
5144  * references on Ruby syntax; search right to left. */
5145  int len = tok->u.backref.num;
5146  num = len > 1 ? tok->u.backref.refs[0] : tok->u.backref.ref1;
5147  }
5148  }
5149 #endif
5150  else
5152  *np = node_new_enclose(ENCLOSE_CONDITION);
5154  NENCLOSE(*np)->regnum = num;
5155  if (IS_NOT_NULL(name)) NENCLOSE(*np)->state |= NST_NAME_REF;
5156  }
5157  else
5159  break;
5160 
5161 #if 0
5162  case '|': /* branch reset: (?|...) */
5164  /* TODO */
5165  }
5166  else
5168  break;
5169 #endif
5170 
5171  case '^': /* loads default options */
5173  /* d-imsx */
5174  ONOFF(option, ONIG_OPTION_ASCII_RANGE, 1);
5175  ONOFF(option, ONIG_OPTION_IGNORECASE, 1);
5176  ONOFF(option, ONIG_OPTION_SINGLELINE, 0);
5177  ONOFF(option, ONIG_OPTION_MULTILINE, 1);
5178  ONOFF(option, ONIG_OPTION_EXTEND, 1);
5179  PFETCH(c);
5180  }
5181 #if 0
5182  else if (!PEND && IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_RUBY)) {
5183  /* d-imx */
5184  ONOFF(option, ONIG_OPTION_ASCII_RANGE, 0);
5187  ONOFF(option, ONIG_OPTION_IGNORECASE, 1);
5188  ONOFF(option, ONIG_OPTION_MULTILINE, 1);
5189  ONOFF(option, ONIG_OPTION_EXTEND, 1);
5190  PFETCH(c);
5191  }
5192 #endif
5193  else {
5195  }
5196  /* fall through */
5197 #ifdef USE_POSIXLINE_OPTION
5198  case 'p':
5199 #endif
5200  case '-': case 'i': case 'm': case 's': case 'x':
5201  case 'a': case 'd': case 'l': case 'u':
5202  {
5203  int neg = 0;
5204 
5205  while (1) {
5206  switch (c) {
5207  case ':':
5208  case ')':
5209  break;
5210 
5211  case '-': neg = 1; break;
5212  case 'x': ONOFF(option, ONIG_OPTION_EXTEND, neg); break;
5213  case 'i': ONOFF(option, ONIG_OPTION_IGNORECASE, neg); break;
5214  case 's':
5216  ONOFF(option, ONIG_OPTION_MULTILINE, neg);
5217  }
5218  else
5220  break;
5221 
5222  case 'm':
5224  ONOFF(option, ONIG_OPTION_SINGLELINE, (neg == 0 ? 1 : 0));
5225  }
5226  else if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_RUBY)) {
5227  ONOFF(option, ONIG_OPTION_MULTILINE, neg);
5228  }
5229  else
5231  break;
5232 #ifdef USE_POSIXLINE_OPTION
5233  case 'p':
5235  break;
5236 #endif
5237 
5238  case 'a': /* limits \d, \s, \w and POSIX brackets to ASCII range */
5241  (neg == 0)) {
5242  ONOFF(option, ONIG_OPTION_ASCII_RANGE, 0);
5245  }
5246  else
5248  break;
5249 
5250  case 'u':
5253  (neg == 0)) {
5254  ONOFF(option, ONIG_OPTION_ASCII_RANGE, 1);
5257  }
5258  else
5260  break;
5261 
5262  case 'd':
5264  (neg == 0)) {
5265  ONOFF(option, ONIG_OPTION_ASCII_RANGE, 1);
5266  }
5267  else if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_RUBY) &&
5268  (neg == 0)) {
5269  ONOFF(option, ONIG_OPTION_ASCII_RANGE, 0);
5272  }
5273  else
5275  break;
5276 
5277  case 'l':
5278  if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL) && (neg == 0)) {
5279  ONOFF(option, ONIG_OPTION_ASCII_RANGE, 1);
5280  }
5281  else
5283  break;
5284 
5285  default:
5287  }
5288 
5289  if (c == ')') {
5290  *np = node_new_option(option);
5292  *src = p;
5293  return 2; /* option only */
5294  }
5295  else if (c == ':') {
5296  OnigOptionType prev = env->option;
5297 
5298  env->option = option;
5299  r = fetch_token(tok, &p, end, env);
5300  if (r < 0) {
5301  env->option = prev;
5302  return r;
5303  }
5304  r = parse_subexp(&target, tok, term, &p, end, env);
5305  env->option = prev;
5306  if (r < 0) return r;
5307  *np = node_new_option(option);
5309  NENCLOSE(*np)->target = target;
5310  *src = p;
5311  return 0;
5312  }
5313 
5314  if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
5315  PFETCH(c);
5316  }
5317  }
5318  break;
5319 
5320  default:
5322  }
5323  }
5324  else {
5326  goto group;
5327 
5328  *np = node_new_enclose_memory(env->option, 0);
5330  num = scan_env_add_mem_entry(env);
5331  if (num < 0) return num;
5332  NENCLOSE(*np)->regnum = num;
5333  }
5334 
5336  r = fetch_token(tok, &p, end, env);
5337  if (r < 0) return r;
5338  r = parse_subexp(&target, tok, term, &p, end, env);
5339  if (r < 0) {
5340  onig_node_free(target);
5341  return r;
5342  }
5343 
5344  if (NTYPE(*np) == NT_ANCHOR)
5345  NANCHOR(*np)->target = target;
5346  else {
5347  NENCLOSE(*np)->target = target;
5348  if (NENCLOSE(*np)->type == ENCLOSE_MEMORY) {
5349  /* Don't move this to previous of parse_subexp() */
5350  r = scan_env_set_mem_node(env, NENCLOSE(*np)->regnum, *np);
5351  if (r != 0) return r;
5352  }
5353  else if (NENCLOSE(*np)->type == ENCLOSE_CONDITION) {
5354  if (NTYPE(target) != NT_ALT) {
5355  /* convert (?(cond)yes) to (?(cond)yes|empty) */
5356  work1 = node_new_empty();
5357  if (IS_NULL(work1)) goto err;
5358  work2 = onig_node_new_alt(work1, NULL_NODE);
5359  if (IS_NULL(work2)) goto err;
5360  work1 = onig_node_new_alt(target, work2);
5361  if (IS_NULL(work1)) goto err;
5362  NENCLOSE(*np)->target = work1;
5363  }
5364  }
5365  }
5366 
5367  *src = p;
5368  return 0;
5369 
5370  err:
5371  onig_node_free(work1);
5372  onig_node_free(work2);
5373  onig_node_free(*np);
5374  *np = NULL;
5375  return ONIGERR_MEMORY;
5376 }
5377 
5378 static const char* const PopularQStr[] = {
5379  "?", "*", "+", "??", "*?", "+?"
5380 };
5381 
5382 static const char* const ReduceQStr[] = {
5383  "", "", "*", "*?", "??", "+ and ??", "+? and ?"
5384 };
5385 
5386 static int
5387 set_quantifier(Node* qnode, Node* target, int group, ScanEnv* env)
5388 {
5389  QtfrNode* qn;
5390 
5391  qn = NQTFR(qnode);
5392  if (qn->lower == 1 && qn->upper == 1) {
5393  return 1;
5394  }
5395 
5396  switch (NTYPE(target)) {
5397  case NT_STR:
5398  if (! group) {
5399  StrNode* sn = NSTR(target);
5400  if (str_node_can_be_split(sn, env->enc)) {
5401  Node* n = str_node_split_last_char(sn, env->enc);
5402  if (IS_NOT_NULL(n)) {
5403  qn->target = n;
5404  return 2;
5405  }
5406  }
5407  }
5408  break;
5409 
5410  case NT_QTFR:
5411  { /* check redundant double repeat. */
5412  /* verbose warn (?:.?)? etc... but not warn (.?)? etc... */
5413  QtfrNode* qnt = NQTFR(target);
5414  int nestq_num = popular_quantifier_num(qn);
5415  int targetq_num = popular_quantifier_num(qnt);
5416 
5417 #ifdef USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR
5418  if (nestq_num >= 0 && targetq_num >= 0 &&
5420  switch (ReduceTypeTable[targetq_num][nestq_num]) {
5421  case RQ_ASIS:
5422  break;
5423 
5424  case RQ_DEL:
5425  if (onig_warn != onig_null_warn) {
5426  onig_syntax_warn(env, "regular expression has redundant nested repeat operator '%s'",
5427  PopularQStr[targetq_num]);
5428  }
5429  goto warn_exit;
5430  break;
5431 
5432  default:
5433  if (onig_warn != onig_null_warn) {
5434  onig_syntax_warn(env, "nested repeat operator '%s' and '%s' was replaced with '%s' in regular expression",
5435  PopularQStr[targetq_num], PopularQStr[nestq_num],
5436  ReduceQStr[ReduceTypeTable[targetq_num][nestq_num]]);
5437  }
5438  goto warn_exit;
5439  break;
5440  }
5441  }
5442 
5443  warn_exit:
5444 #endif
5445  if (targetq_num >= 0) {
5446  if (nestq_num >= 0) {
5447  onig_reduce_nested_quantifier(qnode, target);
5448  goto q_exit;
5449  }
5450  else if (targetq_num == 1 || targetq_num == 2) { /* * or + */
5451  /* (?:a*){n,m}, (?:a+){n,m} => (?:a*){n,n}, (?:a+){n,n} */
5452  if (! IS_REPEAT_INFINITE(qn->upper) && qn->upper > 1 && qn->greedy) {
5453  qn->upper = (qn->lower == 0 ? 1 : qn->lower);
5454  }
5455  }
5456  }
5457  }
5458  break;
5459 
5460  default:
5461  break;
5462  }
5463 
5464  qn->target = target;
5465  q_exit:
5466  return 0;
5467 }
5468 
5469 
5470 #ifndef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
5471 static int
5472 clear_not_flag_cclass(CClassNode* cc, OnigEncoding enc)
5473 {
5474  BBuf *tbuf;
5475  int r;
5476 
5477  if (IS_NCCLASS_NOT(cc)) {
5478  bitset_invert(cc->bs);
5479 
5480  if (! ONIGENC_IS_SINGLEBYTE(enc)) {
5481  r = not_code_range_buf(enc, cc->mbuf, &tbuf);
5482  if (r != 0) return r;
5483 
5484  bbuf_free(cc->mbuf);
5485  cc->mbuf = tbuf;
5486  }
5487 
5488  NCCLASS_CLEAR_NOT(cc);
5489  }
5490 
5491  return 0;
5492 }
5493 #endif /* CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS */
5494 
5495 typedef struct {
5502 
5503 static int
5504 i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[],
5505  int to_len, void* arg)
5506 {
5507  IApplyCaseFoldArg* iarg;
5508  ScanEnv* env;
5509  CClassNode* cc;
5510  CClassNode* asc_cc;
5511  BitSetRef bs;
5512  int add_flag, r;
5513 
5514  iarg = (IApplyCaseFoldArg* )arg;
5515  env = iarg->env;
5516  cc = iarg->cc;
5517  asc_cc = iarg->asc_cc;
5518  bs = cc->bs;
5519 
5520  if (IS_NULL(asc_cc)) {
5521  add_flag = 0;
5522  }
5523  else if (ONIGENC_IS_ASCII_CODE(from) == ONIGENC_IS_ASCII_CODE(*to)) {
5524  add_flag = 1;
5525  }
5526  else {
5527  add_flag = onig_is_code_in_cc(env->enc, from, asc_cc);
5528  if (IS_NCCLASS_NOT(asc_cc))
5529  add_flag = !add_flag;
5530  }
5531 
5532  if (to_len == 1) {
5533  int is_in = onig_is_code_in_cc(env->enc, from, cc);
5534 #ifdef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
5535  if ((is_in != 0 && !IS_NCCLASS_NOT(cc)) ||
5536  (is_in == 0 && IS_NCCLASS_NOT(cc))) {
5537  if (add_flag) {
5538  if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) {
5539  r = add_code_range0(&(cc->mbuf), env, *to, *to, 0);
5540  if (r < 0) return r;
5541  }
5542  else {
5543  BITSET_SET_BIT(bs, *to);
5544  }
5545  }
5546  }
5547 #else
5548  if (is_in != 0) {
5549  if (add_flag) {
5550  if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) {
5551  if (IS_NCCLASS_NOT(cc)) clear_not_flag_cclass(cc, env->enc);
5552  r = add_code_range0(&(cc->mbuf), env, *to, *to, 0);
5553  if (r < 0) return r;
5554  }
5555  else {
5556  if (IS_NCCLASS_NOT(cc)) {
5557  BITSET_CLEAR_BIT(bs, *to);
5558  }
5559  else {
5560  BITSET_SET_BIT(bs, *to);
5561  }
5562  }
5563  }
5564  }
5565 #endif /* CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS */
5566  }
5567  else {
5568  int r, i, len;
5570  Node *snode = NULL_NODE;
5571 
5572  if (onig_is_code_in_cc(env->enc, from, cc)
5574  && !IS_NCCLASS_NOT(cc)
5575 #endif
5576  ) {
5577  for (i = 0; i < to_len; i++) {
5578  len = ONIGENC_CODE_TO_MBC(env->enc, to[i], buf);
5579  if (i == 0) {
5580  snode = onig_node_new_str(buf, buf + len);
5581  CHECK_NULL_RETURN_MEMERR(snode);
5582 
5583  /* char-class expanded multi-char only
5584  compare with string folded at match time. */
5585  NSTRING_SET_AMBIG(snode);
5586  }
5587  else {
5588  r = onig_node_str_cat(snode, buf, buf + len);
5589  if (r < 0) {
5590  onig_node_free(snode);
5591  return r;
5592  }
5593  }
5594  }
5595 
5596  *(iarg->ptail) = onig_node_new_alt(snode, NULL_NODE);
5597  CHECK_NULL_RETURN_MEMERR(*(iarg->ptail));
5598  iarg->ptail = &(NCDR((*(iarg->ptail))));
5599  }
5600  }
5601 
5602  return 0;
5603 }
5604 
5605 static int
5606 cclass_case_fold(Node** np, CClassNode* cc, CClassNode* asc_cc, ScanEnv* env)
5607 {
5608  int r;
5609  IApplyCaseFoldArg iarg;
5610 
5611  iarg.env = env;
5612  iarg.cc = cc;
5613  iarg.asc_cc = asc_cc;
5614  iarg.alt_root = NULL_NODE;
5615  iarg.ptail = &(iarg.alt_root);
5616 
5618  i_apply_case_fold, &iarg);
5619  if (r != 0) {
5620  onig_node_free(iarg.alt_root);
5621  return r;
5622  }
5623  if (IS_NOT_NULL(iarg.alt_root)) {
5624  Node* work = onig_node_new_alt(*np, iarg.alt_root);
5625  if (IS_NULL(work)) {
5626  onig_node_free(iarg.alt_root);
5627  return ONIGERR_MEMORY;
5628  }
5629  *np = work;
5630  }
5631  return r;
5632 }
5633 
5634 static int
5635 node_linebreak(Node** np, ScanEnv* env)
5636 {
5637  /* same as (?>\x0D\x0A|[\x0A-\x0D\x{85}\x{2028}\x{2029}]) */
5638  Node* left = NULL;
5639  Node* right = NULL;
5640  Node* target1 = NULL;
5641  Node* target2 = NULL;
5642  CClassNode* cc;
5643  int num1, num2, r;
5645 
5646  /* \x0D\x0A */
5647  num1 = ONIGENC_CODE_TO_MBC(env->enc, 0x0D, buf);
5648  if (num1 < 0) return num1;
5649  num2 = ONIGENC_CODE_TO_MBC(env->enc, 0x0A, buf + num1);
5650  if (num2 < 0) return num2;
5651  left = node_new_str_raw(buf, buf + num1 + num2);
5652  if (IS_NULL(left)) goto err;
5653 
5654  /* [\x0A-\x0D] or [\x0A-\x0D\x{85}\x{2028}\x{2029}] */
5655  right = node_new_cclass();
5656  if (IS_NULL(right)) goto err;
5657  cc = NCCLASS(right);
5658  if (ONIGENC_MBC_MINLEN(env->enc) > 1) {
5659  r = add_code_range(&(cc->mbuf), env, 0x0A, 0x0D);
5660  if (r != 0) goto err;
5661  }
5662  else {
5663  bitset_set_range(env, cc->bs, 0x0A, 0x0D);
5664  }
5665 
5666  /* TODO: move this block to enc/unicode.c */
5667  if (ONIGENC_IS_UNICODE(env->enc)) {
5668  /* UTF-8, UTF-16BE/LE, UTF-32BE/LE */
5669  r = add_code_range(&(cc->mbuf), env, 0x85, 0x85);
5670  if (r != 0) goto err;
5671  r = add_code_range(&(cc->mbuf), env, 0x2028, 0x2029);
5672  if (r != 0) goto err;
5673  }
5674 
5675  /* ...|... */
5676  target1 = onig_node_new_alt(right, NULL_NODE);
5677  if (IS_NULL(target1)) goto err;
5678  right = NULL;
5679  target2 = onig_node_new_alt(left, target1);
5680  if (IS_NULL(target2)) goto err;
5681  left = NULL;
5682  target1 = NULL;
5683 
5684  /* (?>...) */
5685  *np = node_new_enclose(ENCLOSE_STOP_BACKTRACK);
5686  if (IS_NULL(*np)) goto err;
5687  NENCLOSE(*np)->target = target2;
5688  return ONIG_NORMAL;
5689 
5690  err:
5691  onig_node_free(left);
5692  onig_node_free(right);
5693  onig_node_free(target1);
5694  onig_node_free(target2);
5695  return ONIGERR_MEMORY;
5696 }
5697 
5698 static int
5699 propname2ctype(ScanEnv* env, const char* propname)
5700 {
5701  UChar* name = (UChar* )propname;
5703  name, name + strlen(propname));
5704  return ctype;
5705 }
5706 
5707 static int
5708 node_extended_grapheme_cluster(Node** np, ScanEnv* env)
5709 {
5710  Node* tmp = NULL;
5711  Node* np1 = NULL;
5712  Node* list = NULL;
5713  Node* list2 = NULL;
5714  Node* alt = NULL;
5715  Node* alt2 = NULL;
5716  BBuf *pbuf1 = NULL;
5717  int r = 0;
5718  int num1;
5720  OnigOptionType option;
5721 
5722 #ifdef USE_UNICODE_PROPERTIES
5723  if (ONIGENC_IS_UNICODE(env->enc)) {
5724  /* UTF-8, UTF-16BE/LE, UTF-32BE/LE */
5725  CClassNode* cc;
5726  OnigCodePoint sb_out = (ONIGENC_MBC_MINLEN(env->enc) > 1) ? 0x00 : 0x80;
5727  int extend = propname2ctype(env, "Grapheme_Cluster_Break=Extend");
5728 
5729  /* Prepend*
5730  * ( RI-sequence | Hangul-Syllable | !Control )
5731  * ( Grapheme_Extend | SpacingMark )* */
5732 
5733  /* ( Grapheme_Extend | SpacingMark )* */
5734  np1 = node_new_cclass();
5735  if (IS_NULL(np1)) goto err;
5736  cc = NCCLASS(np1);
5737  r = add_ctype_to_cc(cc, extend, 0, 0, env);
5738  if (r != 0) goto err;
5739  r = add_ctype_to_cc(cc, propname2ctype(env, "Grapheme_Cluster_Break=SpacingMark"), 0, 0, env);
5740  if (r != 0) goto err;
5741  r = add_code_range(&(cc->mbuf), env, 0x200D, 0x200D);
5742  if (r != 0) goto err;
5743 
5744  tmp = node_new_quantifier(0, REPEAT_INFINITE, 0);
5745  if (IS_NULL(tmp)) goto err;
5746  NQTFR(tmp)->target = np1;
5747  np1 = tmp;
5748 
5749  tmp = node_new_list(np1, NULL_NODE);
5750  if (IS_NULL(tmp)) goto err;
5751  list = tmp;
5752  np1 = NULL;
5753 
5754  /* ( RI-sequence | Hangul-Syllable | !Control ) */
5755  /* !Control */
5756  np1 = node_new_cclass();
5757  if (IS_NULL(np1)) goto err;
5758  cc = NCCLASS(np1);
5759  r = add_ctype_to_cc(cc, propname2ctype(env, "Grapheme_Cluster_Break=Control"), 1, 0, env);
5760  if (r != 0) goto err;
5761  if (ONIGENC_MBC_MINLEN(env->enc) > 1) {
5762  BBuf *pbuf2 = NULL;
5763  r = add_code_range(&pbuf1, env, 0x0a, 0x0a);
5764  if (r != 0) goto err;
5765  r = add_code_range(&pbuf1, env, 0x0d, 0x0d);
5766  if (r != 0) goto err;
5767  r = and_code_range_buf(cc->mbuf, 0, pbuf1, 1, &pbuf2, env);
5768  if (r != 0) {
5769  bbuf_free(pbuf2);
5770  goto err;
5771  }
5772  bbuf_free(pbuf1);
5773  pbuf1 = NULL;
5774  bbuf_free(cc->mbuf);
5775  cc->mbuf = pbuf2;
5776  }
5777  else {
5778  BITSET_CLEAR_BIT(cc->bs, 0x0a);
5779  BITSET_CLEAR_BIT(cc->bs, 0x0d);
5780  }
5781 
5782  tmp = onig_node_new_alt(np1, NULL_NODE);
5783  if (IS_NULL(tmp)) goto err;
5784  alt = tmp;
5785  np1 = NULL;
5786 
5787  /* Hangul-Syllable
5788  * := L* V+ T*
5789  * | L* LV V* T*
5790  * | L* LVT T*
5791  * | L+
5792  * | T+ */
5793 
5794  /* T+ */
5795  np1 = node_new_cclass();
5796  if (IS_NULL(np1)) goto err;
5797  cc = NCCLASS(np1);
5798  r = add_ctype_to_cc(cc, propname2ctype(env, "Grapheme_Cluster_Break=T"), 0, 0, env);
5799  if (r != 0) goto err;
5800 
5801  tmp = node_new_quantifier(1, REPEAT_INFINITE, 0);
5802  if (IS_NULL(tmp)) goto err;
5803  NQTFR(tmp)->target = np1;
5804  np1 = tmp;
5805 
5806  tmp = onig_node_new_alt(np1, alt);
5807  if (IS_NULL(tmp)) goto err;
5808  alt = tmp;
5809  np1 = NULL;
5810 
5811  /* L+ */
5812  np1 = node_new_cclass();
5813  if (IS_NULL(np1)) goto err;
5814  cc = NCCLASS(np1);
5815  r = add_ctype_to_cc(cc, propname2ctype(env, "Grapheme_Cluster_Break=L"), 0, 0, env);
5816  if (r != 0) goto err;
5817 
5818  tmp = node_new_quantifier(1, REPEAT_INFINITE, 0);
5819  if (IS_NULL(tmp)) goto err;
5820  NQTFR(tmp)->target = np1;
5821  np1 = tmp;
5822 
5823  tmp = onig_node_new_alt(np1, alt);
5824  if (IS_NULL(tmp)) goto err;
5825  alt = tmp;
5826  np1 = NULL;
5827 
5828  /* L* LVT T* */
5829  np1 = node_new_cclass();
5830  if (IS_NULL(np1)) goto err;
5831  cc = NCCLASS(np1);
5832  r = add_ctype_to_cc(cc, propname2ctype(env, "Grapheme_Cluster_Break=T"), 0, 0, env);
5833  if (r != 0) goto err;
5834 
5835  tmp = node_new_quantifier(0, REPEAT_INFINITE, 0);
5836  if (IS_NULL(tmp)) goto err;
5837  NQTFR(tmp)->target = np1;
5838  np1 = tmp;
5839 
5840  tmp = node_new_list(np1, NULL_NODE);
5841  if (IS_NULL(tmp)) goto err;
5842  list2 = tmp;
5843  np1 = NULL;
5844 
5845  np1 = node_new_cclass();
5846  if (IS_NULL(np1)) goto err;
5847  cc = NCCLASS(np1);
5848  r = add_ctype_to_cc(cc, propname2ctype(env, "Grapheme_Cluster_Break=LVT"), 0, 0, env);
5849  if (r != 0) goto err;
5850 
5851  tmp = node_new_list(np1, list2);
5852  if (IS_NULL(tmp)) goto err;
5853  list2 = tmp;
5854  np1 = NULL;
5855 
5856  np1 = node_new_cclass();
5857  if (IS_NULL(np1)) goto err;
5858  cc = NCCLASS(np1);
5859  r = add_ctype_to_cc(cc, propname2ctype(env, "Grapheme_Cluster_Break=L"), 0, 0, env);
5860  if (r != 0) goto err;
5861 
5862  tmp = node_new_quantifier(0, REPEAT_INFINITE, 0);
5863  if (IS_NULL(tmp)) goto err;
5864  NQTFR(tmp)->target = np1;
5865  np1 = tmp;
5866 
5867  tmp = node_new_list(np1, list2);
5868  if (IS_NULL(tmp)) goto err;
5869  list2 = tmp;
5870  np1 = NULL;
5871 
5872  tmp = onig_node_new_alt(list2, alt);
5873  if (IS_NULL(tmp)) goto err;
5874  alt = tmp;
5875  list2 = NULL;
5876 
5877  /* L* LV V* T* */
5878  np1 = node_new_cclass();
5879  if (IS_NULL(np1)) goto err;
5880  cc = NCCLASS(np1);
5881  r = add_ctype_to_cc(cc, propname2ctype(env, "Grapheme_Cluster_Break=T"), 0, 0, env);
5882  if (r != 0) goto err;
5883 
5884  tmp = node_new_quantifier(0, REPEAT_INFINITE, 0);
5885  if (IS_NULL(tmp)) goto err;
5886  NQTFR(tmp)->target = np1;
5887  np1 = tmp;
5888 
5889  tmp = node_new_list(np1, NULL_NODE);
5890  if (IS_NULL(tmp)) goto err;
5891  list2 = tmp;
5892  np1 = NULL;
5893 
5894  np1 = node_new_cclass();
5895  if (IS_NULL(np1)) goto err;
5896  cc = NCCLASS(np1);
5897  r = add_ctype_to_cc(cc, propname2ctype(env, "Grapheme_Cluster_Break=V"), 0, 0, env);
5898  if (r != 0) goto err;
5899 
5900  tmp = node_new_quantifier(0, REPEAT_INFINITE, 0);
5901  if (IS_NULL(tmp)) goto err;
5902  NQTFR(tmp)->target = np1;
5903  np1 = tmp;
5904 
5905  tmp = node_new_list(np1, list2);
5906  if (IS_NULL(tmp)) goto err;
5907  list2 = tmp;
5908  np1 = NULL;
5909 
5910  np1 = node_new_cclass();
5911  if (IS_NULL(np1)) goto err;
5912  cc = NCCLASS(np1);
5913  r = add_ctype_to_cc(cc, propname2ctype(env, "Grapheme_Cluster_Break=LV"), 0, 0, env);
5914  if (r != 0) goto err;
5915 
5916  tmp = node_new_list(np1, list2);
5917  if (IS_NULL(tmp)) goto err;
5918  list2 = tmp;
5919  np1 = NULL;
5920 
5921  np1 = node_new_cclass();
5922  if (IS_NULL(np1)) goto err;
5923  cc = NCCLASS(np1);
5924  r = add_ctype_to_cc(cc, propname2ctype(env, "Grapheme_Cluster_Break=L"), 0, 0, env);
5925  if (r != 0) goto err;
5926 
5927  tmp = node_new_quantifier(0, REPEAT_INFINITE, 0);
5928  if (IS_NULL(tmp)) goto err;
5929  NQTFR(tmp)->target = np1;
5930  np1 = tmp;
5931 
5932  tmp = node_new_list(np1, list2);
5933  if (IS_NULL(tmp)) goto err;
5934  list2 = tmp;
5935  np1 = NULL;
5936 
5937  tmp = onig_node_new_alt(list2, alt);
5938  if (IS_NULL(tmp)) goto err;
5939  alt = tmp;
5940  list2 = NULL;
5941 
5942  /* L* V+ T* */
5943  np1 = node_new_cclass();
5944  if (IS_NULL(np1)) goto err;
5945  cc = NCCLASS(np1);
5946  r = add_ctype_to_cc(cc, propname2ctype(env, "Grapheme_Cluster_Break=T"), 0, 0, env);
5947  if (r != 0) goto err;
5948 
5949  tmp = node_new_quantifier(0, REPEAT_INFINITE, 0);
5950  if (IS_NULL(tmp)) goto err;
5951  NQTFR(tmp)->target = np1;
5952  np1 = tmp;
5953 
5954  tmp = node_new_list(np1, NULL_NODE);
5955  if (IS_NULL(tmp)) goto err;
5956  list2 = tmp;
5957  np1 = NULL;
5958 
5959  np1 = node_new_cclass();
5960  if (IS_NULL(np1)) goto err;
5961  cc = NCCLASS(np1);
5962  r = add_ctype_to_cc(cc, propname2ctype(env, "Grapheme_Cluster_Break=V"), 0, 0, env);
5963  if (r != 0) goto err;
5964 
5965  tmp = node_new_quantifier(1, REPEAT_INFINITE, 0);
5966  if (IS_NULL(tmp)) goto err;
5967  NQTFR(tmp)->target = np1;
5968  np1 = tmp;
5969 
5970  tmp = node_new_list(np1, list2);
5971  if (IS_NULL(tmp)) goto err;
5972  list2 = tmp;
5973  np1 = NULL;
5974 
5975  np1 = node_new_cclass();
5976  if (IS_NULL(np1)) goto err;
5977  cc = NCCLASS(np1);
5978  r = add_ctype_to_cc(cc, propname2ctype(env, "Grapheme_Cluster_Break=L"), 0, 0, env);
5979  if (r != 0) goto err;
5980 
5981  tmp = node_new_quantifier(0, REPEAT_INFINITE, 0);
5982  if (IS_NULL(tmp)) goto err;
5983  NQTFR(tmp)->target = np1;
5984  np1 = tmp;
5985 
5986  tmp = node_new_list(np1, list2);
5987  if (IS_NULL(tmp)) goto err;
5988  list2 = tmp;
5989  np1 = NULL;
5990 
5991  tmp = onig_node_new_alt(list2, alt);
5992  if (IS_NULL(tmp)) goto err;
5993  alt = tmp;
5994  list2 = NULL;
5995 
5996  /* Emoji sequence := (E_Base | EBG) Extend* E_Modifier?
5997  * (ZWJ (Glue_After_Zwj | EBG Extend* E_Modifier?) )* */
5998 
5999  /* ZWJ (Glue_After_Zwj | E_Base_GAZ Extend* E_Modifier?) */
6000  np1 = node_new_cclass();
6001  if (IS_NULL(np1)) goto err;
6002  cc = NCCLASS(np1);
6003  r = add_ctype_to_cc(cc, propname2ctype(env, "Grapheme_Cluster_Break=E_Modifier"), 0, 0, env);
6004  if (r != 0) goto err;
6005 
6006  tmp = node_new_quantifier(0, 1, 0);
6007  if (IS_NULL(tmp)) goto err;
6008  NQTFR(tmp)->target = np1;
6009  np1 = tmp;
6010 
6011  tmp = node_new_list(np1, NULL_NODE);
6012  if (IS_NULL(tmp)) goto err;
6013  list2 = tmp;
6014  np1 = NULL;
6015 
6016  np1 = node_new_cclass();
6017  if (IS_NULL(np1)) goto err;
6018  cc = NCCLASS(np1);
6019  r = add_ctype_to_cc(cc, extend, 0, 0, env);
6020  if (r != 0) goto err;
6021 
6022  tmp = node_new_quantifier(0, REPEAT_INFINITE, 0);
6023  if (IS_NULL(tmp)) goto err;
6024  NQTFR(tmp)->target = np1;
6025  np1 = tmp;
6026 
6027  tmp = node_new_list(np1, list2);
6028  if (IS_NULL(tmp)) goto err;
6029  list2 = tmp;
6030  np1 = NULL;
6031 
6032  np1 = node_new_cclass();
6033  if (IS_NULL(np1)) goto err;
6034  cc = NCCLASS(np1);
6035  r = add_ctype_to_cc(cc, propname2ctype(env, "Grapheme_Cluster_Break=E_Base_GAZ"), 0, 0, env);
6036  if (r != 0) goto err;
6037 
6038  tmp = node_new_list(np1, list2);
6039  if (IS_NULL(tmp)) goto err;
6040  list2 = tmp;
6041  np1 = NULL;
6042 
6043  tmp = onig_node_new_alt(list2, NULL_NODE);
6044  if (IS_NULL(tmp)) goto err;
6045  alt2 = tmp;
6046  list2 = NULL;
6047 
6048  /* Glue_After_Zwj */
6049  np1 = node_new_cclass();
6050  if (IS_NULL(np1)) goto err;
6051  cc = NCCLASS(np1);
6052  r = add_ctype_to_cc(cc, extend, 0, 0, env);
6053  if (r != 0) goto err;
6054 
6055  tmp = node_new_quantifier(0, REPEAT_INFINITE, 0);
6056  if (IS_NULL(tmp)) goto err;
6057  NQTFR(tmp)->target = np1;
6058  np1 = tmp;
6059 
6060  tmp = node_new_list(np1, NULL_NODE);
6061  if (IS_NULL(tmp)) goto err;
6062  list2 = tmp;
6063  np1 = NULL;
6064 
6065  np1 = node_new_cclass();
6066  if (IS_NULL(np1)) goto err;
6067  cc = NCCLASS(np1);
6068  {
6069  static const OnigCodePoint ranges[] = {
6070  13,
6071  0x1F308, 0x1F308,
6072  0x1F33E, 0x1F33E,
6073  0x1F373, 0x1F373,
6074  0x1F393, 0x1F393,
6075  0x1F3A4, 0x1F3A4,
6076  0x1F3A8, 0x1F3A8,
6077  0x1F3EB, 0x1F3EB,
6078  0x1F3ED, 0x1F3ED,
6079  0x1F4BB, 0x1F4BC,
6080  0x1F527, 0x1F527,
6081  0x1F52C, 0x1F52C,
6082  0x1F680, 0x1F680,
6083  0x1F692, 0x1F692,
6084  };
6085  r = add_ctype_to_cc_by_range(cc, -1, 0, env, sb_out, ranges);
6086  if (r != 0) goto err;
6087  }
6088  r = add_ctype_to_cc(cc, propname2ctype(env, "Grapheme_Cluster_Break=Glue_After_Zwj"), 0, 0, env);
6089  if (r != 0) goto err;
6090 
6091  tmp = node_new_list(np1, list2);
6092  if (IS_NULL(tmp)) goto err;
6093  list2 = tmp;
6094  np1 = NULL;
6095 
6096  tmp = onig_node_new_alt(list2, alt2);
6097  if (IS_NULL(tmp)) goto err;
6098  alt2 = tmp;
6099  list2 = NULL;
6100 
6101  /* Emoji variation sequence
6102  * http://unicode.org/Public/emoji/4.0/emoji-zwj-sequences.txt
6103  */
6104  r = ONIGENC_CODE_TO_MBC(env->enc, 0xfe0f, buf);
6105  if (r < 0) goto err;
6106  np1 = node_new_str_raw(buf, buf + r);
6107  if (IS_NULL(np1)) goto err;
6108 
6109  tmp = node_new_quantifier(0, 1, 0);
6110  if (IS_NULL(tmp)) goto err;
6111  NQTFR(tmp)->target = np1;
6112  np1 = tmp;
6113 
6114  tmp = node_new_list(np1, NULL_NODE);
6115  if (IS_NULL(tmp)) goto err;
6116  list2 = tmp;
6117  np1 = NULL;
6118 
6119  np1 = node_new_cclass();
6120  if (IS_NULL(np1)) goto err;
6121  cc = NCCLASS(np1);
6122  {
6123  static const OnigCodePoint ranges[] = {
6124  4,
6125  0x2640, 0x2640,
6126  0x2642, 0x2642,
6127  0x2695, 0x2696,
6128  0x2708, 0x2708,
6129  };
6130  r = add_ctype_to_cc_by_range(cc, -1, 0, env, sb_out, ranges);
6131  if (r != 0) goto err;
6132  }
6133 
6134  tmp = node_new_list(np1, list2);
6135  if (IS_NULL(tmp)) goto err;
6136  list2 = tmp;
6137  np1 = NULL;
6138 
6139  tmp = onig_node_new_alt(list2, alt2);
6140  if (IS_NULL(tmp)) goto err;
6141  alt2 = tmp;
6142  list2 = NULL;
6143 
6144  tmp = node_new_list(alt2, NULL_NODE);
6145  if (IS_NULL(tmp)) goto err;
6146  list2 = tmp;
6147  alt2 = NULL;
6148 
6149  /* ZWJ */
6150  r = ONIGENC_CODE_TO_MBC(env->enc, 0x200D, buf);
6151  if (r < 0) goto err;
6152  np1 = node_new_str_raw(buf, buf + r);
6153  if (IS_NULL(np1)) goto err;
6154 
6155  tmp = node_new_list(np1, list2);
6156  if (IS_NULL(tmp)) goto err;
6157  list2 = tmp;
6158  np1 = NULL;
6159 
6160  tmp = node_new_quantifier(0, REPEAT_INFINITE, 0);
6161  if (IS_NULL(tmp)) goto err;
6162  NQTFR(tmp)->target = list2;
6163  np1 = tmp;
6164  list2 = NULL;
6165 
6166  tmp = node_new_list(np1, NULL_NODE);
6167  if (IS_NULL(tmp)) goto err;
6168  list2 = tmp;
6169  np1 = NULL;
6170 
6171  /* E_Modifier? */
6172  np1 = node_new_cclass();
6173  if (IS_NULL(np1)) goto err;
6174  cc = NCCLASS(np1);
6175  r = add_ctype_to_cc(cc, propname2ctype(env, "Grapheme_Cluster_Break=E_Modifier"), 0, 0, env);
6176  if (r != 0) goto err;
6177 
6178  tmp = node_new_quantifier(0, 1, 0);
6179  if (IS_NULL(tmp)) goto err;
6180  NQTFR(tmp)->target = np1;
6181  np1 = tmp;
6182 
6183  tmp = node_new_list(np1, list2);
6184  if (IS_NULL(tmp)) goto err;
6185  list2 = tmp;
6186  np1 = NULL;
6187 
6188  /* Extend* */
6189  np1 = node_new_cclass();
6190  if (IS_NULL(np1)) goto err;
6191  cc = NCCLASS(np1);
6192  r = add_ctype_to_cc(cc, extend, 0, 0, env);
6193  if (r != 0) goto err;
6194 
6195  tmp = node_new_quantifier(0, REPEAT_INFINITE, 0);
6196  if (IS_NULL(tmp)) goto err;
6197  NQTFR(tmp)->target = np1;
6198  np1 = tmp;
6199 
6200  tmp = node_new_list(np1, list2);
6201  if (IS_NULL(tmp)) goto err;
6202  list2 = tmp;
6203  np1 = NULL;
6204 
6205  /* (E_Base | EBG) */
6206  np1 = node_new_cclass();
6207  if (IS_NULL(np1)) goto err;
6208  cc = NCCLASS(np1);
6209  {
6210  static const OnigCodePoint ranges[] = {
6211  8,
6212  0x1F3C2, 0x1F3C2,
6213  0x1F3C7, 0x1F3C7,
6214  0x1F3CC, 0x1F3CC,
6215  0x1F3F3, 0x1F3F3,
6216  0x1F441, 0x1F441,
6217  0x1F46F, 0x1F46F,
6218  0x1F574, 0x1F574,
6219  0x1F6CC, 0x1F6CC,
6220  };
6221  r = add_ctype_to_cc_by_range(cc, -1, 0, env, sb_out, ranges);
6222  if (r != 0) goto err;
6223  }
6224  r = add_ctype_to_cc(cc, propname2ctype(env, "Grapheme_Cluster_Break=E_Base"), 0, 0, env);
6225  if (r != 0) goto err;
6226  r = add_ctype_to_cc(cc, propname2ctype(env, "Grapheme_Cluster_Break=E_Base_GAZ"), 0, 0, env);
6227  if (r != 0) goto err;
6228 
6229  tmp = node_new_list(np1, list2);
6230  if (IS_NULL(tmp)) goto err;
6231  list2 = tmp;
6232  np1 = NULL;
6233 
6234  tmp = onig_node_new_alt(list2, alt);
6235  if (IS_NULL(tmp)) goto err;
6236  alt = tmp;
6237  list2 = NULL;
6238 
6239  /* ZWJ (E_Base_GAZ | Glue_After_Zwj) E_Modifier? */
6240  /* a sequence starting with ZWJ seems artificial, but GraphemeBreakTest
6241  * has such examples.
6242  * http://www.unicode.org/Public/9.0.0/ucd/auxiliary/GraphemeBreakTest.html
6243  */
6244  np1 = node_new_cclass();
6245  if (IS_NULL(np1)) goto err;
6246  cc = NCCLASS(np1);
6247  r = add_ctype_to_cc(cc, propname2ctype(env, "Grapheme_Cluster_Break=E_Modifier"), 0, 0, env);
6248  if (r != 0) goto err;
6249 
6250  tmp = node_new_quantifier(0, 1, 0);
6251  if (IS_NULL(tmp)) goto err;
6252  NQTFR(tmp)->target = np1;
6253  np1 = tmp;
6254 
6255  tmp = node_new_list(np1, NULL_NODE);
6256  if (IS_NULL(tmp)) goto err;
6257  list2 = tmp;
6258  np1 = NULL;
6259 
6260  np1 = node_new_cclass();
6261  if (IS_NULL(np1)) goto err;
6262  cc = NCCLASS(np1);
6263  r = add_ctype_to_cc(cc, propname2ctype(env, "Grapheme_Cluster_Break=Glue_After_Zwj"), 0, 0, env);
6264  if (r != 0) goto err;
6265  r = add_ctype_to_cc(cc, propname2ctype(env, "Grapheme_Cluster_Break=E_Base_GAZ"), 0, 0, env);
6266  if (r != 0) goto err;
6267 
6268  tmp = node_new_list(np1, list2);
6269  if (IS_NULL(tmp)) goto err;
6270  list2 = tmp;
6271  np1 = NULL;
6272 
6273  r = ONIGENC_CODE_TO_MBC(env->enc, 0x200D, buf);
6274  if (r < 0) goto err;
6275  np1 = node_new_str_raw(buf, buf + r);
6276  if (IS_NULL(np1)) goto err;
6277 
6278  tmp = node_new_list(np1, list2);
6279  if (IS_NULL(tmp)) goto err;
6280  list2 = tmp;
6281  np1 = NULL;
6282 
6283  tmp = onig_node_new_alt(list2, alt);
6284  if (IS_NULL(tmp)) goto err;
6285  alt = tmp;
6286  list2 = NULL;
6287 
6288  /* RI-Sequence := Regional_Indicator{2} */
6289  np1 = node_new_cclass();
6290  if (IS_NULL(np1)) goto err;
6291  cc = NCCLASS(np1);
6292  r = add_code_range(&(cc->mbuf), env, 0x1F1E6, 0x1F1FF);
6293  if (r != 0) goto err;
6294 
6295  tmp = node_new_quantifier(2, 2, 0);
6296  if (IS_NULL(tmp)) goto err;
6297  NQTFR(tmp)->target = np1;
6298  np1 = tmp;
6299 
6300  tmp = node_new_list(np1, list2);
6301  if (IS_NULL(tmp)) goto err;
6302  list2 = tmp;
6303  np1 = NULL;
6304 
6305  tmp = onig_node_new_alt(list2, alt);
6306  if (IS_NULL(tmp)) goto err;
6307  alt = tmp;
6308  list2 = NULL;
6309 
6310  tmp = node_new_list(alt, list);
6311  if (IS_NULL(tmp)) goto err;
6312  list = tmp;
6313  alt = NULL;
6314 
6315  /* Prepend* */
6316  np1 = node_new_cclass();
6317  if (IS_NULL(np1)) goto err;
6318  cc = NCCLASS(np1);
6319  r = add_ctype_to_cc(cc, propname2ctype(env, "Grapheme_Cluster_Break=Prepend"), 0, 0, env);
6320  if (r != 0) goto err;
6321 
6322  tmp = node_new_quantifier(0, REPEAT_INFINITE, 0);
6323  if (IS_NULL(tmp)) goto err;
6324  NQTFR(tmp)->target = np1;
6325  np1 = tmp;
6326 
6327  tmp = node_new_list(np1, list);
6328  if (IS_NULL(tmp)) goto err;
6329  list = tmp;
6330  np1 = NULL;
6331 
6332  /* PerlSyntax: (?s:.), RubySyntax: (?m:.) */
6333  np1 = node_new_anychar();
6334  if (IS_NULL(np1)) goto err;
6335 
6336  option = env->option;
6337  ONOFF(option, ONIG_OPTION_MULTILINE, 0);
6338  tmp = node_new_option(option);
6339  if (IS_NULL(tmp)) goto err;
6340  NENCLOSE(tmp)->target = np1;
6341  np1 = tmp;
6342 
6343  tmp = onig_node_new_alt(np1, NULL_NODE);
6344  if (IS_NULL(tmp)) goto err;
6345  alt = tmp;
6346  np1 = NULL;
6347 
6348  /* Prepend+ */
6349  r = ONIGENC_CODE_TO_MBC(env->enc, 0x200D, buf);
6350  if (r < 0) goto err;
6351  np1 = node_new_str_raw(buf, buf + r);
6352  if (IS_NULL(np1)) goto err;
6353 
6354  tmp = node_new_quantifier(0, 1, 0);
6355  if (IS_NULL(tmp)) goto err;
6356  NQTFR(tmp)->target = np1;
6357  np1 = tmp;
6358 
6359  tmp = node_new_list(np1, NULL_NODE);
6360  if (IS_NULL(tmp)) goto err;
6361  list2 = tmp;
6362  np1 = NULL;
6363 
6364  np1 = node_new_cclass();
6365  if (IS_NULL(np1)) goto err;
6366  cc = NCCLASS(np1);
6367  r = add_ctype_to_cc(cc, propname2ctype(env, "Grapheme_Cluster_Break=Prepend"), 0, 0, env);
6368  if (r != 0) goto err;
6369 
6370  tmp = node_new_quantifier(1, REPEAT_INFINITE, 0);
6371  if (IS_NULL(tmp)) goto err;
6372  NQTFR(tmp)->target = np1;
6373  np1 = tmp;
6374 
6375  tmp = node_new_list(np1, list2);
6376  if (IS_NULL(tmp)) goto err;
6377  list2 = tmp;
6378  np1 = NULL;
6379 
6380  tmp = onig_node_new_alt(list2, alt);
6381  if (IS_NULL(tmp)) goto err;
6382  alt = tmp;
6383  list2 = NULL;
6384 
6385  tmp = onig_node_new_alt(list, alt);
6386  if (IS_NULL(tmp)) goto err;
6387  alt = tmp;
6388  list = NULL;
6389  }
6390  else
6391 #endif /* USE_UNICODE_PROPERTIES */
6392  {
6393  /* PerlSyntax: (?s:.), RubySyntax: (?m:.) */
6394  np1 = node_new_anychar();
6395  if (IS_NULL(np1)) goto err;
6396 
6397  option = env->option;
6398  ONOFF(option, ONIG_OPTION_MULTILINE, 0);
6399  tmp = node_new_option(option);
6400  if (IS_NULL(tmp)) goto err;
6401  NENCLOSE(tmp)->target = np1;
6402  np1 = tmp;
6403 
6404  alt = onig_node_new_alt(np1, NULL_NODE);
6405  if (IS_NULL(alt)) goto err;
6406  np1 = NULL;
6407  }
6408 
6409  /* \x0D\x0A */
6410  r = ONIGENC_CODE_TO_MBC(env->enc, 0x0D, buf);
6411  if (r < 0) goto err;
6412  num1 = r;
6413  r = ONIGENC_CODE_TO_MBC(env->enc, 0x0A, buf + num1);
6414  if (r < 0) goto err;
6415  np1 = node_new_str_raw(buf, buf + num1 + r);
6416  if (IS_NULL(np1)) goto err;
6417 
6418  tmp = onig_node_new_alt(np1, alt);
6419  if (IS_NULL(tmp)) goto err;
6420  alt = tmp;
6421  np1 = NULL;
6422 
6423  /* (?>\x0D\x0A|...) */
6424  tmp = node_new_enclose(ENCLOSE_STOP_BACKTRACK);
6425  if (IS_NULL(tmp)) goto err;
6426  NENCLOSE(tmp)->target = alt;
6427  np1 = tmp;
6428 
6429 #ifdef USE_UNICODE_PROPERTIES
6430  if (ONIGENC_IS_UNICODE(env->enc)) {
6431  /* Don't ignore case. */
6432  option = env->option;
6433  ONOFF(option, ONIG_OPTION_IGNORECASE, 1);
6434  *np = node_new_option(option);
6435  if (IS_NULL(*np)) goto err;
6436  NENCLOSE(*np)->target = np1;
6437  }
6438  else
6439 #endif
6440  {
6441  *np = np1;
6442  }
6443  return ONIG_NORMAL;
6444 
6445  err:
6446  onig_node_free(np1);
6447  onig_node_free(list);
6448  onig_node_free(list2);
6449  onig_node_free(alt);
6450  onig_node_free(alt2);
6451  bbuf_free(pbuf1);
6452  return (r == 0) ? ONIGERR_MEMORY : r;
6453 }
6454 
6455 static int
6456 countbits(unsigned int bits)
6457 {
6458  bits = (bits & 0x55555555) + ((bits >> 1) & 0x55555555);
6459  bits = (bits & 0x33333333) + ((bits >> 2) & 0x33333333);
6460  bits = (bits & 0x0f0f0f0f) + ((bits >> 4) & 0x0f0f0f0f);
6461  bits = (bits & 0x00ff00ff) + ((bits >> 8) & 0x00ff00ff);
6462  return (bits & 0x0000ffff) + ((bits >>16) & 0x0000ffff);
6463 }
6464 
6465 static int
6466 is_onechar_cclass(CClassNode* cc, OnigCodePoint* code)
6467 {
6468  const OnigCodePoint not_found = ONIG_LAST_CODE_POINT;
6469  OnigCodePoint c = not_found;
6470  int i;
6471  BBuf *bbuf = cc->mbuf;
6472 
6473  if (IS_NCCLASS_NOT(cc)) return 0;
6474 
6475  /* check bbuf */
6476  if (IS_NOT_NULL(bbuf)) {
6477  OnigCodePoint n, *data;
6478  GET_CODE_POINT(n, bbuf->p);
6479  data = (OnigCodePoint* )(bbuf->p) + 1;
6480  if ((n == 1) && (data[0] == data[1])) {
6481  /* only one char found in the bbuf, save the code point. */
6482  c = data[0];
6483  if (((c < SINGLE_BYTE_SIZE) && BITSET_AT(cc->bs, c))) {
6484  /* skip if c is included in the bitset */
6485  c = not_found;
6486  }
6487  }
6488  else {
6489  return 0; /* the bbuf contains multiple chars */
6490  }
6491  }
6492 
6493  /* check bitset */
6494  for (i = 0; i < BITSET_SIZE; i++) {
6495  Bits b1 = cc->bs[i];
6496  if (b1 != 0) {
6497  if (((b1 & (b1 - 1)) == 0) && (c == not_found)) {
6498  c = BITS_IN_ROOM * i + countbits(b1 - 1);
6499  } else {
6500  return 0; /* the character class contains multiple chars */
6501  }
6502  }
6503  }
6504 
6505  if (c != not_found) {
6506  *code = c;
6507  return 1;
6508  }
6509 
6510  /* the character class contains no char. */
6511  return 0;
6512 }
6513 
6514 
6515 static int
6516 parse_exp(Node** np, OnigToken* tok, int term,
6517  UChar** src, UChar* end, ScanEnv* env)
6518 {
6519  int r, len, group = 0;
6520  Node* qn;
6521  Node** targetp;
6522 
6523  *np = NULL;
6524  if (tok->type == (enum TokenSyms )term)
6525  goto end_of_token;
6526 
6527  switch (tok->type) {
6528  case TK_ALT:
6529  case TK_EOT:
6530  end_of_token:
6531  *np = node_new_empty();
6532  return tok->type;
6533  break;
6534 
6535  case TK_SUBEXP_OPEN:
6536  r = parse_enclose(np, tok, TK_SUBEXP_CLOSE, src, end, env);
6537  if (r < 0) return r;
6538  if (r == 1) group = 1;
6539  else if (r == 2) { /* option only */
6540  Node* target;
6541  OnigOptionType prev = env->option;
6542 
6543  env->option = NENCLOSE(*np)->option;
6544  r = fetch_token(tok, src, end, env);
6545  if (r < 0) {
6546  env->option = prev;
6547  return r;
6548  }
6549  r = parse_subexp(&target, tok, term, src, end, env);
6550  env->option = prev;
6551  if (r < 0) {
6552  onig_node_free(target);
6553  return r;
6554  }
6555  NENCLOSE(*np)->target = target;
6556  return tok->type;
6557  }
6558  break;
6559 
6560  case TK_SUBEXP_CLOSE:
6563 
6564  if (tok->escaped) goto tk_raw_byte;
6565  else goto tk_byte;
6566  break;
6567 
6568  case TK_LINEBREAK:
6569  r = node_linebreak(np, env);
6570  if (r < 0) return r;
6571  break;
6572 
6574  r = node_extended_grapheme_cluster(np, env);
6575  if (r < 0) return r;
6576  break;
6577 
6578  case TK_KEEP:
6581  break;
6582 
6583  case TK_STRING:
6584  tk_byte:
6585  {
6586  *np = node_new_str(tok->backp, *src);
6588 
6589  string_loop:
6590  while (1) {
6591  r = fetch_token(tok, src, end, env);
6592  if (r < 0) return r;
6593  if (r == TK_STRING) {
6594  r = onig_node_str_cat(*np, tok->backp, *src);
6595  }
6596 #ifndef NUMBERED_CHAR_IS_NOT_CASE_AMBIG
6597  else if (r == TK_CODE_POINT) {
6598  r = node_str_cat_codepoint(*np, env->enc, tok->u.code);
6599  }
6600 #endif
6601  else {
6602  break;
6603  }
6604  if (r < 0) return r;
6605  }
6606 
6607  string_end:
6608  targetp = np;
6609  goto repeat;
6610  }
6611  break;
6612 
6613  case TK_RAW_BYTE:
6614  tk_raw_byte:
6615  {
6616  *np = node_new_str_raw_char((UChar )tok->u.c);
6618  len = 1;
6619  while (1) {
6620  if (len >= ONIGENC_MBC_MINLEN(env->enc)) {
6621  if (len == enclen(env->enc, NSTR(*np)->s, NSTR(*np)->end)) {
6622  r = fetch_token(tok, src, end, env);
6623  NSTRING_CLEAR_RAW(*np);
6624  goto string_end;
6625  }
6626  }
6627 
6628  r = fetch_token(tok, src, end, env);
6629  if (r < 0) return r;
6630  if (r != TK_RAW_BYTE) {
6631  /* Don't use this, it is wrong for little endian encodings. */
6632 #ifdef USE_PAD_TO_SHORT_BYTE_CHAR
6633  int rem;
6634  if (len < ONIGENC_MBC_MINLEN(env->enc)) {
6635  rem = ONIGENC_MBC_MINLEN(env->enc) - len;
6636  (void )node_str_head_pad(NSTR(*np), rem, (UChar )0);
6637  if (len + rem == enclen(env->enc, NSTR(*np)->s)) {
6638  NSTRING_CLEAR_RAW(*np);
6639  goto string_end;
6640  }
6641  }
6642 #endif
6644  }
6645 
6646  r = node_str_cat_char(*np, (UChar )tok->u.c);
6647  if (r < 0) return r;
6648 
6649  len++;
6650  }
6651  }
6652  break;
6653 
6654  case TK_CODE_POINT:
6655  {
6656  *np = node_new_empty();
6658  r = node_str_cat_codepoint(*np, env->enc, tok->u.code);
6659  if (r != 0) return r;
6660 #ifdef NUMBERED_CHAR_IS_NOT_CASE_AMBIG
6661  NSTRING_SET_RAW(*np);
6662 #else
6663  goto string_loop;
6664 #endif
6665  }
6666  break;
6667 
6668  case TK_QUOTE_OPEN:
6669  {
6670  OnigCodePoint end_op[2];
6671  UChar *qstart, *qend, *nextp;
6672 
6673  end_op[0] = (OnigCodePoint )MC_ESC(env->syntax);
6674  end_op[1] = (OnigCodePoint )'E';
6675  qstart = *src;
6676  qend = find_str_position(end_op, 2, qstart, end, &nextp, env->enc);
6677  if (IS_NULL(qend)) {
6678  nextp = qend = end;
6679  }
6680  *np = node_new_str(qstart, qend);
6682  *src = nextp;
6683  }
6684  break;
6685 
6686  case TK_CHAR_TYPE:
6687  {
6688  switch (tok->u.prop.ctype) {
6689  case ONIGENC_CTYPE_WORD:
6690  *np = node_new_ctype(tok->u.prop.ctype, tok->u.prop.not,
6691  IS_ASCII_RANGE(env->option));
6693  break;
6694 
6695  case ONIGENC_CTYPE_SPACE:
6696  case ONIGENC_CTYPE_DIGIT:
6697  case ONIGENC_CTYPE_XDIGIT:
6698  {
6699  CClassNode* cc;
6700 
6701  *np = node_new_cclass();
6703  cc = NCCLASS(*np);
6704  r = add_ctype_to_cc(cc, tok->u.prop.ctype, 0,
6705  IS_ASCII_RANGE(env->option), env);
6706  if (r != 0) return r;
6707  if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc);
6708  }
6709  break;
6710 
6711  default:
6712  return ONIGERR_PARSER_BUG;
6713  break;
6714  }
6715  }
6716  break;
6717 
6718  case TK_CHAR_PROPERTY:
6719  r = parse_char_property(np, tok, src, end, env);
6720  if (r != 0) return r;
6721  break;
6722 
6723  case TK_CC_OPEN:
6724  {
6725  Node *asc_node;
6726  CClassNode* cc;
6727  OnigCodePoint code;
6728 
6729  r = parse_char_class(np, &asc_node, tok, src, end, env);
6730  if (r != 0) {
6731  onig_node_free(asc_node);
6732  return r;
6733  }
6734 
6735  cc = NCCLASS(*np);
6736  if (is_onechar_cclass(cc, &code)) {
6737  onig_node_free(*np);
6738  onig_node_free(asc_node);
6739  *np = node_new_empty();
6741  r = node_str_cat_codepoint(*np, env->enc, code);
6742  if (r != 0) return r;
6743  goto string_loop;
6744  }
6745  if (IS_IGNORECASE(env->option)) {
6746  r = cclass_case_fold(np, cc, NCCLASS(asc_node), env);
6747  if (r != 0) {
6748  onig_node_free(asc_node);
6749  return r;
6750  }
6751  }
6752  onig_node_free(asc_node);
6753  }
6754  break;
6755 
6756  case TK_ANYCHAR:
6757  *np = node_new_anychar();
6759  break;
6760 
6761  case TK_ANYCHAR_ANYTIME:
6762  *np = node_new_anychar();
6764  qn = node_new_quantifier(0, REPEAT_INFINITE, 0);
6766  NQTFR(qn)->target = *np;
6767  *np = qn;
6768  break;
6769 
6770  case TK_BACKREF:
6771  len = tok->u.backref.num;
6772  *np = node_new_backref(len,
6773  (len > 1 ? tok->u.backref.refs : &(tok->u.backref.ref1)),
6774  tok->u.backref.by_name,
6776  tok->u.backref.exist_level,
6777  tok->u.backref.level,
6778 #endif
6779  env);
6781  break;
6782 
6783 #ifdef USE_SUBEXP_CALL
6784  case TK_CALL:
6785  {
6786  int gnum = tok->u.call.gnum;
6787 
6788  if (gnum < 0 || tok->u.call.rel != 0) {
6789  if (gnum > 0) gnum--;
6790  gnum = BACKREF_REL_TO_ABS(gnum, env);
6791  if (gnum <= 0)
6792  return ONIGERR_INVALID_BACKREF;
6793  }
6794  *np = node_new_call(tok->u.call.name, tok->u.call.name_end, gnum);
6796  env->num_call++;
6797  }
6798  break;
6799 #endif
6800 
6801  case TK_ANCHOR:
6802  *np = onig_node_new_anchor(tok->u.anchor.subtype);
6804  NANCHOR(*np)->ascii_range = tok->u.anchor.ascii_range;
6805  break;
6806 
6807  case TK_OP_REPEAT:
6808  case TK_INTERVAL:
6812  else
6813  *np = node_new_empty();
6814  }
6815  else {
6816  goto tk_byte;
6817  }
6818  break;
6819 
6820  default:
6821  return ONIGERR_PARSER_BUG;
6822  break;
6823  }
6824 
6825  {
6826  targetp = np;
6827 
6828  re_entry:
6829  r = fetch_token(tok, src, end, env);
6830  if (r < 0) return r;
6831 
6832  repeat:
6833  if (r == TK_OP_REPEAT || r == TK_INTERVAL) {
6834  if (is_invalid_quantifier_target(*targetp))
6836 
6837  qn = node_new_quantifier(tok->u.repeat.lower, tok->u.repeat.upper,
6838  (r == TK_INTERVAL ? 1 : 0));
6840  NQTFR(qn)->greedy = tok->u.repeat.greedy;
6841  r = set_quantifier(qn, *targetp, group, env);
6842  if (r < 0) {
6843  onig_node_free(qn);
6844  return r;
6845  }
6846 
6847  if (tok->u.repeat.possessive != 0) {
6848  Node* en;
6849  en = node_new_enclose(ENCLOSE_STOP_BACKTRACK);
6850  if (IS_NULL(en)) {
6851  onig_node_free(qn);
6852  return ONIGERR_MEMORY;
6853  }
6854  NENCLOSE(en)->target = qn;
6855  qn = en;
6856  }
6857 
6858  if (r == 0) {
6859  *targetp = qn;
6860  }
6861  else if (r == 1) {
6862  onig_node_free(qn);
6863  }
6864  else if (r == 2) { /* split case: /abc+/ */
6865  Node *tmp;
6866 
6867  *targetp = node_new_list(*targetp, NULL);
6868  if (IS_NULL(*targetp)) {
6869  onig_node_free(qn);
6870  return ONIGERR_MEMORY;
6871  }
6872  tmp = NCDR(*targetp) = node_new_list(qn, NULL);
6873  if (IS_NULL(tmp)) {
6874  onig_node_free(qn);
6875  return ONIGERR_MEMORY;
6876  }
6877  targetp = &(NCAR(tmp));
6878  }
6879  goto re_entry;
6880  }
6881  }
6882 
6883  return r;
6884 }
6885 
6886 static int
6887 parse_branch(Node** top, OnigToken* tok, int term,
6888  UChar** src, UChar* end, ScanEnv* env)
6889 {
6890  int r;
6891  Node *node, **headp;
6892 
6893  *top = NULL;
6894  r = parse_exp(&node, tok, term, src, end, env);
6895  if (r < 0) {
6896  onig_node_free(node);
6897  return r;
6898  }
6899 
6900  if (r == TK_EOT || r == term || r == TK_ALT) {
6901  *top = node;
6902  }
6903  else {
6904  *top = node_new_list(node, NULL);
6905  headp = &(NCDR(*top));
6906  while (r != TK_EOT && r != term && r != TK_ALT) {
6907  r = parse_exp(&node, tok, term, src, end, env);
6908  if (r < 0) {
6909  onig_node_free(node);
6910  return r;
6911  }
6912 
6913  if (NTYPE(node) == NT_LIST) {
6914  *headp = node;
6915  while (IS_NOT_NULL(NCDR(node))) node = NCDR(node);
6916  headp = &(NCDR(node));
6917  }
6918  else {
6919  *headp = node_new_list(node, NULL);
6920  headp = &(NCDR(*headp));
6921  }
6922  }
6923  }
6924 
6925  return r;
6926 }
6927 
6928 /* term_tok: TK_EOT or TK_SUBEXP_CLOSE */
6929 static int
6930 parse_subexp(Node** top, OnigToken* tok, int term,
6931  UChar** src, UChar* end, ScanEnv* env)
6932 {
6933  int r;
6934  Node *node, **headp;
6935 
6936  *top = NULL;
6937  env->parse_depth++;
6938  if (env->parse_depth > ParseDepthLimit)
6940  r = parse_branch(&node, tok, term, src, end, env);
6941  if (r < 0) {
6942  onig_node_free(node);
6943  return r;
6944  }
6945 
6946  if (r == term) {
6947  *top = node;
6948  }
6949  else if (r == TK_ALT) {
6950  *top = onig_node_new_alt(node, NULL);
6951  headp = &(NCDR(*top));
6952  while (r == TK_ALT) {
6953  r = fetch_token(tok, src, end, env);
6954  if (r < 0) return r;
6955  r = parse_branch(&node, tok, term, src, end, env);
6956  if (r < 0) {
6957  onig_node_free(node);
6958  return r;
6959  }
6960 
6961  *headp = onig_node_new_alt(node, NULL);
6962  headp = &(NCDR(*headp));
6963  }
6964 
6965  if (tok->type != (enum TokenSyms )term)
6966  goto err;
6967  }
6968  else {
6969  onig_node_free(node);
6970  err:
6971  if (term == TK_SUBEXP_CLOSE)
6973  else
6974  return ONIGERR_PARSER_BUG;
6975  }
6976 
6977  env->parse_depth--;
6978  return r;
6979 }
6980 
6981 static int
6982 parse_regexp(Node** top, UChar** src, UChar* end, ScanEnv* env)
6983 {
6984  int r;
6985  OnigToken tok;
6986 
6987  r = fetch_token(&tok, src, end, env);
6988  if (r < 0) return r;
6989  r = parse_subexp(top, &tok, TK_EOT, src, end, env);
6990  if (r < 0) return r;
6991 
6992 #ifdef USE_SUBEXP_CALL
6993  if (env->num_call > 0) {
6994  /* Capture the pattern itself. It is used for (?R), (?0) and \g<0>. */
6995  const int num = 0;
6996  Node* np;
6997  np = node_new_enclose_memory(env->option, 0);
6999  NENCLOSE(np)->regnum = num;
7000  NENCLOSE(np)->target = *top;
7001  r = scan_env_set_mem_node(env, num, np);
7002  if (r != 0) {
7003  onig_node_free(np);
7004  return r;
7005  }
7006  *top = np;
7007  }
7008 #endif
7009  return 0;
7010 }
7011 
7012 extern int
7013 onig_parse_make_tree(Node** root, const UChar* pattern, const UChar* end,
7014  regex_t* reg, ScanEnv* env)
7015 {
7016  int r;
7017  UChar* p;
7018 
7019 #ifdef USE_NAMED_GROUP
7020  names_clear(reg);
7021 #endif
7022 
7023  scan_env_clear(env);
7024  env->option = reg->options;
7025  env->case_fold_flag = reg->case_fold_flag;
7026  env->enc = reg->enc;
7027  env->syntax = reg->syntax;
7028  env->pattern = (UChar* )pattern;
7029  env->pattern_end = (UChar* )end;
7030  env->reg = reg;
7031 
7032  *root = NULL;
7033  p = (UChar* )pattern;
7034  r = parse_regexp(root, &p, (UChar* )end, env);
7035  reg->num_mem = env->num_mem;
7036  return r;
7037 }
7038 
7039 extern void
7040 onig_scan_env_set_error_string(ScanEnv* env, int ecode ARG_UNUSED,
7041  UChar* arg, UChar* arg_end)
7042 {
7043  env->error = arg;
7044  env->error_end = arg_end;
7045 }
unsigned int OnigOptionType
Definition: onigmo.h:445
#define ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS
Definition: onigmo.h:588
UChar * pattern
Definition: regparse.h:299
Definition: regparse.c:453
#define NSTRING_SET_AMBIG(node)
Definition: regparse.h:111
void onig_scan_env_set_error_string(ScanEnv *env, int ecode ARG_UNUSED, UChar *arg, UChar *arg_end)
Definition: regparse.c:7040
#define ONIGENC_APPLY_ALL_CASE_FOLD(enc, case_fold_flag, f, arg)
Definition: onigmo.h:338
#define NCCLASS_SET_NOT(nd)
Definition: regint.h:794
#define ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY
Definition: onigmo.h:568
#define ONIG_SYN_OP2_QMARK_CAPITAL_P_NAMED_GROUP
Definition: onigmo.h:582
void onig_set_warn_func(OnigWarnFunc f)
Definition: regparse.c:101
Definition: st.h:99
int gnum
Definition: regparse.c:2317
#define IS_NULL(p)
Definition: regint.h:298
#define NODE_STR_BUF_SIZE
Definition: regparse.h:101
#define ONIGENC_CODE_TO_MBCLEN(enc, code)
Definition: onigmo.h:367
#define ONIG_SYNTAX_RUBY
Definition: onigmo.h:511
#define ENCLOSE_MEMORY
Definition: regparse.h:94
unsigned int OnigCodePoint
Definition: onigmo.h:80
#define ONIG_REGION_NOTPOS
Definition: onigmo.h:730
unsigned int alloc
Definition: regint.h:444
#define ADD_ALL_MULTI_BYTE_RANGE(enc, mbuf)
Definition: regparse.c:168
#define ONIGENC_MBC_MAXLEN(enc)
Definition: onigmo.h:362
void rb_warn(const char *fmt,...)
Definition: error.c:246
UChar * pattern_end
Definition: regparse.h:300
int onig_foreach_name(regex_t *reg, int(*func)(const UChar *, const UChar *, int, int *, regex_t *, void *), void *arg)
Definition: regparse.c:576
#define IS_REPEAT_INFINITE(n)
Definition: regint.h:409
UChar * end
Definition: regparse.h:173
#define SET_ALL_MULTI_BYTE_RANGE(enc, pbuf)
Definition: regparse.c:165
#define FALSE
Definition: nkf.h:174
int onig_node_str_cat(Node *node, const UChar *s, const UChar *end)
Definition: regparse.c:1376
Node * onig_node_list_add(Node *list, Node *x)
Definition: regparse.c:1192
#define IS_SYNTAX_BV(syn, bvm)
Definition: regparse.h:332
int onig_st_lookup_strend(hash_table_type *table, const UChar *str_key, const UChar *end_key, hash_data_type *value)
Definition: regparse.c:418
int * back_refs
Definition: regparse.c:459
size_t strlen(const char *)
#define INT_MAX_LIMIT
Definition: regint.h:373
Definition: st.h:79
#define ONIGERR_TOO_MANY_CAPTURE_GROUPS
Definition: onigmo.h:676
#define NST_NEST_LEVEL
Definition: regparse.h:141
#define ONIG_SYN_OP_ESC_W_WORD
Definition: onigmo.h:537
#define ONIGENC_IS_UNICODE(enc)
Definition: onigmo.h:327
void onig_null_warn(const char *s ARG_UNUSED)
Definition: regparse.c:87
#define ANCHOR_END_BUF
Definition: regint.h:530
Definition: st.h:99
#define PINC
Definition: regparse.c:301
#define BBUF_WRITE_CODE_POINT(bbuf, pos, code)
Definition: regparse.c:1644
unsigned int flags
Definition: regint.h:806
#define ONIGERR_META_CODE_SYNTAX
Definition: onigmo.h:649
#define ONIG_SYN_OP_ESC_D_DIGIT
Definition: onigmo.h:541
#define ONIGERR_INVALID_BACKREF
Definition: onigmo.h:674
#define ONIG_MAX_REPEAT_NUM
Definition: onigmo.h:440
#define ANCHOR_WORD_BEGIN
Definition: regint.h:536
CClassNode * asc_cc
Definition: regparse.c:5498
#define ONIG_SYN_OP_VARIABLE_META_CHARACTERS
Definition: onigmo.h:519
#define SINGLE_BYTE_SIZE
Definition: regint.h:413
int num_call
Definition: regparse.h:307
#define ONIG_SYN_OP_QMARK_ZERO_ONE
Definition: onigmo.h:525
#define BITSET_SET_BIT(bs, pos)
Definition: regint.h:436
#define NCCLASS_CLEAR_NOT(nd)
Definition: regint.h:795
#define ONIG_SYN_OP_ESC_X_BRACE_HEX8
Definition: onigmo.h:549
OnigCodePoint code
Definition: regparse.c:2293
int sourceline
Definition: regparse.h:325
#define NULL_UCHARP
Definition: regint.h:302
#define WARN_BUFSIZE
Definition: regparse.c:34
#define SCANENV_MEM_NODES(senv)
Definition: regparse.h:286
#define ONIGENC_CTYPE_ASCII
Definition: onigmo.h:308
#define ONIGERR_UNDEFINED_GROUP_OPTION
Definition: onigmo.h:660
regex_t * reg
Definition: regparse.c:554
#define ONIG_IS_OPTION_ON(options, option)
Definition: onigmo.h:476
#define ONIGERR_INVALID_CODE_POINT_VALUE
Definition: onigmo.h:689
int onig_names_free(regex_t *reg)
Definition: regparse.c:525
#define PINC_S
Definition: regparse.c:311
#define ANCHOR_BEGIN_LINE
Definition: regint.h:528
#define IS_SYNTAX_OP(syn, opm)
Definition: regparse.h:330
#define MC_ANYTIME(syn)
Definition: regint.h:750
#define NEWLINE_CODE
Definition: st.h:99
UChar * error
Definition: regparse.h:301
int onig_node_str_set(Node *node, const UChar *s, const UChar *end)
Definition: regparse.c:1412
int * refs
Definition: regparse.c:2307
#define ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME
Definition: onigmo.h:596
#define IS_NCCLASS_NOT(nd)
Definition: regint.h:796
#define NENCLOSE(node)
Definition: regparse.h:81
#define ONIG_ENCODING_ASCII
Definition: onigmo.h:225
#define ONIGERR_TOO_BIG_NUMBER
Definition: onigmo.h:666
if(len<=MAX_WORD_LENGTH &&len >=MIN_WORD_LENGTH)
Definition: zonetab.h:883
UChar * s
Definition: regparse.h:172
#define ONIG_SYN_OP2_ESC_G_SUBEXP_CALL
Definition: onigmo.h:561
unsigned char Bits
Definition: regint.h:420
#define ANCHOR_END_LINE
Definition: regint.h:532
hash_table_type * onig_st_init_strend_table_with_size(st_index_t size)
Definition: regparse.c:406
#define ONIGERR_INVALID_GROUP_NAME
Definition: onigmo.h:680
#define ANCHOR_PREC_READ
Definition: regint.h:538
#define ONIGENC_CTYPE_ALPHA
Definition: onigmo.h:295
#define ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL
Definition: onigmo.h:563
#define NT_QTFR
Definition: regparse.h:43
CCVALTYPE
Definition: regparse.c:4404
#define tok()
Definition: ripper.c:11734
#define IS_SINGLELINE(option)
Definition: regint.h:381
#define GET_CODE_POINT(code, p)
Definition: regint.h:697
#define ONIG_SYN_OP_ASTERISK_ZERO_INF
Definition: onigmo.h:521
union OnigToken::@103 u
#define MC_ANYCHAR_ANYTIME(syn)
Definition: regint.h:753
Node * onig_node_new_alt(Node *left, Node *right)
Definition: regparse.c:1210
#define ONIG_SYN_OP_POSIX_BRACKET
Definition: onigmo.h:543
int onig_st_insert_strend(hash_table_type *table, const UChar *str_key, const UChar *end_key, hash_data_type value)
Definition: regparse.c:430
#define BIT_STATUS_ON_AT_SIMPLE(stats, n)
Definition: regint.h:367
#define SCANENV_MEMNODES_SIZE
Definition: regparse.h:285
int back_alloc
Definition: regparse.c:457
int ascii_range
Definition: regparse.c:2296
TokenSyms
Definition: regparse.c:2254
#define ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS
Definition: onigmo.h:589
#define ONIG_OPTION_IGNORECASE
Definition: onigmo.h:451
#define ONIG_SYN_OP2_ESC_V_VTAB
Definition: onigmo.h:565
#define ONIGENC_CTYPE_ALNUM
Definition: onigmo.h:307
#define ONIG_OPTION_POSIX_BRACKET_ALL_RANGE
Definition: onigmo.h:468
#define BBUF_MOVE_RIGHT(buf, from, to, n)
Definition: regint.h:497
st_data_t st_index_t
Definition: st.h:50
#define ONIGERR_MULTIPLEX_DEFINED_NAME
Definition: onigmo.h:684
const UChar name[6]
Definition: regenc.h:120
#define NST_RECURSION
Definition: regparse.h:135
#define PFETCH(c)
Definition: regparse.c:305
#define ANCHOR_BEGIN_POSITION
Definition: regint.h:529
OnigEncoding enc
Definition: regparse.c:557
#define ONIGENC_PROPERTY_NAME_TO_CTYPE(enc, p, end)
Definition: onigmo.h:369
regex_t * reg
Definition: regparse.h:303
#define ONIGERR_END_PATTERN_AT_ESCAPE
Definition: onigmo.h:646
#define ONIGERR_CONTROL_CODE_SYNTAX
Definition: onigmo.h:650
#define ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND
Definition: onigmo.h:594
int greedy
Definition: regparse.c:2301
#define ONIG_SYN_OP_PLUS_ONE_INF
Definition: onigmo.h:523
ONIG_EXTERN int onigenc_strlen(OnigEncoding enc, const OnigUChar *p, const OnigUChar *end)
ReduceType
Definition: regparse.c:2183
st_table NameTable
Definition: regparse.c:464
#define ANCHOR_NOT_WORD_BOUND
Definition: regint.h:535
int num_named
Definition: regparse.h:310
#define MC_ONE_OR_MORE_TIME(syn)
Definition: regint.h:752
#define SYN_GNU_REGEX_OP
Definition: regint.h:767
#define ONIG_SYN_WARN_CC_DUP
Definition: onigmo.h:609
#define PFETCH_READY
Definition: regparse.c:297
#define ONIGENC_CTYPE_PRINT
Definition: onigmo.h:301
#define ONIG_LAST_CODE_POINT
Definition: regint.h:304
#define ONIG_SYN_OP_ESC_PLUS_ONE_INF
Definition: onigmo.h:524
#define NBREF(node)
Definition: regparse.h:79
#define NT_ALT
Definition: regparse.h:47
const char term
Definition: id.c:37
#define BIT_STATUS_CLEAR(stats)
Definition: regint.h:355
#define ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF
Definition: onigmo.h:522
#define ONIG_SYN_OP2_QMARK_TILDE_ABSENT
Definition: onigmo.h:583
#define ONIGENC_CODE_TO_MBC_MAXLEN
Definition: onigmo.h:289
#define ONIG_SYN_OP_ESC_C_CONTROL
Definition: onigmo.h:546
RUBY_SYMBOL_EXPORT_BEGIN typedef unsigned long st_data_t
Definition: st.h:22
#define ONIGENC_IS_CODE_NEWLINE(enc, code)
Definition: onigmo.h:374
#define NCAR(node)
Definition: regparse.h:86
#define ONIGENC_IS_CODE_CTYPE(enc, code, ctype)
Definition: onigmo.h:372
#define ONIG_INEFFECTIVE_META_CHAR
Definition: onigmo.h:619
#define ONIG_SYN_OP_BRACKET_CC
Definition: onigmo.h:536
Definition: regint.h:441
#define ONIG_SYN_OP2_QMARK_LPAREN_CONDITION
Definition: onigmo.h:581
#define IS_IGNORECASE(option)
Definition: regint.h:383
BitSet bs
Definition: regint.h:807
struct OnigToken::@103::@106 backref
#define ONIG_OPTION_CAPTURE_GROUP
Definition: onigmo.h:460
#define ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP
Definition: onigmo.h:595
#define ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY
Definition: onigmo.h:597
#define neg(x)
Definition: time.c:131
int onig_set_parse_depth_limit(unsigned int depth)
Definition: regparse.c:123
Node * mem_nodes_static[SCANENV_MEMNODES_SIZE]
Definition: regparse.h:313
#define POSIX_BRACKET_ENTRY_INIT(name, ctype)
Definition: regenc.h:124
#define ONIG_SYN_OP_VBAR_ALT
Definition: onigmo.h:529
const char * sourcefile
Definition: regparse.h:324
Bits BitSet[BITSET_SIZE]
Definition: regint.h:422
#define PUNFETCH
Definition: regparse.c:300
#define ONIG_SYN_OP2_ESC_H_XDIGIT
Definition: onigmo.h:571
#define ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE
Definition: onigmo.h:670
int onig_number_of_names(const regex_t *reg)
Definition: regparse.c:623
#define NST_NAME_REF
Definition: regparse.h:139
OnigCaseFoldType case_fold_flag
Definition: onigmo.h:775
#define ONIGERR_END_PATTERN_AT_META
Definition: onigmo.h:647
void * arg
Definition: regparse.c:555
#define NULL_NODE
Definition: regparse.h:283
#define ONIG_MAX_CAPTURE_GROUP_NUM
Definition: onigmo.h:438
#define PEND
Definition: regparse.c:299
#define INIT_MULTI_BYTE_RANGE_SIZE
#define ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS
Definition: onigmo.h:669
int onig_strncmp(const UChar *s1, const UChar *s2, int n)
#define SIZE_CODE_POINT
Definition: regint.h:683
#define val
#define BBUF_INIT(buf, size)
Definition: regint.h:447
#define ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT
Definition: onigmo.h:608
#define SET_ENCLOSE_STATUS(node, f)
Definition: regparse.h:144
const OnigSyntaxType * syntax
Definition: regparse.h:294
#define ONIGERR_INVALID_CHAR_IN_GROUP_NAME
Definition: onigmo.h:681
#define BACKREF_REL_TO_ABS(rel_no, env)
Definition: regparse.c:157
#define ONIGENC_IS_CODE_WORD(enc, code)
Definition: onigmo.h:400
return
Definition: zonetab.h:899
#define ARG_UNUSED
Definition: nkf.h:181
#define ONIGERR_END_PATTERN_AT_CONTROL
Definition: onigmo.h:648
#define ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT
Definition: onigmo.h:556
UChar * name_end
Definition: regparse.c:2316
#define POSIX_BRACKET_CHECK_LIMIT_LENGTH
#define ONIG_SYN_OP2_CCLASS_SET_OP
Definition: onigmo.h:558
#define INIT_NAME_BACKREFS_ALLOC_NUM
Definition: regparse.c:451
#define ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE
Definition: onigmo.h:651
int ref1
Definition: regparse.c:2306
void(* OnigWarnFunc)(const char *s)
Definition: onigmo.h:745
#define MC_ESC(syn)
Definition: regint.h:748
#define ONIGERR_END_PATTERN_IN_GROUP
Definition: onigmo.h:659
#define BITSET_AT(bs, pos)
Definition: regint.h:435
struct OnigToken::@103::@108 prop
#define BITS_IN_ROOM
Definition: regint.h:414
#define ONIGERR_UNDEFINED_NAME_REFERENCE
Definition: onigmo.h:682
#define ONIG_SYN_OP_ESC_QMARK_ZERO_ONE
Definition: onigmo.h:526
int lower
Definition: regparse.h:183
#define NCCLASS(node)
Definition: regparse.h:77
#define ONIG_OPTION_WORD_BOUND_ALL_RANGE
Definition: onigmo.h:469
#define ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT
Definition: onigmo.h:569
#define xmemcpy
Definition: regint.h:202
#define IS_EXTEND(option)
Definition: regint.h:384
void rb_compile_warn(const char *file, int line, const char *fmt,...)
Definition: error.c:200
#define ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS
Definition: onigmo.h:658
#define CHECK_NULL_RETURN_MEMERR(p)
Definition: regint.h:301
Node * onig_node_new_enclose(int type)
Definition: regparse.c:1347
#define NCTYPE(node)
Definition: regparse.h:78
#define ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE
Definition: onigmo.h:668
void onig_set_verb_warn_func(OnigWarnFunc f)
Definition: regparse.c:106
#define ONIG_SYN_OP_DECIMAL_BACKREF
Definition: onigmo.h:535
UChar * name
Definition: regparse.c:454
int mem_alloc
Definition: regparse.h:312
#define ONIGENC_CTYPE_SPACE
Definition: onigmo.h:303
#define ONIGENC_MBC_MINLEN(enc)
Definition: onigmo.h:364
#define ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED
Definition: onigmo.h:654
#define ONIG_SYN_USE_LEFT_MOST_NAMED_GROUP
Definition: onigmo.h:599
#define ONIG_OPTION_DONT_CAPTURE_GROUP
Definition: onigmo.h:459
#define PPEEK_IS(c)
Definition: regparse.c:320
int ret
Definition: regparse.c:556
#define is_invalid_quantifier_target(node)
Definition: regparse.c:2122
#define ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV
Definition: onigmo.h:592
#define NT_LIST
Definition: regparse.h:46
#define ONIG_SYN_OP_BRACE_INTERVAL
Definition: onigmo.h:527
st_data_t HashDataType
Definition: regparse.c:465
UChar * s
Definition: regparse.c:2291
#define ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR
Definition: onigmo.h:533
int err
Definition: win32.c:135
#define ONIG_SYN_WARN_CC_OP_NOT_ESCAPED
Definition: onigmo.h:607
#define BITSET_IS_EMPTY(bs, empty)
Definition: regparse.c:181
#define NT_ANCHOR
Definition: regparse.h:45
#define ANCHOR_WORD_END
Definition: regint.h:537
unsigned int onig_get_parse_depth_limit(void)
Definition: regparse.c:117
#define NSTRING_CLEAR_RAW(node)
Definition: regparse.h:110
#define XDIGITVAL(enc, code)
Definition: regint.h:377
#define ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META
Definition: onigmo.h:564
#define UChar
Definition: onigmo.h:76
int upper
Definition: regparse.h:184
#define ONIG_OPTION_EXTEND
Definition: onigmo.h:452
#define numberof(array)
Definition: etc.c:618
const OnigSyntaxType OnigSyntaxRuby
Definition: regparse.c:39
#define ONIG_OPTION_SINGLELINE
Definition: onigmo.h:455
#define ONIG_SYN_STRICT_CHECK_BACKREF
Definition: onigmo.h:593
#define ONIGENC_CTYPE_WORD
Definition: onigmo.h:306
#define USE_BACKREF_WITH_LEVEL
Definition: regint.h:73
#define NT_STR
Definition: regparse.h:38
#define NQTFR(node)
Definition: regparse.h:80
#define ONIGENC_CTYPE_XDIGIT
Definition: onigmo.h:305
#define ONIG_NORMAL
Definition: onigmo.h:624
#define BITSET_SET_BIT_CHKDUP(bs, pos)
Definition: regparse.c:176
#define BBUF_MOVE_LEFT_REDUCE(buf, from, to)
Definition: regint.h:509
Bits * BitSetRef
Definition: regint.h:423
#define IS_WORD_BOUND_ALL_RANGE(option)
Definition: regint.h:395
#define DEFAULT_PARSE_DEPTH_LIMIT
Definition: regint.h:88
#define ONIG_SYN_OP2_ESC_CAPITAL_R_LINEBREAK
Definition: onigmo.h:573
#define ONIG_SYN_OP2_ESC_CAPITAL_X_EXTENDED_GRAPHEME_CLUSTER
Definition: onigmo.h:574
void * name_table
Definition: onigmo.h:774
unsigned int flag
Definition: regparse.h:174
#define ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS
Definition: onigmo.h:653
#define NT_CANY
Definition: regparse.h:41
#define NST_NAMED_GROUP
Definition: regparse.h:138
int upper
Definition: regparse.c:2300
UChar * name
Definition: regparse.c:2315
ONIG_EXTERN OnigUChar * onigenc_get_prev_char_head(OnigEncoding enc, const OnigUChar *start, const OnigUChar *s, const OnigUChar *end)
int onig_name_to_group_numbers(regex_t *reg, const UChar *name, const UChar *name_end, int **nums)
Definition: regparse.c:887
int(* property_name_to_ctype)(const struct OnigEncodingTypeST *enc, const OnigUChar *p, const OnigUChar *end)
Definition: onigmo.h:172
#define ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE
Definition: onigmo.h:552
#define ONIGENC_MBC_TO_CODE(enc, p, end)
Definition: onigmo.h:366
BBuf * mbuf
Definition: regint.h:808
CClassNode * cc
Definition: regparse.c:5497
int ctype
Definition: regenc.h:121
#define ODIGITVAL(code)
Definition: regint.h:376
#define ONIG_SYN_OP_ESC_CONTROL_CHARS
Definition: onigmo.h:545
#define SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2)
Definition: regparse.c:1815
void onig_vsnprintf_with_pattern(UChar buf[], int bufsize, OnigEncoding enc, UChar *pat, UChar *pat_end, const UChar *fmt, va_list args)
Definition: regerror.c:314
#define ONIG_SYN_OP_QMARK_NON_GREEDY
Definition: onigmo.h:544
#define ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC
Definition: onigmo.h:602
#define ONIGERR_TOO_SHORT_MULTI_BYTE_STRING
Definition: onigmo.h:672
unsigned char buf[MIME_BUF_SIZE]
Definition: nkf.c:4309
#define SYN_GNU_REGEX_BV
Definition: regint.h:780
enum TokenSyms type
Definition: regparse.c:2286
int onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode *cc)
Definition: regcomp.c:6114
#define ONIGENC_CODE_RANGE_NUM(range)
Definition: onigmo.h:139
#define ONIGENC_IS_SINGLEBYTE(enc)
Definition: onigmo.h:318
#define NTYPE(node)
Definition: regparse.h:69
#define ANCHOR_LOOK_BEHIND_NOT
Definition: regint.h:541
BitStatusType backrefed_mem
Definition: regparse.h:298
#define NCALL(node)
Definition: regparse.h:84
#define NQ_TARGET_ISNOT_EMPTY
Definition: regparse.h:122
int escaped
Definition: regparse.c:2287
int num_mem
Definition: regparse.h:308
#define ONIG_SYN_OP2_ESC_G_BRACE_BACKREF
Definition: onigmo.h:578
st_data_t hash_data_type
Definition: regint.h:925
Node * onig_node_new_list(Node *left, Node *right)
Definition: regparse.c:1186
#define ONIGENC_CTYPE_LOWER
Definition: onigmo.h:300
#define ONIG_SYN_OP2_ESC_K_NAMED_BACKREF
Definition: onigmo.h:560
#define ONIGENC_IS_ASCII_CODE(code)
Definition: regenc.h:216
#define ONIGERR_INVALID_CHAR_PROPERTY_NAME
Definition: onigmo.h:688
#define bad(x)
Definition: _sdbm.c:124
const UChar * s
Definition: regparse.c:362
#define NT_CCLASS
Definition: regparse.h:39
#define NODE_BACKREFS_SIZE
Definition: regparse.h:102
#define ANCHOR_SEMI_END_BUF
Definition: regint.h:531
#define ONIGERR_EMPTY_GROUP_NAME
Definition: onigmo.h:679
#define ONIG_SYN_OP2_OPTION_PERL
Definition: onigmo.h:554
Node * onig_node_new_str(const UChar *s, const UChar *end)
Definition: regparse.c:1481
#define ONIG_SYN_OP_ESC_O_BRACE_OCTAL
Definition: onigmo.h:550
#define ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, sbout, ranges)
Definition: onigmo.h:403
OnigPosition * beg
Definition: onigmo.h:717
#define ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END
Definition: onigmo.h:538
int level
Definition: regparse.c:2311
#define ONIG_SYN_OP_ESC_BRACE_INTERVAL
Definition: onigmo.h:528
UChar * error_end
Definition: regparse.h:302
#define ONIG_SYN_BACKSLASH_ESCAPE_IN_CC
Definition: onigmo.h:603
void onig_node_str_clear(Node *node)
Definition: regparse.c:1449
Node ** mem_nodes_dynamic
Definition: regparse.h:314
BitStatusType bt_mem_end
Definition: regparse.h:297
void onig_node_conv_to_str_node(Node *node, int raw)
#define ENCLOSE_STOP_BACKTRACK
Definition: regparse.h:96
#define ONIG_SYN_OP_ESC_OCTAL3
Definition: onigmo.h:547
#define MIN(a, b)
Definition: regint.h:295
UChar * onigenc_step(OnigEncoding enc, const UChar *p, const UChar *end, int n)
Definition: regenc.c:113
OnigEncoding enc
Definition: regparse.h:293
int subtype
Definition: regparse.c:2295
void onig_node_free(Node *node)
Definition: regparse.c:1062
register unsigned int len
Definition: zonetab.h:51
#define NANCHOR(node)
Definition: regparse.h:82
#define NT_BREF
Definition: regparse.h:42
#define ONIGENC_CODE_RANGE_FROM(range, i)
Definition: onigmo.h:140
#define ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR
Definition: onigmo.h:567
#define ONIGERR_END_PATTERN_AT_LEFT_BRACE
Definition: onigmo.h:642
#define MC_ZERO_OR_ONE_TIME(syn)
Definition: regint.h:751
unsigned int top
Definition: nkf.c:4310
#define ONIG_OPTION_MULTILINE
Definition: onigmo.h:453
#define ONIGENC_CTYPE_PUNCT
Definition: onigmo.h:302
#define CHECK_NULL_RETURN(p)
Definition: regint.h:300
struct OnigToken::@103::@105 repeat
#define ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID
Definition: onigmo.h:655
#define ONIG_SYN_OP2_QMARK_SUBEXP_CALL
Definition: onigmo.h:579
#define ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY
Definition: onigmo.h:687
#define MBCODE_START_POS(enc)
Definition: regparse.c:162
int size
Definition: encoding.c:57
#define CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
Definition: regparse.c:36
size_t name_len
Definition: regparse.c:455
#define f
#define BBUF_ENSURE_SIZE(buf, size)
Definition: regint.h:465
int base
Definition: regparse.c:2288
#define ONIG_SYN_OP_ESC_B_WORD_BOUND
Definition: onigmo.h:539
#define ONIGERR_PREMATURE_END_OF_CHAR_CLASS
Definition: onigmo.h:645
#define ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP
Definition: onigmo.h:577
#define ONIG_SYN_OP_ESC_S_WHITE_SPACE
Definition: onigmo.h:540
#define PPEEK
Definition: regparse.c:319
unsigned int used
Definition: regint.h:443
#define ONIGERR_PARSE_DEPTH_LIMIT_OVER
Definition: onigmo.h:636
#define ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC
Definition: onigmo.h:604
#define PFETCH_S(c)
Definition: regparse.c:314
#define ONIGERR_TOO_LONG_WIDE_CHAR_VALUE
Definition: onigmo.h:678
OnigCaseFoldType case_fold_flag
Definition: regparse.h:292
#define xmalloc
Definition: defines.h:183
int by_name
Definition: regparse.c:2308
#define ONIGENC_CTYPE_BLANK
Definition: onigmo.h:296
#define ONIGENC_IS_CODE_XDIGIT(enc, code)
Definition: onigmo.h:398
#define ANCHOR_PREC_READ_NOT
Definition: regint.h:539
#define ENCLOSE_CONDITION
Definition: regparse.h:97
void onig_reduce_nested_quantifier(Node *pnode, Node *cnode)
Definition: regparse.c:2204
#define ENCLOSE_OPTION
Definition: regparse.h:95
#define ONIG_OPTION_ASCII_RANGE
Definition: onigmo.h:467
#define ONIGENC_CTYPE_GRAPH
Definition: onigmo.h:299
#define BITSET_CLEAR_BIT(bs, pos)
Definition: regint.h:437
#define ONIG_MAX_MULTI_BYTE_RANGES_NUM
Definition: onigmo.h:441
#define ONIGERR_TOO_BIG_WIDE_CHAR_VALUE
Definition: onigmo.h:691
#define ONIGENC_CODE_TO_MBC(enc, code, buf)
Definition: onigmo.h:368
#define ONIG_SYN_OP_DOT_ANYCHAR
Definition: onigmo.h:520
const OnigSyntaxType * syntax
Definition: onigmo.h:773
UChar * p
Definition: regint.h:442
#define ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL
Definition: onigmo.h:557
int onig_name_to_backref_number(regex_t *reg, const UChar *name, const UChar *name_end, const OnigRegion *region)
Definition: regparse.c:909
#define RTEST(v)
Definition: ruby.h:450
void hash_table_type
Definition: regint.h:919
UChar * backp
Definition: regparse.c:2289
#define ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP
Definition: onigmo.h:559
#define NSTR(node)
Definition: regparse.h:76
struct rb_encoding_entry * list
Definition: encoding.c:55
unsigned int parse_depth
Definition: regparse.h:321
#define IS_POSIX_BRACKET_ALL_RANGE(option)
Definition: regint.h:394
#define ENCLOSE_ABSENT
Definition: regparse.h:98
#define REPEAT_INFINITE
Definition: regint.h:408
int lower
Definition: regparse.c:2299
#define IS_SYNTAX_OP2(syn, opm)
Definition: regparse.h:331
int back_num
Definition: regparse.c:456
#define ONIGERR_INVALID_REPEAT_RANGE_PATTERN
Definition: onigmo.h:663
int ctype
Definition: regparse.c:2321
OnigOptionType option
Definition: regparse.h:291
#define ONIGERR_INVALID_POSIX_BRACKET_TYPE
Definition: onigmo.h:661
#define NODE_STR_MARGIN
Definition: regparse.h:100
CCSTATE
Definition: regparse.c:4397
#define ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC
Definition: onigmo.h:605
Definition: regenc.h:118
#define ONOFF(v, f, negative)
Definition: regparse.c:160
struct _Node * target
Definition: regparse.h:182
#define ONIG_SYN_OP2_ESC_U_HEX4
Definition: onigmo.h:566
struct OnigToken::@103::@107 call
#define ONIGENC_CODE_RANGE_TO(range, i)
Definition: onigmo.h:141
int greedy
Definition: regparse.h:185
#define INIT_SCANENV_MEMNODES_ALLOC_SIZE
Definition: regparse.c:980
#define NST_BY_NUMBER
Definition: regparse.h:142
#define ONIGENC_CTYPE_CNTRL
Definition: onigmo.h:297
struct OnigToken::@103::@104 anchor
int onigenc_with_ascii_strncmp(OnigEncoding enc, const UChar *p, const UChar *end, const UChar *sascii, int n)
Definition: regenc.c:860
#define ONIG_SYN_OP2_OPTION_RUBY
Definition: onigmo.h:555
int onig_parse_make_tree(Node **root, const UChar *pattern, const UChar *end, regex_t *reg, ScanEnv *env)
Definition: regparse.c:7013
const char * name
Definition: nkf.c:208
#define NT_CALL
Definition: regparse.h:48
#define xrealloc
Definition: defines.h:186
#define DIGITVAL(code)
Definition: regint.h:375
#define ONIG_SYN_OP_LPAREN_SUBEXP
Definition: onigmo.h:531
int onig_scan_unsigned_number(UChar **src, const UChar *end, OnigEncoding enc)
Definition: regparse.c:1556
#define NSTRING_SET_RAW(node)
Definition: regparse.h:109
#define ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP
Definition: onigmo.h:590
#define NT_CTYPE
Definition: regparse.h:40
int onig_noname_group_capture_is_active(const regex_t *reg)
Definition: regparse.c:963
#define ONIG_SYN_OP_ESC_LPAREN_SUBEXP
Definition: onigmo.h:532
ScanEnv * env
Definition: regparse.c:5496
#define ANCHOR_BEGIN_BUF
Definition: regint.h:527
int back_ref1
Definition: regparse.c:458
#define BIT_STATUS_BITS_NUM
Definition: regint.h:354
OnigEncoding enc
Definition: onigmo.h:772
#define IS_NOT_NULL(p)
Definition: regint.h:299
const UChar * end
Definition: regparse.c:363
short int len
Definition: regenc.h:119
#define ANCHOR_LOOK_BEHIND
Definition: regint.h:540
int(* func)(const UChar *, const UChar *, int, int *, regex_t *, void *)
Definition: regparse.c:553
int possessive
Definition: regparse.c:2302
#define ONIGERR_MEMORY
Definition: onigmo.h:629
#define ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY
Definition: onigmo.h:562
void onig_strcpy(UChar *dest, const UChar *src, const UChar *end)
Definition: regparse.c:259
void void xfree(void *)
#define POSIX_BRACKET_NAME_MIN_LEN
BitStatusType bt_mem_start
Definition: regparse.h:296
#define ONIGENC_IS_CODE_DIGIT(enc, code)
Definition: onigmo.h:396
#define IS_MC_ESC_CODE(code, syn)
Definition: regint.h:755
#define ANCHOR_WORD_BOUND
Definition: regint.h:534
#define ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR
Definition: onigmo.h:534
#define ANCHOR_KEEP
Definition: regint.h:546
OnigOptionType options
Definition: onigmo.h:768
#define env
#define ONIGENC_CTYPE_DIGIT
Definition: onigmo.h:298
#define MAX(a, b)
Definition: regint.h:296
#define ONIG_SYN_OP2_QMARK_GROUP_EFFECT
Definition: onigmo.h:553
int onig_renumber_name_table(regex_t *reg, GroupNumRemap *map)
Definition: regparse.c:611
#define NULL
Definition: _sdbm.c:102
#define ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE
Definition: onigmo.h:667
#define ONIGERR_UNMATCHED_CLOSE_PARENTHESIS
Definition: onigmo.h:657
#define NCDR(node)
Definition: regparse.h:87
st_index_t num_entries
Definition: st.h:86
#define MC_ANYCHAR(syn)
Definition: regint.h:749
#define ONIG_MAX_BACKREF_NUM
Definition: onigmo.h:439
#define ONIGERR_EMPTY_CHAR_CLASS
Definition: onigmo.h:644
int warnings_flag
Definition: regparse.h:322
#define ONIG_SYN_OP_ESC_VBAR_ALT
Definition: onigmo.h:530
#define ruby_verbose
Definition: ruby.h:1813
const OnigSyntaxType * OnigDefaultSyntax
Definition: regparse.c:85
#define ONIG_SYN_OP_LINE_ANCHOR
Definition: onigmo.h:542
#define ONIGERR_TOO_SHORT_DIGITS
Definition: onigmo.h:677
#define IS_ASCII_RANGE(option)
Definition: regint.h:393
#define ONIG_NO_SUPPORT_CONFIG
Definition: onigmo.h:626
#define SET_NTYPE(node, ntype)
Definition: regparse.h:70
#define ONIGERR_PARSER_BUG
Definition: onigmo.h:631
#define NT_ENCLOSE
Definition: regparse.h:44
#define ONIGERR_INVALID_CONDITION_PATTERN
Definition: onigmo.h:664
int exist_level
Definition: regparse.c:2310
#define ONIG_SYN_ALLOW_INVALID_INTERVAL
Definition: onigmo.h:591
#define ONIG_SYN_OP2_QMARK_VBAR_BRANCH_RESET
Definition: onigmo.h:580
#define NSTR_RAW
Definition: regparse.h:104
#define ONIGERR_TOO_MANY_MULTI_BYTE_RANGES
Definition: onigmo.h:671
#define ONIGENC_CTYPE_UPPER
Definition: onigmo.h:304
#define ONIGENC_IS_CODE_NAME(enc, c)
Definition: regparse.c:2510
#define enclen(enc, p, e)
Definition: regenc.h:93
#define BITSET_CLEAR(bs)
Definition: regint.h:427
BitStatusType capture_history
Definition: regparse.h:295
#define BITSET_SIZE
Definition: regint.h:415
#define ONIG_SYN_OP_ESC_X_HEX2
Definition: onigmo.h:548
Node * onig_node_new_anchor(int type)
Definition: regparse.c:1222