Ruby  2.5.0dev(2017-10-22revision60238)
regsyntax.c
Go to the documentation of this file.
1 /**********************************************************************
2  regsyntax.c - Onigmo (Oniguruma-mod) (regular expression library)
3 **********************************************************************/
4 /*-
5  * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
6  * Copyright (c) 2011-2016 K.Takata <kentkt AT csc DOT jp>
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  * notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  * notice, this list of conditions and the following disclaimer in the
16  * documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  */
30 
31 #include "regint.h"
32 
34  0
36  , 0
38  ,
39  {
40  (OnigCodePoint )'\\' /* esc */
41  , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
42  , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
43  , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
44  , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
45  , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
46  }
47 };
48 
52  , 0
53  , 0
55  ,
56  {
57  (OnigCodePoint )'\\' /* esc */
58  , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
59  , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
60  , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
61  , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
62  , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
63  }
64 };
65 
70  , 0
76  ,
77  {
78  (OnigCodePoint )'\\' /* esc */
79  , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
80  , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
81  , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
82  , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
83  , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
84  }
85 };
86 
97  ,
98  {
99  (OnigCodePoint )'\\' /* esc */
100  , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
101  , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
102  , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
103  , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
104  , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
105  }
106 };
107 
116  , 0
119  ,
120  {
121  (OnigCodePoint )'\\' /* esc */
122  , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
123  , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
124  , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
125  , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
126  , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
127  }
128 };
129 
132  , 0
135  ,
136  {
137  (OnigCodePoint )'\\' /* esc */
138  , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
139  , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
140  , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
141  , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
142  , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
143  }
144 };
145 
159  ,
160  {
161  (OnigCodePoint )'\\' /* esc */
162  , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
163  , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
164  , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
165  , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
166  , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
167  }
168 };
169 
170 /* Perl 5.8 */
185  ,
186  {
187  (OnigCodePoint )'\\' /* esc */
188  , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
189  , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
190  , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
191  , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
192  , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
193  }
194 };
195 
196 /* Perl 5.8 + named group */
212  , ( SYN_GNU_REGEX_BV |
216  ,
217  {
218  (OnigCodePoint )'\\' /* esc */
219  , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
220  , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
221  , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
222  , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
223  , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
224  }
225 };
226 
227 /* Perl 5.10+ */
249  , ( SYN_GNU_REGEX_BV |
254  ,
255  {
256  (OnigCodePoint )'\\' /* esc */
257  , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
258  , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
259  , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
260  , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
261  , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
262  }
263 };
264 
279  , ( SYN_GNU_REGEX_BV |
282  ,
283  {
284  (OnigCodePoint )'\\' /* esc */
285  , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
286  , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
287  , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
288  , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
289  , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
290  }
291 };
292 
293 
294 
295 extern int
297 {
298  if (IS_NULL(syntax))
299  syntax = ONIG_SYNTAX_RUBY;
300 
301  OnigDefaultSyntax = syntax;
302  return 0;
303 }
304 
305 extern void
307 {
308  *to = *from;
309 }
310 
311 extern void
312 onig_set_syntax_op(OnigSyntaxType* syntax, unsigned int op)
313 {
314  syntax->op = op;
315 }
316 
317 extern void
318 onig_set_syntax_op2(OnigSyntaxType* syntax, unsigned int op2)
319 {
320  syntax->op2 = op2;
321 }
322 
323 extern void
324 onig_set_syntax_behavior(OnigSyntaxType* syntax, unsigned int behavior)
325 {
326  syntax->behavior = behavior;
327 }
328 
329 extern void
331 {
332  syntax->options = options;
333 }
334 
335 extern unsigned int
337 {
338  return syntax->op;
339 }
340 
341 extern unsigned int
343 {
344  return syntax->op2;
345 }
346 
347 extern unsigned int
349 {
350  return syntax->behavior;
351 }
352 
353 extern OnigOptionType
355 {
356  return syntax->options;
357 }
358 
359 #ifdef USE_VARIABLE_META_CHARS
361  unsigned int what, OnigCodePoint code)
362 {
363  switch (what) {
365  enc->meta_char_table.esc = code;
366  break;
368  enc->meta_char_table.anychar = code;
369  break;
371  enc->meta_char_table.anytime = code;
372  break;
374  enc->meta_char_table.zero_or_one_time = code;
375  break;
377  enc->meta_char_table.one_or_more_time = code;
378  break;
380  enc->meta_char_table.anychar_anytime = code;
381  break;
382  default:
384  break;
385  }
386  return 0;
387 }
388 #endif /* USE_VARIABLE_META_CHARS */
unsigned int OnigOptionType
Definition: onigmo.h:445
#define ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS
Definition: onigmo.h:588
#define ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY
Definition: onigmo.h:568
#define ONIG_SYN_OP2_QMARK_CAPITAL_P_NAMED_GROUP
Definition: onigmo.h:582
#define IS_NULL(p)
Definition: regint.h:298
#define ONIG_SYNTAX_RUBY
Definition: onigmo.h:511
unsigned int OnigCodePoint
Definition: onigmo.h:80
OnigCodePoint anychar
Definition: onigmo.h:151
#define ONIG_SYN_OP_ESC_W_WORD
Definition: onigmo.h:537
#define ONIG_META_CHAR_ANYCHAR
Definition: onigmo.h:613
#define ONIG_SYN_OP_QMARK_ZERO_ONE
Definition: onigmo.h:525
#define ONIG_SYN_OP_ESC_X_BRACE_HEX8
Definition: onigmo.h:549
#define ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME
Definition: onigmo.h:596
unsigned int onig_get_syntax_op(const OnigSyntaxType *syntax)
Definition: regsyntax.c:336
const OnigSyntaxType OnigSyntaxPerl58_NG
Definition: regsyntax.c:197
#define ONIG_SYN_OP2_ESC_G_SUBEXP_CALL
Definition: onigmo.h:561
#define ONIG_SYN_OP_ASTERISK_ZERO_INF
Definition: onigmo.h:521
int onig_set_default_syntax(const OnigSyntaxType *syntax)
Definition: regsyntax.c:296
#define ONIG_SYN_OP_POSIX_BRACKET
Definition: onigmo.h:543
OnigCodePoint anytime
Definition: onigmo.h:152
#define ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS
Definition: onigmo.h:589
#define ONIG_SYN_OP2_ESC_V_VTAB
Definition: onigmo.h:565
#define ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME_CALL
Definition: onigmo.h:598
#define SYN_POSIX_COMMON_OP
Definition: regint.h:760
void onig_set_syntax_op2(OnigSyntaxType *syntax, unsigned int op2)
Definition: regsyntax.c:318
OnigCodePoint one_or_more_time
Definition: onigmo.h:154
#define ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND
Definition: onigmo.h:594
#define ONIG_SYN_OP_PLUS_ONE_INF
Definition: onigmo.h:523
const OnigSyntaxType OnigSyntaxJava
Definition: regsyntax.c:146
#define SYN_GNU_REGEX_OP
Definition: regint.h:767
unsigned int behavior
Definition: onigmo.h:482
#define ONIG_SYN_OP_ESC_PLUS_ONE_INF
Definition: onigmo.h:524
#define ONIG_SYN_OP_ESC_C_CONTROL
Definition: onigmo.h:546
#define ONIG_INEFFECTIVE_META_CHAR
Definition: onigmo.h:619
#define ONIG_SYN_OP_BRACKET_CC
Definition: onigmo.h:536
#define ONIG_SYN_OP2_QMARK_LPAREN_CONDITION
Definition: onigmo.h:581
unsigned int onig_get_syntax_op2(const OnigSyntaxType *syntax)
Definition: regsyntax.c:342
#define ONIG_OPTION_CAPTURE_GROUP
Definition: onigmo.h:460
#define ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP
Definition: onigmo.h:595
const OnigSyntaxType OnigSyntaxEmacs
Definition: regsyntax.c:87
#define ONIG_SYN_OP_VBAR_ALT
Definition: onigmo.h:529
const OnigSyntaxType OnigSyntaxPerl58
Definition: regsyntax.c:171
#define ONIGERR_INVALID_ARGUMENT
Definition: onigmo.h:640
#define ONIG_SYN_OP2_INEFFECTIVE_ESCAPE
Definition: onigmo.h:572
OnigOptionType onig_get_syntax_options(const OnigSyntaxType *syntax)
Definition: regsyntax.c:354
unsigned int op2
Definition: onigmo.h:481
#define ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT
Definition: onigmo.h:556
#define ONIG_META_CHAR_ANYTIME
Definition: onigmo.h:614
#define ONIG_SYN_OP2_CCLASS_SET_OP
Definition: onigmo.h:558
OnigCodePoint esc
Definition: onigmo.h:150
#define ONIG_SYN_OP_ESC_QMARK_ZERO_ONE
Definition: onigmo.h:526
OnigCodePoint zero_or_one_time
Definition: onigmo.h:153
#define ONIG_OPTION_WORD_BOUND_ALL_RANGE
Definition: onigmo.h:469
#define ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT
Definition: onigmo.h:569
#define ONIG_SYN_OP_DECIMAL_BACKREF
Definition: onigmo.h:535
#define ONIG_SYN_USE_LEFT_MOST_NAMED_GROUP
Definition: onigmo.h:599
#define ONIG_META_CHAR_ESCAPE
Definition: onigmo.h:612
#define ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV
Definition: onigmo.h:592
#define ONIG_SYN_OP_BRACE_INTERVAL
Definition: onigmo.h:527
#define ONIG_OPTION_SINGLELINE
Definition: onigmo.h:455
const OnigSyntaxType OnigSyntaxPosixExtended
Definition: regsyntax.c:66
#define ONIG_META_CHAR_ANYCHAR_ANYTIME
Definition: onigmo.h:617
#define ONIG_SYN_OP2_ESC_CAPITAL_R_LINEBREAK
Definition: onigmo.h:573
#define ONIG_SYN_OP2_ESC_CAPITAL_X_EXTENDED_GRAPHEME_CLUSTER
Definition: onigmo.h:574
#define ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE
Definition: onigmo.h:552
#define ONIG_SYN_OP_ESC_CONTROL_CHARS
Definition: onigmo.h:545
const OnigSyntaxType OnigSyntaxPython
Definition: regsyntax.c:265
#define ONIG_SYN_OP_QMARK_NON_GREEDY
Definition: onigmo.h:544
#define ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC
Definition: onigmo.h:602
#define SYN_GNU_REGEX_BV
Definition: regint.h:780
void onig_set_syntax_op(OnigSyntaxType *syntax, unsigned int op)
Definition: regsyntax.c:312
const OnigSyntaxType OnigSyntaxPosixBasic
Definition: regsyntax.c:49
const OnigSyntaxType OnigSyntaxASIS
Definition: regsyntax.c:33
#define ONIG_SYN_OP2_ESC_G_BRACE_BACKREF
Definition: onigmo.h:578
#define ONIG_SYN_OP2_ESC_K_NAMED_BACKREF
Definition: onigmo.h:560
#define ONIG_SYN_OP2_OPTION_PERL
Definition: onigmo.h:554
#define ONIG_SYN_OP_ESC_O_BRACE_OCTAL
Definition: onigmo.h:550
#define ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END
Definition: onigmo.h:538
#define ONIG_SYN_OP_ESC_BRACE_INTERVAL
Definition: onigmo.h:528
#define ONIG_SYN_OP_ESC_OCTAL3
Definition: onigmo.h:547
void onig_set_syntax_behavior(OnigSyntaxType *syntax, unsigned int behavior)
Definition: regsyntax.c:324
#define ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR
Definition: onigmo.h:567
#define ONIG_OPTION_MULTILINE
Definition: onigmo.h:453
#define ONIG_SYN_OP2_QMARK_SUBEXP_CALL
Definition: onigmo.h:579
const OnigSyntaxType OnigSyntaxPerl
Definition: regsyntax.c:228
#define ONIG_SYN_OP_ESC_B_WORD_BOUND
Definition: onigmo.h:539
#define ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP
Definition: onigmo.h:577
void onig_set_syntax_options(OnigSyntaxType *syntax, OnigOptionType options)
Definition: regsyntax.c:330
#define ONIG_OPTION_NONE
Definition: onigmo.h:450
#define ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC
Definition: onigmo.h:604
const OnigSyntaxType OnigSyntaxGrep
Definition: regsyntax.c:108
#define ONIG_OPTION_ASCII_RANGE
Definition: onigmo.h:467
unsigned int onig_get_syntax_behavior(const OnigSyntaxType *syntax)
Definition: regsyntax.c:348
#define ONIG_SYN_OP_DOT_ANYCHAR
Definition: onigmo.h:520
#define ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL
Definition: onigmo.h:557
#define ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP
Definition: onigmo.h:559
OnigMetaCharTableType meta_char_table
Definition: onigmo.h:484
#define ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC
Definition: onigmo.h:605
#define ONIG_SYN_OP2_ESC_U_HEX4
Definition: onigmo.h:566
ONIG_EXTERN const OnigSyntaxType * OnigDefaultSyntax
Definition: onigmo.h:515
#define ONIG_SYN_OP_LPAREN_SUBEXP
Definition: onigmo.h:531
#define ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP
Definition: onigmo.h:590
void onig_copy_syntax(OnigSyntaxType *to, const OnigSyntaxType *from)
Definition: regsyntax.c:306
#define ONIG_SYN_OP_ESC_LPAREN_SUBEXP
Definition: onigmo.h:532
const OnigSyntaxType OnigSyntaxGnuRegex
Definition: regsyntax.c:130
int onig_set_meta_char(OnigSyntaxType *enc, unsigned int what, OnigCodePoint code)
Definition: regsyntax.c:360
#define ONIG_SYN_CONTEXT_INDEP_ANCHORS
Definition: onigmo.h:587
#define ONIG_META_CHAR_ZERO_OR_ONE_TIME
Definition: onigmo.h:615
#define ONIG_SYN_OP2_QMARK_GROUP_EFFECT
Definition: onigmo.h:553
unsigned int op
Definition: onigmo.h:480
#define ONIG_SYN_OP_ESC_VBAR_ALT
Definition: onigmo.h:530
#define ONIG_SYN_OP_LINE_ANCHOR
Definition: onigmo.h:542
OnigOptionType options
Definition: onigmo.h:483
#define ONIG_META_CHAR_ONE_OR_MORE_TIME
Definition: onigmo.h:616
OnigCodePoint anychar_anytime
Definition: onigmo.h:155
#define ONIG_SYN_OP_ESC_X_HEX2
Definition: onigmo.h:548