Ruby 2.7.6p219 (2022-04-12 revision c9c2245c0a25176072e02db9254f0e0c84c805cd)
regparse.c
Go to the documentation of this file.
1/**********************************************************************
2 regparse.c - Onigmo (Oniguruma-mod) (regular expression library)
3**********************************************************************/
4/*-
5 * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
6 * Copyright (c) 2011-2016 K.Takata <kentkt AT csc DOT jp>
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31#include "regparse.h"
32#include <stdarg.h>
33
34#define WARN_BUFSIZE 256
35
36#define CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
37
38
55#ifndef RUBY
57#endif
74 ,
75 {
76 (OnigCodePoint )'\\' /* esc */
77 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
78 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
79 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
80 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
81 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
82 }
83};
84
86
87extern void onig_null_warn(const char* s ARG_UNUSED) { }
88
89#ifdef DEFAULT_WARN_FUNCTION
90static OnigWarnFunc onig_warn = (OnigWarnFunc )DEFAULT_WARN_FUNCTION;
91#else
92static OnigWarnFunc onig_warn = onig_null_warn;
93#endif
94
95#ifdef DEFAULT_VERB_WARN_FUNCTION
96static OnigWarnFunc onig_verb_warn = (OnigWarnFunc )DEFAULT_VERB_WARN_FUNCTION;
97#else
98static OnigWarnFunc onig_verb_warn = onig_null_warn;
99#endif
100
102{
103 onig_warn = f;
104}
105
107{
108 onig_verb_warn = f;
109}
110
111static void CC_DUP_WARN(ScanEnv *env, OnigCodePoint from, OnigCodePoint to);
112
113
114static unsigned int ParseDepthLimit = DEFAULT_PARSE_DEPTH_LIMIT;
115
116extern unsigned int
118{
119 return ParseDepthLimit;
120}
121
122extern int
123onig_set_parse_depth_limit(unsigned int depth)
124{
125 if (depth == 0)
126 ParseDepthLimit = DEFAULT_PARSE_DEPTH_LIMIT;
127 else
128 ParseDepthLimit = depth;
129 return 0;
130}
131
132
133static void
134bbuf_free(BBuf* bbuf)
135{
136 if (IS_NOT_NULL(bbuf)) {
137 if (IS_NOT_NULL(bbuf->p)) xfree(bbuf->p);
138 xfree(bbuf);
139 }
140}
141
142static int
143bbuf_clone(BBuf** rto, BBuf* from)
144{
145 int r;
146 BBuf *to;
147
148 *rto = to = (BBuf* )xmalloc(sizeof(BBuf));
150 r = BBUF_INIT(to, from->alloc);
151 if (r != 0) return r;
152 to->used = from->used;
153 xmemcpy(to->p, from->p, from->used);
154 return 0;
155}
156
157#define BACKREF_REL_TO_ABS(rel_no, env) \
158 ((env)->num_mem + 1 + (rel_no))
159
160#define ONOFF(v,f,negative) (negative) ? ((v) &= ~(f)) : ((v) |= (f))
161
162#define MBCODE_START_POS(enc) \
163 (OnigCodePoint )(ONIGENC_MBC_MINLEN(enc) > 1 ? 0 : 0x80)
164
165#define SET_ALL_MULTI_BYTE_RANGE(enc, pbuf) \
166 add_code_range_to_buf(pbuf, env, MBCODE_START_POS(enc), ONIG_LAST_CODE_POINT)
167
168#define ADD_ALL_MULTI_BYTE_RANGE(enc, mbuf) do {\
169 if (! ONIGENC_IS_SINGLEBYTE(enc)) {\
170 r = SET_ALL_MULTI_BYTE_RANGE(enc, &(mbuf));\
171 if (r) return r;\
172 }\
173} while (0)
174
175
176#define BITSET_SET_BIT_CHKDUP(bs, pos) do { \
177 if (BITSET_AT(bs, pos)) CC_DUP_WARN(env, pos, pos); \
178 BS_ROOM(bs, pos) |= BS_BIT(pos); \
179} while (0)
180
181#define BITSET_IS_EMPTY(bs,empty) do {\
182 int i;\
183 empty = 1;\
184 for (i = 0; i < BITSET_SIZE; i++) {\
185 if ((bs)[i] != 0) {\
186 empty = 0; break;\
187 }\
188 }\
189} while (0)
190
191static void
192bitset_set_range(ScanEnv *env, BitSetRef bs, int from, int to)
193{
194 int i;
195 for (i = from; i <= to && i < SINGLE_BYTE_SIZE; i++) {
197 }
198}
199
200#if 0
201static void
202bitset_set_all(BitSetRef bs)
203{
204 int i;
205 for (i = 0; i < BITSET_SIZE; i++) { bs[i] = ~((Bits )0); }
206}
207#endif
208
209static void
210bitset_invert(BitSetRef bs)
211{
212 int i;
213 for (i = 0; i < BITSET_SIZE; i++) { bs[i] = ~(bs[i]); }
214}
215
216static void
217bitset_invert_to(BitSetRef from, BitSetRef to)
218{
219 int i;
220 for (i = 0; i < BITSET_SIZE; i++) { to[i] = ~(from[i]); }
221}
222
223static void
224bitset_and(BitSetRef dest, BitSetRef bs)
225{
226 int i;
227 for (i = 0; i < BITSET_SIZE; i++) { dest[i] &= bs[i]; }
228}
229
230static void
231bitset_or(BitSetRef dest, BitSetRef bs)
232{
233 int i;
234 for (i = 0; i < BITSET_SIZE; i++) { dest[i] |= bs[i]; }
235}
236
237static void
238bitset_copy(BitSetRef dest, BitSetRef bs)
239{
240 int i;
241 for (i = 0; i < BITSET_SIZE; i++) { dest[i] = bs[i]; }
242}
243
244#if defined(USE_NAMED_GROUP) && !defined(USE_ST_LIBRARY)
245extern int
246onig_strncmp(const UChar* s1, const UChar* s2, int n)
247{
248 int x;
249
250 while (n-- > 0) {
251 x = *s2++ - *s1++;
252 if (x) return x;
253 }
254 return 0;
255}
256#endif
257
258extern void
259onig_strcpy(UChar* dest, const UChar* src, const UChar* end)
260{
261 ptrdiff_t len = end - src;
262 if (len > 0) {
263 xmemcpy(dest, src, len);
264 dest[len] = (UChar )0;
265 }
266}
267
268#ifdef USE_NAMED_GROUP
269static UChar*
270strdup_with_null(OnigEncoding enc, UChar* s, UChar* end)
271{
272 ptrdiff_t slen;
273 int term_len, i;
274 UChar *r;
275
276 slen = end - s;
277 term_len = ONIGENC_MBC_MINLEN(enc);
278
279 r = (UChar* )xmalloc(slen + term_len);
281 xmemcpy(r, s, slen);
282
283 for (i = 0; i < term_len; i++)
284 r[slen + i] = (UChar )0;
285
286 return r;
287}
288#endif
289
290/* scan pattern methods */
291#define PEND_VALUE 0
292
293#ifdef __GNUC__
294/* get rid of Wunused-but-set-variable and Wuninitialized */
295# define PFETCH_READY UChar* pfetch_prev = NULL; (void)pfetch_prev
296#else
297# define PFETCH_READY UChar* pfetch_prev
298#endif
299#define PEND (p < end ? 0 : 1)
300#define PUNFETCH p = pfetch_prev
301#define PINC do { \
302 pfetch_prev = p; \
303 p += enclen(enc, p, end); \
304} while (0)
305#define PFETCH(c) do { \
306 c = ((enc->max_enc_len == 1) ? *p : ONIGENC_MBC_TO_CODE(enc, p, end)); \
307 pfetch_prev = p; \
308 p += enclen(enc, p, end); \
309} while (0)
310
311#define PINC_S do { \
312 p += enclen(enc, p, end); \
313} while (0)
314#define PFETCH_S(c) do { \
315 c = ((enc->max_enc_len == 1) ? *p : ONIGENC_MBC_TO_CODE(enc, p, end)); \
316 p += enclen(enc, p, end); \
317} while (0)
318
319#define PPEEK (p < end ? ONIGENC_MBC_TO_CODE(enc, p, end) : PEND_VALUE)
320#define PPEEK_IS(c) (PPEEK == (OnigCodePoint )c)
321
322static UChar*
323strcat_capa(UChar* dest, UChar* dest_end, const UChar* src, const UChar* src_end,
324 size_t capa)
325{
326 UChar* r;
327
328 if (dest)
329 r = (UChar* )xrealloc(dest, capa + 1);
330 else
331 r = (UChar* )xmalloc(capa + 1);
332
334 onig_strcpy(r + (dest_end - dest), src, src_end);
335 return r;
336}
337
338/* dest on static area */
339static UChar*
340strcat_capa_from_static(UChar* dest, UChar* dest_end,
341 const UChar* src, const UChar* src_end, size_t capa)
342{
343 UChar* r;
344
345 r = (UChar* )xmalloc(capa + 1);
347 onig_strcpy(r, dest, dest_end);
348 onig_strcpy(r + (dest_end - dest), src, src_end);
349 return r;
350}
351
352
353#ifdef USE_ST_LIBRARY
354
355# ifdef RUBY
356# include "ruby/st.h"
357# else
358# include "st.h"
359# endif
360
361typedef struct {
362 const UChar* s;
363 const UChar* end;
365
366static int
367str_end_cmp(st_data_t xp, st_data_t yp)
368{
369 const st_str_end_key *x, *y;
370 const UChar *p, *q;
371 int c;
372
373 x = (const st_str_end_key *)xp;
374 y = (const st_str_end_key *)yp;
375 if ((x->end - x->s) != (y->end - y->s))
376 return 1;
377
378 p = x->s;
379 q = y->s;
380 while (p < x->end) {
381 c = (int )*p - (int )*q;
382 if (c != 0) return c;
383
384 p++; q++;
385 }
386
387 return 0;
388}
389
390static st_index_t
391str_end_hash(st_data_t xp)
392{
393 const st_str_end_key *x = (const st_str_end_key *)xp;
394 const UChar *p;
395 st_index_t val = 0;
396
397 p = x->s;
398 while (p < x->end) {
399 val = val * 997 + (int )*p++;
400 }
401
402 return val + (val >> 5);
403}
404
405extern hash_table_type*
407{
408 static const struct st_hash_type hashType = {
409 str_end_cmp,
410 str_end_hash,
411 };
412
413 return (hash_table_type* )
414 onig_st_init_table_with_size(&hashType, size);
415}
416
417extern int
419 const UChar* end_key, hash_data_type *value)
420{
422
423 key.s = (UChar* )str_key;
424 key.end = (UChar* )end_key;
425
426 return onig_st_lookup(table, (st_data_t )(&key), value);
427}
428
429extern int
431 const UChar* end_key, hash_data_type value)
432{
434 int result;
435
437 key->s = (UChar* )str_key;
438 key->end = (UChar* )end_key;
439 result = onig_st_insert(table, (st_data_t )key, value);
440 if (result) {
441 xfree(key);
442 }
443 return result;
444}
445
446#endif /* USE_ST_LIBRARY */
447
448
449#ifdef USE_NAMED_GROUP
450
451# define INIT_NAME_BACKREFS_ALLOC_NUM 8
452
453typedef struct {
455 size_t name_len; /* byte length */
456 int back_num; /* number of backrefs */
460} NameEntry;
461
462# ifdef USE_ST_LIBRARY
463
465typedef st_data_t HashDataType; /* 1.6 st.h doesn't define st_data_t type */
466
467# ifdef ONIG_DEBUG
468static int
469i_print_name_entry(UChar* key, NameEntry* e, void* arg)
470{
471 int i;
472 FILE* fp = (FILE* )arg;
473
474 fprintf(fp, "%s: ", e->name);
475 if (e->back_num == 0)
476 fputs("-", fp);
477 else if (e->back_num == 1)
478 fprintf(fp, "%d", e->back_ref1);
479 else {
480 for (i = 0; i < e->back_num; i++) {
481 if (i > 0) fprintf(fp, ", ");
482 fprintf(fp, "%d", e->back_refs[i]);
483 }
484 }
485 fputs("\n", fp);
486 return ST_CONTINUE;
487}
488
489extern int
490onig_print_names(FILE* fp, regex_t* reg)
491{
492 NameTable* t = (NameTable* )reg->name_table;
493
494 if (IS_NOT_NULL(t)) {
495 fprintf(fp, "name table\n");
496 onig_st_foreach(t, (st_foreach_callback_func *)i_print_name_entry, (HashDataType )fp);
497 fputs("\n", fp);
498 }
499 return 0;
500}
501# endif /* ONIG_DEBUG */
502
503static int
504i_free_name_entry(UChar* key, NameEntry* e, void* arg ARG_UNUSED)
505{
506 xfree(e->name);
508 xfree(key);
509 xfree(e);
510 return ST_DELETE;
511}
512
513static int
514names_clear(regex_t* reg)
515{
516 NameTable* t = (NameTable* )reg->name_table;
517
518 if (IS_NOT_NULL(t)) {
519 onig_st_foreach(t, (st_foreach_callback_func *)i_free_name_entry, 0);
520 }
521 return 0;
522}
523
524extern int
526{
527 int r;
528 NameTable* t;
529
530 r = names_clear(reg);
531 if (r) return r;
532
533 t = (NameTable* )reg->name_table;
534 if (IS_NOT_NULL(t)) onig_st_free_table(t);
535 reg->name_table = (void* )NULL;
536 return 0;
537}
538
539static NameEntry*
540name_find(regex_t* reg, const UChar* name, const UChar* name_end)
541{
542 NameEntry* e;
543 NameTable* t = (NameTable* )reg->name_table;
544
545 e = (NameEntry* )NULL;
546 if (IS_NOT_NULL(t)) {
547 onig_st_lookup_strend(t, name, name_end, (HashDataType* )((void* )(&e)));
548 }
549 return e;
550}
551
552typedef struct {
553 int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*);
555 void* arg;
556 int ret;
558} INamesArg;
559
560static int
562{
563 int r = (*(arg->func))(e->name,
564 e->name + e->name_len,
565 e->back_num,
566 (e->back_num > 1 ? e->back_refs : &(e->back_ref1)),
567 arg->reg, arg->arg);
568 if (r != 0) {
569 arg->ret = r;
570 return ST_STOP;
571 }
572 return ST_CONTINUE;
573}
574
575extern int
577 int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg)
578{
579 INamesArg narg;
580 NameTable* t = (NameTable* )reg->name_table;
581
582 narg.ret = 0;
583 if (IS_NOT_NULL(t)) {
584 narg.func = func;
585 narg.reg = reg;
586 narg.arg = arg;
587 narg.enc = reg->enc; /* should be pattern encoding. */
588 onig_st_foreach(t, (st_foreach_callback_func *)i_names, (HashDataType )&narg);
589 }
590 return narg.ret;
591}
592
593static int
594i_renumber_name(UChar* key ARG_UNUSED, NameEntry* e, GroupNumRemap* map)
595{
596 int i;
597
598 if (e->back_num > 1) {
599 for (i = 0; i < e->back_num; i++) {
600 e->back_refs[i] = map[e->back_refs[i]].new_val;
601 }
602 }
603 else if (e->back_num == 1) {
604 e->back_ref1 = map[e->back_ref1].new_val;
605 }
606
607 return ST_CONTINUE;
608}
609
610extern int
612{
613 NameTable* t = (NameTable* )reg->name_table;
614
615 if (IS_NOT_NULL(t)) {
616 onig_st_foreach(t, (st_foreach_callback_func *)i_renumber_name, (HashDataType )map);
617 }
618 return 0;
619}
620
621
622extern int
624{
625 NameTable* t = (NameTable* )reg->name_table;
626
627 if (IS_NOT_NULL(t))
628 return (int )t->num_entries;
629 else
630 return 0;
631}
632
633# else /* USE_ST_LIBRARY */
634
635# define INIT_NAMES_ALLOC_NUM 8
636
637typedef struct {
638 NameEntry* e;
639 int num;
640 int alloc;
641} NameTable;
642
643# ifdef ONIG_DEBUG
644extern int
645onig_print_names(FILE* fp, regex_t* reg)
646{
647 int i, j;
648 NameEntry* e;
649 NameTable* t = (NameTable* )reg->name_table;
650
651 if (IS_NOT_NULL(t) && t->num > 0) {
652 fprintf(fp, "name table\n");
653 for (i = 0; i < t->num; i++) {
654 e = &(t->e[i]);
655 fprintf(fp, "%s: ", e->name);
656 if (e->back_num == 0) {
657 fputs("-", fp);
658 }
659 else if (e->back_num == 1) {
660 fprintf(fp, "%d", e->back_ref1);
661 }
662 else {
663 for (j = 0; j < e->back_num; j++) {
664 if (j > 0) fprintf(fp, ", ");
665 fprintf(fp, "%d", e->back_refs[j]);
666 }
667 }
668 fputs("\n", fp);
669 }
670 fputs("\n", fp);
671 }
672 return 0;
673}
674# endif
675
676static int
677names_clear(regex_t* reg)
678{
679 int i;
680 NameEntry* e;
681 NameTable* t = (NameTable* )reg->name_table;
682
683 if (IS_NOT_NULL(t)) {
684 for (i = 0; i < t->num; i++) {
685 e = &(t->e[i]);
686 if (IS_NOT_NULL(e->name)) {
687 xfree(e->name);
688 e->name = NULL;
689 e->name_len = 0;
690 e->back_num = 0;
691 e->back_alloc = 0;
693 e->back_refs = (int* )NULL;
694 }
695 }
696 if (IS_NOT_NULL(t->e)) {
697 xfree(t->e);
698 t->e = NULL;
699 }
700 t->num = 0;
701 }
702 return 0;
703}
704
705extern int
707{
708 int r;
709 NameTable* t;
710
711 r = names_clear(reg);
712 if (r) return r;
713
714 t = (NameTable* )reg->name_table;
715 if (IS_NOT_NULL(t)) xfree(t);
716 reg->name_table = NULL;
717 return 0;
718}
719
720static NameEntry*
721name_find(regex_t* reg, const UChar* name, const UChar* name_end)
722{
723 int i, len;
724 NameEntry* e;
725 NameTable* t = (NameTable* )reg->name_table;
726
727 if (IS_NOT_NULL(t)) {
728 len = name_end - name;
729 for (i = 0; i < t->num; i++) {
730 e = &(t->e[i]);
731 if (len == e->name_len && onig_strncmp(name, e->name, len) == 0)
732 return e;
733 }
734 }
735 return (NameEntry* )NULL;
736}
737
738extern int
740 int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg)
741{
742 int i, r;
743 NameEntry* e;
744 NameTable* t = (NameTable* )reg->name_table;
745
746 if (IS_NOT_NULL(t)) {
747 for (i = 0; i < t->num; i++) {
748 e = &(t->e[i]);
749 r = (*func)(e->name, e->name + e->name_len, e->back_num,
750 (e->back_num > 1 ? e->back_refs : &(e->back_ref1)),
751 reg, arg);
752 if (r != 0) return r;
753 }
754 }
755 return 0;
756}
757
758extern int
760{
761 NameTable* t = (NameTable* )reg->name_table;
762
763 if (IS_NOT_NULL(t))
764 return t->num;
765 else
766 return 0;
767}
768
769# endif /* else USE_ST_LIBRARY */
770
771static int
772name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env)
773{
774 int alloc;
775 NameEntry* e;
776 NameTable* t = (NameTable* )reg->name_table;
777
778 if (name_end - name <= 0)
780
781 e = name_find(reg, name, name_end);
782 if (IS_NULL(e)) {
783# ifdef USE_ST_LIBRARY
784 if (IS_NULL(t)) {
786 reg->name_table = (void* )t;
787 }
788 e = (NameEntry* )xmalloc(sizeof(NameEntry));
790
791 e->name = strdup_with_null(reg->enc, name, name_end);
792 if (IS_NULL(e->name)) {
793 xfree(e);
794 return ONIGERR_MEMORY;
795 }
796 onig_st_insert_strend(t, e->name, (e->name + (name_end - name)),
797 (HashDataType )e);
798
799 e->name_len = name_end - name;
800 e->back_num = 0;
801 e->back_alloc = 0;
802 e->back_refs = (int* )NULL;
803
804# else
805
806 if (IS_NULL(t)) {
807 alloc = INIT_NAMES_ALLOC_NUM;
808 t = (NameTable* )xmalloc(sizeof(NameTable));
810 t->e = NULL;
811 t->alloc = 0;
812 t->num = 0;
813
814 t->e = (NameEntry* )xmalloc(sizeof(NameEntry) * alloc);
815 if (IS_NULL(t->e)) {
816 xfree(t);
817 return ONIGERR_MEMORY;
818 }
819 t->alloc = alloc;
820 reg->name_table = t;
821 goto clear;
822 }
823 else if (t->num == t->alloc) {
824 int i;
825 NameEntry* p;
826
827 alloc = t->alloc * 2;
828 p = (NameEntry* )xrealloc(t->e, sizeof(NameEntry) * alloc);
830 t->e = p;
831 t->alloc = alloc;
832
833 clear:
834 for (i = t->num; i < t->alloc; i++) {
835 t->e[i].name = NULL;
836 t->e[i].name_len = 0;
837 t->e[i].back_num = 0;
838 t->e[i].back_alloc = 0;
839 t->e[i].back_refs = (int* )NULL;
840 }
841 }
842 e = &(t->e[t->num]);
843 t->num++;
844 e->name = strdup_with_null(reg->enc, name, name_end);
845 if (IS_NULL(e->name)) return ONIGERR_MEMORY;
846 e->name_len = name_end - name;
847# endif
848 }
849
850 if (e->back_num >= 1 &&
853 name, name_end);
855 }
856
857 e->back_num++;
858 if (e->back_num == 1) {
859 e->back_ref1 = backref;
860 }
861 else {
862 if (e->back_num == 2) {
864 e->back_refs = (int* )xmalloc(sizeof(int) * alloc);
866 e->back_alloc = alloc;
867 e->back_refs[0] = e->back_ref1;
868 e->back_refs[1] = backref;
869 }
870 else {
871 if (e->back_num > e->back_alloc) {
872 int* p;
873 alloc = e->back_alloc * 2;
874 p = (int* )xrealloc(e->back_refs, sizeof(int) * alloc);
876 e->back_refs = p;
877 e->back_alloc = alloc;
878 }
879 e->back_refs[e->back_num - 1] = backref;
880 }
881 }
882
883 return 0;
884}
885
886extern int
888 const UChar* name_end, int** nums)
889{
890 NameEntry* e = name_find(reg, name, name_end);
891
893
894 switch (e->back_num) {
895 case 0:
896 *nums = 0;
897 break;
898 case 1:
899 *nums = &(e->back_ref1);
900 break;
901 default:
902 *nums = e->back_refs;
903 break;
904 }
905 return e->back_num;
906}
907
908extern int
910 const UChar* name_end, const OnigRegion *region)
911{
912 int i, n, *nums;
913
914 n = onig_name_to_group_numbers(reg, name, name_end, &nums);
915 if (n < 0)
916 return n;
917 else if (n == 0)
918 return ONIGERR_PARSER_BUG;
919 else if (n == 1)
920 return nums[0];
921 else {
922 if (IS_NOT_NULL(region)) {
923 for (i = n - 1; i >= 0; i--) {
924 if (region->beg[nums[i]] != ONIG_REGION_NOTPOS)
925 return nums[i];
926 }
927 }
928 return nums[n - 1];
929 }
930}
931
932#else /* USE_NAMED_GROUP */
933
934extern int
936 const UChar* name_end, int** nums)
937{
939}
940
941extern int
943 const UChar* name_end, const OnigRegion* region)
944{
946}
947
948extern int
950 int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg)
951{
953}
954
955extern int
957{
958 return 0;
959}
960#endif /* else USE_NAMED_GROUP */
961
962extern int
964{
966 return 0;
967
968#ifdef USE_NAMED_GROUP
969 if (onig_number_of_names(reg) > 0 &&
972 return 0;
973 }
974#endif
975
976 return 1;
977}
978
979
980#define INIT_SCANENV_MEMNODES_ALLOC_SIZE 16
981
982static void
983scan_env_clear(ScanEnv* env)
984{
985 int i;
986
987 BIT_STATUS_CLEAR(env->capture_history);
988 BIT_STATUS_CLEAR(env->bt_mem_start);
989 BIT_STATUS_CLEAR(env->bt_mem_end);
990 BIT_STATUS_CLEAR(env->backrefed_mem);
991 env->error = (UChar* )NULL;
992 env->error_end = (UChar* )NULL;
993 env->num_call = 0;
994 env->num_mem = 0;
995#ifdef USE_NAMED_GROUP
996 env->num_named = 0;
997#endif
998 env->mem_alloc = 0;
999 env->mem_nodes_dynamic = (Node** )NULL;
1000
1001 for (i = 0; i < SCANENV_MEMNODES_SIZE; i++)
1002 env->mem_nodes_static[i] = NULL_NODE;
1003
1004#ifdef USE_COMBINATION_EXPLOSION_CHECK
1005 env->num_comb_exp_check = 0;
1006 env->comb_exp_max_regnum = 0;
1007 env->curr_max_regnum = 0;
1008 env->has_recursion = 0;
1009#endif
1010 env->parse_depth = 0;
1011 env->warnings_flag = 0;
1012}
1013
1014static int
1015scan_env_add_mem_entry(ScanEnv* env)
1016{
1017 int i, need, alloc;
1018 Node** p;
1019
1020 need = env->num_mem + 1;
1021 if (need > ONIG_MAX_CAPTURE_GROUP_NUM)
1023 if (need >= SCANENV_MEMNODES_SIZE) {
1024 if (env->mem_alloc <= need) {
1025 if (IS_NULL(env->mem_nodes_dynamic)) {
1027 p = (Node** )xmalloc(sizeof(Node*) * alloc);
1029 xmemcpy(p, env->mem_nodes_static,
1030 sizeof(Node*) * SCANENV_MEMNODES_SIZE);
1031 }
1032 else {
1033 alloc = env->mem_alloc * 2;
1034 p = (Node** )xrealloc(env->mem_nodes_dynamic, sizeof(Node*) * alloc);
1036 }
1037
1038 for (i = env->num_mem + 1; i < alloc; i++)
1039 p[i] = NULL_NODE;
1040
1041 env->mem_nodes_dynamic = p;
1042 env->mem_alloc = alloc;
1043 }
1044 }
1045
1046 env->num_mem++;
1047 return env->num_mem;
1048}
1049
1050static int
1051scan_env_set_mem_node(ScanEnv* env, int num, Node* node)
1052{
1053 if (env->num_mem >= num)
1054 SCANENV_MEM_NODES(env)[num] = node;
1055 else
1056 return ONIGERR_PARSER_BUG;
1057 return 0;
1058}
1059
1060
1061extern void
1063{
1064 start:
1065 if (IS_NULL(node)) return ;
1066
1067 switch (NTYPE(node)) {
1068 case NT_STR:
1069 if (NSTR(node)->capa != 0 &&
1070 IS_NOT_NULL(NSTR(node)->s) && NSTR(node)->s != NSTR(node)->buf) {
1071 xfree(NSTR(node)->s);
1072 }
1073 break;
1074
1075 case NT_LIST:
1076 case NT_ALT:
1077 onig_node_free(NCAR(node));
1078 {
1079 Node* next_node = NCDR(node);
1080
1081 xfree(node);
1082 node = next_node;
1083 goto start;
1084 }
1085 break;
1086
1087 case NT_CCLASS:
1088 {
1089 CClassNode* cc = NCCLASS(node);
1090
1091 if (cc->mbuf)
1092 bbuf_free(cc->mbuf);
1093 }
1094 break;
1095
1096 case NT_QTFR:
1097 if (NQTFR(node)->target)
1098 onig_node_free(NQTFR(node)->target);
1099 break;
1100
1101 case NT_ENCLOSE:
1102 if (NENCLOSE(node)->target)
1103 onig_node_free(NENCLOSE(node)->target);
1104 break;
1105
1106 case NT_BREF:
1107 if (IS_NOT_NULL(NBREF(node)->back_dynamic))
1108 xfree(NBREF(node)->back_dynamic);
1109 break;
1110
1111 case NT_ANCHOR:
1112 if (NANCHOR(node)->target)
1113 onig_node_free(NANCHOR(node)->target);
1114 break;
1115 }
1116
1117 xfree(node);
1118}
1119
1120static Node*
1121node_new(void)
1122{
1123 Node* node;
1124
1125 node = (Node* )xmalloc(sizeof(Node));
1126 /* xmemset(node, 0, sizeof(Node)); */
1127 return node;
1128}
1129
1130static void
1131initialize_cclass(CClassNode* cc)
1132{
1133 BITSET_CLEAR(cc->bs);
1134 /* cc->base.flags = 0; */
1135 cc->flags = 0;
1136 cc->mbuf = NULL;
1137}
1138
1139static Node*
1140node_new_cclass(void)
1141{
1142 Node* node = node_new();
1143 CHECK_NULL_RETURN(node);
1144
1145 SET_NTYPE(node, NT_CCLASS);
1146 initialize_cclass(NCCLASS(node));
1147 return node;
1148}
1149
1150static Node*
1151node_new_ctype(int type, int not, int ascii_range)
1152{
1153 Node* node = node_new();
1154 CHECK_NULL_RETURN(node);
1155
1156 SET_NTYPE(node, NT_CTYPE);
1157 NCTYPE(node)->ctype = type;
1158 NCTYPE(node)->not = not;
1159 NCTYPE(node)->ascii_range = ascii_range;
1160 return node;
1161}
1162
1163static Node*
1164node_new_anychar(void)
1165{
1166 Node* node = node_new();
1167 CHECK_NULL_RETURN(node);
1168
1169 SET_NTYPE(node, NT_CANY);
1170 return node;
1171}
1172
1173static Node*
1174node_new_list(Node* left, Node* right)
1175{
1176 Node* node = node_new();
1177 CHECK_NULL_RETURN(node);
1178
1179 SET_NTYPE(node, NT_LIST);
1180 NCAR(node) = left;
1181 NCDR(node) = right;
1182 return node;
1183}
1184
1185extern Node*
1187{
1188 return node_new_list(left, right);
1189}
1190
1191extern Node*
1193{
1194 Node *n;
1195
1197 if (IS_NULL(n)) return NULL_NODE;
1198
1199 if (IS_NOT_NULL(list)) {
1200 while (IS_NOT_NULL(NCDR(list)))
1201 list = NCDR(list);
1202
1203 NCDR(list) = n;
1204 }
1205
1206 return n;
1207}
1208
1209extern Node*
1211{
1212 Node* node = node_new();
1213 CHECK_NULL_RETURN(node);
1214
1215 SET_NTYPE(node, NT_ALT);
1216 NCAR(node) = left;
1217 NCDR(node) = right;
1218 return node;
1219}
1220
1221extern Node*
1223{
1224 Node* node = node_new();
1225 CHECK_NULL_RETURN(node);
1226
1227 SET_NTYPE(node, NT_ANCHOR);
1228 NANCHOR(node)->type = type;
1229 NANCHOR(node)->target = NULL;
1230 NANCHOR(node)->char_len = -1;
1231 NANCHOR(node)->ascii_range = 0;
1232 return node;
1233}
1234
1235static Node*
1236node_new_backref(int back_num, int* backrefs, int by_name,
1238 int exist_level, int nest_level,
1239#endif
1240 ScanEnv* env)
1241{
1242 int i;
1243 Node* node = node_new();
1244
1245 CHECK_NULL_RETURN(node);
1246
1247 SET_NTYPE(node, NT_BREF);
1248 NBREF(node)->state = 0;
1249 NBREF(node)->back_num = back_num;
1250 NBREF(node)->back_dynamic = (int* )NULL;
1251 if (by_name != 0)
1252 NBREF(node)->state |= NST_NAME_REF;
1253
1254#ifdef USE_BACKREF_WITH_LEVEL
1255 if (exist_level != 0) {
1256 NBREF(node)->state |= NST_NEST_LEVEL;
1257 NBREF(node)->nest_level = nest_level;
1258 }
1259#endif
1260
1261 for (i = 0; i < back_num; i++) {
1262 if (backrefs[i] <= env->num_mem &&
1263 IS_NULL(SCANENV_MEM_NODES(env)[backrefs[i]])) {
1264 NBREF(node)->state |= NST_RECURSION; /* /...(\1).../ */
1265 break;
1266 }
1267 }
1268
1269 if (back_num <= NODE_BACKREFS_SIZE) {
1270 for (i = 0; i < back_num; i++)
1271 NBREF(node)->back_static[i] = backrefs[i];
1272 }
1273 else {
1274 int* p = (int* )xmalloc(sizeof(int) * back_num);
1275 if (IS_NULL(p)) {
1276 onig_node_free(node);
1277 return NULL;
1278 }
1279 NBREF(node)->back_dynamic = p;
1280 for (i = 0; i < back_num; i++)
1281 p[i] = backrefs[i];
1282 }
1283 return node;
1284}
1285
1286#ifdef USE_SUBEXP_CALL
1287static Node*
1288node_new_call(UChar* name, UChar* name_end, int gnum)
1289{
1290 Node* node = node_new();
1291 CHECK_NULL_RETURN(node);
1292
1293 SET_NTYPE(node, NT_CALL);
1294 NCALL(node)->state = 0;
1295 NCALL(node)->target = NULL_NODE;
1296 NCALL(node)->name = name;
1297 NCALL(node)->name_end = name_end;
1298 NCALL(node)->group_num = gnum; /* call by number if gnum != 0 */
1299 return node;
1300}
1301#endif
1302
1303static Node*
1304node_new_quantifier(int lower, int upper, int by_number)
1305{
1306 Node* node = node_new();
1307 CHECK_NULL_RETURN(node);
1308
1309 SET_NTYPE(node, NT_QTFR);
1310 NQTFR(node)->state = 0;
1311 NQTFR(node)->target = NULL;
1312 NQTFR(node)->lower = lower;
1313 NQTFR(node)->upper = upper;
1314 NQTFR(node)->greedy = 1;
1315 NQTFR(node)->target_empty_info = NQ_TARGET_ISNOT_EMPTY;
1316 NQTFR(node)->head_exact = NULL_NODE;
1317 NQTFR(node)->next_head_exact = NULL_NODE;
1318 NQTFR(node)->is_referred = 0;
1319 if (by_number != 0)
1320 NQTFR(node)->state |= NST_BY_NUMBER;
1321
1322#ifdef USE_COMBINATION_EXPLOSION_CHECK
1323 NQTFR(node)->comb_exp_check_num = 0;
1324#endif
1325
1326 return node;
1327}
1328
1329static Node*
1330node_new_enclose(int type)
1331{
1332 Node* node = node_new();
1333 CHECK_NULL_RETURN(node);
1334
1335 SET_NTYPE(node, NT_ENCLOSE);
1336 NENCLOSE(node)->type = type;
1337 NENCLOSE(node)->state = 0;
1338 NENCLOSE(node)->regnum = 0;
1339 NENCLOSE(node)->option = 0;
1340 NENCLOSE(node)->target = NULL;
1341 NENCLOSE(node)->call_addr = -1;
1342 NENCLOSE(node)->opt_count = 0;
1343 return node;
1344}
1345
1346extern Node*
1348{
1349 return node_new_enclose(type);
1350}
1351
1352static Node*
1353node_new_enclose_memory(OnigOptionType option, int is_named)
1354{
1355 Node* node = node_new_enclose(ENCLOSE_MEMORY);
1356 CHECK_NULL_RETURN(node);
1357 if (is_named != 0)
1359
1360#ifdef USE_SUBEXP_CALL
1361 NENCLOSE(node)->option = option;
1362#endif
1363 return node;
1364}
1365
1366static Node*
1367node_new_option(OnigOptionType option)
1368{
1369 Node* node = node_new_enclose(ENCLOSE_OPTION);
1370 CHECK_NULL_RETURN(node);
1371 NENCLOSE(node)->option = option;
1372 return node;
1373}
1374
1375extern int
1376onig_node_str_cat(Node* node, const UChar* s, const UChar* end)
1377{
1378 ptrdiff_t addlen = end - s;
1379
1380 if (addlen > 0) {
1381 ptrdiff_t len = NSTR(node)->end - NSTR(node)->s;
1382
1383 if (NSTR(node)->capa > 0 || (len + addlen > NODE_STR_BUF_SIZE - 1)) {
1384 UChar* p;
1385 ptrdiff_t capa = len + addlen + NODE_STR_MARGIN;
1386
1387 if (capa <= NSTR(node)->capa) {
1388 onig_strcpy(NSTR(node)->s + len, s, end);
1389 }
1390 else {
1391 if (NSTR(node)->s == NSTR(node)->buf)
1392 p = strcat_capa_from_static(NSTR(node)->s, NSTR(node)->end,
1393 s, end, capa);
1394 else
1395 p = strcat_capa(NSTR(node)->s, NSTR(node)->end, s, end, capa);
1396
1398 NSTR(node)->s = p;
1399 NSTR(node)->capa = (int )capa;
1400 }
1401 }
1402 else {
1403 onig_strcpy(NSTR(node)->s + len, s, end);
1404 }
1405 NSTR(node)->end = NSTR(node)->s + len + addlen;
1406 }
1407
1408 return 0;
1409}
1410
1411extern int
1412onig_node_str_set(Node* node, const UChar* s, const UChar* end)
1413{
1414 onig_node_str_clear(node);
1415 return onig_node_str_cat(node, s, end);
1416}
1417
1418static int
1419node_str_cat_char(Node* node, UChar c)
1420{
1421 UChar s[1];
1422
1423 s[0] = c;
1424 return onig_node_str_cat(node, s, s + 1);
1425}
1426
1427static int
1428node_str_cat_codepoint(Node* node, OnigEncoding enc, OnigCodePoint c)
1429{
1431 int num = ONIGENC_CODE_TO_MBC(enc, c, buf);
1432 if (num < 0) return num;
1433 return onig_node_str_cat(node, buf, buf + num);
1434}
1435
1436#if 0
1437extern void
1438onig_node_conv_to_str_node(Node* node, int flag)
1439{
1440 SET_NTYPE(node, NT_STR);
1441 NSTR(node)->flag = flag;
1442 NSTR(node)->capa = 0;
1443 NSTR(node)->s = NSTR(node)->buf;
1444 NSTR(node)->end = NSTR(node)->buf;
1445}
1446#endif
1447
1448extern void
1450{
1451 if (NSTR(node)->capa != 0 &&
1452 IS_NOT_NULL(NSTR(node)->s) && NSTR(node)->s != NSTR(node)->buf) {
1453 xfree(NSTR(node)->s);
1454 }
1455
1456 NSTR(node)->capa = 0;
1457 NSTR(node)->flag = 0;
1458 NSTR(node)->s = NSTR(node)->buf;
1459 NSTR(node)->end = NSTR(node)->buf;
1460}
1461
1462static Node*
1463node_new_str(const UChar* s, const UChar* end)
1464{
1465 Node* node = node_new();
1466 CHECK_NULL_RETURN(node);
1467
1468 SET_NTYPE(node, NT_STR);
1469 NSTR(node)->capa = 0;
1470 NSTR(node)->flag = 0;
1471 NSTR(node)->s = NSTR(node)->buf;
1472 NSTR(node)->end = NSTR(node)->buf;
1473 if (onig_node_str_cat(node, s, end)) {
1474 onig_node_free(node);
1475 return NULL;
1476 }
1477 return node;
1478}
1479
1480extern Node*
1481onig_node_new_str(const UChar* s, const UChar* end)
1482{
1483 return node_new_str(s, end);
1484}
1485
1486static Node*
1487node_new_str_raw(UChar* s, UChar* end)
1488{
1489 Node* node = node_new_str(s, end);
1490 if (IS_NOT_NULL(node))
1491 NSTRING_SET_RAW(node);
1492 return node;
1493}
1494
1495static Node*
1496node_new_empty(void)
1497{
1498 return node_new_str(NULL, NULL);
1499}
1500
1501static Node*
1502node_new_str_raw_char(UChar c)
1503{
1504 UChar p[1];
1505
1506 p[0] = c;
1507 return node_new_str_raw(p, p + 1);
1508}
1509
1510static Node*
1511str_node_split_last_char(StrNode* sn, OnigEncoding enc)
1512{
1513 const UChar *p;
1514 Node* n = NULL_NODE;
1515
1516 if (sn->end > sn->s) {
1517 p = onigenc_get_prev_char_head(enc, sn->s, sn->end, sn->end);
1518 if (p && p > sn->s) { /* can be split. */
1519 n = node_new_str(p, sn->end);
1520 if (IS_NOT_NULL(n) && (sn->flag & NSTR_RAW) != 0)
1522 sn->end = (UChar* )p;
1523 }
1524 }
1525 return n;
1526}
1527
1528static int
1529str_node_can_be_split(StrNode* sn, OnigEncoding enc)
1530{
1531 if (sn->end > sn->s) {
1532 return ((enclen(enc, sn->s, sn->end) < sn->end - sn->s) ? 1 : 0);
1533 }
1534 return 0;
1535}
1536
1537#ifdef USE_PAD_TO_SHORT_BYTE_CHAR
1538static int
1539node_str_head_pad(StrNode* sn, int num, UChar val)
1540{
1542 int i, len;
1543
1544 len = sn->end - sn->s;
1545 onig_strcpy(buf, sn->s, sn->end);
1546 onig_strcpy(&(sn->s[num]), buf, buf + len);
1547 sn->end += num;
1548
1549 for (i = 0; i < num; i++) {
1550 sn->s[i] = val;
1551 }
1552}
1553#endif
1554
1555extern int
1557{
1558 unsigned int num, val;
1559 OnigCodePoint c;
1560 UChar* p = *src;
1562
1563 num = 0;
1564 while (!PEND) {
1565 PFETCH(c);
1566 if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
1567 val = (unsigned int )DIGITVAL(c);
1568 if ((INT_MAX_LIMIT - val) / 10UL < num)
1569 return -1; /* overflow */
1570
1571 num = num * 10 + val;
1572 }
1573 else {
1574 PUNFETCH;
1575 break;
1576 }
1577 }
1578 *src = p;
1579 return num;
1580}
1581
1582static int
1583scan_unsigned_hexadecimal_number(UChar** src, UChar* end, int minlen,
1584 int maxlen, OnigEncoding enc)
1585{
1586 OnigCodePoint c;
1587 unsigned int num, val;
1588 int restlen;
1589 UChar* p = *src;
1591
1592 restlen = maxlen - minlen;
1593 num = 0;
1594 while (!PEND && maxlen-- != 0) {
1595 PFETCH(c);
1596 if (ONIGENC_IS_CODE_XDIGIT(enc, c)) {
1597 val = (unsigned int )XDIGITVAL(enc,c);
1598 if ((INT_MAX_LIMIT - val) / 16UL < num)
1599 return -1; /* overflow */
1600
1601 num = (num << 4) + XDIGITVAL(enc,c);
1602 }
1603 else {
1604 PUNFETCH;
1605 maxlen++;
1606 break;
1607 }
1608 }
1609 if (maxlen > restlen)
1610 return -2; /* not enough digits */
1611 *src = p;
1612 return num;
1613}
1614
1615static int
1616scan_unsigned_octal_number(UChar** src, UChar* end, int maxlen,
1617 OnigEncoding enc)
1618{
1619 OnigCodePoint c;
1620 unsigned int num, val;
1621 UChar* p = *src;
1623
1624 num = 0;
1625 while (!PEND && maxlen-- != 0) {
1626 PFETCH(c);
1627 if (ONIGENC_IS_CODE_DIGIT(enc, c) && c < '8') {
1628 val = ODIGITVAL(c);
1629 if ((INT_MAX_LIMIT - val) / 8UL < num)
1630 return -1; /* overflow */
1631
1632 num = (num << 3) + val;
1633 }
1634 else {
1635 PUNFETCH;
1636 break;
1637 }
1638 }
1639 *src = p;
1640 return num;
1641}
1642
1643
1644#define BBUF_WRITE_CODE_POINT(bbuf,pos,code) \
1645 BBUF_WRITE(bbuf, pos, &(code), SIZE_CODE_POINT)
1646
1647/* data format:
1648 [n][from-1][to-1][from-2][to-2] ... [from-n][to-n]
1649 (all data size is OnigCodePoint)
1650 */
1651static int
1652new_code_range(BBuf** pbuf)
1653{
1654#define INIT_MULTI_BYTE_RANGE_SIZE (SIZE_CODE_POINT * 5)
1655 int r;
1657 BBuf* bbuf;
1658
1659 bbuf = *pbuf = (BBuf* )xmalloc(sizeof(BBuf));
1662 if (r) return r;
1663
1664 n = 0;
1665 BBUF_WRITE_CODE_POINT(bbuf, 0, n);
1666 return 0;
1667}
1668
1669static int
1670add_code_range_to_buf0(BBuf** pbuf, ScanEnv* env, OnigCodePoint from, OnigCodePoint to,
1671 int checkdup)
1672{
1673 int r, inc_n, pos;
1674 OnigCodePoint low, high, bound, x;
1675 OnigCodePoint n, *data;
1676 BBuf* bbuf;
1677
1678 if (from > to) {
1679 n = from; from = to; to = n;
1680 }
1681
1682 if (IS_NULL(*pbuf)) {
1683 r = new_code_range(pbuf);
1684 if (r) return r;
1685 bbuf = *pbuf;
1686 n = 0;
1687 }
1688 else {
1689 bbuf = *pbuf;
1690 GET_CODE_POINT(n, bbuf->p);
1691 }
1692 data = (OnigCodePoint* )(bbuf->p);
1693 data++;
1694
1695 bound = (from == 0) ? 0 : n;
1696 for (low = 0; low < bound; ) {
1697 x = (low + bound) >> 1;
1698 if (from - 1 > data[x*2 + 1])
1699 low = x + 1;
1700 else
1701 bound = x;
1702 }
1703
1704 high = (to == ONIG_LAST_CODE_POINT) ? n : low;
1705 for (bound = n; high < bound; ) {
1706 x = (high + bound) >> 1;
1707 if (to + 1 >= data[x*2])
1708 high = x + 1;
1709 else
1710 bound = x;
1711 }
1712 /* data[(low-1)*2+1] << from <= data[low*2]
1713 * data[(high-1)*2+1] <= to << data[high*2]
1714 */
1715
1716 inc_n = low + 1 - high;
1717 if (n + inc_n > ONIG_MAX_MULTI_BYTE_RANGES_NUM)
1719
1720 if (inc_n != 1) {
1721 if (checkdup && from <= data[low*2+1]
1722 && (data[low*2] <= from || data[low*2+1] <= to))
1723 CC_DUP_WARN(env, from, to);
1724 if (from > data[low*2])
1725 from = data[low*2];
1726 if (to < data[(high - 1)*2 + 1])
1727 to = data[(high - 1)*2 + 1];
1728 }
1729
1730 if (inc_n != 0) {
1731 int from_pos = SIZE_CODE_POINT * (1 + high * 2);
1732 int to_pos = SIZE_CODE_POINT * (1 + (low + 1) * 2);
1733
1734 if (inc_n > 0) {
1735 if (high < n) {
1736 int size = (n - high) * 2 * SIZE_CODE_POINT;
1737 BBUF_MOVE_RIGHT(bbuf, from_pos, to_pos, size);
1738 }
1739 }
1740 else {
1741 BBUF_MOVE_LEFT_REDUCE(bbuf, from_pos, to_pos);
1742 }
1743 }
1744
1745 pos = SIZE_CODE_POINT * (1 + low * 2);
1746 BBUF_ENSURE_SIZE(bbuf, pos + SIZE_CODE_POINT * 2);
1747 BBUF_WRITE_CODE_POINT(bbuf, pos, from);
1748 BBUF_WRITE_CODE_POINT(bbuf, pos + SIZE_CODE_POINT, to);
1749 n += inc_n;
1750 BBUF_WRITE_CODE_POINT(bbuf, 0, n);
1751
1752 return 0;
1753}
1754
1755static int
1756add_code_range_to_buf(BBuf** pbuf, ScanEnv* env, OnigCodePoint from, OnigCodePoint to)
1757{
1758 return add_code_range_to_buf0(pbuf, env, from, to, 1);
1759}
1760
1761static int
1762add_code_range0(BBuf** pbuf, ScanEnv* env, OnigCodePoint from, OnigCodePoint to, int checkdup)
1763{
1764 if (from > to) {
1766 return 0;
1767 else
1769 }
1770
1771 return add_code_range_to_buf0(pbuf, env, from, to, checkdup);
1772}
1773
1774static int
1775add_code_range(BBuf** pbuf, ScanEnv* env, OnigCodePoint from, OnigCodePoint to)
1776{
1777 return add_code_range0(pbuf, env, from, to, 1);
1778}
1779
1780static int
1781not_code_range_buf(OnigEncoding enc, BBuf* bbuf, BBuf** pbuf, ScanEnv* env)
1782{
1783 int r, i, n;
1784 OnigCodePoint pre, from, *data, to = 0;
1785
1786 *pbuf = (BBuf* )NULL;
1787 if (IS_NULL(bbuf)) {
1788 set_all:
1789 return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);
1790 }
1791
1792 data = (OnigCodePoint* )(bbuf->p);
1793 GET_CODE_POINT(n, data);
1794 data++;
1795 if (n <= 0) goto set_all;
1796
1797 r = 0;
1798 pre = MBCODE_START_POS(enc);
1799 for (i = 0; i < n; i++) {
1800 from = data[i*2];
1801 to = data[i*2+1];
1802 if (pre <= from - 1) {
1803 r = add_code_range_to_buf(pbuf, env, pre, from - 1);
1804 if (r != 0) return r;
1805 }
1806 if (to == ONIG_LAST_CODE_POINT) break;
1807 pre = to + 1;
1808 }
1809 if (to < ONIG_LAST_CODE_POINT) {
1810 r = add_code_range_to_buf(pbuf, env, to + 1, ONIG_LAST_CODE_POINT);
1811 }
1812 return r;
1813}
1814
1815#define SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2) do {\
1816 BBuf *tbuf; \
1817 int tnot; \
1818 tnot = not1; not1 = not2; not2 = tnot; \
1819 tbuf = bbuf1; bbuf1 = bbuf2; bbuf2 = tbuf; \
1820} while (0)
1821
1822static int
1823or_code_range_buf(OnigEncoding enc, BBuf* bbuf1, int not1,
1824 BBuf* bbuf2, int not2, BBuf** pbuf, ScanEnv* env)
1825{
1826 int r;
1827 OnigCodePoint i, n1, *data1;
1828 OnigCodePoint from, to;
1829
1830 *pbuf = (BBuf* )NULL;
1831 if (IS_NULL(bbuf1) && IS_NULL(bbuf2)) {
1832 if (not1 != 0 || not2 != 0)
1833 return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);
1834 return 0;
1835 }
1836
1837 r = 0;
1838 if (IS_NULL(bbuf2))
1839 SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2);
1840
1841 if (IS_NULL(bbuf1)) {
1842 if (not1 != 0) {
1843 return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);
1844 }
1845 else {
1846 if (not2 == 0) {
1847 return bbuf_clone(pbuf, bbuf2);
1848 }
1849 else {
1850 return not_code_range_buf(enc, bbuf2, pbuf, env);
1851 }
1852 }
1853 }
1854
1855 if (not1 != 0)
1856 SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2);
1857
1858 data1 = (OnigCodePoint* )(bbuf1->p);
1859 GET_CODE_POINT(n1, data1);
1860 data1++;
1861
1862 if (not2 == 0 && not1 == 0) { /* 1 OR 2 */
1863 r = bbuf_clone(pbuf, bbuf2);
1864 }
1865 else if (not1 == 0) { /* 1 OR (not 2) */
1866 r = not_code_range_buf(enc, bbuf2, pbuf, env);
1867 }
1868 if (r != 0) return r;
1869
1870 for (i = 0; i < n1; i++) {
1871 from = data1[i*2];
1872 to = data1[i*2+1];
1873 r = add_code_range_to_buf(pbuf, env, from, to);
1874 if (r != 0) return r;
1875 }
1876 return 0;
1877}
1878
1879static int
1880and_code_range1(BBuf** pbuf, ScanEnv* env, OnigCodePoint from1, OnigCodePoint to1,
1881 OnigCodePoint* data, int n)
1882{
1883 int i, r;
1884 OnigCodePoint from2, to2;
1885
1886 for (i = 0; i < n; i++) {
1887 from2 = data[i*2];
1888 to2 = data[i*2+1];
1889 if (from2 < from1) {
1890 if (to2 < from1) continue;
1891 else {
1892 from1 = to2 + 1;
1893 }
1894 }
1895 else if (from2 <= to1) {
1896 if (to2 < to1) {
1897 if (from1 <= from2 - 1) {
1898 r = add_code_range_to_buf(pbuf, env, from1, from2-1);
1899 if (r != 0) return r;
1900 }
1901 from1 = to2 + 1;
1902 }
1903 else {
1904 to1 = from2 - 1;
1905 }
1906 }
1907 else {
1908 from1 = from2;
1909 }
1910 if (from1 > to1) break;
1911 }
1912 if (from1 <= to1) {
1913 r = add_code_range_to_buf(pbuf, env, from1, to1);
1914 if (r != 0) return r;
1915 }
1916 return 0;
1917}
1918
1919static int
1920and_code_range_buf(BBuf* bbuf1, int not1, BBuf* bbuf2, int not2, BBuf** pbuf, ScanEnv* env)
1921{
1922 int r;
1923 OnigCodePoint i, j, n1, n2, *data1, *data2;
1924 OnigCodePoint from, to, from1, to1, from2, to2;
1925
1926 *pbuf = (BBuf* )NULL;
1927 if (IS_NULL(bbuf1)) {
1928 if (not1 != 0 && IS_NOT_NULL(bbuf2)) /* not1 != 0 -> not2 == 0 */
1929 return bbuf_clone(pbuf, bbuf2);
1930 return 0;
1931 }
1932 else if (IS_NULL(bbuf2)) {
1933 if (not2 != 0)
1934 return bbuf_clone(pbuf, bbuf1);
1935 return 0;
1936 }
1937
1938 if (not1 != 0)
1939 SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2);
1940
1941 data1 = (OnigCodePoint* )(bbuf1->p);
1942 data2 = (OnigCodePoint* )(bbuf2->p);
1943 GET_CODE_POINT(n1, data1);
1944 GET_CODE_POINT(n2, data2);
1945 data1++;
1946 data2++;
1947
1948 if (not2 == 0 && not1 == 0) { /* 1 AND 2 */
1949 for (i = 0; i < n1; i++) {
1950 from1 = data1[i*2];
1951 to1 = data1[i*2+1];
1952 for (j = 0; j < n2; j++) {
1953 from2 = data2[j*2];
1954 to2 = data2[j*2+1];
1955 if (from2 > to1) break;
1956 if (to2 < from1) continue;
1957 from = MAX(from1, from2);
1958 to = MIN(to1, to2);
1959 r = add_code_range_to_buf(pbuf, env, from, to);
1960 if (r != 0) return r;
1961 }
1962 }
1963 }
1964 else if (not1 == 0) { /* 1 AND (not 2) */
1965 for (i = 0; i < n1; i++) {
1966 from1 = data1[i*2];
1967 to1 = data1[i*2+1];
1968 r = and_code_range1(pbuf, env, from1, to1, data2, n2);
1969 if (r != 0) return r;
1970 }
1971 }
1972
1973 return 0;
1974}
1975
1976static int
1977and_cclass(CClassNode* dest, CClassNode* cc, ScanEnv* env)
1978{
1979 OnigEncoding enc = env->enc;
1980 int r, not1, not2;
1981 BBuf *buf1, *buf2, *pbuf = 0;
1982 BitSetRef bsr1, bsr2;
1983 BitSet bs1, bs2;
1984
1985 not1 = IS_NCCLASS_NOT(dest);
1986 bsr1 = dest->bs;
1987 buf1 = dest->mbuf;
1988 not2 = IS_NCCLASS_NOT(cc);
1989 bsr2 = cc->bs;
1990 buf2 = cc->mbuf;
1991
1992 if (not1 != 0) {
1993 bitset_invert_to(bsr1, bs1);
1994 bsr1 = bs1;
1995 }
1996 if (not2 != 0) {
1997 bitset_invert_to(bsr2, bs2);
1998 bsr2 = bs2;
1999 }
2000 bitset_and(bsr1, bsr2);
2001 if (bsr1 != dest->bs) {
2002 bitset_copy(dest->bs, bsr1);
2003 bsr1 = dest->bs;
2004 }
2005 if (not1 != 0) {
2006 bitset_invert(dest->bs);
2007 }
2008
2009 if (! ONIGENC_IS_SINGLEBYTE(enc)) {
2010 if (not1 != 0 && not2 != 0) {
2011 r = or_code_range_buf(enc, buf1, 0, buf2, 0, &pbuf, env);
2012 }
2013 else {
2014 r = and_code_range_buf(buf1, not1, buf2, not2, &pbuf, env);
2015 if (r == 0 && not1 != 0) {
2016 BBuf *tbuf = 0;
2017 r = not_code_range_buf(enc, pbuf, &tbuf, env);
2018 bbuf_free(pbuf);
2019 pbuf = tbuf;
2020 }
2021 }
2022 if (r != 0) {
2023 bbuf_free(pbuf);
2024 return r;
2025 }
2026
2027 dest->mbuf = pbuf;
2028 bbuf_free(buf1);
2029 return r;
2030 }
2031 return 0;
2032}
2033
2034static int
2035or_cclass(CClassNode* dest, CClassNode* cc, ScanEnv* env)
2036{
2037 OnigEncoding enc = env->enc;
2038 int r, not1, not2;
2039 BBuf *buf1, *buf2, *pbuf = 0;
2040 BitSetRef bsr1, bsr2;
2041 BitSet bs1, bs2;
2042
2043 not1 = IS_NCCLASS_NOT(dest);
2044 bsr1 = dest->bs;
2045 buf1 = dest->mbuf;
2046 not2 = IS_NCCLASS_NOT(cc);
2047 bsr2 = cc->bs;
2048 buf2 = cc->mbuf;
2049
2050 if (not1 != 0) {
2051 bitset_invert_to(bsr1, bs1);
2052 bsr1 = bs1;
2053 }
2054 if (not2 != 0) {
2055 bitset_invert_to(bsr2, bs2);
2056 bsr2 = bs2;
2057 }
2058 bitset_or(bsr1, bsr2);
2059 if (bsr1 != dest->bs) {
2060 bitset_copy(dest->bs, bsr1);
2061 bsr1 = dest->bs;
2062 }
2063 if (not1 != 0) {
2064 bitset_invert(dest->bs);
2065 }
2066
2067 if (! ONIGENC_IS_SINGLEBYTE(enc)) {
2068 if (not1 != 0 && not2 != 0) {
2069 r = and_code_range_buf(buf1, 0, buf2, 0, &pbuf, env);
2070 }
2071 else {
2072 r = or_code_range_buf(enc, buf1, not1, buf2, not2, &pbuf, env);
2073 if (r == 0 && not1 != 0) {
2074 BBuf *tbuf = 0;
2075 r = not_code_range_buf(enc, pbuf, &tbuf, env);
2076 bbuf_free(pbuf);
2077 pbuf = tbuf;
2078 }
2079 }
2080 if (r != 0) {
2081 bbuf_free(pbuf);
2082 return r;
2083 }
2084
2085 dest->mbuf = pbuf;
2086 bbuf_free(buf1);
2087 return r;
2088 }
2089 else
2090 return 0;
2091}
2092
2093static void UNKNOWN_ESC_WARN(ScanEnv *env, int c);
2094
2095static OnigCodePoint
2096conv_backslash_value(OnigCodePoint c, ScanEnv* env)
2097{
2099 switch (c) {
2100 case 'n': return '\n';
2101 case 't': return '\t';
2102 case 'r': return '\r';
2103 case 'f': return '\f';
2104 case 'a': return '\007';
2105 case 'b': return '\010';
2106 case 'e': return '\033';
2107 case 'v':
2109 return '\v';
2110 break;
2111
2112 default:
2113 if (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'))
2114 UNKNOWN_ESC_WARN(env, c);
2115 break;
2116 }
2117 }
2118 return c;
2119}
2120
2121#ifdef USE_NO_INVALID_QUANTIFIER
2122# define is_invalid_quantifier_target(node) 0
2123#else
2124static int
2126{
2127 switch (NTYPE(node)) {
2128 case NT_ANCHOR:
2129 return 1;
2130 break;
2131
2132 case NT_ENCLOSE:
2133 /* allow enclosed elements */
2134 /* return is_invalid_quantifier_target(NENCLOSE(node)->target); */
2135 break;
2136
2137 case NT_LIST:
2138 do {
2139 if (! is_invalid_quantifier_target(NCAR(node))) return 0;
2140 } while (IS_NOT_NULL(node = NCDR(node)));
2141 return 0;
2142 break;
2143
2144 case NT_ALT:
2145 do {
2146 if (is_invalid_quantifier_target(NCAR(node))) return 1;
2147 } while (IS_NOT_NULL(node = NCDR(node)));
2148 break;
2149
2150 default:
2151 break;
2152 }
2153 return 0;
2154}
2155#endif
2156
2157/* ?:0, *:1, +:2, ??:3, *?:4, +?:5 */
2158static int
2159popular_quantifier_num(QtfrNode* q)
2160{
2161 if (q->greedy) {
2162 if (q->lower == 0) {
2163 if (q->upper == 1) return 0;
2164 else if (IS_REPEAT_INFINITE(q->upper)) return 1;
2165 }
2166 else if (q->lower == 1) {
2167 if (IS_REPEAT_INFINITE(q->upper)) return 2;
2168 }
2169 }
2170 else {
2171 if (q->lower == 0) {
2172 if (q->upper == 1) return 3;
2173 else if (IS_REPEAT_INFINITE(q->upper)) return 4;
2174 }
2175 else if (q->lower == 1) {
2176 if (IS_REPEAT_INFINITE(q->upper)) return 5;
2177 }
2178 }
2179 return -1;
2180}
2181
2182
2184 RQ_ASIS = 0, /* as is */
2185 RQ_DEL = 1, /* delete parent */
2186 RQ_A, /* to '*' */
2187 RQ_AQ, /* to '*?' */
2188 RQ_QQ, /* to '??' */
2189 RQ_P_QQ, /* to '+)??' */
2190 RQ_PQ_Q /* to '+?)?' */
2192
2193static enum ReduceType const ReduceTypeTable[6][6] = {
2194/* '?', '*', '+', '??', '*?', '+?' p / c */
2195 {RQ_DEL, RQ_A, RQ_A, RQ_QQ, RQ_AQ, RQ_ASIS}, /* '?' */
2196 {RQ_DEL, RQ_DEL, RQ_DEL, RQ_P_QQ, RQ_P_QQ, RQ_DEL}, /* '*' */
2197 {RQ_A, RQ_A, RQ_DEL, RQ_ASIS, RQ_P_QQ, RQ_DEL}, /* '+' */
2198 {RQ_DEL, RQ_AQ, RQ_AQ, RQ_DEL, RQ_AQ, RQ_AQ}, /* '??' */
2199 {RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL}, /* '*?' */
2200 {RQ_ASIS, RQ_PQ_Q, RQ_DEL, RQ_AQ, RQ_AQ, RQ_DEL} /* '+?' */
2201};
2202
2203extern void
2205{
2206 int pnum, cnum;
2207 QtfrNode *p, *c;
2208
2209 p = NQTFR(pnode);
2210 c = NQTFR(cnode);
2211 pnum = popular_quantifier_num(p);
2212 cnum = popular_quantifier_num(c);
2213 if (pnum < 0 || cnum < 0) return ;
2214
2215 switch (ReduceTypeTable[cnum][pnum]) {
2216 case RQ_DEL:
2217 *pnode = *cnode;
2218 break;
2219 case RQ_A:
2220 p->target = c->target;
2221 p->lower = 0; p->upper = REPEAT_INFINITE; p->greedy = 1;
2222 break;
2223 case RQ_AQ:
2224 p->target = c->target;
2225 p->lower = 0; p->upper = REPEAT_INFINITE; p->greedy = 0;
2226 break;
2227 case RQ_QQ:
2228 p->target = c->target;
2229 p->lower = 0; p->upper = 1; p->greedy = 0;
2230 break;
2231 case RQ_P_QQ:
2232 p->target = cnode;
2233 p->lower = 0; p->upper = 1; p->greedy = 0;
2234 c->lower = 1; c->upper = REPEAT_INFINITE; c->greedy = 1;
2235 return ;
2236 break;
2237 case RQ_PQ_Q:
2238 p->target = cnode;
2239 p->lower = 0; p->upper = 1; p->greedy = 1;
2240 c->lower = 1; c->upper = REPEAT_INFINITE; c->greedy = 0;
2241 return ;
2242 break;
2243 case RQ_ASIS:
2244 p->target = cnode;
2245 return ;
2246 break;
2247 }
2248
2249 c->target = NULL_NODE;
2250 onig_node_free(cnode);
2251}
2252
2253
2255 TK_EOT = 0, /* end of token */
2267 TK_ANYCHAR_ANYTIME, /* SQL '%' == .* */
2273 TK_CHAR_PROPERTY, /* \p{...}, \P{...} */
2277 /* in cc */
2281 TK_CC_AND, /* && */
2282 TK_CC_CC_OPEN /* [ */
2284
2285typedef struct {
2288 int base; /* is number: 8, 16 (used in [....]) */
2290 union {
2292 int c;
2294 struct {
2297 } anchor;
2298 struct {
2303 } repeat;
2304 struct {
2305 int num;
2306 int ref1;
2307 int* refs;
2309#ifdef USE_BACKREF_WITH_LEVEL
2311 int level; /* \k<name+n> */
2312#endif
2313 } backref;
2314 struct {
2317 int gnum;
2318 int rel;
2320 struct {
2322 int not;
2323 } prop;
2324 } u;
2325} OnigToken;
2326
2327
2328static int
2329fetch_range_quantifier(UChar** src, UChar* end, OnigToken* tok, ScanEnv* env)
2330{
2331 int low, up, syn_allow, non_low = 0;
2332 int r = 0;
2333 OnigCodePoint c;
2334 OnigEncoding enc = env->enc;
2335 UChar* p = *src;
2337
2338 syn_allow = IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_INVALID_INTERVAL);
2339
2340 if (PEND) {
2341 if (syn_allow)
2342 return 1; /* "....{" : OK! */
2343 else
2344 return ONIGERR_END_PATTERN_AT_LEFT_BRACE; /* "....{" syntax error */
2345 }
2346
2347 if (! syn_allow) {
2348 c = PPEEK;
2349 if (c == ')' || c == '(' || c == '|') {
2351 }
2352 }
2353
2354 low = onig_scan_unsigned_number(&p, end, env->enc);
2355 if (low < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
2356 if (low > ONIG_MAX_REPEAT_NUM)
2358
2359 if (p == *src) { /* can't read low */
2361 /* allow {,n} as {0,n} */
2362 low = 0;
2363 non_low = 1;
2364 }
2365 else
2366 goto invalid;
2367 }
2368
2369 if (PEND) goto invalid;
2370 PFETCH(c);
2371 if (c == ',') {
2372 UChar* prev = p;
2373 up = onig_scan_unsigned_number(&p, end, env->enc);
2374 if (up < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
2375 if (up > ONIG_MAX_REPEAT_NUM)
2377
2378 if (p == prev) {
2379 if (non_low != 0)
2380 goto invalid;
2381 up = REPEAT_INFINITE; /* {n,} : {n,infinite} */
2382 }
2383 }
2384 else {
2385 if (non_low != 0)
2386 goto invalid;
2387
2388 PUNFETCH;
2389 up = low; /* {n} : exact n times */
2390 r = 2; /* fixed */
2391 }
2392
2393 if (PEND) goto invalid;
2394 PFETCH(c);
2396 if (c != MC_ESC(env->syntax)) goto invalid;
2397 if (PEND) goto invalid;
2398 PFETCH(c);
2399 }
2400 if (c != '}') goto invalid;
2401
2402 if (!IS_REPEAT_INFINITE(up) && low > up) {
2404 }
2405
2406 tok->type = TK_INTERVAL;
2407 tok->u.repeat.lower = low;
2408 tok->u.repeat.upper = up;
2409 *src = p;
2410 return r; /* 0: normal {n,m}, 2: fixed {n} */
2411
2412 invalid:
2413 if (syn_allow)
2414 return 1; /* OK */
2415 else
2417}
2418
2419/* \M-, \C-, \c, or \... */
2420static int
2421fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env, OnigCodePoint* val)
2422{
2423 int v;
2424 OnigCodePoint c;
2425 OnigEncoding enc = env->enc;
2426 UChar* p = *src;
2427
2429
2430 PFETCH_S(c);
2431 switch (c) {
2432 case 'M':
2435 PFETCH_S(c);
2436 if (c != '-') return ONIGERR_META_CODE_SYNTAX;
2438 PFETCH_S(c);
2439 if (c == MC_ESC(env->syntax)) {
2440 v = fetch_escaped_value(&p, end, env, &c);
2441 if (v < 0) return v;
2442 }
2443 c = ((c & 0xff) | 0x80);
2444 }
2445 else
2446 goto backslash;
2447 break;
2448
2449 case 'C':
2452 PFETCH_S(c);
2453 if (c != '-') return ONIGERR_CONTROL_CODE_SYNTAX;
2454 goto control;
2455 }
2456 else
2457 goto backslash;
2458
2459 case 'c':
2461 control:
2463 PFETCH_S(c);
2464 if (c == '?') {
2465 c = 0177;
2466 }
2467 else {
2468 if (c == MC_ESC(env->syntax)) {
2469 v = fetch_escaped_value(&p, end, env, &c);
2470 if (v < 0) return v;
2471 }
2472 c &= 0x9f;
2473 }
2474 break;
2475 }
2476 /* fall through */
2477
2478 default:
2479 {
2480 backslash:
2481 c = conv_backslash_value(c, env);
2482 }
2483 break;
2484 }
2485
2486 *src = p;
2487 *val = c;
2488 return 0;
2489}
2490
2491static int fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env);
2492
2493static OnigCodePoint
2494get_name_end_code_point(OnigCodePoint start)
2495{
2496 switch (start) {
2497 case '<': return (OnigCodePoint )'>'; break;
2498 case '\'': return (OnigCodePoint )'\''; break;
2499 case '(': return (OnigCodePoint )')'; break;
2500 case '{': return (OnigCodePoint )'}'; break;
2501 default:
2502 break;
2503 }
2504
2505 return (OnigCodePoint )0;
2506}
2507
2508#ifdef USE_NAMED_GROUP
2509# ifdef RUBY
2510# define ONIGENC_IS_CODE_NAME(enc, c) TRUE
2511# else
2512# define ONIGENC_IS_CODE_NAME(enc, c) ONIGENC_IS_CODE_WORD(enc, c)
2513# endif
2514
2515# ifdef USE_BACKREF_WITH_LEVEL
2516/*
2517 \k<name+n>, \k<name-n>
2518 \k<num+n>, \k<num-n>
2519 \k<-num+n>, \k<-num-n>
2520*/
2521static int
2522fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end,
2523 UChar** rname_end, ScanEnv* env,
2524 int* rback_num, int* rlevel)
2525{
2526 int r, sign, is_num, exist_level;
2527 OnigCodePoint end_code;
2528 OnigCodePoint c = 0;
2529 OnigEncoding enc = env->enc;
2530 UChar *name_end;
2531 UChar *pnum_head;
2532 UChar *p = *src;
2534
2535 *rback_num = 0;
2536 is_num = exist_level = 0;
2537 sign = 1;
2538 pnum_head = *src;
2539
2540 end_code = get_name_end_code_point(start_code);
2541
2542 name_end = end;
2543 r = 0;
2544 if (PEND) {
2546 }
2547 else {
2548 PFETCH(c);
2549 if (c == end_code)
2551
2552 if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
2553 is_num = 1;
2554 }
2555 else if (c == '-') {
2556 is_num = 2;
2557 sign = -1;
2558 pnum_head = p;
2559 }
2560 else if (!ONIGENC_IS_CODE_NAME(enc, c)) {
2562 }
2563 }
2564
2565 while (!PEND) {
2566 name_end = p;
2567 PFETCH(c);
2568 if (c == end_code || c == ')' || c == '+' || c == '-') {
2569 if (is_num == 2) r = ONIGERR_INVALID_GROUP_NAME;
2570 break;
2571 }
2572
2573 if (is_num != 0) {
2574 if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
2575 is_num = 1;
2576 }
2577 else {
2579 is_num = 0;
2580 }
2581 }
2582 else if (!ONIGENC_IS_CODE_NAME(enc, c)) {
2584 }
2585 }
2586
2587 if (r == 0 && c != end_code) {
2588 if (c == '+' || c == '-') {
2589 int level;
2590 int flag = (c == '-' ? -1 : 1);
2591
2592 if (PEND) {
2594 goto end;
2595 }
2596 PFETCH(c);
2597 if (! ONIGENC_IS_CODE_DIGIT(enc, c)) goto err;
2598 PUNFETCH;
2599 level = onig_scan_unsigned_number(&p, end, enc);
2600 if (level < 0) return ONIGERR_TOO_BIG_NUMBER;
2601 *rlevel = (level * flag);
2602 exist_level = 1;
2603
2604 if (!PEND) {
2605 PFETCH(c);
2606 if (c == end_code)
2607 goto end;
2608 }
2609 }
2610
2611 err:
2613 name_end = end;
2614 }
2615
2616 end:
2617 if (r == 0) {
2618 if (is_num != 0) {
2619 *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc);
2620 if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;
2621 else if (*rback_num == 0) goto err;
2622
2623 *rback_num *= sign;
2624 }
2625
2626 *rname_end = name_end;
2627 *src = p;
2628 return (exist_level ? 1 : 0);
2629 }
2630 else {
2631 onig_scan_env_set_error_string(env, r, *src, name_end);
2632 return r;
2633 }
2634}
2635# endif /* USE_BACKREF_WITH_LEVEL */
2636
2637/*
2638 ref: 0 -> define name (don't allow number name)
2639 1 -> reference name (allow number name)
2640*/
2641static int
2642fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,
2643 UChar** rname_end, ScanEnv* env, int* rback_num, int ref)
2644{
2645 int r, is_num, sign;
2646 OnigCodePoint end_code;
2647 OnigCodePoint c = 0;
2648 OnigEncoding enc = env->enc;
2649 UChar *name_end;
2650 UChar *pnum_head;
2651 UChar *p = *src;
2652
2653 *rback_num = 0;
2654
2655 end_code = get_name_end_code_point(start_code);
2656
2657 name_end = end;
2658 pnum_head = *src;
2659 r = 0;
2660 is_num = 0;
2661 sign = 1;
2662 if (PEND) {
2664 }
2665 else {
2666 PFETCH_S(c);
2667 if (c == end_code)
2669
2670 if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
2671 if (ref == 1)
2672 is_num = 1;
2673 else {
2675 is_num = 0;
2676 }
2677 }
2678 else if (c == '-') {
2679 if (ref == 1) {
2680 is_num = 2;
2681 sign = -1;
2682 pnum_head = p;
2683 }
2684 else {
2686 is_num = 0;
2687 }
2688 }
2689 else if (!ONIGENC_IS_CODE_NAME(enc, c)) {
2691 }
2692 }
2693
2694 if (r == 0) {
2695 while (!PEND) {
2696 name_end = p;
2697 PFETCH_S(c);
2698 if (c == end_code || c == ')') {
2699 if (is_num == 2) {
2701 goto teardown;
2702 }
2703 break;
2704 }
2705
2706 if (is_num != 0) {
2707 if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
2708 is_num = 1;
2709 }
2710 else {
2711 if (!ONIGENC_IS_CODE_WORD(enc, c))
2713 else
2715 goto teardown;
2716 }
2717 }
2718 else {
2719 if (!ONIGENC_IS_CODE_NAME(enc, c)) {
2721 goto teardown;
2722 }
2723 }
2724 }
2725
2726 if (c != end_code) {
2728 name_end = end;
2729 goto err;
2730 }
2731
2732 if (is_num != 0) {
2733 *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc);
2734 if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;
2735 else if (*rback_num == 0) {
2737 goto err;
2738 }
2739
2740 *rback_num *= sign;
2741 }
2742
2743 *rname_end = name_end;
2744 *src = p;
2745 return 0;
2746 }
2747 else {
2748teardown:
2749 while (!PEND) {
2750 name_end = p;
2751 PFETCH_S(c);
2752 if (c == end_code || c == ')')
2753 break;
2754 }
2755 if (PEND)
2756 name_end = end;
2757
2758 err:
2759 onig_scan_env_set_error_string(env, r, *src, name_end);
2760 return r;
2761 }
2762}
2763#else
2764static int
2765fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,
2766 UChar** rname_end, ScanEnv* env, int* rback_num, int ref)
2767{
2768 int r, is_num, sign;
2769 OnigCodePoint end_code;
2770 OnigCodePoint c = 0;
2771 UChar *name_end;
2772 OnigEncoding enc = env->enc;
2773 UChar *pnum_head;
2774 UChar *p = *src;
2776
2777 *rback_num = 0;
2778
2779 end_code = get_name_end_code_point(start_code);
2780
2781 *rname_end = name_end = end;
2782 r = 0;
2783 pnum_head = *src;
2784 is_num = 0;
2785 sign = 1;
2786
2787 if (PEND) {
2789 }
2790 else {
2791 PFETCH(c);
2792 if (c == end_code)
2794
2795 if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
2796 is_num = 1;
2797 }
2798 else if (c == '-') {
2799 is_num = 2;
2800 sign = -1;
2801 pnum_head = p;
2802 }
2803 else {
2805 }
2806 }
2807
2808 while (!PEND) {
2809 name_end = p;
2810
2811 PFETCH(c);
2812 if (c == end_code || c == ')') break;
2813 if (! ONIGENC_IS_CODE_DIGIT(enc, c))
2815 }
2816 if (r == 0 && c != end_code) {
2818 name_end = end;
2819 }
2820
2821 if (r == 0) {
2822 *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc);
2823 if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;
2824 else if (*rback_num == 0) {
2826 goto err;
2827 }
2828 *rback_num *= sign;
2829
2830 *rname_end = name_end;
2831 *src = p;
2832 return 0;
2833 }
2834 else {
2835 err:
2836 onig_scan_env_set_error_string(env, r, *src, name_end);
2837 return r;
2838 }
2839}
2840#endif /* USE_NAMED_GROUP */
2841
2842
2843static void
2844onig_syntax_warn(ScanEnv *env, const char *fmt, ...)
2845{
2846 va_list args;
2848 va_start(args, fmt);
2850 env->pattern, env->pattern_end,
2851 (const UChar *)fmt, args);
2852 va_end(args);
2853#ifdef RUBY
2854 if (env->sourcefile == NULL)
2855 rb_warn("%s", (char *)buf);
2856 else
2857 rb_compile_warn(env->sourcefile, env->sourceline, "%s", (char *)buf);
2858#else
2859 (*onig_warn)((char* )buf);
2860#endif
2861}
2862
2863static void
2864CC_ESC_WARN(ScanEnv *env, UChar *c)
2865{
2866 if (onig_warn == onig_null_warn) return ;
2867
2870 onig_syntax_warn(env, "character class has '%s' without escape", c);
2871 }
2872}
2873
2874static void
2875CLOSE_BRACKET_WITHOUT_ESC_WARN(ScanEnv* env, UChar* c)
2876{
2877 if (onig_warn == onig_null_warn) return ;
2878
2880 onig_syntax_warn(env, "regular expression has '%s' without escape", c);
2881 }
2882}
2883
2884#ifndef RTEST
2885# define RTEST(v) 1
2886#endif
2887
2888static void
2890{
2891 if (onig_warn == onig_null_warn || !RTEST(ruby_verbose)) return ;
2892
2893 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_CC_DUP) &&
2894 !(env->warnings_flag & ONIG_SYN_WARN_CC_DUP)) {
2895#ifdef WARN_ALL_CC_DUP
2896 onig_syntax_warn(env, "character class has duplicated range: %04x-%04x", from, to);
2897#else
2898 env->warnings_flag |= ONIG_SYN_WARN_CC_DUP;
2899 onig_syntax_warn(env, "character class has duplicated range");
2900#endif
2901 }
2902}
2903
2904static void
2905UNKNOWN_ESC_WARN(ScanEnv *env, int c)
2906{
2907 if (onig_warn == onig_null_warn || !RTEST(ruby_verbose)) return ;
2908 onig_syntax_warn(env, "Unknown escape \\%c is ignored", c);
2909}
2910
2911static UChar*
2912find_str_position(OnigCodePoint s[], int n, UChar* from, UChar* to,
2913 UChar **next, OnigEncoding enc)
2914{
2915 int i;
2916 OnigCodePoint x;
2917 UChar *q;
2918 UChar *p = from;
2919
2920 while (p < to) {
2921 x = ONIGENC_MBC_TO_CODE(enc, p, to);
2922 q = p + enclen(enc, p, to);
2923 if (x == s[0]) {
2924 for (i = 1; i < n && q < to; i++) {
2925 x = ONIGENC_MBC_TO_CODE(enc, q, to);
2926 if (x != s[i]) break;
2927 q += enclen(enc, q, to);
2928 }
2929 if (i >= n) {
2930 if (IS_NOT_NULL(next))
2931 *next = q;
2932 return p;
2933 }
2934 }
2935 p = q;
2936 }
2937 return NULL_UCHARP;
2938}
2939
2940static int
2941str_exist_check_with_esc(OnigCodePoint s[], int n, UChar* from, UChar* to,
2943{
2944 int i, in_esc;
2945 OnigCodePoint x;
2946 UChar *q;
2947 UChar *p = from;
2948
2949 in_esc = 0;
2950 while (p < to) {
2951 if (in_esc) {
2952 in_esc = 0;
2953 p += enclen(enc, p, to);
2954 }
2955 else {
2956 x = ONIGENC_MBC_TO_CODE(enc, p, to);
2957 q = p + enclen(enc, p, to);
2958 if (x == s[0]) {
2959 for (i = 1; i < n && q < to; i++) {
2960 x = ONIGENC_MBC_TO_CODE(enc, q, to);
2961 if (x != s[i]) break;
2962 q += enclen(enc, q, to);
2963 }
2964 if (i >= n) return 1;
2965 p += enclen(enc, p, to);
2966 }
2967 else {
2968 x = ONIGENC_MBC_TO_CODE(enc, p, to);
2969 if (x == bad) return 0;
2970 else if (x == MC_ESC(syn)) in_esc = 1;
2971 p = q;
2972 }
2973 }
2974 }
2975 return 0;
2976}
2977
2978static int
2979fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
2980{
2981 int num;
2982 OnigCodePoint c, c2;
2983 const OnigSyntaxType* syn = env->syntax;
2984 OnigEncoding enc = env->enc;
2985 UChar* prev;
2986 UChar* p = *src;
2988
2989 if (PEND) {
2990 tok->type = TK_EOT;
2991 return tok->type;
2992 }
2993
2994 PFETCH(c);
2995 tok->type = TK_CHAR;
2996 tok->base = 0;
2997 tok->u.c = c;
2998 tok->escaped = 0;
2999
3000 if (c == ']') {
3001 tok->type = TK_CC_CLOSE;
3002 }
3003 else if (c == '-') {
3004 tok->type = TK_CC_RANGE;
3005 }
3006 else if (c == MC_ESC(syn)) {
3008 goto end;
3009
3011
3012 PFETCH(c);
3013 tok->escaped = 1;
3014 tok->u.c = c;
3015 switch (c) {
3016 case 'w':
3017 tok->type = TK_CHAR_TYPE;
3018 tok->u.prop.ctype = ONIGENC_CTYPE_WORD;
3019 tok->u.prop.not = 0;
3020 break;
3021 case 'W':
3022 tok->type = TK_CHAR_TYPE;
3023 tok->u.prop.ctype = ONIGENC_CTYPE_WORD;
3024 tok->u.prop.not = 1;
3025 break;
3026 case 'd':
3027 tok->type = TK_CHAR_TYPE;
3028 tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;
3029 tok->u.prop.not = 0;
3030 break;
3031 case 'D':
3032 tok->type = TK_CHAR_TYPE;
3033 tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;
3034 tok->u.prop.not = 1;
3035 break;
3036 case 's':
3037 tok->type = TK_CHAR_TYPE;
3038 tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;
3039 tok->u.prop.not = 0;
3040 break;
3041 case 'S':
3042 tok->type = TK_CHAR_TYPE;
3043 tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;
3044 tok->u.prop.not = 1;
3045 break;
3046 case 'h':
3047 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
3048 tok->type = TK_CHAR_TYPE;
3049 tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;
3050 tok->u.prop.not = 0;
3051 break;
3052 case 'H':
3053 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
3054 tok->type = TK_CHAR_TYPE;
3055 tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;
3056 tok->u.prop.not = 1;
3057 break;
3058
3059 case 'p':
3060 case 'P':
3061 if (PEND) break;
3062
3063 c2 = PPEEK;
3064 if (c2 == '{' &&
3066 PINC;
3067 tok->type = TK_CHAR_PROPERTY;
3068 tok->u.prop.not = (c == 'P' ? 1 : 0);
3069
3071 PFETCH(c2);
3072 if (c2 == '^') {
3073 tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0);
3074 }
3075 else
3076 PUNFETCH;
3077 }
3078 }
3079 else {
3080 onig_syntax_warn(env, "invalid Unicode Property \\%c", c);
3081 }
3082 break;
3083
3084 case 'x':
3085 if (PEND) break;
3086
3087 prev = p;
3089 PINC;
3090 num = scan_unsigned_hexadecimal_number(&p, end, 0, 8, enc);
3091 if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
3092 if (!PEND) {
3093 c2 = PPEEK;
3094 if (ONIGENC_IS_CODE_XDIGIT(enc, c2))
3096 }
3097
3098 if (p > prev + enclen(enc, prev, end) && !PEND && (PPEEK_IS('}'))) {
3099 PINC;
3100 tok->type = TK_CODE_POINT;
3101 tok->base = 16;
3102 tok->u.code = (OnigCodePoint )num;
3103 }
3104 else {
3105 /* can't read nothing or invalid format */
3106 p = prev;
3107 }
3108 }
3109 else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) {
3110 num = scan_unsigned_hexadecimal_number(&p, end, 0, 2, enc);
3111 if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
3112 if (p == prev) { /* can't read nothing. */
3113 num = 0; /* but, it's not error */
3114 }
3115 tok->type = TK_RAW_BYTE;
3116 tok->base = 16;
3117 tok->u.c = num;
3118 }
3119 break;
3120
3121 case 'u':
3122 if (PEND) break;
3123
3124 prev = p;
3126 num = scan_unsigned_hexadecimal_number(&p, end, 4, 4, enc);
3127 if (num < -1) return ONIGERR_TOO_SHORT_DIGITS;
3128 else if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
3129 if (p == prev) { /* can't read nothing. */
3130 num = 0; /* but, it's not error */
3131 }
3132 tok->type = TK_CODE_POINT;
3133 tok->base = 16;
3134 tok->u.code = (OnigCodePoint )num;
3135 }
3136 break;
3137
3138 case 'o':
3139 if (PEND) break;
3140
3141 prev = p;
3143 PINC;
3144 num = scan_unsigned_octal_number(&p, end, 11, enc);
3145 if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
3146 if (!PEND) {
3147 c2 = PPEEK;
3148 if (ONIGENC_IS_CODE_DIGIT(enc, c2) && c2 < '8')
3150 }
3151
3152 if (p > prev + enclen(enc, prev, end) && !PEND && (PPEEK_IS('}'))) {
3153 PINC;
3154 tok->type = TK_CODE_POINT;
3155 tok->base = 8;
3156 tok->u.code = (OnigCodePoint )num;
3157 }
3158 else {
3159 /* can't read nothing or invalid format */
3160 p = prev;
3161 }
3162 }
3163 break;
3164
3165 case '0':
3166 case '1': case '2': case '3': case '4': case '5': case '6': case '7':
3168 PUNFETCH;
3169 prev = p;
3170 num = scan_unsigned_octal_number(&p, end, 3, enc);
3171 if (num < 0 || 0xff < num) return ONIGERR_TOO_BIG_NUMBER;
3172 if (p == prev) { /* can't read nothing. */
3173 num = 0; /* but, it's not error */
3174 }
3175 tok->type = TK_RAW_BYTE;
3176 tok->base = 8;
3177 tok->u.c = num;
3178 }
3179 break;
3180
3181 default:
3182 PUNFETCH;
3183 num = fetch_escaped_value(&p, end, env, &c2);
3184 if (num < 0) return num;
3185 if ((OnigCodePoint )tok->u.c != c2) {
3186 tok->u.code = (OnigCodePoint )c2;
3187 tok->type = TK_CODE_POINT;
3188 }
3189 break;
3190 }
3191 }
3192 else if (c == '[') {
3193 if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_POSIX_BRACKET) && (PPEEK_IS(':'))) {
3194 OnigCodePoint send[] = { (OnigCodePoint )':', (OnigCodePoint )']' };
3195 tok->backp = p; /* point at '[' is read */
3196 PINC;
3197 if (str_exist_check_with_esc(send, 2, p, end,
3198 (OnigCodePoint )']', enc, syn)) {
3199 tok->type = TK_POSIX_BRACKET_OPEN;
3200 }
3201 else {
3202 PUNFETCH;
3203 goto cc_in_cc;
3204 }
3205 }
3206 else {
3207 cc_in_cc:
3209 tok->type = TK_CC_CC_OPEN;
3210 }
3211 else {
3212 CC_ESC_WARN(env, (UChar* )"[");
3213 }
3214 }
3215 }
3216 else if (c == '&') {
3218 !PEND && (PPEEK_IS('&'))) {
3219 PINC;
3220 tok->type = TK_CC_AND;
3221 }
3222 }
3223
3224 end:
3225 *src = p;
3226 return tok->type;
3227}
3228
3229#ifdef USE_NAMED_GROUP
3230static int
3231fetch_named_backref_token(OnigCodePoint c, OnigToken* tok, UChar** src,
3232 UChar* end, ScanEnv* env)
3233{
3234 int r, num;
3235 const OnigSyntaxType* syn = env->syntax;
3236 UChar* prev;
3237 UChar* p = *src;
3238 UChar* name_end;
3239 int* backs;
3240 int back_num;
3241
3242 prev = p;
3243
3244# ifdef USE_BACKREF_WITH_LEVEL
3245 name_end = NULL_UCHARP; /* no need. escape gcc warning. */
3246 r = fetch_name_with_level(c, &p, end, &name_end,
3247 env, &back_num, &tok->u.backref.level);
3248 if (r == 1) tok->u.backref.exist_level = 1;
3249 else tok->u.backref.exist_level = 0;
3250# else
3251 r = fetch_name(&p, end, &name_end, env, &back_num, 1);
3252# endif
3253 if (r < 0) return r;
3254
3255 if (back_num != 0) {
3256 if (back_num < 0) {
3257 back_num = BACKREF_REL_TO_ABS(back_num, env);
3258 if (back_num <= 0)
3260 }
3261
3263 if (back_num > env->num_mem ||
3264 IS_NULL(SCANENV_MEM_NODES(env)[back_num]))
3266 }
3267 tok->type = TK_BACKREF;
3268 tok->u.backref.by_name = 0;
3269 tok->u.backref.num = 1;
3270 tok->u.backref.ref1 = back_num;
3271 }
3272 else {
3273 num = onig_name_to_group_numbers(env->reg, prev, name_end, &backs);
3274 if (num <= 0) {
3276 ONIGERR_UNDEFINED_NAME_REFERENCE, prev, name_end);
3278 }
3280 int i;
3281 for (i = 0; i < num; i++) {
3282 if (backs[i] > env->num_mem ||
3283 IS_NULL(SCANENV_MEM_NODES(env)[backs[i]]))
3285 }
3286 }
3287
3288 tok->type = TK_BACKREF;
3289 tok->u.backref.by_name = 1;
3290 if (num == 1 || IS_SYNTAX_BV(syn, ONIG_SYN_USE_LEFT_MOST_NAMED_GROUP)) {
3291 tok->u.backref.num = 1;
3292 tok->u.backref.ref1 = backs[0];
3293 }
3294 else {
3295 tok->u.backref.num = num;
3296 tok->u.backref.refs = backs;
3297 }
3298 }
3299 *src = p;
3300 return 0;
3301}
3302#endif
3303
3304static int
3305fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
3306{
3307 int r, num;
3308 OnigCodePoint c;
3309 OnigEncoding enc = env->enc;
3310 const OnigSyntaxType* syn = env->syntax;
3311 UChar* prev;
3312 UChar* p = *src;
3314
3315 start:
3316 if (PEND) {
3317 tok->type = TK_EOT;
3318 return tok->type;
3319 }
3320
3321 tok->type = TK_STRING;
3322 tok->base = 0;
3323 tok->backp = p;
3324
3325 PFETCH(c);
3326 if (IS_MC_ESC_CODE(c, syn)) {
3328
3329 tok->backp = p;
3330 PFETCH(c);
3331
3332 tok->u.c = c;
3333 tok->escaped = 1;
3334 switch (c) {
3335 case '*':
3337 tok->type = TK_OP_REPEAT;
3338 tok->u.repeat.lower = 0;
3339 tok->u.repeat.upper = REPEAT_INFINITE;
3340 goto greedy_check;
3341 break;
3342
3343 case '+':
3344 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_PLUS_ONE_INF)) break;
3345 tok->type = TK_OP_REPEAT;
3346 tok->u.repeat.lower = 1;
3347 tok->u.repeat.upper = REPEAT_INFINITE;
3348 goto greedy_check;
3349 break;
3350
3351 case '?':
3353 tok->type = TK_OP_REPEAT;
3354 tok->u.repeat.lower = 0;
3355 tok->u.repeat.upper = 1;
3356 greedy_check:
3357 if (!PEND && PPEEK_IS('?') &&
3359 PFETCH(c);
3360 tok->u.repeat.greedy = 0;
3361 tok->u.repeat.possessive = 0;
3362 }
3363 else {
3364 possessive_check:
3365 if (!PEND && PPEEK_IS('+') &&
3367 tok->type != TK_INTERVAL) ||
3369 tok->type == TK_INTERVAL))) {
3370 PFETCH(c);
3371 tok->u.repeat.greedy = 1;
3372 tok->u.repeat.possessive = 1;
3373 }
3374 else {
3375 tok->u.repeat.greedy = 1;
3376 tok->u.repeat.possessive = 0;
3377 }
3378 }
3379 break;
3380
3381 case '{':
3383 r = fetch_range_quantifier(&p, end, tok, env);
3384 if (r < 0) return r; /* error */
3385 if (r == 0) goto greedy_check;
3386 else if (r == 2) { /* {n} */
3388 goto possessive_check;
3389
3390 goto greedy_check;
3391 }
3392 /* r == 1 : normal char */
3393 break;
3394
3395 case '|':
3396 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_VBAR_ALT)) break;
3397 tok->type = TK_ALT;
3398 break;
3399
3400 case '(':
3402 tok->type = TK_SUBEXP_OPEN;
3403 break;
3404
3405 case ')':
3407 tok->type = TK_SUBEXP_CLOSE;
3408 break;
3409
3410 case 'w':
3411 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break;
3412 tok->type = TK_CHAR_TYPE;
3413 tok->u.prop.ctype = ONIGENC_CTYPE_WORD;
3414 tok->u.prop.not = 0;
3415 break;
3416
3417 case 'W':
3418 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break;
3419 tok->type = TK_CHAR_TYPE;
3420 tok->u.prop.ctype = ONIGENC_CTYPE_WORD;
3421 tok->u.prop.not = 1;
3422 break;
3423
3424 case 'b':
3425 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break;
3426 tok->type = TK_ANCHOR;
3427 tok->u.anchor.subtype = ANCHOR_WORD_BOUND;
3428 tok->u.anchor.ascii_range = IS_ASCII_RANGE(env->option)
3429 && ! IS_WORD_BOUND_ALL_RANGE(env->option);
3430 break;
3431
3432 case 'B':
3433 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break;
3434 tok->type = TK_ANCHOR;
3435 tok->u.anchor.subtype = ANCHOR_NOT_WORD_BOUND;
3436 tok->u.anchor.ascii_range = IS_ASCII_RANGE(env->option)
3437 && ! IS_WORD_BOUND_ALL_RANGE(env->option);
3438 break;
3439
3440#ifdef USE_WORD_BEGIN_END
3441 case '<':
3443 tok->type = TK_ANCHOR;
3444 tok->u.anchor.subtype = ANCHOR_WORD_BEGIN;
3445 tok->u.anchor.ascii_range = IS_ASCII_RANGE(env->option);
3446 break;
3447
3448 case '>':
3450 tok->type = TK_ANCHOR;
3451 tok->u.anchor.subtype = ANCHOR_WORD_END;
3452 tok->u.anchor.ascii_range = IS_ASCII_RANGE(env->option);
3453 break;
3454#endif
3455
3456 case 's':
3458 tok->type = TK_CHAR_TYPE;
3459 tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;
3460 tok->u.prop.not = 0;
3461 break;
3462
3463 case 'S':
3465 tok->type = TK_CHAR_TYPE;
3466 tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;
3467 tok->u.prop.not = 1;
3468 break;
3469
3470 case 'd':
3471 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break;
3472 tok->type = TK_CHAR_TYPE;
3473 tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;
3474 tok->u.prop.not = 0;
3475 break;
3476
3477 case 'D':
3478 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break;
3479 tok->type = TK_CHAR_TYPE;
3480 tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;
3481 tok->u.prop.not = 1;
3482 break;
3483
3484 case 'h':
3485 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
3486 tok->type = TK_CHAR_TYPE;
3487 tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;
3488 tok->u.prop.not = 0;
3489 break;
3490
3491 case 'H':
3492 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
3493 tok->type = TK_CHAR_TYPE;
3494 tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;
3495 tok->u.prop.not = 1;
3496 break;
3497
3498 case 'A':
3500 begin_buf:
3501 tok->type = TK_ANCHOR;
3502 tok->u.anchor.subtype = ANCHOR_BEGIN_BUF;
3503 break;
3504
3505 case 'Z':
3507 tok->type = TK_ANCHOR;
3508 tok->u.anchor.subtype = ANCHOR_SEMI_END_BUF;
3509 break;
3510
3511 case 'z':
3513 end_buf:
3514 tok->type = TK_ANCHOR;
3515 tok->u.anchor.subtype = ANCHOR_END_BUF;
3516 break;
3517
3518 case 'G':
3520 tok->type = TK_ANCHOR;
3521 tok->u.anchor.subtype = ANCHOR_BEGIN_POSITION;
3522 break;
3523
3524 case '`':
3526 goto begin_buf;
3527 break;
3528
3529 case '\'':
3531 goto end_buf;
3532 break;
3533
3534 case 'x':
3535 if (PEND) break;
3536
3537 prev = p;
3539 PINC;
3540 num = scan_unsigned_hexadecimal_number(&p, end, 0, 8, enc);
3541 if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
3542 if (!PEND) {
3543 if (ONIGENC_IS_CODE_XDIGIT(enc, PPEEK))
3545 }
3546
3547 if ((p > prev + enclen(enc, prev, end)) && !PEND && PPEEK_IS('}')) {
3548 PINC;
3549 tok->type = TK_CODE_POINT;
3550 tok->u.code = (OnigCodePoint )num;
3551 }
3552 else {
3553 /* can't read nothing or invalid format */
3554 p = prev;
3555 }
3556 }
3557 else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) {
3558 num = scan_unsigned_hexadecimal_number(&p, end, 0, 2, enc);
3559 if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
3560 if (p == prev) { /* can't read nothing. */
3561 num = 0; /* but, it's not error */
3562 }
3563 tok->type = TK_RAW_BYTE;
3564 tok->base = 16;
3565 tok->u.c = num;
3566 }
3567 break;
3568
3569 case 'u':
3570 if (PEND) break;
3571
3572 prev = p;
3574 num = scan_unsigned_hexadecimal_number(&p, end, 4, 4, enc);
3575 if (num < -1) return ONIGERR_TOO_SHORT_DIGITS;
3576 else if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
3577 if (p == prev) { /* can't read nothing. */
3578 num = 0; /* but, it's not error */
3579 }
3580 tok->type = TK_CODE_POINT;
3581 tok->base = 16;
3582 tok->u.code = (OnigCodePoint )num;
3583 }
3584 break;
3585
3586 case 'o':
3587 if (PEND) break;
3588
3589 prev = p;
3591 PINC;
3592 num = scan_unsigned_octal_number(&p, end, 11, enc);
3593 if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
3594 if (!PEND) {
3595 OnigCodePoint c = PPEEK;
3596 if (ONIGENC_IS_CODE_DIGIT(enc, c) && c < '8')
3598 }
3599
3600 if ((p > prev + enclen(enc, prev, end)) && !PEND && PPEEK_IS('}')) {
3601 PINC;
3602 tok->type = TK_CODE_POINT;
3603 tok->u.code = (OnigCodePoint )num;
3604 }
3605 else {
3606 /* can't read nothing or invalid format */
3607 p = prev;
3608 }
3609 }
3610 break;
3611
3612 case '1': case '2': case '3': case '4':
3613 case '5': case '6': case '7': case '8': case '9':
3614 PUNFETCH;
3615 prev = p;
3616 num = onig_scan_unsigned_number(&p, end, enc);
3617 if (num < 0 || num > ONIG_MAX_BACKREF_NUM) {
3618 goto skip_backref;
3619 }
3620
3622 (num <= env->num_mem || num <= 9)) { /* This spec. from GNU regex */
3624 if (num > env->num_mem || IS_NULL(SCANENV_MEM_NODES(env)[num]))
3626 }
3627
3628 tok->type = TK_BACKREF;
3629 tok->u.backref.num = 1;
3630 tok->u.backref.ref1 = num;
3631 tok->u.backref.by_name = 0;
3632#ifdef USE_BACKREF_WITH_LEVEL
3633 tok->u.backref.exist_level = 0;
3634#endif
3635 break;
3636 }
3637
3638 skip_backref:
3639 if (c == '8' || c == '9') {
3640 /* normal char */
3641 p = prev; PINC;
3642 break;
3643 }
3644
3645 p = prev;
3646 /* fall through */
3647 case '0':
3649 prev = p;
3650 num = scan_unsigned_octal_number(&p, end, (c == '0' ? 2:3), enc);
3651 if (num < 0 || 0xff < num) return ONIGERR_TOO_BIG_NUMBER;
3652 if (p == prev) { /* can't read nothing. */
3653 num = 0; /* but, it's not error */
3654 }
3655 tok->type = TK_RAW_BYTE;
3656 tok->base = 8;
3657 tok->u.c = num;
3658 }
3659 else if (c != '0') {
3660 PINC;
3661 }
3662 break;
3663
3664#ifdef USE_NAMED_GROUP
3665 case 'k':
3667 PFETCH(c);
3668 if (c == '<' || c == '\'') {
3669 r = fetch_named_backref_token(c, tok, &p, end, env);
3670 if (r < 0) return r;
3671 }
3672 else {
3673 PUNFETCH;
3674 onig_syntax_warn(env, "invalid back reference");
3675 }
3676 }
3677 break;
3678#endif
3679
3680#if defined(USE_SUBEXP_CALL) || defined(USE_NAMED_GROUP)
3681 case 'g':
3682# ifdef USE_NAMED_GROUP
3684 PFETCH(c);
3685 if (c == '{') {
3686 r = fetch_named_backref_token(c, tok, &p, end, env);
3687 if (r < 0) return r;
3688 }
3689 else
3690 PUNFETCH;
3691 }
3692# endif
3693# ifdef USE_SUBEXP_CALL
3695 PFETCH(c);
3696 if (c == '<' || c == '\'') {
3697 int gnum = -1, rel = 0;
3698 UChar* name_end;
3699 OnigCodePoint cnext;
3700
3701 cnext = PPEEK;
3702 if (cnext == '0') {
3703 PINC;
3704 if (PPEEK_IS(get_name_end_code_point(c))) { /* \g<0>, \g'0' */
3705 PINC;
3706 name_end = p;
3707 gnum = 0;
3708 }
3709 }
3710 else if (cnext == '+') {
3711 PINC;
3712 rel = 1;
3713 }
3714 prev = p;
3715 if (gnum < 0) {
3716 r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, &gnum, 1);
3717 if (r < 0) return r;
3718 }
3719
3720 tok->type = TK_CALL;
3721 tok->u.call.name = prev;
3722 tok->u.call.name_end = name_end;
3723 tok->u.call.gnum = gnum;
3724 tok->u.call.rel = rel;
3725 }
3726 else {
3727 onig_syntax_warn(env, "invalid subexp call");
3728 PUNFETCH;
3729 }
3730 }
3731# endif
3732 break;
3733#endif
3734
3735 case 'Q':
3737 tok->type = TK_QUOTE_OPEN;
3738 }
3739 break;
3740
3741 case 'p':
3742 case 'P':
3743 if (PPEEK_IS('{') &&
3745 PINC;
3746 tok->type = TK_CHAR_PROPERTY;
3747 tok->u.prop.not = (c == 'P' ? 1 : 0);
3748
3750 PFETCH(c);
3751 if (c == '^') {
3752 tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0);
3753 }
3754 else
3755 PUNFETCH;
3756 }
3757 }
3758 else {
3759 onig_syntax_warn(env, "invalid Unicode Property \\%c", c);
3760 }
3761 break;
3762
3763 case 'R':
3765 tok->type = TK_LINEBREAK;
3766 }
3767 break;
3768
3769 case 'X':
3772 }
3773 break;
3774
3775 case 'K':
3777 tok->type = TK_KEEP;
3778 }
3779 break;
3780
3781 default:
3782 {
3783 OnigCodePoint c2;
3784
3785 PUNFETCH;
3786 num = fetch_escaped_value(&p, end, env, &c2);
3787 if (num < 0) return num;
3788 /* set_raw: */
3789 if ((OnigCodePoint )tok->u.c != c2) {
3790 tok->type = TK_CODE_POINT;
3791 tok->u.code = (OnigCodePoint )c2;
3792 }
3793 else { /* string */
3794 p = tok->backp + enclen(enc, tok->backp, end);
3795 }
3796 }
3797 break;
3798 }
3799 }
3800 else {
3801 tok->u.c = c;
3802 tok->escaped = 0;
3803
3804#ifdef USE_VARIABLE_META_CHARS
3805 if ((c != ONIG_INEFFECTIVE_META_CHAR) &&
3807 if (c == MC_ANYCHAR(syn))
3808 goto any_char;
3809 else if (c == MC_ANYTIME(syn))
3810 goto anytime;
3811 else if (c == MC_ZERO_OR_ONE_TIME(syn))
3812 goto zero_or_one_time;
3813 else if (c == MC_ONE_OR_MORE_TIME(syn))
3814 goto one_or_more_time;
3815 else if (c == MC_ANYCHAR_ANYTIME(syn)) {
3816 tok->type = TK_ANYCHAR_ANYTIME;
3817 goto out;
3818 }
3819 }
3820#endif
3821
3822 switch (c) {
3823 case '.':
3824 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_DOT_ANYCHAR)) break;
3825#ifdef USE_VARIABLE_META_CHARS
3826 any_char:
3827#endif
3828 tok->type = TK_ANYCHAR;
3829 break;
3830
3831 case '*':
3833#ifdef USE_VARIABLE_META_CHARS
3834 anytime:
3835#endif
3836 tok->type = TK_OP_REPEAT;
3837 tok->u.repeat.lower = 0;
3838 tok->u.repeat.upper = REPEAT_INFINITE;
3839 goto greedy_check;
3840 break;
3841
3842 case '+':
3843 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_PLUS_ONE_INF)) break;
3844#ifdef USE_VARIABLE_META_CHARS
3845 one_or_more_time:
3846#endif
3847 tok->type = TK_OP_REPEAT;
3848 tok->u.repeat.lower = 1;
3849 tok->u.repeat.upper = REPEAT_INFINITE;
3850 goto greedy_check;
3851 break;
3852
3853 case '?':
3854 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_ZERO_ONE)) break;
3855#ifdef USE_VARIABLE_META_CHARS
3856 zero_or_one_time:
3857#endif
3858 tok->type = TK_OP_REPEAT;
3859 tok->u.repeat.lower = 0;
3860 tok->u.repeat.upper = 1;
3861 goto greedy_check;
3862 break;
3863
3864 case '{':
3865 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACE_INTERVAL)) break;
3866 r = fetch_range_quantifier(&p, end, tok, env);
3867 if (r < 0) return r; /* error */
3868 if (r == 0) goto greedy_check;
3869 else if (r == 2) { /* {n} */
3871 goto possessive_check;
3872
3873 goto greedy_check;
3874 }
3875 /* r == 1 : normal char */
3876 break;
3877
3878 case '|':
3879 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_VBAR_ALT)) break;
3880 tok->type = TK_ALT;
3881 break;
3882
3883 case '(':
3884 if (PPEEK_IS('?') &&
3886 PINC;
3887 if (PPEEK_IS('#')) {
3888 PFETCH(c);
3889 while (1) {
3891 PFETCH(c);
3892 if (c == MC_ESC(syn)) {
3893 if (!PEND) PFETCH(c);
3894 }
3895 else {
3896 if (c == ')') break;
3897 }
3898 }
3899 goto start;
3900 }
3901#ifdef USE_PERL_SUBEXP_CALL
3902 /* (?&name), (?n), (?R), (?0), (?+n), (?-n) */
3903 c = PPEEK;
3904 if ((c == '&' || c == 'R' || ONIGENC_IS_CODE_DIGIT(enc, c)) &&
3906 /* (?&name), (?n), (?R), (?0) */
3907 int gnum;
3908 UChar *name;
3909 UChar *name_end;
3910
3911 if (c == 'R' || c == '0') {
3912 PINC; /* skip 'R' / '0' */
3913 if (!PPEEK_IS(')')) return ONIGERR_INVALID_GROUP_NAME;
3914 PINC; /* skip ')' */
3915 name_end = name = p;
3916 gnum = 0;
3917 }
3918 else {
3919 int numref = 1;
3920 if (c == '&') { /* (?&name) */
3921 PINC;
3922 numref = 0; /* don't allow number name */
3923 }
3924 name = p;
3925 r = fetch_name((OnigCodePoint )'(', &p, end, &name_end, env, &gnum, numref);
3926 if (r < 0) return r;
3927 }
3928
3929 tok->type = TK_CALL;
3930 tok->u.call.name = name;
3931 tok->u.call.name_end = name_end;
3932 tok->u.call.gnum = gnum;
3933 tok->u.call.rel = 0;
3934 break;
3935 }
3936 else if ((c == '-' || c == '+') &&
3938 /* (?+n), (?-n) */
3939 int gnum;
3940 UChar *name;
3941 UChar *name_end;
3942 OnigCodePoint cnext;
3944
3945 PINC; /* skip '-' / '+' */
3946 cnext = PPEEK;
3947 if (ONIGENC_IS_CODE_DIGIT(enc, cnext)) {
3948 if (c == '-') PUNFETCH;
3949 name = p;
3950 r = fetch_name((OnigCodePoint )'(', &p, end, &name_end, env, &gnum, 1);
3951 if (r < 0) return r;
3952
3953 tok->type = TK_CALL;
3954 tok->u.call.name = name;
3955 tok->u.call.name_end = name_end;
3956 tok->u.call.gnum = gnum;
3957 tok->u.call.rel = 1;
3958 break;
3959 }
3960 }
3961#endif /* USE_PERL_SUBEXP_CALL */
3962#ifdef USE_CAPITAL_P_NAMED_GROUP
3963 if (PPEEK_IS('P') &&
3965 int gnum;
3966 UChar *name;
3967 UChar *name_end;
3969
3970 PINC; /* skip 'P' */
3972 PFETCH(c);
3973 if (c == '=') { /* (?P=name): backref */
3974 r = fetch_named_backref_token((OnigCodePoint )'(', tok, &p, end, env);
3975 if (r < 0) return r;
3976 break;
3977 }
3978 else if (c == '>') { /* (?P>name): subexp call */
3979 name = p;
3980 r = fetch_name((OnigCodePoint )'(', &p, end, &name_end, env, &gnum, 0);
3981 if (r < 0) return r;
3982
3983 tok->type = TK_CALL;
3984 tok->u.call.name = name;
3985 tok->u.call.name_end = name_end;
3986 tok->u.call.gnum = gnum;
3987 tok->u.call.rel = 0;
3988 break;
3989 }
3990 }
3991#endif /* USE_CAPITAL_P_NAMED_GROUP */
3992 PUNFETCH;
3993 }
3994
3995 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LPAREN_SUBEXP)) break;
3996 tok->type = TK_SUBEXP_OPEN;
3997 break;
3998
3999 case ')':
4000 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LPAREN_SUBEXP)) break;
4001 tok->type = TK_SUBEXP_CLOSE;
4002 break;
4003
4004 case '^':
4005 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break;
4006 tok->type = TK_ANCHOR;
4007 tok->u.anchor.subtype = (IS_SINGLELINE(env->option)
4009 break;
4010
4011 case '$':
4012 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break;
4013 tok->type = TK_ANCHOR;
4014 tok->u.anchor.subtype = (IS_SINGLELINE(env->option)
4016 break;
4017
4018 case '[':
4019 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACKET_CC)) break;
4020 tok->type = TK_CC_OPEN;
4021 break;
4022
4023 case ']':
4024 if (*src > env->pattern) /* /].../ is allowed. */
4025 CLOSE_BRACKET_WITHOUT_ESC_WARN(env, (UChar* )"]");
4026 break;
4027
4028 case '#':
4029 if (IS_EXTEND(env->option)) {
4030 while (!PEND) {
4031 PFETCH(c);
4032 if (ONIGENC_IS_CODE_NEWLINE(enc, c))
4033 break;
4034 }
4035 goto start;
4036 break;
4037 }
4038 break;
4039
4040 case ' ': case '\t': case '\n': case '\r': case '\f':
4041 if (IS_EXTEND(env->option))
4042 goto start;
4043 break;
4044
4045 default:
4046 /* string */
4047 break;
4048 }
4049 }
4050
4051#ifdef USE_VARIABLE_META_CHARS
4052 out:
4053#endif
4054 *src = p;
4055 return tok->type;
4056}
4057
4058static int
4059add_ctype_to_cc_by_range(CClassNode* cc, int ctype ARG_UNUSED, int not,
4060 ScanEnv* env,
4061 OnigCodePoint sb_out, const OnigCodePoint mbr[])
4062{
4063 int i, r;
4064 OnigCodePoint j;
4065
4066 int n = ONIGENC_CODE_RANGE_NUM(mbr);
4067
4068 if (not == 0) {
4069 for (i = 0; i < n; i++) {
4070 for (j = ONIGENC_CODE_RANGE_FROM(mbr, i);
4071 j <= ONIGENC_CODE_RANGE_TO(mbr, i); j++) {
4072 if (j >= sb_out) {
4073 if (j > ONIGENC_CODE_RANGE_FROM(mbr, i)) {
4074 r = add_code_range_to_buf(&(cc->mbuf), env, j,
4075 ONIGENC_CODE_RANGE_TO(mbr, i));
4076 if (r != 0) return r;
4077 i++;
4078 }
4079
4080 goto sb_end;
4081 }
4082 BITSET_SET_BIT_CHKDUP(cc->bs, j);
4083 }
4084 }
4085
4086 sb_end:
4087 for ( ; i < n; i++) {
4088 r = add_code_range_to_buf(&(cc->mbuf), env,
4090 ONIGENC_CODE_RANGE_TO(mbr, i));
4091 if (r != 0) return r;
4092 }
4093 }
4094 else {
4095 OnigCodePoint prev = 0;
4096
4097 for (i = 0; i < n; i++) {
4098 for (j = prev;
4099 j < ONIGENC_CODE_RANGE_FROM(mbr, i); j++) {
4100 if (j >= sb_out) {
4101 goto sb_end2;
4102 }
4103 BITSET_SET_BIT_CHKDUP(cc->bs, j);
4104 }
4105 prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1;
4106 }
4107 for (j = prev; j < sb_out; j++) {
4108 BITSET_SET_BIT_CHKDUP(cc->bs, j);
4109 }
4110
4111 sb_end2:
4112 prev = sb_out;
4113
4114 for (i = 0; i < n; i++) {
4115 if (prev < ONIGENC_CODE_RANGE_FROM(mbr, i)) {
4116 r = add_code_range_to_buf(&(cc->mbuf), env, prev,
4117 ONIGENC_CODE_RANGE_FROM(mbr, i) - 1);
4118 if (r != 0) return r;
4119 }
4120 prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1;
4121 }
4122 if (prev < 0x7fffffff) {
4123 r = add_code_range_to_buf(&(cc->mbuf), env, prev, 0x7fffffff);
4124 if (r != 0) return r;
4125 }
4126 }
4127
4128 return 0;
4129}
4130
4131static int
4132add_ctype_to_cc(CClassNode* cc, int ctype, int not, int ascii_range, ScanEnv* env)
4133{
4134 int maxcode;
4135 int c, r;
4136 const OnigCodePoint *ranges;
4137 OnigCodePoint sb_out;
4138 OnigEncoding enc = env->enc;
4139
4140 r = ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &sb_out, &ranges);
4141 if (r == 0) {
4142 if (ascii_range) {
4143 CClassNode ccwork;
4144 initialize_cclass(&ccwork);
4145 r = add_ctype_to_cc_by_range(&ccwork, ctype, not, env, sb_out,
4146 ranges);
4147 if (r == 0) {
4148 if (not) {
4149 r = add_code_range_to_buf0(&(ccwork.mbuf), env, 0x80, ONIG_LAST_CODE_POINT, FALSE);
4150 }
4151 else {
4152 CClassNode ccascii;
4153 initialize_cclass(&ccascii);
4154 if (ONIGENC_MBC_MINLEN(env->enc) > 1) {
4155 r = add_code_range(&(ccascii.mbuf), env, 0x00, 0x7F);
4156 }
4157 else {
4158 bitset_set_range(env, ccascii.bs, 0x00, 0x7F);
4159 r = 0;
4160 }
4161 if (r == 0) {
4162 r = and_cclass(&ccwork, &ccascii, env);
4163 }
4164 if (IS_NOT_NULL(ccascii.mbuf)) bbuf_free(ccascii.mbuf);
4165 }
4166 if (r == 0) {
4167 r = or_cclass(cc, &ccwork, env);
4168 }
4169 if (IS_NOT_NULL(ccwork.mbuf)) bbuf_free(ccwork.mbuf);
4170 }
4171 }
4172 else {
4173 r = add_ctype_to_cc_by_range(cc, ctype, not, env, sb_out, ranges);
4174 }
4175 return r;
4176 }
4177 else if (r != ONIG_NO_SUPPORT_CONFIG) {
4178 return r;
4179 }
4180
4181 maxcode = ascii_range ? 0x80 : SINGLE_BYTE_SIZE;
4182 r = 0;
4183 switch (ctype) {
4195 if (not != 0) {
4196 for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
4197 if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
4198 BITSET_SET_BIT_CHKDUP(cc->bs, c);
4199 }
4200 ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
4201 }
4202 else {
4203 for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
4204 if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
4205 BITSET_SET_BIT_CHKDUP(cc->bs, c);
4206 }
4207 }
4208 break;
4209
4212 if (not != 0) {
4213 for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
4214 if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype)
4215 || c >= maxcode)
4216 BITSET_SET_BIT_CHKDUP(cc->bs, c);
4217 }
4218 if (ascii_range)
4219 ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
4220 }
4221 else {
4222 for (c = 0; c < maxcode; c++) {
4223 if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
4224 BITSET_SET_BIT_CHKDUP(cc->bs, c);
4225 }
4226 if (! ascii_range)
4227 ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
4228 }
4229 break;
4230
4231 case ONIGENC_CTYPE_WORD:
4232 if (not == 0) {
4233 for (c = 0; c < maxcode; c++) {
4234 if (ONIGENC_IS_CODE_WORD(enc, c)) BITSET_SET_BIT_CHKDUP(cc->bs, c);
4235 }
4236 if (! ascii_range)
4237 ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
4238 }
4239 else {
4240 for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
4241 if ((ONIGENC_CODE_TO_MBCLEN(enc, c) > 0) /* check invalid code point */
4242 && (! ONIGENC_IS_CODE_WORD(enc, c) || c >= maxcode))
4243 BITSET_SET_BIT_CHKDUP(cc->bs, c);
4244 }
4245 if (ascii_range)
4246 ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
4247 }
4248 break;
4249
4250 default:
4251 return ONIGERR_PARSER_BUG;
4252 break;
4253 }
4254
4255 return r;
4256}
4257
4258static int
4259parse_posix_bracket(CClassNode* cc, CClassNode* asc_cc,
4260 UChar** src, UChar* end, ScanEnv* env)
4261{
4262#define POSIX_BRACKET_CHECK_LIMIT_LENGTH 20
4263#define POSIX_BRACKET_NAME_MIN_LEN 4
4264
4265 static const PosixBracketEntryType PBS[] = {
4280 };
4281
4282 const PosixBracketEntryType *pb;
4283 int not, i, r;
4284 int ascii_range;
4285 OnigCodePoint c;
4286 OnigEncoding enc = env->enc;
4287 UChar *p = *src;
4288
4289 if (PPEEK_IS('^')) {
4290 PINC_S;
4291 not = 1;
4292 }
4293 else
4294 not = 0;
4295
4296 if (onigenc_strlen(enc, p, end) < POSIX_BRACKET_NAME_MIN_LEN + 3)
4297 goto not_posix_bracket;
4298
4299 ascii_range = IS_ASCII_RANGE(env->option) &&
4301 for (pb = PBS; pb < PBS + numberof(PBS); pb++) {
4302 if (onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0) {
4303 p = (UChar* )onigenc_step(enc, p, end, pb->len);
4304 if (onigenc_with_ascii_strncmp(enc, p, end, (UChar* )":]", 2) != 0)
4306
4307 r = add_ctype_to_cc(cc, pb->ctype, not, ascii_range, env);
4308 if (r != 0) return r;
4309
4310 if (IS_NOT_NULL(asc_cc)) {
4311 if (pb->ctype != ONIGENC_CTYPE_WORD &&
4312 pb->ctype != ONIGENC_CTYPE_ASCII &&
4313 !ascii_range)
4314 r = add_ctype_to_cc(asc_cc, pb->ctype, not, ascii_range, env);
4315 if (r != 0) return r;
4316 }
4317
4318 PINC_S; PINC_S;
4319 *src = p;
4320 return 0;
4321 }
4322 }
4323
4324 not_posix_bracket:
4325 c = 0;
4326 i = 0;
4327 while (!PEND && ((c = PPEEK) != ':') && c != ']') {
4328 PINC_S;
4329 if (++i > POSIX_BRACKET_CHECK_LIMIT_LENGTH) break;
4330 }
4331 if (c == ':' && ! PEND) {
4332 PINC_S;
4333 if (! PEND) {
4334 PFETCH_S(c);
4335 if (c == ']')
4337 }
4338 }
4339
4340 return 1; /* 1: is not POSIX bracket, but no error. */
4341}
4342
4343static int
4344fetch_char_property_to_ctype(UChar** src, UChar* end, ScanEnv* env)
4345{
4346 int r;
4347 OnigCodePoint c;
4348 OnigEncoding enc = env->enc;
4349 UChar *prev, *start, *p = *src;
4350
4351 r = 0;
4352 start = prev = p;
4353
4354 while (!PEND) {
4355 prev = p;
4356 PFETCH_S(c);
4357 if (c == '}') {
4358 r = ONIGENC_PROPERTY_NAME_TO_CTYPE(enc, start, prev);
4359 if (r < 0) break;
4360
4361 *src = p;
4362 return r;
4363 }
4364 else if (c == '(' || c == ')' || c == '{' || c == '|') {
4366 break;
4367 }
4368 }
4369
4371 return r;
4372}
4373
4374static int cclass_case_fold(Node** np, CClassNode* cc, CClassNode* asc_cc, ScanEnv* env);
4375
4376static int
4377parse_char_property(Node** np, OnigToken* tok, UChar** src, UChar* end,
4378 ScanEnv* env)
4379{
4380 int r, ctype;
4381 CClassNode* cc;
4382
4383 ctype = fetch_char_property_to_ctype(src, end, env);
4384 if (ctype < 0) return ctype;
4385
4386 *np = node_new_cclass();
4388 cc = NCCLASS(*np);
4389 r = add_ctype_to_cc(cc, ctype, 0, 0, env);
4390 if (r != 0) return r;
4391 if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc);
4392
4393 if (IS_IGNORECASE(env->option)) {
4394 if (ctype != ONIGENC_CTYPE_ASCII)
4395 r = cclass_case_fold(np, cc, cc, env);
4396 }
4397 return r;
4398}
4399
4400
4405 CCS_START
4407
4411 CCV_CLASS
4413
4414static int
4415next_state_class(CClassNode* cc, CClassNode* asc_cc,
4416 OnigCodePoint* vs, enum CCVALTYPE* type,
4417 enum CCSTATE* state, ScanEnv* env)
4418{
4419 int r;
4420
4421 if (*state == CCS_RANGE)
4423
4424 if (*state == CCS_VALUE && *type != CCV_CLASS) {
4425 if (*type == CCV_SB) {
4426 BITSET_SET_BIT_CHKDUP(cc->bs, (int )(*vs));
4427 if (IS_NOT_NULL(asc_cc))
4428 BITSET_SET_BIT(asc_cc->bs, (int )(*vs));
4429 }
4430 else if (*type == CCV_CODE_POINT) {
4431 r = add_code_range(&(cc->mbuf), env, *vs, *vs);
4432 if (r < 0) return r;
4433 if (IS_NOT_NULL(asc_cc)) {
4434 r = add_code_range0(&(asc_cc->mbuf), env, *vs, *vs, 0);
4435 if (r < 0) return r;
4436 }
4437 }
4438 }
4439
4440 *state = CCS_VALUE;
4441 *type = CCV_CLASS;
4442 return 0;
4443}
4444
4445static int
4446next_state_val(CClassNode* cc, CClassNode* asc_cc,
4447 OnigCodePoint *from, OnigCodePoint to,
4448 int* from_israw, int to_israw,
4449 enum CCVALTYPE intype, enum CCVALTYPE* type,
4450 enum CCSTATE* state, ScanEnv* env)
4451{
4452 int r;
4453
4454 switch (*state) {
4455 case CCS_VALUE:
4456 if (*type == CCV_SB) {
4457 BITSET_SET_BIT_CHKDUP(cc->bs, (int )(*from));
4458 if (IS_NOT_NULL(asc_cc))
4459 BITSET_SET_BIT(asc_cc->bs, (int )(*from));
4460 }
4461 else if (*type == CCV_CODE_POINT) {
4462 r = add_code_range(&(cc->mbuf), env, *from, *from);
4463 if (r < 0) return r;
4464 if (IS_NOT_NULL(asc_cc)) {
4465 r = add_code_range0(&(asc_cc->mbuf), env, *from, *from, 0);
4466 if (r < 0) return r;
4467 }
4468 }
4469 break;
4470
4471 case CCS_RANGE:
4472 if (intype == *type) {
4473 if (intype == CCV_SB) {
4474 if (*from > 0xff || to > 0xff)
4476
4477 if (*from > to) {
4479 goto ccs_range_end;
4480 else
4482 }
4483 bitset_set_range(env, cc->bs, (int )*from, (int )to);
4484 if (IS_NOT_NULL(asc_cc))
4485 bitset_set_range(env, asc_cc->bs, (int )*from, (int )to);
4486 }
4487 else {
4488 r = add_code_range(&(cc->mbuf), env, *from, to);
4489 if (r < 0) return r;
4490 if (IS_NOT_NULL(asc_cc)) {
4491 r = add_code_range0(&(asc_cc->mbuf), env, *from, to, 0);
4492 if (r < 0) return r;
4493 }
4494 }
4495 }
4496 else {
4497 if (*from > to) {
4499 goto ccs_range_end;
4500 else
4502 }
4503 bitset_set_range(env, cc->bs, (int )*from, (int )(to < 0xff ? to : 0xff));
4504 r = add_code_range(&(cc->mbuf), env, (OnigCodePoint )*from, to);
4505 if (r < 0) return r;
4506 if (IS_NOT_NULL(asc_cc)) {
4507 bitset_set_range(env, asc_cc->bs, (int )*from, (int )(to < 0xff ? to : 0xff));
4508 r = add_code_range0(&(asc_cc->mbuf), env, (OnigCodePoint )*from, to, 0);
4509 if (r < 0) return r;
4510 }
4511 }
4512 ccs_range_end:
4513 *state = CCS_COMPLETE;
4514 break;
4515
4516 case CCS_COMPLETE:
4517 case CCS_START:
4518 *state = CCS_VALUE;
4519 break;
4520
4521 default:
4522 break;
4523 }
4524
4525 *from_israw = to_israw;
4526 *from = to;
4527 *type = intype;
4528 return 0;
4529}
4530
4531static int
4532code_exist_check(OnigCodePoint c, UChar* from, UChar* end, int ignore_escaped,
4533 ScanEnv* env)
4534{
4535 int in_esc;
4536 OnigCodePoint code;
4537 OnigEncoding enc = env->enc;
4538 UChar* p = from;
4539
4540 in_esc = 0;
4541 while (! PEND) {
4542 if (ignore_escaped && in_esc) {
4543 in_esc = 0;
4544 }
4545 else {
4546 PFETCH_S(code);
4547 if (code == c) return 1;
4548 if (code == MC_ESC(env->syntax)) in_esc = 1;
4549 }
4550 }
4551 return 0;
4552}
4553
4554static int
4555parse_char_class(Node** np, Node** asc_np, OnigToken* tok, UChar** src, UChar* end,
4556 ScanEnv* env)
4557{
4558 int r, neg, len, fetched, and_start;
4559 OnigCodePoint v, vs;
4560 UChar *p;
4561 Node* node;
4562 Node* asc_node;
4563 CClassNode *cc, *prev_cc;
4564 CClassNode *asc_cc, *asc_prev_cc;
4565 CClassNode work_cc, asc_work_cc;
4566
4567 enum CCSTATE state;
4568 enum CCVALTYPE val_type, in_type;
4569 int val_israw, in_israw;
4570
4571 *np = *asc_np = NULL_NODE;
4572 env->parse_depth++;
4573 if (env->parse_depth > ParseDepthLimit)
4575 prev_cc = asc_prev_cc = (CClassNode* )NULL;
4576 r = fetch_token_in_cc(tok, src, end, env);
4577 if (r == TK_CHAR && tok->u.c == '^' && tok->escaped == 0) {
4578 neg = 1;
4579 r = fetch_token_in_cc(tok, src, end, env);
4580 }
4581 else {
4582 neg = 0;
4583 }
4584
4585 if (r < 0) return r;
4586 if (r == TK_CC_CLOSE) {
4587 if (! code_exist_check((OnigCodePoint )']',
4588 *src, env->pattern_end, 1, env))
4590
4591 CC_ESC_WARN(env, (UChar* )"]");
4592 r = tok->type = TK_CHAR; /* allow []...] */
4593 }
4594
4595 *np = node = node_new_cclass();
4597 cc = NCCLASS(node);
4598
4599 if (IS_IGNORECASE(env->option)) {
4600 *asc_np = asc_node = node_new_cclass();
4601 CHECK_NULL_RETURN_MEMERR(asc_node);
4602 asc_cc = NCCLASS(asc_node);
4603 }
4604 else {
4605 asc_node = NULL_NODE;
4606 asc_cc = NULL;
4607 }
4608
4609 and_start = 0;
4610 state = CCS_START;
4611 p = *src;
4612 while (r != TK_CC_CLOSE) {
4613 fetched = 0;
4614 switch (r) {
4615 case TK_CHAR:
4616 if ((tok->u.code >= SINGLE_BYTE_SIZE) ||
4617 (len = ONIGENC_CODE_TO_MBCLEN(env->enc, tok->u.c)) > 1) {
4618 in_type = CCV_CODE_POINT;
4619 }
4620 else if (len < 0) {
4621 r = len;
4622 goto err;
4623 }
4624 else {
4625 sb_char:
4626 in_type = CCV_SB;
4627 }
4628 v = (OnigCodePoint )tok->u.c;
4629 in_israw = 0;
4630 goto val_entry2;
4631 break;
4632
4633 case TK_RAW_BYTE:
4634 /* tok->base != 0 : octal or hexadec. */
4635 if (! ONIGENC_IS_SINGLEBYTE(env->enc) && tok->base != 0) {
4638 UChar* psave = p;
4639 int i, base = tok->base;
4640
4641 buf[0] = (UChar )tok->u.c;
4642 for (i = 1; i < ONIGENC_MBC_MAXLEN(env->enc); i++) {
4643 r = fetch_token_in_cc(tok, &p, end, env);
4644 if (r < 0) goto err;
4645 if (r != TK_RAW_BYTE || tok->base != base) {
4646 fetched = 1;
4647 break;
4648 }
4649 buf[i] = (UChar )tok->u.c;
4650 }
4651
4652 if (i < ONIGENC_MBC_MINLEN(env->enc)) {
4654 goto err;
4655 }
4656
4657 len = enclen(env->enc, buf, buf + i);
4658 if (i < len) {
4660 goto err;
4661 }
4662 else if (i > len) { /* fetch back */
4663 p = psave;
4664 for (i = 1; i < len; i++) {
4665 (void)fetch_token_in_cc(tok, &p, end, env);
4666 /* no need to check the return value (already checked above) */
4667 }
4668 fetched = 0;
4669 }
4670
4671 if (i == 1) {
4672 v = (OnigCodePoint )buf[0];
4673 goto raw_single;
4674 }
4675 else {
4676 v = ONIGENC_MBC_TO_CODE(env->enc, buf, bufe);
4677 in_type = CCV_CODE_POINT;
4678 }
4679 }
4680 else {
4681 v = (OnigCodePoint )tok->u.c;
4682 raw_single:
4683 in_type = CCV_SB;
4684 }
4685 in_israw = 1;
4686 goto val_entry2;
4687 break;
4688
4689 case TK_CODE_POINT:
4690 v = tok->u.code;
4691 in_israw = 1;
4692 val_entry:
4694 if (len < 0) {
4695 r = len;
4696 goto err;
4697 }
4698 in_type = (len == 1 ? CCV_SB : CCV_CODE_POINT);
4699 val_entry2:
4700 r = next_state_val(cc, asc_cc, &vs, v, &val_israw, in_israw, in_type, &val_type,
4701 &state, env);
4702 if (r != 0) goto err;
4703 break;
4704
4706 r = parse_posix_bracket(cc, asc_cc, &p, end, env);
4707 if (r < 0) goto err;
4708 if (r == 1) { /* is not POSIX bracket */
4709 CC_ESC_WARN(env, (UChar* )"[");
4710 p = tok->backp;
4711 v = (OnigCodePoint )tok->u.c;
4712 in_israw = 0;
4713 goto val_entry;
4714 }
4715 goto next_class;
4716 break;
4717
4718 case TK_CHAR_TYPE:
4719 r = add_ctype_to_cc(cc, tok->u.prop.ctype, tok->u.prop.not,
4720 IS_ASCII_RANGE(env->option), env);
4721 if (r != 0) return r;
4722 if (IS_NOT_NULL(asc_cc)) {
4723 if (tok->u.prop.ctype != ONIGENC_CTYPE_WORD)
4724 r = add_ctype_to_cc(asc_cc, tok->u.prop.ctype, tok->u.prop.not,
4725 IS_ASCII_RANGE(env->option), env);
4726 if (r != 0) return r;
4727 }
4728
4729 next_class:
4730 r = next_state_class(cc, asc_cc, &vs, &val_type, &state, env);
4731 if (r != 0) goto err;
4732 break;
4733
4734 case TK_CHAR_PROPERTY:
4735 {
4736 int ctype;
4737
4738 ctype = fetch_char_property_to_ctype(&p, end, env);
4739 if (ctype < 0) return ctype;
4740 r = add_ctype_to_cc(cc, ctype, tok->u.prop.not, 0, env);
4741 if (r != 0) return r;
4742 if (IS_NOT_NULL(asc_cc)) {
4743 if (ctype != ONIGENC_CTYPE_ASCII)
4744 r = add_ctype_to_cc(asc_cc, ctype, tok->u.prop.not, 0, env);
4745 if (r != 0) return r;
4746 }
4747 goto next_class;
4748 }
4749 break;
4750
4751 case TK_CC_RANGE:
4752 if (state == CCS_VALUE) {
4753 r = fetch_token_in_cc(tok, &p, end, env);
4754 if (r < 0) goto err;
4755 fetched = 1;
4756 if (r == TK_CC_CLOSE) { /* allow [x-] */
4757 range_end_val:
4758 v = (OnigCodePoint )'-';
4759 in_israw = 0;
4760 goto val_entry;
4761 }
4762 else if (r == TK_CC_AND) {
4763 CC_ESC_WARN(env, (UChar* )"-");
4764 goto range_end_val;
4765 }
4766
4767 if (val_type == CCV_CLASS) {
4769 goto err;
4770 }
4771
4772 state = CCS_RANGE;
4773 }
4774 else if (state == CCS_START) {
4775 /* [-xa] is allowed */
4776 v = (OnigCodePoint )tok->u.c;
4777 in_israw = 0;
4778
4779 r = fetch_token_in_cc(tok, &p, end, env);
4780 if (r < 0) goto err;
4781 fetched = 1;
4782 /* [--x] or [a&&-x] is warned. */
4783 if (r == TK_CC_RANGE || and_start != 0)
4784 CC_ESC_WARN(env, (UChar* )"-");
4785
4786 goto val_entry;
4787 }
4788 else if (state == CCS_RANGE) {
4789 CC_ESC_WARN(env, (UChar* )"-");
4790 goto sb_char; /* [!--x] is allowed */
4791 }
4792 else { /* CCS_COMPLETE */
4793 r = fetch_token_in_cc(tok, &p, end, env);
4794 if (r < 0) goto err;
4795 fetched = 1;
4796 if (r == TK_CC_CLOSE) goto range_end_val; /* allow [a-b-] */
4797 else if (r == TK_CC_AND) {
4798 CC_ESC_WARN(env, (UChar* )"-");
4799 goto range_end_val;
4800 }
4801
4803 CC_ESC_WARN(env, (UChar* )"-");
4804 goto range_end_val; /* [0-9-a] is allowed as [0-9\-a] */
4805 }
4807 goto err;
4808 }
4809 break;
4810
4811 case TK_CC_CC_OPEN: /* [ */
4812 {
4813 Node *anode, *aasc_node;
4814 CClassNode* acc;
4815
4816 r = parse_char_class(&anode, &aasc_node, tok, &p, end, env);
4817 if (r == 0) {
4818 acc = NCCLASS(anode);
4819 r = or_cclass(cc, acc, env);
4820 }
4821 if (r == 0 && IS_NOT_NULL(aasc_node)) {
4822 acc = NCCLASS(aasc_node);
4823 r = or_cclass(asc_cc, acc, env);
4824 }
4825 onig_node_free(anode);
4826 onig_node_free(aasc_node);
4827 if (r != 0) goto err;
4828 }
4829 break;
4830
4831 case TK_CC_AND: /* && */
4832 {
4833 if (state == CCS_VALUE) {
4834 r = next_state_val(cc, asc_cc, &vs, 0, &val_israw, 0, val_type,
4835 &val_type, &state, env);
4836 if (r != 0) goto err;
4837 }
4838 /* initialize local variables */
4839 and_start = 1;
4840 state = CCS_START;
4841
4842 if (IS_NOT_NULL(prev_cc)) {
4843 r = and_cclass(prev_cc, cc, env);
4844 if (r != 0) goto err;
4845 bbuf_free(cc->mbuf);
4846 if (IS_NOT_NULL(asc_cc)) {
4847 r = and_cclass(asc_prev_cc, asc_cc, env);
4848 if (r != 0) goto err;
4849 bbuf_free(asc_cc->mbuf);
4850 }
4851 }
4852 else {
4853 prev_cc = cc;
4854 cc = &work_cc;
4855 if (IS_NOT_NULL(asc_cc)) {
4856 asc_prev_cc = asc_cc;
4857 asc_cc = &asc_work_cc;
4858 }
4859 }
4860 initialize_cclass(cc);
4861 if (IS_NOT_NULL(asc_cc))
4862 initialize_cclass(asc_cc);
4863 }
4864 break;
4865
4866 case TK_EOT:
4868 goto err;
4869 break;
4870 default:
4872 goto err;
4873 break;
4874 }
4875
4876 if (fetched)
4877 r = tok->type;
4878 else {
4879 r = fetch_token_in_cc(tok, &p, end, env);
4880 if (r < 0) goto err;
4881 }
4882 }
4883
4884 if (state == CCS_VALUE) {
4885 r = next_state_val(cc, asc_cc, &vs, 0, &val_israw, 0, val_type,
4886 &val_type, &state, env);
4887 if (r != 0) goto err;
4888 }
4889
4890 if (IS_NOT_NULL(prev_cc)) {
4891 r = and_cclass(prev_cc, cc, env);
4892 if (r != 0) goto err;
4893 bbuf_free(cc->mbuf);
4894 cc = prev_cc;
4895 if (IS_NOT_NULL(asc_cc)) {
4896 r = and_cclass(asc_prev_cc, asc_cc, env);
4897 if (r != 0) goto err;
4898 bbuf_free(asc_cc->mbuf);
4899 asc_cc = asc_prev_cc;
4900 }
4901 }
4902
4903 if (neg != 0) {
4905 if (IS_NOT_NULL(asc_cc))
4906 NCCLASS_SET_NOT(asc_cc);
4907 }
4908 else {
4910 if (IS_NOT_NULL(asc_cc))
4911 NCCLASS_CLEAR_NOT(asc_cc);
4912 }
4913 if (IS_NCCLASS_NOT(cc) &&
4915 int is_empty;
4916
4917 is_empty = (IS_NULL(cc->mbuf) ? 1 : 0);
4918 if (is_empty != 0)
4919 BITSET_IS_EMPTY(cc->bs, is_empty);
4920
4921 if (is_empty == 0) {
4922#define NEWLINE_CODE 0x0a
4923
4925 if (ONIGENC_CODE_TO_MBCLEN(env->enc, NEWLINE_CODE) == 1)
4927 else {
4928 r = add_code_range(&(cc->mbuf), env, NEWLINE_CODE, NEWLINE_CODE);
4929 if (r < 0) goto err;
4930 }
4931 }
4932 }
4933 }
4934 *src = p;
4935 env->parse_depth--;
4936 return 0;
4937
4938 err:
4939 if (cc != NCCLASS(*np))
4940 bbuf_free(cc->mbuf);
4941 if (IS_NOT_NULL(asc_cc) && (asc_cc != NCCLASS(*asc_np)))
4942 bbuf_free(asc_cc->mbuf);
4943 return r;
4944}
4945
4946static int parse_subexp(Node** top, OnigToken* tok, int term,
4947 UChar** src, UChar* end, ScanEnv* env);
4948
4949static int
4950parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
4951 ScanEnv* env)
4952{
4953 int r = 0, num;
4954 Node *target, *work1 = NULL, *work2 = NULL;
4955 OnigOptionType option;
4956 OnigCodePoint c;
4957 OnigEncoding enc = env->enc;
4958
4959#ifdef USE_NAMED_GROUP
4960 int list_capture;
4961#endif
4962
4963 UChar* p = *src;
4965
4966 *np = NULL;
4968
4969 option = env->option;
4970 if (PPEEK_IS('?') &&
4972 PINC;
4974
4975 PFETCH(c);
4976 switch (c) {
4977 case ':': /* (?:...) grouping only */
4978 group:
4979 r = fetch_token(tok, &p, end, env);
4980 if (r < 0) return r;
4981 r = parse_subexp(np, tok, term, &p, end, env);
4982 if (r < 0) return r;
4983 *src = p;
4984 return 1; /* group */
4985 break;
4986
4987 case '=':
4989 break;
4990 case '!': /* preceding read */
4992 break;
4993 case '>': /* (?>...) stop backtrack */
4994 *np = node_new_enclose(ENCLOSE_STOP_BACKTRACK);
4995 break;
4996 case '~': /* (?~...) absent operator */
4998 *np = node_new_enclose(ENCLOSE_ABSENT);
4999 }
5000 else {
5002 }
5003 break;
5004
5005#ifdef USE_NAMED_GROUP
5006 case '\'':
5008 goto named_group1;
5009 }
5010 else
5012 break;
5013
5014# ifdef USE_CAPITAL_P_NAMED_GROUP
5015 case 'P': /* (?P<name>...) */
5016 if (!PEND &&
5018 PFETCH(c);
5019 if (c == '<') goto named_group1;
5020 }
5022 break;
5023# endif
5024#endif
5025
5026 case '<': /* look behind (?<=...), (?<!...) */
5028 PFETCH(c);
5029 if (c == '=')
5031 else if (c == '!')
5033#ifdef USE_NAMED_GROUP
5034 else { /* (?<name>...) */
5036 UChar *name;
5037 UChar *name_end;
5038
5039 PUNFETCH;
5040 c = '<';
5041
5042 named_group1:
5043 list_capture = 0;
5044
5045# ifdef USE_CAPTURE_HISTORY
5046 named_group2:
5047# endif
5048 name = p;
5049 r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, &num, 0);
5050 if (r < 0) return r;
5051
5052 num = scan_env_add_mem_entry(env);
5053 if (num < 0) return num;
5054 if (list_capture != 0 && num >= (int )BIT_STATUS_BITS_NUM)
5056
5057 r = name_add(env->reg, name, name_end, num, env);
5058 if (r != 0) return r;
5059 *np = node_new_enclose_memory(env->option, 1);
5061 NENCLOSE(*np)->regnum = num;
5062 if (list_capture != 0)
5063 BIT_STATUS_ON_AT_SIMPLE(env->capture_history, num);
5064 env->num_named++;
5065 }
5066 else {
5068 }
5069 }
5070#else
5071 else {
5073 }
5074#endif
5075 break;
5076
5077#ifdef USE_CAPTURE_HISTORY
5078 case '@':
5080# ifdef USE_NAMED_GROUP
5081 if (!PEND &&
5083 PFETCH(c);
5084 if (c == '<' || c == '\'') {
5085 list_capture = 1;
5086 goto named_group2; /* (?@<name>...) */
5087 }
5088 PUNFETCH;
5089 }
5090# endif
5091 *np = node_new_enclose_memory(env->option, 0);
5093 num = scan_env_add_mem_entry(env);
5094 if (num < 0) return num;
5095 if (num >= (int )BIT_STATUS_BITS_NUM)
5097
5098 NENCLOSE(*np)->regnum = num;
5099 BIT_STATUS_ON_AT_SIMPLE(env->capture_history, num);
5100 }
5101 else {
5103 }
5104 break;
5105#endif /* USE_CAPTURE_HISTORY */
5106
5107 case '(': /* conditional expression: (?(cond)yes), (?(cond)yes|no) */
5108 if (!PEND &&
5110 UChar *name = NULL;
5111 UChar *name_end;
5112 PFETCH(c);
5113 if (ONIGENC_IS_CODE_DIGIT(enc, c)) { /* (n) */
5114 PUNFETCH;
5115 r = fetch_name((OnigCodePoint )'(', &p, end, &name_end, env, &num, 1);
5116 if (r < 0) return r;
5117#if 0
5118 /* Relative number is not currently supported. (same as Perl) */
5119 if (num < 0) {
5120 num = BACKREF_REL_TO_ABS(num, env);
5121 if (num <= 0)
5123 }
5124#endif
5126 if (num > env->num_mem ||
5129 }
5130 }
5131#ifdef USE_NAMED_GROUP
5132 else if (c == '<' || c == '\'') { /* (<name>), ('name') */
5133 name = p;
5134 r = fetch_named_backref_token(c, tok, &p, end, env);
5135 if (r < 0) return r;
5136 if (!PPEEK_IS(')')) return ONIGERR_UNDEFINED_GROUP_OPTION;
5137 PINC;
5138
5140 num = tok->u.backref.ref1;
5141 }
5142 else {
5143 /* FIXME:
5144 * Use left most named group for now. This is the same as Perl.
5145 * However this should use the same strategy as normal back-
5146 * references on Ruby syntax; search right to left. */
5147 int len = tok->u.backref.num;
5148 num = len > 1 ? tok->u.backref.refs[0] : tok->u.backref.ref1;
5149 }
5150 }
5151#endif
5152 else
5154 *np = node_new_enclose(ENCLOSE_CONDITION);
5156 NENCLOSE(*np)->regnum = num;
5157 if (IS_NOT_NULL(name)) NENCLOSE(*np)->state |= NST_NAME_REF;
5158 }
5159 else
5161 break;
5162
5163#if 0
5164 case '|': /* branch reset: (?|...) */
5166 /* TODO */
5167 }
5168 else
5170 break;
5171#endif
5172
5173 case '^': /* loads default options */
5174 if (!PEND && IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) {
5175 /* d-imsx */
5176 ONOFF(option, ONIG_OPTION_ASCII_RANGE, 1);
5177 ONOFF(option, ONIG_OPTION_IGNORECASE, 1);
5178 ONOFF(option, ONIG_OPTION_SINGLELINE, 0);
5179 ONOFF(option, ONIG_OPTION_MULTILINE, 1);
5180 ONOFF(option, ONIG_OPTION_EXTEND, 1);
5181 PFETCH(c);
5182 }
5183#if 0
5184 else if (!PEND && IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_RUBY)) {
5185 /* d-imx */
5186 ONOFF(option, ONIG_OPTION_ASCII_RANGE, 0);
5189 ONOFF(option, ONIG_OPTION_IGNORECASE, 1);
5190 ONOFF(option, ONIG_OPTION_MULTILINE, 1);
5191 ONOFF(option, ONIG_OPTION_EXTEND, 1);
5192 PFETCH(c);
5193 }
5194#endif
5195 else {
5197 }
5198 /* fall through */
5199#ifdef USE_POSIXLINE_OPTION
5200 case 'p':
5201#endif
5202 case '-': case 'i': case 'm': case 's': case 'x':
5203 case 'a': case 'd': case 'l': case 'u':
5204 {
5205 int neg = 0;
5206
5207 while (1) {
5208 switch (c) {
5209 case ':':
5210 case ')':
5211 break;
5212
5213 case '-': neg = 1; break;
5214 case 'x': ONOFF(option, ONIG_OPTION_EXTEND, neg); break;
5215 case 'i': ONOFF(option, ONIG_OPTION_IGNORECASE, neg); break;
5216 case 's':
5219 }
5220 else
5222 break;
5223
5224 case 'm':
5226 ONOFF(option, ONIG_OPTION_SINGLELINE, (neg == 0 ? 1 : 0));
5227 }
5228 else if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_RUBY)) {
5230 }
5231 else
5233 break;
5234#ifdef USE_POSIXLINE_OPTION
5235 case 'p':
5237 break;
5238#endif
5239
5240 case 'a': /* limits \d, \s, \w and POSIX brackets to ASCII range */
5243 (neg == 0)) {
5244 ONOFF(option, ONIG_OPTION_ASCII_RANGE, 0);
5247 }
5248 else
5250 break;
5251
5252 case 'u':
5255 (neg == 0)) {
5256 ONOFF(option, ONIG_OPTION_ASCII_RANGE, 1);
5259 }
5260 else
5262 break;
5263
5264 case 'd':
5266 (neg == 0)) {
5267 ONOFF(option, ONIG_OPTION_ASCII_RANGE, 1);
5268 }
5269 else if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_RUBY) &&
5270 (neg == 0)) {
5271 ONOFF(option, ONIG_OPTION_ASCII_RANGE, 0);
5274 }
5275 else
5277 break;
5278
5279 case 'l':
5280 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL) && (neg == 0)) {
5281 ONOFF(option, ONIG_OPTION_ASCII_RANGE, 1);
5282 }
5283 else
5285 break;
5286
5287 default:
5289 }
5290
5291 if (c == ')') {
5292 *np = node_new_option(option);
5294 *src = p;
5295 return 2; /* option only */
5296 }
5297 else if (c == ':') {
5298 OnigOptionType prev = env->option;
5299
5300 env->option = option;
5301 r = fetch_token(tok, &p, end, env);
5302 if (r < 0) {
5303 env->option = prev;
5304 return r;
5305 }
5306 r = parse_subexp(&target, tok, term, &p, end, env);
5307 env->option = prev;
5308 if (r < 0) return r;
5309 *np = node_new_option(option);
5311 NENCLOSE(*np)->target = target;
5312 *src = p;
5313 return 0;
5314 }
5315
5317 PFETCH(c);
5318 }
5319 }
5320 break;
5321
5322 default:
5324 }
5325 }
5326 else {
5328 goto group;
5329
5330 *np = node_new_enclose_memory(env->option, 0);
5332 num = scan_env_add_mem_entry(env);
5333 if (num < 0) return num;
5334 NENCLOSE(*np)->regnum = num;
5335 }
5336
5338 r = fetch_token(tok, &p, end, env);
5339 if (r < 0) return r;
5340 r = parse_subexp(&target, tok, term, &p, end, env);
5341 if (r < 0) {
5342 onig_node_free(target);
5343 return r;
5344 }
5345
5346 if (NTYPE(*np) == NT_ANCHOR)
5347 NANCHOR(*np)->target = target;
5348 else {
5349 NENCLOSE(*np)->target = target;
5350 if (NENCLOSE(*np)->type == ENCLOSE_MEMORY) {
5351 /* Don't move this to previous of parse_subexp() */
5352 r = scan_env_set_mem_node(env, NENCLOSE(*np)->regnum, *np);
5353 if (r != 0) return r;
5354 }
5355 else if (NENCLOSE(*np)->type == ENCLOSE_CONDITION) {
5356 if (NTYPE(target) != NT_ALT) {
5357 /* convert (?(cond)yes) to (?(cond)yes|empty) */
5358 work1 = node_new_empty();
5359 if (IS_NULL(work1)) goto err;
5360 work2 = onig_node_new_alt(work1, NULL_NODE);
5361 if (IS_NULL(work2)) goto err;
5362 work1 = onig_node_new_alt(target, work2);
5363 if (IS_NULL(work1)) goto err;
5364 NENCLOSE(*np)->target = work1;
5365 }
5366 }
5367 }
5368
5369 *src = p;
5370 return 0;
5371
5372 err:
5373 onig_node_free(work1);
5374 onig_node_free(work2);
5375 onig_node_free(*np);
5376 *np = NULL;
5377 return ONIGERR_MEMORY;
5378}
5379
5380static const char* const PopularQStr[] = {
5381 "?", "*", "+", "??", "*?", "+?"
5382};
5383
5384static const char* const ReduceQStr[] = {
5385 "", "", "*", "*?", "??", "+ and ??", "+? and ?"
5386};
5387
5388static int
5389set_quantifier(Node* qnode, Node* target, int group, ScanEnv* env)
5390{
5391 QtfrNode* qn;
5392
5393 qn = NQTFR(qnode);
5394 if (qn->lower == 1 && qn->upper == 1) {
5395 return 1;
5396 }
5397
5398 switch (NTYPE(target)) {
5399 case NT_STR:
5400 if (! group) {
5401 StrNode* sn = NSTR(target);
5402 if (str_node_can_be_split(sn, env->enc)) {
5403 Node* n = str_node_split_last_char(sn, env->enc);
5404 if (IS_NOT_NULL(n)) {
5405 qn->target = n;
5406 return 2;
5407 }
5408 }
5409 }
5410 break;
5411
5412 case NT_QTFR:
5413 { /* check redundant double repeat. */
5414 /* verbose warn (?:.?)? etc... but not warn (.?)? etc... */
5415 QtfrNode* qnt = NQTFR(target);
5416 int nestq_num = popular_quantifier_num(qn);
5417 int targetq_num = popular_quantifier_num(qnt);
5418
5419#ifdef USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR
5420 if (nestq_num >= 0 && targetq_num >= 0 &&
5422 switch (ReduceTypeTable[targetq_num][nestq_num]) {
5423 case RQ_ASIS:
5424 break;
5425
5426 case RQ_DEL:
5427 if (onig_warn != onig_null_warn) {
5428 onig_syntax_warn(env, "regular expression has redundant nested repeat operator '%s'",
5429 PopularQStr[targetq_num]);
5430 }
5431 goto warn_exit;
5432 break;
5433
5434 default:
5435 if (onig_warn != onig_null_warn) {
5436 onig_syntax_warn(env, "nested repeat operator '%s' and '%s' was replaced with '%s' in regular expression",
5437 PopularQStr[targetq_num], PopularQStr[nestq_num],
5438 ReduceQStr[ReduceTypeTable[targetq_num][nestq_num]]);
5439 }
5440 goto warn_exit;
5441 break;
5442 }
5443 }
5444
5445 warn_exit:
5446#endif
5447 if (targetq_num >= 0) {
5448 if (nestq_num >= 0) {
5449 onig_reduce_nested_quantifier(qnode, target);
5450 goto q_exit;
5451 }
5452 else if (targetq_num == 1 || targetq_num == 2) { /* * or + */
5453 /* (?:a*){n,m}, (?:a+){n,m} => (?:a*){n,n}, (?:a+){n,n} */
5454 if (! IS_REPEAT_INFINITE(qn->upper) && qn->upper > 1 && qn->greedy) {
5455 qn->upper = (qn->lower == 0 ? 1 : qn->lower);
5456 }
5457 }
5458 }
5459 }
5460 break;
5461
5462 default:
5463 break;
5464 }
5465
5466 qn->target = target;
5467 q_exit:
5468 return 0;
5469}
5470
5471
5472#ifndef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
5473static int
5474clear_not_flag_cclass(CClassNode* cc, OnigEncoding enc)
5475{
5476 BBuf *tbuf;
5477 int r;
5478
5479 if (IS_NCCLASS_NOT(cc)) {
5480 bitset_invert(cc->bs);
5481
5482 if (! ONIGENC_IS_SINGLEBYTE(enc)) {
5483 r = not_code_range_buf(enc, cc->mbuf, &tbuf);
5484 if (r != 0) return r;
5485
5486 bbuf_free(cc->mbuf);
5487 cc->mbuf = tbuf;
5488 }
5489
5491 }
5492
5493 return 0;
5494}
5495#endif /* CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS */
5496
5497typedef struct {
5504
5505static int
5506i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[],
5507 int to_len, void* arg)
5508{
5509 IApplyCaseFoldArg* iarg;
5510 ScanEnv* env;
5511 CClassNode* cc;
5512 CClassNode* asc_cc;
5513 BitSetRef bs;
5514 int add_flag, r;
5515
5516 iarg = (IApplyCaseFoldArg* )arg;
5517 env = iarg->env;
5518 cc = iarg->cc;
5519 asc_cc = iarg->asc_cc;
5520 bs = cc->bs;
5521
5522 if (IS_NULL(asc_cc)) {
5523 add_flag = 0;
5524 }
5525 else if (ONIGENC_IS_ASCII_CODE(from) == ONIGENC_IS_ASCII_CODE(*to)) {
5526 add_flag = 1;
5527 }
5528 else {
5529 add_flag = onig_is_code_in_cc(env->enc, from, asc_cc);
5530 if (IS_NCCLASS_NOT(asc_cc))
5531 add_flag = !add_flag;
5532 }
5533
5534 if (to_len == 1) {
5535 int is_in = onig_is_code_in_cc(env->enc, from, cc);
5536#ifdef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
5537 if ((is_in != 0 && !IS_NCCLASS_NOT(cc)) ||
5538 (is_in == 0 && IS_NCCLASS_NOT(cc))) {
5539 if (add_flag) {
5540 if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) {
5541 r = add_code_range0(&(cc->mbuf), env, *to, *to, 0);
5542 if (r < 0) return r;
5543 }
5544 else {
5545 BITSET_SET_BIT(bs, *to);
5546 }
5547 }
5548 }
5549#else
5550 if (is_in != 0) {
5551 if (add_flag) {
5552 if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) {
5553 if (IS_NCCLASS_NOT(cc)) clear_not_flag_cclass(cc, env->enc);
5554 r = add_code_range0(&(cc->mbuf), env, *to, *to, 0);
5555 if (r < 0) return r;
5556 }
5557 else {
5558 if (IS_NCCLASS_NOT(cc)) {
5559 BITSET_CLEAR_BIT(bs, *to);
5560 }
5561 else {
5562 BITSET_SET_BIT(bs, *to);
5563 }
5564 }
5565 }
5566 }
5567#endif /* CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS */
5568 }
5569 else {
5570 int r, i, len;
5572 Node *snode = NULL_NODE;
5573
5574 if (onig_is_code_in_cc(env->enc, from, cc)
5576 && !IS_NCCLASS_NOT(cc)
5577#endif
5578 ) {
5579 for (i = 0; i < to_len; i++) {
5580 len = ONIGENC_CODE_TO_MBC(env->enc, to[i], buf);
5581 if (i == 0) {
5582 snode = onig_node_new_str(buf, buf + len);
5584
5585 /* char-class expanded multi-char only
5586 compare with string folded at match time. */
5587 NSTRING_SET_AMBIG(snode);
5588 }
5589 else {
5590 r = onig_node_str_cat(snode, buf, buf + len);
5591 if (r < 0) {
5592 onig_node_free(snode);
5593 return r;
5594 }
5595 }
5596 }
5597
5598 *(iarg->ptail) = onig_node_new_alt(snode, NULL_NODE);
5600 iarg->ptail = &(NCDR((*(iarg->ptail))));
5601 }
5602 }
5603
5604 return 0;
5605}
5606
5607static int
5608cclass_case_fold(Node** np, CClassNode* cc, CClassNode* asc_cc, ScanEnv* env)
5609{
5610 int r;
5611 IApplyCaseFoldArg iarg;
5612
5613 iarg.env = env;
5614 iarg.cc = cc;
5615 iarg.asc_cc = asc_cc;
5616 iarg.alt_root = NULL_NODE;
5617 iarg.ptail = &(iarg.alt_root);
5618
5619 r = ONIGENC_APPLY_ALL_CASE_FOLD(env->enc, env->case_fold_flag,
5620 i_apply_case_fold, &iarg);
5621 if (r != 0) {
5623 return r;
5624 }
5625 if (IS_NOT_NULL(iarg.alt_root)) {
5626 Node* work = onig_node_new_alt(*np, iarg.alt_root);
5627 if (IS_NULL(work)) {
5629 return ONIGERR_MEMORY;
5630 }
5631 *np = work;
5632 }
5633 return r;
5634}
5635
5636static int
5637node_linebreak(Node** np, ScanEnv* env)
5638{
5639 /* same as (?>\x0D\x0A|[\x0A-\x0D\x{85}\x{2028}\x{2029}]) */
5640 Node* left = NULL;
5641 Node* right = NULL;
5642 Node* target1 = NULL;
5643 Node* target2 = NULL;
5644 CClassNode* cc;
5645 int num1, num2, r;
5647
5648 /* \x0D\x0A */
5649 num1 = ONIGENC_CODE_TO_MBC(env->enc, 0x0D, buf);
5650 if (num1 < 0) return num1;
5651 num2 = ONIGENC_CODE_TO_MBC(env->enc, 0x0A, buf + num1);
5652 if (num2 < 0) return num2;
5653 left = node_new_str_raw(buf, buf + num1 + num2);
5654 if (IS_NULL(left)) goto err;
5655
5656 /* [\x0A-\x0D] or [\x0A-\x0D\x{85}\x{2028}\x{2029}] */
5657 right = node_new_cclass();
5658 if (IS_NULL(right)) goto err;
5659 cc = NCCLASS(right);
5660 if (ONIGENC_MBC_MINLEN(env->enc) > 1) {
5661 r = add_code_range(&(cc->mbuf), env, 0x0A, 0x0D);
5662 if (r != 0) goto err;
5663 }
5664 else {
5665 bitset_set_range(env, cc->bs, 0x0A, 0x0D);
5666 }
5667
5668 /* TODO: move this block to enc/unicode.c */
5669 if (ONIGENC_IS_UNICODE(env->enc)) {
5670 /* UTF-8, UTF-16BE/LE, UTF-32BE/LE */
5671 r = add_code_range(&(cc->mbuf), env, 0x85, 0x85);
5672 if (r != 0) goto err;
5673 r = add_code_range(&(cc->mbuf), env, 0x2028, 0x2029);
5674 if (r != 0) goto err;
5675 }
5676
5677 /* ...|... */
5678 target1 = onig_node_new_alt(right, NULL_NODE);
5679 if (IS_NULL(target1)) goto err;
5680 right = NULL;
5681 target2 = onig_node_new_alt(left, target1);
5682 if (IS_NULL(target2)) goto err;
5683 left = NULL;
5684 target1 = NULL;
5685
5686 /* (?>...) */
5687 *np = node_new_enclose(ENCLOSE_STOP_BACKTRACK);
5688 if (IS_NULL(*np)) goto err;
5689 NENCLOSE(*np)->target = target2;
5690 return ONIG_NORMAL;
5691
5692 err:
5693 onig_node_free(left);
5694 onig_node_free(right);
5695 onig_node_free(target1);
5696 onig_node_free(target2);
5697 return ONIGERR_MEMORY;
5698}
5699
5700static int
5701propname2ctype(ScanEnv* env, const char* propname)
5702{
5703 UChar* name = (UChar* )propname;
5704 UChar* name_end = name + strlen(propname);
5705 int ctype = env->enc->property_name_to_ctype(ONIG_ENCODING_ASCII,
5706 name, name_end);
5707 if (ctype < 0) {
5708 onig_scan_env_set_error_string(env, ctype, name, name_end);
5709 }
5710 return ctype;
5711}
5712
5713static int
5714add_property_to_cc(CClassNode* cc, const char* propname, int not, ScanEnv* env)
5715{
5716 int ctype = propname2ctype(env, propname);
5717 if (ctype < 0) return ctype;
5718 return add_ctype_to_cc(cc, ctype, not, 0, env);
5719}
5720
5721/*
5722 * helper methods for node_extended_grapheme_cluster (/\X/)
5723 */
5724static int
5725create_property_node(Node **np, ScanEnv* env, const char* propname)
5726{
5727 int r;
5728 CClassNode* cc;
5729
5730 *np = node_new_cclass();
5731 if (IS_NULL(*np)) return ONIGERR_MEMORY;
5732 cc = NCCLASS(*np);
5733 r = add_property_to_cc(cc, propname, 0, env);
5734 if (r != 0)
5735 onig_node_free(*np);
5736 return r;
5737}
5738
5739static int
5740quantify_node(Node **np, int lower, int upper)
5741{
5742 Node* tmp = node_new_quantifier(lower, upper, 0);
5743 if (IS_NULL(tmp)) return ONIGERR_MEMORY;
5744 NQTFR(tmp)->target = *np;
5745 *np = tmp;
5746 return 0;
5747}
5748
5749static int
5750quantify_property_node(Node **np, ScanEnv* env, const char* propname, char repetitions)
5751{
5752 int r;
5753 int lower = 0;
5754 int upper = REPEAT_INFINITE;
5755
5756 r = create_property_node(np, env, propname);
5757 if (r != 0) return r;
5758 switch (repetitions) {
5759 case '?': upper = 1; break;
5760 case '+': lower = 1; break;
5761 case '*': break;
5762 case '2': lower = upper = 2; break;
5763 default : return ONIGERR_PARSER_BUG;
5764 }
5765 return quantify_node(np, lower, upper);
5766}
5767
5768#define LIST 0
5769#define ALT 1
5770
5771/* IMPORTANT: Make sure node_array ends with NULL_NODE */
5772static int
5773create_node_from_array(int kind, Node **np, Node **node_array)
5774{
5775 Node* tmp = NULL_NODE;
5776 int i = 0;
5777
5778 while (node_array[i] != NULL_NODE) i++;
5779 while (--i >= 0) {
5780 *np = kind==LIST ? node_new_list(node_array[i], tmp)
5781 : onig_node_new_alt(node_array[i], tmp);
5782 if (IS_NULL(*np)) {
5783 while (i >= 0) {
5784 onig_node_free(node_array[i]);
5785 node_array[i--] = NULL_NODE;
5786 }
5787 onig_node_free(tmp);
5788 return ONIGERR_MEMORY;
5789 }
5790 else
5791 node_array[i] = NULL_NODE;
5792 tmp = *np;
5793 }
5794 return 0;
5795}
5796
5797#define R_ERR(call) r=(call);if(r!=0)goto err
5798
5799/* Memory layout for common node array:
5800 * The main purpose is to be able to easily free all leftover nodes
5801 * after an error. As a side effect, we share some memory.
5802 *
5803 * The layout is as shown below (each line corresponds to one call of
5804 * create_node_from_array()). Because create_node_from_array sets all
5805 * nodes of the source to NULL_NODE, we can overlap the target array
5806 * as long as we do not override the actual target location.
5807 *
5808 * Target Array name Index
5809 *
5810 * node_array 0 1 2 3 4 5 6 7 8 9 A B C D E F
5811 * top_alts alts[5] 0 1 2 3 4*
5812 * alts+1 list[4] 0 1 2 3*
5813 * list+1 core_alts[7] 0 1 2 3 4 5 6*
5814 * core_alts+0 H_list[4] 0 1 2 3*
5815 * H_list+1 H_alt2[4] 0 1 2 3*
5816 * h_alt2+1 H_list2[3] 0 1 2*
5817 * core_alts+4 XP_list[4] 0 1 2 3*
5818 * XP_list+1 Ex_list[4] 0 1 2 3*
5819 */
5820#define NODE_COMMON_SIZE 16
5821
5822static int
5823node_extended_grapheme_cluster(Node** np, ScanEnv* env)
5824{
5825 Node* tmp = NULL;
5826 Node* np1 = NULL;
5827 Node* top_alt = NULL;
5828 int r = 0;
5829 int num1;
5830 int i;
5831 int any_target_position;
5833 OnigOptionType option;
5834 /* node_common is function-global so that we can free all nodes
5835 * in case of error. Unused slots are set to NULL_NODE at all times. */
5836 Node *node_common[NODE_COMMON_SIZE];
5837 Node **alts = node_common+0; /* size: 5 */
5838
5839 for (i=0; i<NODE_COMMON_SIZE; i++)
5840 node_common[i] = NULL_NODE;
5841
5842 /* CRLF, common for both Unicode and non-Unicode */
5843 /* \x0D\x0A */
5844 r = ONIGENC_CODE_TO_MBC(env->enc, 0x0D, buf);
5845 if (r < 0) goto err;
5846 num1 = r;
5847 r = ONIGENC_CODE_TO_MBC(env->enc, 0x0A, buf + num1);
5848 if (r < 0) goto err;
5849 alts[0] = node_new_str_raw(buf, buf + num1 + r);
5850 if (IS_NULL(alts[0])) goto err;
5851
5852#ifdef USE_UNICODE_PROPERTIES
5853 if (ONIGENC_IS_UNICODE(env->enc)) { /* UTF-8, UTF-16BE/LE, UTF-32BE/LE */
5854 CClassNode* cc;
5855
5856 if (propname2ctype(env, "Grapheme_Cluster_Break=Extend") < 0) goto err;
5857 /* Unicode 11.0.0
5858 * CRLF (already done)
5859 * | [Control CR LF]
5860 * | precore* core postcore*
5861 * | . (to catch invalid stuff, because this seems to be spec for String#grapheme_clusters) */
5862
5863 /* [Control CR LF] (CR and LF are not in the spec, but this is a conformed fix) */
5864 alts[1] = node_new_cclass();
5865 if (IS_NULL(alts[1])) goto err;
5866 cc = NCCLASS(alts[1]);
5867 R_ERR(add_property_to_cc(cc, "Grapheme_Cluster_Break=Control", 0, env));
5868 if (ONIGENC_MBC_MINLEN(env->enc) > 1) { /* UTF-16/UTF-32 */
5869 R_ERR(add_code_range(&(cc->mbuf), env, 0x000A, 0x000A)); /* CR */
5870 R_ERR(add_code_range(&(cc->mbuf), env, 0x000D, 0x000D)); /* LF */
5871 }
5872 else {
5873 BITSET_SET_BIT(cc->bs, 0x0a);
5874 BITSET_SET_BIT(cc->bs, 0x0d);
5875 }
5876
5877 /* precore* core postcore* */
5878 {
5879 Node **list = alts + 3; /* size: 4 */
5880
5881 /* precore*; precore := Prepend */
5882 R_ERR(quantify_property_node(list+0, env, "Grapheme_Cluster_Break=Prepend", '*'));
5883
5884 /* core := hangul-syllable
5885 * | ri-sequence
5886 * | xpicto-sequence
5887 * | [^Control CR LF] */
5888 {
5889 Node **core_alts = list + 2; /* size: 7 */
5890
5891 /* hangul-syllable :=
5892 * L* (V+ | LV V* | LVT) T*
5893 * | L+
5894 * | T+ */
5895 /* hangul-syllable is an alternative (would be called H_alt)
5896 * inside an alternative, but we flatten it into core_alts */
5897
5898 /* L* (V+ | LV V* | LVT) T* */
5899 {
5900 Node **H_list = core_alts + 1; /* size: 4 */
5901 R_ERR(quantify_property_node(H_list+0, env, "Grapheme_Cluster_Break=L", '*'));
5902
5903 /* V+ | LV V* | LVT */
5904 {
5905 Node **H_alt2 = H_list + 2; /* size: 4 */
5906 R_ERR(quantify_property_node(H_alt2+0, env, "Grapheme_Cluster_Break=V", '+'));
5907
5908 /* LV V* */
5909 {
5910 Node **H_list2 = H_alt2 + 2; /* size: 3 */
5911
5912 R_ERR(create_property_node(H_list2+0, env, "Grapheme_Cluster_Break=LV"));
5913 R_ERR(quantify_property_node(H_list2+1, env, "Grapheme_Cluster_Break=V", '*'));
5914 R_ERR(create_node_from_array(LIST, H_alt2+1, H_list2));
5915 }
5916
5917 R_ERR(create_property_node(H_alt2+2, env, "Grapheme_Cluster_Break=LVT"));
5918 R_ERR(create_node_from_array(ALT, H_list+1, H_alt2));
5919 }
5920
5921 R_ERR(quantify_property_node(H_list+2, env, "Grapheme_Cluster_Break=T", '*'));
5922 R_ERR(create_node_from_array(LIST, core_alts+0, H_list));
5923 }
5924
5925 R_ERR(quantify_property_node(core_alts+1, env, "Grapheme_Cluster_Break=L", '+'));
5926 R_ERR(quantify_property_node(core_alts+2, env, "Grapheme_Cluster_Break=T", '+'));
5927 /* end of hangul-syllable */
5928
5929 /* ri-sequence := RI RI */
5930 R_ERR(quantify_property_node(core_alts+3, env, "Regional_Indicator", '2'));
5931
5932 /* xpicto-sequence := \p{Extended_Pictographic} (Extend* ZWJ \p{Extended_Pictographic})* */
5933 {
5934 Node **XP_list = core_alts + 5; /* size: 3 */
5935 R_ERR(create_property_node(XP_list+0, env, "Extended_Pictographic"));
5936
5937 /* (Extend* ZWJ \p{Extended_Pictographic})* */
5938 {
5939 Node **Ex_list = XP_list + 2; /* size: 4 */
5940 /* assert(Ex_list+4 == node_common+NODE_COMMON_SIZE); */
5941 R_ERR(quantify_property_node(Ex_list+0, env, "Grapheme_Cluster_Break=Extend", '*'));
5942
5943 /* ZWJ (ZERO WIDTH JOINER) */
5944 r = ONIGENC_CODE_TO_MBC(env->enc, 0x200D, buf);
5945 if (r < 0) goto err;
5946 Ex_list[1] = node_new_str_raw(buf, buf + r);
5947 if (IS_NULL(Ex_list[1])) goto err;
5948
5949 R_ERR(create_property_node(Ex_list+2, env, "Extended_Pictographic"));
5950 R_ERR(create_node_from_array(LIST, XP_list+1, Ex_list));
5951 }
5952 R_ERR(quantify_node(XP_list+1, 0, REPEAT_INFINITE)); /* TODO: Check about node freeing */
5953
5954 R_ERR(create_node_from_array(LIST, core_alts+4, XP_list));
5955 }
5956
5957 /* [^Control CR LF] */
5958 core_alts[5] = node_new_cclass();
5959 if (IS_NULL(core_alts[5])) goto err;
5960 cc = NCCLASS(core_alts[5]);
5961 if (ONIGENC_MBC_MINLEN(env->enc) > 1) { /* UTF-16/UTF-32 */
5962 BBuf *inverted_buf = NULL;
5963
5964 /* TODO: fix false warning */
5965 const int dup_not_warned = env->warnings_flag | ~ONIG_SYN_WARN_CC_DUP;
5966 env->warnings_flag |= ONIG_SYN_WARN_CC_DUP;
5967
5968 /* Start with a positive buffer and invert at the end.
5969 * Otherwise, adding single-character ranges work the wrong way. */
5970 R_ERR(add_property_to_cc(cc, "Grapheme_Cluster_Break=Control", 0, env));
5971 R_ERR(add_code_range(&(cc->mbuf), env, 0x000A, 0x000A)); /* CR */
5972 R_ERR(add_code_range(&(cc->mbuf), env, 0x000D, 0x000D)); /* LF */
5973 R_ERR(not_code_range_buf(env->enc, cc->mbuf, &inverted_buf, env));
5974 cc->mbuf = inverted_buf; /* TODO: check what to do with buffer before inversion */
5975
5976 env->warnings_flag &= dup_not_warned; /* TODO: fix false warning */
5977 }
5978 else {
5979 R_ERR(add_property_to_cc(cc, "Grapheme_Cluster_Break=Control", 1, env));
5980 BITSET_CLEAR_BIT(cc->bs, 0x0a);
5981 BITSET_CLEAR_BIT(cc->bs, 0x0d);
5982 }
5983
5984 R_ERR(create_node_from_array(ALT, list+1, core_alts));
5985 }
5986
5987 /* postcore*; postcore = [Extend ZWJ SpacingMark] */
5988 R_ERR(create_property_node(list+2, env, "Grapheme_Cluster_Break=Extend"));
5989 cc = NCCLASS(list[2]);
5990 R_ERR(add_property_to_cc(cc, "Grapheme_Cluster_Break=SpacingMark", 0, env));
5991 R_ERR(add_code_range(&(cc->mbuf), env, 0x200D, 0x200D));
5992 R_ERR(quantify_node(list+2, 0, REPEAT_INFINITE));
5993
5994 R_ERR(create_node_from_array(LIST, alts+2, list));
5995 }
5996
5997 any_target_position = 3;
5998 }
5999 else
6000#endif /* USE_UNICODE_PROPERTIES */
6001 {
6002 any_target_position = 1;
6003 }
6004
6005 /* PerlSyntax: (?s:.), RubySyntax: (?m:.), common for both Unicode and non-Unicode */
6006 /* Not in Unicode spec (UAX #29), but added to catch invalid stuff,
6007 * because this is Ruby spec for String#grapheme_clusters. */
6008 np1 = node_new_anychar();
6009 if (IS_NULL(np1)) goto err;
6010
6011 option = env->option;
6012 ONOFF(option, ONIG_OPTION_MULTILINE, 0);
6013 tmp = node_new_option(option);
6014 if (IS_NULL(tmp)) goto err;
6015 NENCLOSE(tmp)->target = np1;
6016 alts[any_target_position] = tmp;
6017 np1 = NULL;
6018
6019 R_ERR(create_node_from_array(ALT, &top_alt, alts));
6020
6021 /* (?>): For efficiency, because there is no text piece
6022 * that is not in a grapheme cluster, and there is only one way
6023 * to split a string into grapheme clusters. */
6024 tmp = node_new_enclose(ENCLOSE_STOP_BACKTRACK);
6025 if (IS_NULL(tmp)) goto err;
6026 NENCLOSE(tmp)->target = top_alt;
6027 np1 = tmp;
6028
6029#ifdef USE_UNICODE_PROPERTIES
6030 if (ONIGENC_IS_UNICODE(env->enc)) {
6031 /* Don't ignore case. */
6032 option = env->option;
6033 ONOFF(option, ONIG_OPTION_IGNORECASE, 1);
6034 *np = node_new_option(option);
6035 if (IS_NULL(*np)) goto err;
6036 NENCLOSE(*np)->target = np1;
6037 }
6038 else
6039#endif
6040 {
6041 *np = np1;
6042 }
6043 return ONIG_NORMAL;
6044
6045 err:
6046 onig_node_free(np1);
6047 for (i=0; i<NODE_COMMON_SIZE; i++)
6048 onig_node_free(node_common[i]);
6049 return (r == 0) ? ONIGERR_MEMORY : r;
6050}
6051#undef R_ERR
6052
6053static int
6054countbits(unsigned int bits)
6055{
6056 bits = (bits & 0x55555555) + ((bits >> 1) & 0x55555555);
6057 bits = (bits & 0x33333333) + ((bits >> 2) & 0x33333333);
6058 bits = (bits & 0x0f0f0f0f) + ((bits >> 4) & 0x0f0f0f0f);
6059 bits = (bits & 0x00ff00ff) + ((bits >> 8) & 0x00ff00ff);
6060 return (bits & 0x0000ffff) + ((bits >>16) & 0x0000ffff);
6061}
6062
6063static int
6064is_onechar_cclass(CClassNode* cc, OnigCodePoint* code)
6065{
6066 const OnigCodePoint not_found = ONIG_LAST_CODE_POINT;
6067 OnigCodePoint c = not_found;
6068 int i;
6069 BBuf *bbuf = cc->mbuf;
6070
6071 if (IS_NCCLASS_NOT(cc)) return 0;
6072
6073 /* check bbuf */
6074 if (IS_NOT_NULL(bbuf)) {
6075 OnigCodePoint n, *data;
6076 GET_CODE_POINT(n, bbuf->p);
6077 data = (OnigCodePoint* )(bbuf->p) + 1;
6078 if ((n == 1) && (data[0] == data[1])) {
6079 /* only one char found in the bbuf, save the code point. */
6080 c = data[0];
6081 if (((c < SINGLE_BYTE_SIZE) && BITSET_AT(cc->bs, c))) {
6082 /* skip if c is included in the bitset */
6083 c = not_found;
6084 }
6085 }
6086 else {
6087 return 0; /* the bbuf contains multiple chars */
6088 }
6089 }
6090
6091 /* check bitset */
6092 for (i = 0; i < BITSET_SIZE; i++) {
6093 Bits b1 = cc->bs[i];
6094 if (b1 != 0) {
6095 if (((b1 & (b1 - 1)) == 0) && (c == not_found)) {
6096 c = BITS_IN_ROOM * i + countbits(b1 - 1);
6097 } else {
6098 return 0; /* the character class contains multiple chars */
6099 }
6100 }
6101 }
6102
6103 if (c != not_found) {
6104 *code = c;
6105 return 1;
6106 }
6107
6108 /* the character class contains no char. */
6109 return 0;
6110}
6111
6112
6113static int
6114parse_exp(Node** np, OnigToken* tok, int term,
6115 UChar** src, UChar* end, ScanEnv* env)
6116{
6117 int r, len, group = 0;
6118 Node* qn;
6119 Node** targetp;
6120
6121 *np = NULL;
6122 if (tok->type == (enum TokenSyms )term)
6123 goto end_of_token;
6124
6125 switch (tok->type) {
6126 case TK_ALT:
6127 case TK_EOT:
6128 end_of_token:
6129 *np = node_new_empty();
6130 return tok->type;
6131 break;
6132
6133 case TK_SUBEXP_OPEN:
6134 r = parse_enclose(np, tok, TK_SUBEXP_CLOSE, src, end, env);
6135 if (r < 0) return r;
6136 if (r == 1) group = 1;
6137 else if (r == 2) { /* option only */
6138 Node* target;
6139 OnigOptionType prev = env->option;
6140
6141 env->option = NENCLOSE(*np)->option;
6142 r = fetch_token(tok, src, end, env);
6143 if (r < 0) {
6144 env->option = prev;
6145 return r;
6146 }
6147 r = parse_subexp(&target, tok, term, src, end, env);
6148 env->option = prev;
6149 if (r < 0) {
6150 onig_node_free(target);
6151 return r;
6152 }
6153 NENCLOSE(*np)->target = target;
6154 return tok->type;
6155 }
6156 break;
6157
6158 case TK_SUBEXP_CLOSE:
6161
6162 if (tok->escaped) goto tk_raw_byte;
6163 else goto tk_byte;
6164 break;
6165
6166 case TK_LINEBREAK:
6167 r = node_linebreak(np, env);
6168 if (r < 0) return r;
6169 break;
6170
6172 r = node_extended_grapheme_cluster(np, env);
6173 if (r < 0) return r;
6174 break;
6175
6176 case TK_KEEP:
6179 break;
6180
6181 case TK_STRING:
6182 tk_byte:
6183 {
6184 *np = node_new_str(tok->backp, *src);
6186
6187 string_loop:
6188 while (1) {
6189 r = fetch_token(tok, src, end, env);
6190 if (r < 0) return r;
6191 if (r == TK_STRING) {
6192 r = onig_node_str_cat(*np, tok->backp, *src);
6193 }
6194#ifndef NUMBERED_CHAR_IS_NOT_CASE_AMBIG
6195 else if (r == TK_CODE_POINT) {
6196 r = node_str_cat_codepoint(*np, env->enc, tok->u.code);
6197 }
6198#endif
6199 else {
6200 break;
6201 }
6202 if (r < 0) return r;
6203 }
6204
6205 string_end:
6206 targetp = np;
6207 goto repeat;
6208 }
6209 break;
6210
6211 case TK_RAW_BYTE:
6212 tk_raw_byte:
6213 {
6214 *np = node_new_str_raw_char((UChar )tok->u.c);
6216 len = 1;
6217 while (1) {
6218 if (len >= ONIGENC_MBC_MINLEN(env->enc)) {
6219 if (len == enclen(env->enc, NSTR(*np)->s, NSTR(*np)->end)) {
6220 r = fetch_token(tok, src, end, env);
6221 NSTRING_CLEAR_RAW(*np);
6222 goto string_end;
6223 }
6224 }
6225
6226 r = fetch_token(tok, src, end, env);
6227 if (r < 0) return r;
6228 if (r != TK_RAW_BYTE) {
6229 /* Don't use this, it is wrong for little endian encodings. */
6230#ifdef USE_PAD_TO_SHORT_BYTE_CHAR
6231 int rem;
6232 if (len < ONIGENC_MBC_MINLEN(env->enc)) {
6233 rem = ONIGENC_MBC_MINLEN(env->enc) - len;
6234 (void )node_str_head_pad(NSTR(*np), rem, (UChar )0);
6235 if (len + rem == enclen(env->enc, NSTR(*np)->s)) {
6236 NSTRING_CLEAR_RAW(*np);
6237 goto string_end;
6238 }
6239 }
6240#endif
6242 }
6243
6244 r = node_str_cat_char(*np, (UChar )tok->u.c);
6245 if (r < 0) return r;
6246
6247 len++;
6248 }
6249 }
6250 break;
6251
6252 case TK_CODE_POINT:
6253 {
6254 *np = node_new_empty();
6256 r = node_str_cat_codepoint(*np, env->enc, tok->u.code);
6257 if (r != 0) return r;
6258#ifdef NUMBERED_CHAR_IS_NOT_CASE_AMBIG
6259 NSTRING_SET_RAW(*np);
6260#else
6261 goto string_loop;
6262#endif
6263 }
6264 break;
6265
6266 case TK_QUOTE_OPEN:
6267 {
6268 OnigCodePoint end_op[2];
6269 UChar *qstart, *qend, *nextp;
6270
6271 end_op[0] = (OnigCodePoint )MC_ESC(env->syntax);
6272 end_op[1] = (OnigCodePoint )'E';
6273 qstart = *src;
6274 qend = find_str_position(end_op, 2, qstart, end, &nextp, env->enc);
6275 if (IS_NULL(qend)) {
6276 nextp = qend = end;
6277 }
6278 *np = node_new_str(qstart, qend);
6280 *src = nextp;
6281 }
6282 break;
6283
6284 case TK_CHAR_TYPE:
6285 {
6286 switch (tok->u.prop.ctype) {
6287 case ONIGENC_CTYPE_WORD:
6288 *np = node_new_ctype(tok->u.prop.ctype, tok->u.prop.not,
6289 IS_ASCII_RANGE(env->option));
6291 break;
6292
6296 {
6297 CClassNode* cc;
6298
6299 *np = node_new_cclass();
6301 cc = NCCLASS(*np);
6302 r = add_ctype_to_cc(cc, tok->u.prop.ctype, 0,
6303 IS_ASCII_RANGE(env->option), env);
6304 if (r != 0) return r;
6305 if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc);
6306 }
6307 break;
6308
6309 default:
6310 return ONIGERR_PARSER_BUG;
6311 break;
6312 }
6313 }
6314 break;
6315
6316 case TK_CHAR_PROPERTY:
6317 r = parse_char_property(np, tok, src, end, env);
6318 if (r != 0) return r;
6319 break;
6320
6321 case TK_CC_OPEN:
6322 {
6323 Node *asc_node;
6324 CClassNode* cc;
6325 OnigCodePoint code;
6326
6327 r = parse_char_class(np, &asc_node, tok, src, end, env);
6328 if (r != 0) {
6329 onig_node_free(asc_node);
6330 return r;
6331 }
6332
6333 cc = NCCLASS(*np);
6334 if (is_onechar_cclass(cc, &code)) {
6335 onig_node_free(*np);
6336 onig_node_free(asc_node);
6337 *np = node_new_empty();
6339 r = node_str_cat_codepoint(*np, env->enc, code);
6340 if (r != 0) return r;
6341 goto string_loop;
6342 }
6343 if (IS_IGNORECASE(env->option)) {
6344 r = cclass_case_fold(np, cc, NCCLASS(asc_node), env);
6345 if (r != 0) {
6346 onig_node_free(asc_node);
6347 return r;
6348 }
6349 }
6350 onig_node_free(asc_node);
6351 }
6352 break;
6353
6354 case TK_ANYCHAR:
6355 *np = node_new_anychar();
6357 break;
6358
6359 case TK_ANYCHAR_ANYTIME:
6360 *np = node_new_anychar();
6362 qn = node_new_quantifier(0, REPEAT_INFINITE, 0);
6364 NQTFR(qn)->target = *np;
6365 *np = qn;
6366 break;
6367
6368 case TK_BACKREF:
6369 len = tok->u.backref.num;
6370 *np = node_new_backref(len,
6371 (len > 1 ? tok->u.backref.refs : &(tok->u.backref.ref1)),
6372 tok->u.backref.by_name,
6374 tok->u.backref.exist_level,
6375 tok->u.backref.level,
6376#endif
6377 env);
6379 break;
6380
6381#ifdef USE_SUBEXP_CALL
6382 case TK_CALL:
6383 {
6384 int gnum = tok->u.call.gnum;
6385
6386 if (gnum < 0 || tok->u.call.rel != 0) {
6387 if (gnum > 0) gnum--;
6388 gnum = BACKREF_REL_TO_ABS(gnum, env);
6389 if (gnum <= 0)
6391 }
6392 *np = node_new_call(tok->u.call.name, tok->u.call.name_end, gnum);
6394 env->num_call++;
6395 }
6396 break;
6397#endif
6398
6399 case TK_ANCHOR:
6400 *np = onig_node_new_anchor(tok->u.anchor.subtype);
6402 NANCHOR(*np)->ascii_range = tok->u.anchor.ascii_range;
6403 break;
6404
6405 case TK_OP_REPEAT:
6406 case TK_INTERVAL:
6410 else
6411 *np = node_new_empty();
6412 }
6413 else {
6414 goto tk_byte;
6415 }
6416 break;
6417
6418 default:
6419 return ONIGERR_PARSER_BUG;
6420 break;
6421 }
6422
6423 {
6424 targetp = np;
6425
6426 re_entry:
6427 r = fetch_token(tok, src, end, env);
6428 if (r < 0) return r;
6429
6430 repeat:
6431 if (r == TK_OP_REPEAT || r == TK_INTERVAL) {
6432 if (is_invalid_quantifier_target(*targetp))
6434
6435 qn = node_new_quantifier(tok->u.repeat.lower, tok->u.repeat.upper,
6436 (r == TK_INTERVAL ? 1 : 0));
6438 NQTFR(qn)->greedy = tok->u.repeat.greedy;
6439 r = set_quantifier(qn, *targetp, group, env);
6440 if (r < 0) {
6441 onig_node_free(qn);
6442 return r;
6443 }
6444
6445 if (tok->u.repeat.possessive != 0) {
6446 Node* en;
6447 en = node_new_enclose(ENCLOSE_STOP_BACKTRACK);
6448 if (IS_NULL(en)) {
6449 onig_node_free(qn);
6450 return ONIGERR_MEMORY;
6451 }
6452 NENCLOSE(en)->target = qn;
6453 qn = en;
6454 }
6455
6456 if (r == 0) {
6457 *targetp = qn;
6458 }
6459 else if (r == 1) {
6460 onig_node_free(qn);
6461 }
6462 else if (r == 2) { /* split case: /abc+/ */
6463 Node *tmp;
6464
6465 *targetp = node_new_list(*targetp, NULL);
6466 if (IS_NULL(*targetp)) {
6467 onig_node_free(qn);
6468 return ONIGERR_MEMORY;
6469 }
6470 tmp = NCDR(*targetp) = node_new_list(qn, NULL);
6471 if (IS_NULL(tmp)) {
6472 onig_node_free(qn);
6473 return ONIGERR_MEMORY;
6474 }
6475 targetp = &(NCAR(tmp));
6476 }
6477 goto re_entry;
6478 }
6479 }
6480
6481 return r;
6482}
6483
6484static int
6485parse_branch(Node** top, OnigToken* tok, int term,
6486 UChar** src, UChar* end, ScanEnv* env)
6487{
6488 int r;
6489 Node *node, **headp;
6490
6491 *top = NULL;
6492 r = parse_exp(&node, tok, term, src, end, env);
6493 if (r < 0) {
6494 onig_node_free(node);
6495 return r;
6496 }
6497
6498 if (r == TK_EOT || r == term || r == TK_ALT) {
6499 *top = node;
6500 }
6501 else {
6502 *top = node_new_list(node, NULL);
6503 headp = &(NCDR(*top));
6504 while (r != TK_EOT && r != term && r != TK_ALT) {
6505 r = parse_exp(&node, tok, term, src, end, env);
6506 if (r < 0) {
6507 onig_node_free(node);
6508 return r;
6509 }
6510
6511 if (NTYPE(node) == NT_LIST) {
6512 *headp = node;
6513 while (IS_NOT_NULL(NCDR(node))) node = NCDR(node);
6514 headp = &(NCDR(node));
6515 }
6516 else {
6517 *headp = node_new_list(node, NULL);
6518 headp = &(NCDR(*headp));
6519 }
6520 }
6521 }
6522
6523 return r;
6524}
6525
6526/* term_tok: TK_EOT or TK_SUBEXP_CLOSE */
6527static int
6528parse_subexp(Node** top, OnigToken* tok, int term,
6529 UChar** src, UChar* end, ScanEnv* env)
6530{
6531 int r;
6532 Node *node, **headp;
6533
6534 *top = NULL;
6535 env->parse_depth++;
6536 if (env->parse_depth > ParseDepthLimit)
6538 r = parse_branch(&node, tok, term, src, end, env);
6539 if (r < 0) {
6540 onig_node_free(node);
6541 return r;
6542 }
6543
6544 if (r == term) {
6545 *top = node;
6546 }
6547 else if (r == TK_ALT) {
6548 *top = onig_node_new_alt(node, NULL);
6549 headp = &(NCDR(*top));
6550 while (r == TK_ALT) {
6551 r = fetch_token(tok, src, end, env);
6552 if (r < 0) return r;
6553 r = parse_branch(&node, tok, term, src, end, env);
6554 if (r < 0) {
6555 onig_node_free(node);
6556 return r;
6557 }
6558
6559 *headp = onig_node_new_alt(node, NULL);
6560 headp = &(NCDR(*headp));
6561 }
6562
6563 if (tok->type != (enum TokenSyms )term)
6564 goto err;
6565 }
6566 else {
6567 onig_node_free(node);
6568 err:
6569 if (term == TK_SUBEXP_CLOSE)
6571 else
6572 return ONIGERR_PARSER_BUG;
6573 }
6574
6575 env->parse_depth--;
6576 return r;
6577}
6578
6579static int
6580parse_regexp(Node** top, UChar** src, UChar* end, ScanEnv* env)
6581{
6582 int r;
6583 OnigToken tok;
6584
6585 r = fetch_token(&tok, src, end, env);
6586 if (r < 0) return r;
6587 r = parse_subexp(top, &tok, TK_EOT, src, end, env);
6588 if (r < 0) return r;
6589
6590#ifdef USE_SUBEXP_CALL
6591 if (env->num_call > 0) {
6592 /* Capture the pattern itself. It is used for (?R), (?0) and \g<0>. */
6593 const int num = 0;
6594 Node* np;
6595 np = node_new_enclose_memory(env->option, 0);
6597 NENCLOSE(np)->regnum = num;
6598 NENCLOSE(np)->target = *top;
6599 r = scan_env_set_mem_node(env, num, np);
6600 if (r != 0) {
6601 onig_node_free(np);
6602 return r;
6603 }
6604 *top = np;
6605 }
6606#endif
6607 return 0;
6608}
6609
6610extern int
6611onig_parse_make_tree(Node** root, const UChar* pattern, const UChar* end,
6612 regex_t* reg, ScanEnv* env)
6613{
6614 int r;
6615 UChar* p;
6616
6617#ifdef USE_NAMED_GROUP
6618 names_clear(reg);
6619#endif
6620
6621 scan_env_clear(env);
6622 env->option = reg->options;
6623 env->case_fold_flag = reg->case_fold_flag;
6624 env->enc = reg->enc;
6625 env->syntax = reg->syntax;
6626 env->pattern = (UChar* )pattern;
6627 env->pattern_end = (UChar* )end;
6628 env->reg = reg;
6629
6630 *root = NULL;
6631 p = (UChar* )pattern;
6632 r = parse_regexp(root, &p, (UChar* )end, env);
6633 reg->num_mem = env->num_mem;
6634 return r;
6635}
6636
6637extern void
6639 UChar* arg, UChar* arg_end)
6640{
6641 env->error = arg;
6642 env->error_end = arg_end;
6643}
#define bad(x)
Definition: _sdbm.c:123
struct rb_encoding_entry * list
Definition: encoding.c:56
void rb_warn(const char *fmt,...)
Definition: error.c:315
const char term
Definition: id.c:37
VALUE type(ANYARGS)
ANYARGS-ed function type.
Definition: cxxanyargs.hpp:39
unsigned int top
Definition: nkf.c:4323
const char * name
Definition: nkf.c:208
#define ARG_UNUSED
Definition: nkf.h:181
#define ONIG_SYN_OP_ESC_C_CONTROL
Definition: onigmo.h:546
#define ONIG_SYN_OP_LPAREN_SUBEXP
Definition: onigmo.h:531
#define ONIGERR_INVALID_CHAR_PROPERTY_NAME
Definition: onigmo.h:688
#define ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE
Definition: onigmo.h:552
#define ONIGENC_CTYPE_GRAPH
Definition: onigmo.h:299
#define ONIGERR_END_PATTERN_AT_META
Definition: onigmo.h:647
#define ONIG_SYN_OP_ASTERISK_ZERO_INF
Definition: onigmo.h:521
#define ONIGENC_APPLY_ALL_CASE_FOLD(enc, case_fold_flag, f, arg)
Definition: onigmo.h:338
#define ONIGENC_CTYPE_ASCII
Definition: onigmo.h:308
#define ONIG_SYN_USE_LEFT_MOST_NAMED_GROUP
Definition: onigmo.h:599
#define ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS
Definition: onigmo.h:669
#define ONIG_OPTION_DONT_CAPTURE_GROUP
Definition: onigmo.h:459
#define ONIG_SYN_OP2_QMARK_GROUP_EFFECT
Definition: onigmo.h:553
#define ONIGENC_CTYPE_DIGIT
Definition: onigmo.h:298
#define ONIG_SYN_OP_QMARK_NON_GREEDY
Definition: onigmo.h:544
#define ONIG_NO_SUPPORT_CONFIG
Definition: onigmo.h:626
#define ONIGENC_CODE_TO_MBC(enc, code, buf)
Definition: onigmo.h:368
#define ONIG_SYN_OP_BRACKET_CC
Definition: onigmo.h:536
#define ONIG_SYN_OP_ESC_VBAR_ALT
Definition: onigmo.h:530
#define ONIGERR_PARSE_DEPTH_LIMIT_OVER
Definition: onigmo.h:636
#define ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR
Definition: onigmo.h:567
#define ONIG_SYN_OP2_QMARK_VBAR_BRANCH_RESET
Definition: onigmo.h:580
#define ONIG_SYN_OP2_OPTION_PERL
Definition: onigmo.h:554
#define ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY
Definition: onigmo.h:597
#define ONIG_SYN_OP2_QMARK_CAPITAL_P_NAMED_GROUP
Definition: onigmo.h:582
#define ONIGENC_IS_CODE_CTYPE(enc, code, ctype)
Definition: onigmo.h:372
#define ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID
Definition: onigmo.h:655
#define ONIG_MAX_REPEAT_NUM
Definition: onigmo.h:440
#define ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY
Definition: onigmo.h:568
#define ONIGENC_CTYPE_XDIGIT
Definition: onigmo.h:305
#define ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END
Definition: onigmo.h:538
#define ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT
Definition: onigmo.h:569
#define ONIGENC_CODE_RANGE_FROM(range, i)
Definition: onigmo.h:140
#define ONIG_SYN_OP_ESC_LPAREN_SUBEXP
Definition: onigmo.h:532
#define ONIG_SYN_BACKSLASH_ESCAPE_IN_CC
Definition: onigmo.h:603
#define ONIG_IS_OPTION_ON(options, option)
Definition: onigmo.h:476
#define ONIG_SYN_OP_ESC_BRACE_INTERVAL
Definition: onigmo.h:528
#define ONIG_INEFFECTIVE_META_CHAR
Definition: onigmo.h:619
#define ONIG_REGION_NOTPOS
Definition: onigmo.h:734
#define ONIG_SYNTAX_RUBY
Definition: onigmo.h:511
#define ONIGENC_MBC_TO_CODE(enc, p, end)
Definition: onigmo.h:366
#define ONIG_SYN_WARN_CC_DUP
Definition: onigmo.h:609
#define ONIGERR_META_CODE_SYNTAX
Definition: onigmo.h:649
#define ONIG_SYN_OP_BRACE_INTERVAL
Definition: onigmo.h:527
#define ONIG_SYN_OP_ESC_B_WORD_BOUND
Definition: onigmo.h:539
#define ONIGERR_PARSER_BUG
Definition: onigmo.h:631
#define ONIG_SYN_OP_DECIMAL_BACKREF
Definition: onigmo.h:535
#define ONIG_SYN_OP_ESC_W_WORD
Definition: onigmo.h:537
#define ONIGENC_CTYPE_ALNUM
Definition: onigmo.h:307
#define ONIG_SYN_OP2_QMARK_TILDE_ABSENT
Definition: onigmo.h:583
#define ONIGENC_CTYPE_ALPHA
Definition: onigmo.h:295
#define ONIGENC_IS_CODE_XDIGIT(enc, code)
Definition: onigmo.h:398
#define ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME
Definition: onigmo.h:596
#define ONIGENC_CTYPE_SPACE
Definition: onigmo.h:303
#define ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR
Definition: onigmo.h:533
#define ONIGENC_IS_UNICODE(enc)
Definition: onigmo.h:327
#define ONIGERR_END_PATTERN_AT_ESCAPE
Definition: onigmo.h:646
#define ONIG_OPTION_MULTILINE
Definition: onigmo.h:453
#define ONIGERR_TOO_LONG_WIDE_CHAR_VALUE
Definition: onigmo.h:678
#define ONIGENC_CTYPE_PUNCT
Definition: onigmo.h:302
#define ONIG_SYN_OP2_QMARK_LPAREN_CONDITION
Definition: onigmo.h:581
#define ONIGERR_INVALID_GROUP_NAME
Definition: onigmo.h:680
#define ONIG_SYN_OP_ESC_X_BRACE_HEX8
Definition: onigmo.h:549
#define ONIGENC_IS_CODE_DIGIT(enc, code)
Definition: onigmo.h:396
#define ONIGERR_EMPTY_CHAR_CLASS
Definition: onigmo.h:644
#define UChar
Definition: onigmo.h:76
#define ONIGENC_CODE_TO_MBC_MAXLEN
Definition: onigmo.h:289
#define ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV
Definition: onigmo.h:592
#define ONIGENC_CTYPE_WORD
Definition: onigmo.h:306
#define ONIGERR_UNDEFINED_GROUP_OPTION
Definition: onigmo.h:660
#define ONIG_NORMAL
Definition: onigmo.h:624
#define ONIGERR_END_PATTERN_AT_CONTROL
Definition: onigmo.h:648
#define ONIGERR_END_PATTERN_AT_LEFT_BRACE
Definition: onigmo.h:642
#define ONIG_OPTION_POSIX_BRACKET_ALL_RANGE
Definition: onigmo.h:468
#define ONIGENC_PROPERTY_NAME_TO_CTYPE(enc, p, end)
Definition: onigmo.h:369
#define ONIG_SYN_WARN_CC_OP_NOT_ESCAPED
Definition: onigmo.h:607
#define ONIGERR_UNDEFINED_NAME_REFERENCE
Definition: onigmo.h:682
#define ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL
Definition: onigmo.h:557
#define ONIGENC_CTYPE_UPPER
Definition: onigmo.h:304
#define ONIG_OPTION_ASCII_RANGE
Definition: onigmo.h:467
#define ONIG_SYN_OP_LINE_ANCHOR
Definition: onigmo.h:542
#define ONIG_SYN_OP2_ESC_V_VTAB
Definition: onigmo.h:565
#define ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE
Definition: onigmo.h:668
void(* OnigWarnFunc)(const char *s)
Definition: onigmo.h:749
#define ONIGENC_IS_CODE_WORD(enc, code)
Definition: onigmo.h:400
#define ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS
Definition: onigmo.h:653
unsigned int OnigCodePoint
Definition: onigmo.h:80
#define ONIG_OPTION_IGNORECASE
Definition: onigmo.h:451
#define ONIG_SYN_OP2_OPTION_RUBY
Definition: onigmo.h:555
#define ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT
Definition: onigmo.h:608
#define ONIGERR_INVALID_POSIX_BRACKET_TYPE
Definition: onigmo.h:661
#define ONIG_SYN_OP_ESC_OCTAL3
Definition: onigmo.h:547
#define ONIGERR_MEMORY
Definition: onigmo.h:629
#define ONIG_SYN_OP_PLUS_ONE_INF
Definition: onigmo.h:523
#define ONIG_SYN_OP_DOT_ANYCHAR
Definition: onigmo.h:520
#define ONIGERR_TOO_BIG_WIDE_CHAR_VALUE
Definition: onigmo.h:691
#define ONIGERR_TOO_SHORT_DIGITS
Definition: onigmo.h:677
#define ONIG_ENCODING_ASCII
Definition: onigmo.h:225
#define ONIG_OPTION_CAPTURE_GROUP
Definition: onigmo.h:460
#define ONIGERR_UNMATCHED_CLOSE_PARENTHESIS
Definition: onigmo.h:657
#define ONIG_SYN_OP_ESC_QMARK_ZERO_ONE
Definition: onigmo.h:526
#define ONIG_SYN_OP2_ESC_CAPITAL_X_EXTENDED_GRAPHEME_CLUSTER
Definition: onigmo.h:574
#define ONIGERR_PREMATURE_END_OF_CHAR_CLASS
Definition: onigmo.h:645
#define ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT
Definition: onigmo.h:556
#define ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP
Definition: onigmo.h:577
#define ONIGENC_MBC_MAXLEN(enc)
Definition: onigmo.h:362
#define ONIGENC_MBC_MINLEN(enc)
Definition: onigmo.h:364
#define ONIGERR_EMPTY_GROUP_NAME
Definition: onigmo.h:679
#define ONIG_SYN_OP2_ESC_H_XDIGIT
Definition: onigmo.h:571
#define ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY
Definition: onigmo.h:562
#define ONIG_SYN_OP_ESC_CONTROL_CHARS
Definition: onigmo.h:545
#define ONIGENC_CTYPE_CNTRL
Definition: onigmo.h:297
#define ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP
Definition: onigmo.h:559
#define ONIGENC_CTYPE_PRINT
Definition: onigmo.h:301
#define ONIG_SYN_OP_ESC_O_BRACE_OCTAL
Definition: onigmo.h:550
#define ONIG_SYN_OP2_CCLASS_SET_OP
Definition: onigmo.h:558
#define ONIGERR_TOO_MANY_MULTI_BYTE_RANGES
Definition: onigmo.h:671
#define ONIGERR_END_PATTERN_IN_GROUP
Definition: onigmo.h:659
#define ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP
Definition: onigmo.h:595
#define ONIGENC_CTYPE_BLANK
Definition: onigmo.h:296
#define ONIG_SYN_ALLOW_INVALID_INTERVAL
Definition: onigmo.h:591
#define ONIGERR_MULTIPLEX_DEFINED_NAME
Definition: onigmo.h:684
#define ONIGENC_CTYPE_LOWER
Definition: onigmo.h:300
#define ONIG_SYN_OP_ESC_D_DIGIT
Definition: onigmo.h:541
#define ONIG_SYN_OP2_ESC_G_BRACE_BACKREF
Definition: onigmo.h:578
#define ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC
Definition: onigmo.h:605
#define ONIG_SYN_OP_POSIX_BRACKET
Definition: onigmo.h:543
#define ONIG_SYN_OP_ESC_S_WHITE_SPACE
Definition: onigmo.h:540
#define ONIGENC_CODE_RANGE_NUM(range)
Definition: onigmo.h:139
#define ONIG_MAX_BACKREF_NUM
Definition: onigmo.h:439
#define ONIG_SYN_STRICT_CHECK_BACKREF
Definition: onigmo.h:593
#define ONIGENC_CODE_RANGE_TO(range, i)
Definition: onigmo.h:141
#define ONIG_SYN_OP2_ESC_K_NAMED_BACKREF
Definition: onigmo.h:560
#define ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND
Definition: onigmo.h:594
#define ONIG_SYN_OP2_ESC_U_HEX4
Definition: onigmo.h:566
ONIG_EXTERN OnigUChar * onigenc_get_prev_char_head(OnigEncoding enc, const OnigUChar *start, const OnigUChar *s, const OnigUChar *end)
#define ONIGENC_IS_SINGLEBYTE(enc)
Definition: onigmo.h:318
#define ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR
Definition: onigmo.h:534
#define ONIG_SYN_OP_ESC_PLUS_ONE_INF
Definition: onigmo.h:524
#define ONIG_SYN_OP_VBAR_ALT
Definition: onigmo.h:529
#define ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED
Definition: onigmo.h:654
#define ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE
Definition: onigmo.h:667
#define ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE
Definition: onigmo.h:651
#define ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS
Definition: onigmo.h:658
#define ONIGERR_INVALID_CONDITION_PATTERN
Definition: onigmo.h:664
#define ONIG_SYN_OP2_ESC_CAPITAL_R_LINEBREAK
Definition: onigmo.h:573
unsigned int OnigOptionType
Definition: onigmo.h:445
#define ONIG_OPTION_WORD_BOUND_ALL_RANGE
Definition: onigmo.h:469
#define ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, sbout, ranges)
Definition: onigmo.h:403
#define ONIGERR_INVALID_BACKREF
Definition: onigmo.h:674
ONIG_EXTERN int onigenc_strlen(OnigEncoding enc, const OnigUChar *p, const OnigUChar *end)
#define ONIG_SYN_OP2_QMARK_SUBEXP_CALL
Definition: onigmo.h:579
#define ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF
Definition: onigmo.h:522
#define ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS
Definition: onigmo.h:589
#define ONIGERR_INVALID_REPEAT_RANGE_PATTERN
Definition: onigmo.h:663
#define ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP
Definition: onigmo.h:590
#define ONIG_SYN_OP2_ESC_G_SUBEXP_CALL
Definition: onigmo.h:561
#define ONIGENC_CODE_TO_MBCLEN(enc, code)
Definition: onigmo.h:367
#define ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC
Definition: onigmo.h:602
#define ONIG_MAX_CAPTURE_GROUP_NUM
Definition: onigmo.h:438
#define ONIG_SYN_OP_QMARK_ZERO_ONE
Definition: onigmo.h:525
#define ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY
Definition: onigmo.h:687
#define ONIGERR_INVALID_CODE_POINT_VALUE
Definition: onigmo.h:689
#define ONIG_SYN_OP_VARIABLE_META_CHARACTERS
Definition: onigmo.h:519
#define ONIG_SYN_OP_ESC_X_HEX2
Definition: onigmo.h:548
#define ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL
Definition: onigmo.h:563
#define ONIG_OPTION_SINGLELINE
Definition: onigmo.h:455
#define ONIG_MAX_MULTI_BYTE_RANGES_NUM
Definition: onigmo.h:441
#define ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META
Definition: onigmo.h:564
#define ONIGERR_TOO_BIG_NUMBER
Definition: onigmo.h:666
#define ONIGERR_TOO_SHORT_MULTI_BYTE_STRING
Definition: onigmo.h:672
#define ONIGERR_CONTROL_CODE_SYNTAX
Definition: onigmo.h:650
#define ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC
Definition: onigmo.h:604
#define ONIG_OPTION_EXTEND
Definition: onigmo.h:452
#define ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS
Definition: onigmo.h:588
#define ONIGERR_TOO_MANY_CAPTURE_GROUPS
Definition: onigmo.h:676
#define ONIGERR_INVALID_CHAR_IN_GROUP_NAME
Definition: onigmo.h:681
#define ONIGENC_IS_CODE_NEWLINE(enc, code)
Definition: onigmo.h:374
#define NULL
#define RTEST(v)
unsigned long st_data_t
void rb_compile_warn(const char *, int, const char *,...) __attribute__((format(printf
size_t strlen(const char *)
long int ptrdiff_t
#define xfree
const struct rb_call_cache * cc
#define MIN(a, b)
#define xrealloc
#define numberof(array)
int fprintf(FILE *__restrict__, const char *__restrict__,...) __attribute__((__format__(__printf__
const char size_t n
#define ruby_verbose
__inline__ const void *__restrict__ src
#define xmalloc
uint32_t i
__inline__ const void *__restrict__ size_t len
const VALUE int int int int int int VALUE char * fmt
#define va_end(v)
__gnuc_va_list va_list
return cc call
const char * s2
int VALUE v
#define va_start(v, l)
#define FALSE
unsigned int size
struct rb_call_cache buf
st_data_t st_index_t
__inline__ int
int st_foreach_callback_func(st_data_t, st_data_t, st_data_t)
if((__builtin_expect(!!(!me), 0)))
int fputs(const char *__restrict__, FILE *__restrict__)
#define MAX(a, b)
int onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode *cc)
Definition: regcomp.c:6120
UChar * onigenc_step(OnigEncoding enc, const UChar *p, const UChar *end, int n)
Definition: regenc.c:113
int onigenc_with_ascii_strncmp(OnigEncoding enc, const UChar *p, const UChar *end, const UChar *sascii, int n)
Definition: regenc.c:860
#define POSIX_BRACKET_ENTRY_INIT(name, ctype)
Definition: regenc.h:124
#define enclen(enc, p, e)
Definition: regenc.h:93
#define ONIGENC_IS_ASCII_CODE(code)
Definition: regenc.h:216
void onig_vsnprintf_with_pattern(UChar buf[], int bufsize, OnigEncoding enc, UChar *pat, UChar *pat_end, const UChar *fmt, va_list args)
Definition: regerror.c:314
#define INT_MAX_LIMIT
Definition: regint.h:373
st_data_t hash_data_type
Definition: regint.h:925
#define IS_MC_ESC_CODE(code, syn)
Definition: regint.h:755
#define MC_ANYTIME(syn)
Definition: regint.h:750
#define BBUF_MOVE_RIGHT(buf, from, to, n)
Definition: regint.h:497
#define ANCHOR_BEGIN_LINE
Definition: regint.h:528
#define BBUF_MOVE_LEFT_REDUCE(buf, from, to)
Definition: regint.h:509
#define IS_ASCII_RANGE(option)
Definition: regint.h:393
#define DIGITVAL(code)
Definition: regint.h:375
#define CHECK_NULL_RETURN_MEMERR(p)
Definition: regint.h:301
#define ANCHOR_PREC_READ_NOT
Definition: regint.h:539
#define BBUF_ENSURE_SIZE(buf, size)
Definition: regint.h:465
#define BIT_STATUS_BITS_NUM
Definition: regint.h:354
#define MC_ONE_OR_MORE_TIME(syn)
Definition: regint.h:752
#define BITSET_CLEAR_BIT(bs, pos)
Definition: regint.h:437
#define ANCHOR_BEGIN_POSITION
Definition: regint.h:529
#define ONIG_LAST_CODE_POINT
Definition: regint.h:304
#define BITSET_AT(bs, pos)
Definition: regint.h:435
unsigned char Bits
Definition: regint.h:420
#define CHECK_NULL_RETURN(p)
Definition: regint.h:300
#define ANCHOR_LOOK_BEHIND
Definition: regint.h:540
#define IS_NOT_NULL(p)
Definition: regint.h:299
#define ANCHOR_END_LINE
Definition: regint.h:532
#define ANCHOR_BEGIN_BUF
Definition: regint.h:527
#define ANCHOR_WORD_BOUND
Definition: regint.h:534
#define SYN_GNU_REGEX_OP
Definition: regint.h:767
#define DEFAULT_PARSE_DEPTH_LIMIT
Definition: regint.h:88
#define ANCHOR_WORD_BEGIN
Definition: regint.h:536
#define NULL_UCHARP
Definition: regint.h:302
#define BBUF_INIT(buf, size)
Definition: regint.h:447
#define IS_REPEAT_INFINITE(n)
Definition: regint.h:409
#define ANCHOR_LOOK_BEHIND_NOT
Definition: regint.h:541
#define BITSET_SIZE
Definition: regint.h:415
#define SINGLE_BYTE_SIZE
Definition: regint.h:413
#define MC_ANYCHAR(syn)
Definition: regint.h:749
#define GET_CODE_POINT(code, p)
Definition: regint.h:697
#define IS_NULL(p)
Definition: regint.h:298
#define ANCHOR_SEMI_END_BUF
Definition: regint.h:531
#define ODIGITVAL(code)
Definition: regint.h:376
#define IS_WORD_BOUND_ALL_RANGE(option)
Definition: regint.h:395
#define SIZE_CODE_POINT
Definition: regint.h:683
#define ANCHOR_PREC_READ
Definition: regint.h:538
#define NCCLASS_CLEAR_NOT(nd)
Definition: regint.h:795
#define BITS_IN_ROOM
Definition: regint.h:414
void hash_table_type
Definition: regint.h:919
#define IS_NCCLASS_NOT(nd)
Definition: regint.h:796
Bits BitSet[BITSET_SIZE]
Definition: regint.h:422
#define REPEAT_INFINITE
Definition: regint.h:408
#define ANCHOR_KEEP
Definition: regint.h:546
#define ANCHOR_NOT_WORD_BOUND
Definition: regint.h:535
#define BITSET_SET_BIT(bs, pos)
Definition: regint.h:436
#define IS_SINGLELINE(option)
Definition: regint.h:381
#define IS_POSIX_BRACKET_ALL_RANGE(option)
Definition: regint.h:394
#define IS_EXTEND(option)
Definition: regint.h:384
#define USE_BACKREF_WITH_LEVEL
Definition: regint.h:73
#define BIT_STATUS_CLEAR(stats)
Definition: regint.h:355
#define BITSET_CLEAR(bs)
Definition: regint.h:427
#define XDIGITVAL(enc, code)
Definition: regint.h:377
#define SYN_GNU_REGEX_BV
Definition: regint.h:780
#define xmemcpy
Definition: regint.h:202
Bits * BitSetRef
Definition: regint.h:423
#define IS_IGNORECASE(option)
Definition: regint.h:383
#define MC_ESC(syn)
Definition: regint.h:748
#define MC_ANYCHAR_ANYTIME(syn)
Definition: regint.h:753
#define BIT_STATUS_ON_AT_SIMPLE(stats, n)
Definition: regint.h:367
#define ANCHOR_WORD_END
Definition: regint.h:537
#define ANCHOR_END_BUF
Definition: regint.h:530
#define MC_ZERO_OR_ONE_TIME(syn)
Definition: regint.h:751
#define NCCLASS_SET_NOT(nd)
Definition: regint.h:794
#define is_invalid_quantifier_target(node)
Definition: regparse.c:2122
st_table NameTable
Definition: regparse.c:464
#define INIT_MULTI_BYTE_RANGE_SIZE
#define PINC
Definition: regparse.c:301
int onig_name_to_group_numbers(regex_t *reg, const UChar *name, const UChar *name_end, int **nums)
Definition: regparse.c:887
int onig_foreach_name(regex_t *reg, int(*func)(const UChar *, const UChar *, int, int *, regex_t *, void *), void *arg)
Definition: regparse.c:576
int onig_noname_group_capture_is_active(const regex_t *reg)
Definition: regparse.c:963
Node * onig_node_new_list(Node *left, Node *right)
Definition: regparse.c:1186
Node * onig_node_new_anchor(int type)
Definition: regparse.c:1222
ReduceType
Definition: regparse.c:2183
@ RQ_QQ
Definition: regparse.c:2188
@ RQ_PQ_Q
Definition: regparse.c:2190
@ RQ_ASIS
Definition: regparse.c:2184
@ RQ_AQ
Definition: regparse.c:2187
@ RQ_P_QQ
Definition: regparse.c:2189
@ RQ_DEL
Definition: regparse.c:2185
@ RQ_A
Definition: regparse.c:2186
#define INIT_SCANENV_MEMNODES_ALLOC_SIZE
Definition: regparse.c:980
#define POSIX_BRACKET_NAME_MIN_LEN
void onig_null_warn(const char *s ARG_UNUSED)
Definition: regparse.c:87
#define PPEEK
Definition: regparse.c:319
void onig_set_warn_func(OnigWarnFunc f)
Definition: regparse.c:101
#define R_ERR(call)
Definition: regparse.c:5797
unsigned int onig_get_parse_depth_limit(void)
Definition: regparse.c:117
int onig_st_insert_strend(hash_table_type *table, const UChar *str_key, const UChar *end_key, hash_data_type value)
Definition: regparse.c:430
#define PEND
Definition: regparse.c:299
#define PFETCH_READY
Definition: regparse.c:297
#define ONOFF(v, f, negative)
Definition: regparse.c:160
int onig_node_str_cat(Node *node, const UChar *s, const UChar *end)
Definition: regparse.c:1376
#define PPEEK_IS(c)
Definition: regparse.c:320
#define POSIX_BRACKET_CHECK_LIMIT_LENGTH
int onig_parse_make_tree(Node **root, const UChar *pattern, const UChar *end, regex_t *reg, ScanEnv *env)
Definition: regparse.c:6611
#define WARN_BUFSIZE
Definition: regparse.c:34
#define ADD_ALL_MULTI_BYTE_RANGE(enc, mbuf)
Definition: regparse.c:168
#define INIT_NAME_BACKREFS_ALLOC_NUM
Definition: regparse.c:451
hash_table_type * onig_st_init_strend_table_with_size(st_index_t size)
Definition: regparse.c:406
int onig_number_of_names(const regex_t *reg)
Definition: regparse.c:623
#define MBCODE_START_POS(enc)
Definition: regparse.c:162
const OnigSyntaxType * OnigDefaultSyntax
Definition: regparse.c:85
#define PFETCH(c)
Definition: regparse.c:305
void onig_scan_env_set_error_string(ScanEnv *env, int ecode ARG_UNUSED, UChar *arg, UChar *arg_end)
Definition: regparse.c:6638
Node * onig_node_list_add(Node *list, Node *x)
Definition: regparse.c:1192
int onig_name_to_backref_number(regex_t *reg, const UChar *name, const UChar *name_end, const OnigRegion *region)
Definition: regparse.c:909
#define NODE_COMMON_SIZE
Definition: regparse.c:5820
#define ALT
Definition: regparse.c:5769
#define NEWLINE_CODE
void onig_node_free(Node *node)
Definition: regparse.c:1062
Node * onig_node_new_enclose(int type)
Definition: regparse.c:1347
#define SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2)
Definition: regparse.c:1815
const OnigSyntaxType OnigSyntaxRuby
Definition: regparse.c:39
Node * onig_node_new_alt(Node *left, Node *right)
Definition: regparse.c:1210
Node * onig_node_new_str(const UChar *s, const UChar *end)
Definition: regparse.c:1481
#define CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
Definition: regparse.c:36
#define BACKREF_REL_TO_ABS(rel_no, env)
Definition: regparse.c:157
#define ONIGENC_IS_CODE_NAME(enc, c)
Definition: regparse.c:2510
int onig_set_parse_depth_limit(unsigned int depth)
Definition: regparse.c:123
#define BITSET_IS_EMPTY(bs, empty)
Definition: regparse.c:181
TokenSyms
Definition: regparse.c:2254
@ TK_CC_AND
Definition: regparse.c:2281
@ TK_ANYCHAR
Definition: regparse.c:2260
@ TK_ANYCHAR_ANYTIME
Definition: regparse.c:2267
@ TK_SUBEXP_CLOSE
Definition: regparse.c:2270
@ TK_CC_RANGE
Definition: regparse.c:2279
@ TK_STRING
Definition: regparse.c:2258
@ TK_ANCHOR
Definition: regparse.c:2264
@ TK_EOT
Definition: regparse.c:2255
@ TK_QUOTE_OPEN
Definition: regparse.c:2272
@ TK_CC_OPEN
Definition: regparse.c:2271
@ TK_LINEBREAK
Definition: regparse.c:2274
@ TK_BACKREF
Definition: regparse.c:2262
@ TK_CHAR_TYPE
Definition: regparse.c:2261
@ TK_SUBEXP_OPEN
Definition: regparse.c:2269
@ TK_INTERVAL
Definition: regparse.c:2266
@ TK_POSIX_BRACKET_OPEN
Definition: regparse.c:2280
@ TK_CODE_POINT
Definition: regparse.c:2259
@ TK_KEEP
Definition: regparse.c:2276
@ TK_CALL
Definition: regparse.c:2263
@ TK_CHAR_PROPERTY
Definition: regparse.c:2273
@ TK_CC_CC_OPEN
Definition: regparse.c:2282
@ TK_ALT
Definition: regparse.c:2268
@ TK_EXTENDED_GRAPHEME_CLUSTER
Definition: regparse.c:2275
@ TK_RAW_BYTE
Definition: regparse.c:2256
@ TK_CC_CLOSE
Definition: regparse.c:2278
@ TK_OP_REPEAT
Definition: regparse.c:2265
@ TK_CHAR
Definition: regparse.c:2257
int onig_node_str_set(Node *node, const UChar *s, const UChar *end)
Definition: regparse.c:1412
void onig_node_str_clear(Node *node)
Definition: regparse.c:1449
#define PFETCH_S(c)
Definition: regparse.c:314
#define BITSET_SET_BIT_CHKDUP(bs, pos)
Definition: regparse.c:176
int onig_names_free(regex_t *reg)
Definition: regparse.c:525
void onig_reduce_nested_quantifier(Node *pnode, Node *cnode)
Definition: regparse.c:2204
#define PUNFETCH
Definition: regparse.c:300
st_data_t HashDataType
Definition: regparse.c:465
#define PINC_S
Definition: regparse.c:311
#define BBUF_WRITE_CODE_POINT(bbuf, pos, code)
Definition: regparse.c:1644
int onig_st_lookup_strend(hash_table_type *table, const UChar *str_key, const UChar *end_key, hash_data_type *value)
Definition: regparse.c:418
void onig_strcpy(UChar *dest, const UChar *src, const UChar *end)
Definition: regparse.c:259
CCSTATE
Definition: regparse.c:4401
@ CCS_COMPLETE
Definition: regparse.c:4404
@ CCS_START
Definition: regparse.c:4405
@ CCS_RANGE
Definition: regparse.c:4403
@ CCS_VALUE
Definition: regparse.c:4402
void onig_set_verb_warn_func(OnigWarnFunc f)
Definition: regparse.c:106
#define LIST
Definition: regparse.c:5768
int onig_renumber_name_table(regex_t *reg, GroupNumRemap *map)
Definition: regparse.c:611
int onig_scan_unsigned_number(UChar **src, const UChar *end, OnigEncoding enc)
Definition: regparse.c:1556
CCVALTYPE
Definition: regparse.c:4408
@ CCV_CLASS
Definition: regparse.c:4411
@ CCV_SB
Definition: regparse.c:4409
@ CCV_CODE_POINT
Definition: regparse.c:4410
#define SET_ALL_MULTI_BYTE_RANGE(enc, pbuf)
Definition: regparse.c:165
#define NST_RECURSION
Definition: regparse.h:135
#define NSTRING_SET_RAW(node)
Definition: regparse.h:109
int onig_strncmp(const UChar *s1, const UChar *s2, int n)
#define IS_SYNTAX_BV(syn, bvm)
Definition: regparse.h:332
#define ENCLOSE_OPTION
Definition: regparse.h:95
#define NT_CANY
Definition: regparse.h:41
#define NSTR(node)
Definition: regparse.h:76
#define NT_ENCLOSE
Definition: regparse.h:44
#define NENCLOSE(node)
Definition: regparse.h:81
#define IS_SYNTAX_OP2(syn, opm)
Definition: regparse.h:331
#define NT_QTFR
Definition: regparse.h:43
#define ENCLOSE_MEMORY
Definition: regparse.h:94
#define NT_CALL
Definition: regparse.h:48
#define NBREF(node)
Definition: regparse.h:79
#define NST_NEST_LEVEL
Definition: regparse.h:141
#define NODE_STR_MARGIN
Definition: regparse.h:100
#define NT_ANCHOR
Definition: regparse.h:45
#define ENCLOSE_ABSENT
Definition: regparse.h:98
#define NT_CTYPE
Definition: regparse.h:40
#define NCTYPE(node)
Definition: regparse.h:78
#define NSTR_RAW
Definition: regparse.h:104
#define NULL_NODE
Definition: regparse.h:283
#define NSTRING_CLEAR_RAW(node)
Definition: regparse.h:110
#define NST_NAMED_GROUP
Definition: regparse.h:138
#define ENCLOSE_CONDITION
Definition: regparse.h:97
#define SET_ENCLOSE_STATUS(node, f)
Definition: regparse.h:144
#define NCCLASS(node)
Definition: regparse.h:77
#define SCANENV_MEM_NODES(senv)
Definition: regparse.h:286
#define ENCLOSE_STOP_BACKTRACK
Definition: regparse.h:96
#define NODE_STR_BUF_SIZE
Definition: regparse.h:101
#define NT_CCLASS
Definition: regparse.h:39
#define NSTRING_SET_AMBIG(node)
Definition: regparse.h:111
#define NQ_TARGET_ISNOT_EMPTY
Definition: regparse.h:122
#define SET_NTYPE(node, ntype)
Definition: regparse.h:70
#define NST_BY_NUMBER
Definition: regparse.h:142
#define NT_LIST
Definition: regparse.h:46
#define NST_NAME_REF
Definition: regparse.h:139
void onig_node_conv_to_str_node(Node *node, int raw)
#define NT_BREF
Definition: regparse.h:42
#define NCDR(node)
Definition: regparse.h:87
#define NCAR(node)
Definition: regparse.h:86
#define NTYPE(node)
Definition: regparse.h:69
#define NT_STR
Definition: regparse.h:38
#define NQTFR(node)
Definition: regparse.h:80
#define SCANENV_MEMNODES_SIZE
Definition: regparse.h:285
#define NT_ALT
Definition: regparse.h:47
#define NCALL(node)
Definition: regparse.h:84
#define IS_SYNTAX_OP(syn, opm)
Definition: regparse.h:330
#define NODE_BACKREFS_SIZE
Definition: regparse.h:102
#define NANCHOR(node)
Definition: regparse.h:82
#define tok(p)
Definition: ripper.c:13259
#define f
Definition: regint.h:441
unsigned int alloc
Definition: regint.h:444
UChar * p
Definition: regint.h:442
unsigned int used
Definition: regint.h:443
BitSet bs
Definition: regint.h:807
BBuf * mbuf
Definition: regint.h:808
ScanEnv * env
Definition: regparse.c:5498
CClassNode * asc_cc
Definition: regparse.c:5500
CClassNode * cc
Definition: regparse.c:5499
int ret
Definition: regparse.c:556
regex_t * reg
Definition: regparse.c:554
void * arg
Definition: regparse.c:555
OnigEncoding enc
Definition: regparse.c:557
int(* func)(const UChar *, const UChar *, int, int *, regex_t *, void *)
Definition: regparse.c:553
Definition: regparse.c:453
int back_alloc
Definition: regparse.c:457
int * back_refs
Definition: regparse.c:459
size_t name_len
Definition: regparse.c:455
int back_ref1
Definition: regparse.c:458
int back_num
Definition: regparse.c:456
UChar * name
Definition: regparse.c:454
int ref1
Definition: regparse.c:2306
int ascii_range
Definition: regparse.c:2296
int subtype
Definition: regparse.c:2295
OnigCodePoint code
Definition: regparse.c:2293
int * refs
Definition: regparse.c:2307
enum TokenSyms type
Definition: regparse.c:2286
int ctype
Definition: regparse.c:2321
int exist_level
Definition: regparse.c:2310
int upper
Definition: regparse.c:2300
int escaped
Definition: regparse.c:2287
UChar * name
Definition: regparse.c:2315
UChar * s
Definition: regparse.c:2291
int base
Definition: regparse.c:2288
int by_name
Definition: regparse.c:2308
UChar * name_end
Definition: regparse.c:2316
int lower
Definition: regparse.c:2299
int possessive
Definition: regparse.c:2302
int gnum
Definition: regparse.c:2317
UChar * backp
Definition: regparse.c:2289
int greedy
Definition: regparse.c:2301
int level
Definition: regparse.c:2311
Definition: regenc.h:118
int ctype
Definition: regenc.h:121
short int len
Definition: regenc.h:119
const UChar name[6]
Definition: regenc.h:120
int lower
Definition: regparse.h:183
struct _Node * target
Definition: regparse.h:182
int greedy
Definition: regparse.h:185
int upper
Definition: regparse.h:184
UChar * s
Definition: regparse.h:172
unsigned int flag
Definition: regparse.h:174
UChar * end
Definition: regparse.h:173
OnigEncoding enc
Definition: onigmo.h:776
void * name_table
Definition: onigmo.h:778
OnigCaseFoldType case_fold_flag
Definition: onigmo.h:779
const OnigSyntaxType * syntax
Definition: onigmo.h:777
OnigOptionType options
Definition: onigmo.h:772
OnigPosition * beg
Definition: onigmo.h:719
const UChar * s
Definition: regparse.c:362
const UChar * end
Definition: regparse.c:363
#define neg(x)
Definition: time.c:141
#define env