Ruby 2.7.6p219 (2022-04-12 revision c9c2245c0a25176072e02db9254f0e0c84c805cd)
regexec.c
Go to the documentation of this file.
1/**********************************************************************
2 regexec.c - Onigmo (Oniguruma-mod) (regular expression library)
3**********************************************************************/
4/*-
5 * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
6 * Copyright (c) 2011-2016 K.Takata <kentkt AT csc DOT jp>
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31#include "regint.h"
32
33#ifdef RUBY
34# undef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
35#else
36# define USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
37#endif
38
39#ifndef USE_TOKEN_THREADED_VM
40# ifdef __GNUC__
41# define USE_TOKEN_THREADED_VM 1
42# else
43# define USE_TOKEN_THREADED_VM 0
44# endif
45#endif
46
47#ifdef RUBY
48# define ENC_DUMMY_FLAG (1<<24)
49static inline int
51{
52 return ONIGENC_MBC_MINLEN(enc)==1 && !((enc)->ruby_encoding_index & ENC_DUMMY_FLAG);
53}
54# undef ONIGENC_IS_MBC_ASCII_WORD
55# define ONIGENC_IS_MBC_ASCII_WORD(enc,s,end) \
56 (rb_enc_asciicompat(enc) ? (ISALNUM(*s) || *s=='_') : \
57 onigenc_ascii_is_code_ctype( \
58 ONIGENC_MBC_TO_CODE(enc,s,end),ONIGENC_CTYPE_WORD,enc))
59#endif /* RUBY */
60
61#ifdef USE_CRNL_AS_LINE_TERMINATOR
62# define ONIGENC_IS_MBC_CRNL(enc,p,end) \
63 (ONIGENC_MBC_TO_CODE(enc,p,end) == 13 && \
64 ONIGENC_MBC_TO_CODE(enc,(p+enclen(enc,p,end)),end) == 10)
65# define ONIGENC_IS_MBC_NEWLINE_EX(enc,p,start,end,option,check_prev) \
66 is_mbc_newline_ex((enc),(p),(start),(end),(option),(check_prev))
67static int
68is_mbc_newline_ex(OnigEncoding enc, const UChar *p, const UChar *start,
69 const UChar *end, OnigOptionType option, int check_prev)
70{
71 if (IS_NEWLINE_CRLF(option)) {
72 if (ONIGENC_MBC_TO_CODE(enc, p, end) == 0x0a) {
73 if (check_prev) {
74 const UChar *prev = onigenc_get_prev_char_head(enc, start, p, end);
75 if ((prev != NULL) && ONIGENC_MBC_TO_CODE(enc, prev, end) == 0x0d)
76 return 0;
77 else
78 return 1;
79 }
80 else
81 return 1;
82 }
83 else {
84 const UChar *pnext = p + enclen(enc, p, end);
85 if (pnext < end &&
86 ONIGENC_MBC_TO_CODE(enc, p, end) == 0x0d &&
87 ONIGENC_MBC_TO_CODE(enc, pnext, end) == 0x0a)
88 return 1;
89 if (ONIGENC_IS_MBC_NEWLINE(enc, p, end))
90 return 1;
91 return 0;
92 }
93 }
94 else {
95 return ONIGENC_IS_MBC_NEWLINE(enc, p, end);
96 }
97}
98#else /* USE_CRNL_AS_LINE_TERMINATOR */
99# define ONIGENC_IS_MBC_NEWLINE_EX(enc,p,start,end,option,check_prev) \
100 ONIGENC_IS_MBC_NEWLINE((enc), (p), (end))
101#endif /* USE_CRNL_AS_LINE_TERMINATOR */
102
103#ifdef USE_CAPTURE_HISTORY
104static void history_tree_free(OnigCaptureTreeNode* node);
105
106static void
107history_tree_clear(OnigCaptureTreeNode* node)
108{
109 int i;
110
111 if (IS_NOT_NULL(node)) {
112 for (i = 0; i < node->num_childs; i++) {
113 if (IS_NOT_NULL(node->childs[i])) {
114 history_tree_free(node->childs[i]);
115 }
116 }
117 for (i = 0; i < node->allocated; i++) {
118 node->childs[i] = (OnigCaptureTreeNode* )0;
119 }
120 node->num_childs = 0;
121 node->beg = ONIG_REGION_NOTPOS;
122 node->end = ONIG_REGION_NOTPOS;
123 node->group = -1;
124 xfree(node->childs);
125 node->childs = (OnigCaptureTreeNode** )0;
126 }
127}
128
129static void
130history_tree_free(OnigCaptureTreeNode* node)
131{
132 history_tree_clear(node);
133 xfree(node);
134}
135
136static void
137history_root_free(OnigRegion* r)
138{
139 if (IS_NOT_NULL(r->history_root)) {
140 history_tree_free(r->history_root);
141 r->history_root = (OnigCaptureTreeNode* )0;
142 }
143}
144
145static OnigCaptureTreeNode*
146history_node_new(void)
147{
148 OnigCaptureTreeNode* node;
149
150 node = (OnigCaptureTreeNode* )xmalloc(sizeof(OnigCaptureTreeNode));
151 CHECK_NULL_RETURN(node);
152 node->childs = (OnigCaptureTreeNode** )0;
153 node->allocated = 0;
154 node->num_childs = 0;
155 node->group = -1;
156 node->beg = ONIG_REGION_NOTPOS;
157 node->end = ONIG_REGION_NOTPOS;
158
159 return node;
160}
161
162static int
163history_tree_add_child(OnigCaptureTreeNode* parent, OnigCaptureTreeNode* child)
164{
165# define HISTORY_TREE_INIT_ALLOC_SIZE 8
166
167 if (parent->num_childs >= parent->allocated) {
168 int n, i;
169
170 if (IS_NULL(parent->childs)) {
171 n = HISTORY_TREE_INIT_ALLOC_SIZE;
172 parent->childs =
173 (OnigCaptureTreeNode** )xmalloc(sizeof(OnigCaptureTreeNode*) * n);
174 CHECK_NULL_RETURN_MEMERR(parent->childs);
175 }
176 else {
177 OnigCaptureTreeNode** tmp;
178 n = parent->allocated * 2;
179 tmp =
180 (OnigCaptureTreeNode** )xrealloc(parent->childs,
181 sizeof(OnigCaptureTreeNode*) * n);
182 if (tmp == 0) {
183 history_tree_clear(parent);
184 return ONIGERR_MEMORY;
185 }
186 parent->childs = tmp;
187 }
188 for (i = parent->allocated; i < n; i++) {
189 parent->childs[i] = (OnigCaptureTreeNode* )0;
190 }
191 parent->allocated = n;
192 }
193
194 parent->childs[parent->num_childs] = child;
195 parent->num_childs++;
196 return 0;
197}
198
199static OnigCaptureTreeNode*
200history_tree_clone(OnigCaptureTreeNode* node)
201{
202 int i, r;
203 OnigCaptureTreeNode *clone, *child;
204
205 clone = history_node_new();
206 CHECK_NULL_RETURN(clone);
207
208 clone->beg = node->beg;
209 clone->end = node->end;
210 for (i = 0; i < node->num_childs; i++) {
211 child = history_tree_clone(node->childs[i]);
212 if (IS_NULL(child)) {
213 history_tree_free(clone);
214 return (OnigCaptureTreeNode* )0;
215 }
216 r = history_tree_add_child(clone, child);
217 if (r != 0) {
218 history_tree_free(child);
219 history_tree_free(clone);
220 return (OnigCaptureTreeNode* )0;
221 }
222 }
223
224 return clone;
225}
226
227extern OnigCaptureTreeNode*
228onig_get_capture_tree(OnigRegion* region)
229{
230 return region->history_root;
231}
232#endif /* USE_CAPTURE_HISTORY */
233
234extern void
236{
237 int i;
238
239 for (i = 0; i < region->num_regs; i++) {
240 region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
241 }
242#ifdef USE_CAPTURE_HISTORY
243 history_root_free(region);
244#endif
245}
246
247extern int
249{
250 region->num_regs = n;
251
252 if (n < ONIG_NREGION)
253 n = ONIG_NREGION;
254
255 if (region->allocated == 0) {
256 region->beg = (OnigPosition* )xmalloc(n * sizeof(OnigPosition));
257 if (region->beg == 0)
258 return ONIGERR_MEMORY;
259
260 region->end = (OnigPosition* )xmalloc(n * sizeof(OnigPosition));
261 if (region->end == 0) {
262 xfree(region->beg);
263 return ONIGERR_MEMORY;
264 }
265
266 region->allocated = n;
267 }
268 else if (region->allocated < n) {
269 OnigPosition *tmp;
270
271 region->allocated = 0;
272 tmp = (OnigPosition* )xrealloc(region->beg, n * sizeof(OnigPosition));
273 if (tmp == 0) {
274 xfree(region->beg);
275 xfree(region->end);
276 return ONIGERR_MEMORY;
277 }
278 region->beg = tmp;
279 tmp = (OnigPosition* )xrealloc(region->end, n * sizeof(OnigPosition));
280 if (tmp == 0) {
281 xfree(region->beg);
282 xfree(region->end);
283 return ONIGERR_MEMORY;
284 }
285 region->end = tmp;
286
287 region->allocated = n;
288 }
289
290 return 0;
291}
292
293static int
294onig_region_resize_clear(OnigRegion* region, int n)
295{
296 int r;
297
298 r = onig_region_resize(region, n);
299 if (r != 0) return r;
300 onig_region_clear(region);
301 return 0;
302}
303
304extern int
305onig_region_set(OnigRegion* region, int at, int beg, int end)
306{
307 if (at < 0) return ONIGERR_INVALID_ARGUMENT;
308
309 if (at >= region->allocated) {
310 int r = onig_region_resize(region, at + 1);
311 if (r < 0) return r;
312 }
313
314 region->beg[at] = beg;
315 region->end[at] = end;
316 return 0;
317}
318
319extern void
321{
322 region->num_regs = 0;
323 region->allocated = 0;
324 region->beg = (OnigPosition* )0;
325 region->end = (OnigPosition* )0;
326#ifdef USE_CAPTURE_HISTORY
327 region->history_root = (OnigCaptureTreeNode* )0;
328#endif
329}
330
331extern OnigRegion*
333{
334 OnigRegion* r;
335
336 r = (OnigRegion* )xmalloc(sizeof(OnigRegion));
337 if (r)
339 return r;
340}
341
342extern void
343onig_region_free(OnigRegion* r, int free_self)
344{
345 if (r) {
346 if (r->allocated > 0) {
347 if (r->beg) xfree(r->beg);
348 if (r->end) xfree(r->end);
349 r->allocated = 0;
350 }
351#ifdef USE_CAPTURE_HISTORY
352 history_root_free(r);
353#endif
354 if (free_self) xfree(r);
355 }
356}
357
358extern void
360{
361#define RREGC_SIZE (sizeof(int) * from->num_regs)
362 int i, r;
363
364 if (to == from) return;
365
366 r = onig_region_resize(to, from->num_regs);
367 if (r) return;
368
369 for (i = 0; i < from->num_regs; i++) {
370 to->beg[i] = from->beg[i];
371 to->end[i] = from->end[i];
372 }
373 to->num_regs = from->num_regs;
374
375#ifdef USE_CAPTURE_HISTORY
376 history_root_free(to);
377
378 if (IS_NOT_NULL(from->history_root)) {
379 to->history_root = history_tree_clone(from->history_root);
380 }
381#endif
382}
383
384
386#define INVALID_STACK_INDEX -1
387
388/* stack type */
389/* used by normal-POP */
390#define STK_ALT 0x0001
391#define STK_LOOK_BEHIND_NOT 0x0002
392#define STK_POS_NOT 0x0003
393/* handled by normal-POP */
394#define STK_MEM_START 0x0100
395#define STK_MEM_END 0x8200
396#define STK_REPEAT_INC 0x0300
397#define STK_STATE_CHECK_MARK 0x1000
398/* avoided by normal-POP */
399#define STK_NULL_CHECK_START 0x3000
400#define STK_NULL_CHECK_END 0x5000 /* for recursive call */
401#define STK_MEM_END_MARK 0x8400
402#define STK_POS 0x0500 /* used when POP-POS */
403#define STK_STOP_BT 0x0600 /* mark for "(?>...)" */
404#define STK_REPEAT 0x0700
405#define STK_CALL_FRAME 0x0800
406#define STK_RETURN 0x0900
407#define STK_VOID 0x0a00 /* for fill a blank */
408#define STK_ABSENT_POS 0x0b00 /* for absent */
409#define STK_ABSENT 0x0c00 /* absent inner loop marker */
410
411/* stack type check mask */
412#define STK_MASK_POP_USED 0x00ff
413#define STK_MASK_TO_VOID_TARGET 0x10ff
414#define STK_MASK_MEM_END_OR_MARK 0x8000 /* MEM_END or MEM_END_MARK */
415
416#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
417# define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start, arg_gpos) do {\
418 (msa).stack_p = (void* )0;\
419 (msa).options = (arg_option);\
420 (msa).region = (arg_region);\
421 (msa).start = (arg_start);\
422 (msa).gpos = (arg_gpos);\
423 (msa).best_len = ONIG_MISMATCH;\
424} while(0)
425#else
426# define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start, arg_gpos) do {\
427 (msa).stack_p = (void* )0;\
428 (msa).options = (arg_option);\
429 (msa).region = (arg_region);\
430 (msa).start = (arg_start);\
431 (msa).gpos = (arg_gpos);\
432} while(0)
433#endif
434
435#ifdef USE_COMBINATION_EXPLOSION_CHECK
436
437# define STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE 16
438
439# define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num) do { \
440 if ((state_num) > 0 && str_len >= STATE_CHECK_STRING_THRESHOLD_LEN) {\
441 unsigned int size = (unsigned int )(((str_len) + 1) * (state_num) + 7) >> 3;\
442 offset = ((offset) * (state_num)) >> 3;\
443 if (size > 0 && offset < size && size < STATE_CHECK_BUFF_MAX_SIZE) {\
444 if (size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) {\
445 (msa).state_check_buff = (void* )xmalloc(size);\
446 CHECK_NULL_RETURN_MEMERR((msa).state_check_buff);\
447 }\
448 else \
449 (msa).state_check_buff = (void* )xalloca(size);\
450 xmemset(((char* )((msa).state_check_buff)+(offset)), 0, \
451 (size_t )(size - (offset))); \
452 (msa).state_check_buff_size = size;\
453 }\
454 else {\
455 (msa).state_check_buff = (void* )0;\
456 (msa).state_check_buff_size = 0;\
457 }\
458 }\
459 else {\
460 (msa).state_check_buff = (void* )0;\
461 (msa).state_check_buff_size = 0;\
462 }\
463 } while(0)
464
465# define MATCH_ARG_FREE(msa) do {\
466 if ((msa).stack_p) xfree((msa).stack_p);\
467 if ((msa).state_check_buff_size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) { \
468 if ((msa).state_check_buff) xfree((msa).state_check_buff);\
469 }\
470} while(0)
471#else /* USE_COMBINATION_EXPLOSION_CHECK */
472# define MATCH_ARG_FREE(msa) if ((msa).stack_p) xfree((msa).stack_p)
473#endif /* USE_COMBINATION_EXPLOSION_CHECK */
474
475
476
477#define MAX_PTR_NUM 100
478
479#define STACK_INIT(alloc_addr, heap_addr, ptr_num, stack_num) do {\
480 if (ptr_num > MAX_PTR_NUM) {\
481 alloc_addr = (char* )xmalloc(sizeof(OnigStackIndex) * (ptr_num));\
482 heap_addr = alloc_addr;\
483 if (msa->stack_p) {\
484 stk_alloc = (OnigStackType* )(msa->stack_p);\
485 stk_base = stk_alloc;\
486 stk = stk_base;\
487 stk_end = stk_base + msa->stack_n;\
488 } else {\
489 stk_alloc = (OnigStackType* )xalloca(sizeof(OnigStackType) * (stack_num));\
490 stk_base = stk_alloc;\
491 stk = stk_base;\
492 stk_end = stk_base + (stack_num);\
493 }\
494 } else if (msa->stack_p) {\
495 alloc_addr = (char* )xalloca(sizeof(OnigStackIndex) * (ptr_num));\
496 heap_addr = NULL;\
497 stk_alloc = (OnigStackType* )(msa->stack_p);\
498 stk_base = stk_alloc;\
499 stk = stk_base;\
500 stk_end = stk_base + msa->stack_n;\
501 }\
502 else {\
503 alloc_addr = (char* )xalloca(sizeof(OnigStackIndex) * (ptr_num)\
504 + sizeof(OnigStackType) * (stack_num));\
505 heap_addr = NULL;\
506 stk_alloc = (OnigStackType* )(alloc_addr + sizeof(OnigStackIndex) * (ptr_num));\
507 stk_base = stk_alloc;\
508 stk = stk_base;\
509 stk_end = stk_base + (stack_num);\
510 }\
511} while(0)
512
513#define STACK_SAVE do{\
514 if (stk_base != stk_alloc) {\
515 msa->stack_p = stk_base;\
516 msa->stack_n = stk_end - stk_base; /* TODO: check overflow */\
517 };\
518} while(0)
519
520static unsigned int MatchStackLimitSize = DEFAULT_MATCH_STACK_LIMIT_SIZE;
521
522extern unsigned int
524{
525 return MatchStackLimitSize;
526}
527
528extern int
530{
531 MatchStackLimitSize = size;
532 return 0;
533}
534
535static int
536stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end,
537 OnigStackType** arg_stk, OnigStackType* stk_alloc, OnigMatchArg* msa)
538{
539 size_t n;
540 OnigStackType *x, *stk_base, *stk_end, *stk;
541
542 stk_base = *arg_stk_base;
543 stk_end = *arg_stk_end;
544 stk = *arg_stk;
545
546 n = stk_end - stk_base;
547 if (stk_base == stk_alloc && IS_NULL(msa->stack_p)) {
548 x = (OnigStackType* )xmalloc(sizeof(OnigStackType) * n * 2);
549 if (IS_NULL(x)) {
551 return ONIGERR_MEMORY;
552 }
553 xmemcpy(x, stk_base, n * sizeof(OnigStackType));
554 n *= 2;
555 }
556 else {
557 unsigned int limit_size = MatchStackLimitSize;
558 n *= 2;
559 if (limit_size != 0 && n > limit_size) {
560 if ((unsigned int )(stk_end - stk_base) == limit_size)
562 else
563 n = limit_size;
564 }
565 x = (OnigStackType* )xrealloc(stk_base, sizeof(OnigStackType) * n);
566 if (IS_NULL(x)) {
568 return ONIGERR_MEMORY;
569 }
570 }
571 *arg_stk = x + (stk - stk_base);
572 *arg_stk_base = x;
573 *arg_stk_end = x + n;
574 return 0;
575}
576
577#define STACK_ENSURE(n) do {\
578 if (stk_end - stk < (n)) {\
579 int r = stack_double(&stk_base, &stk_end, &stk, stk_alloc, msa);\
580 if (r != 0) {\
581 STACK_SAVE;\
582 if (xmalloc_base) xfree(xmalloc_base);\
583 return r;\
584 }\
585 }\
586} while(0)
587
588#define STACK_AT(index) (stk_base + (index))
589#define GET_STACK_INDEX(stk) ((stk) - stk_base)
590
591#define STACK_PUSH_TYPE(stack_type) do {\
592 STACK_ENSURE(1);\
593 stk->type = (stack_type);\
594 STACK_INC;\
595} while(0)
596
597#define IS_TO_VOID_TARGET(stk) (((stk)->type & STK_MASK_TO_VOID_TARGET) != 0)
598
599#ifdef USE_COMBINATION_EXPLOSION_CHECK
600# define STATE_CHECK_POS(s,snum) \
601 (((s) - str) * num_comb_exp_check + ((snum) - 1))
602# define STATE_CHECK_VAL(v,snum) do {\
603 if (state_check_buff != NULL) {\
604 int x = STATE_CHECK_POS(s,snum);\
605 (v) = state_check_buff[x/8] & (1<<(x%8));\
606 }\
607 else (v) = 0;\
608} while(0)
609
610
611# define ELSE_IF_STATE_CHECK_MARK(stk) \
612 else if ((stk)->type == STK_STATE_CHECK_MARK) { \
613 int x = STATE_CHECK_POS(stk->u.state.pstr, stk->u.state.state_check);\
614 state_check_buff[x/8] |= (1<<(x%8)); \
615 }
616
617# define STACK_PUSH(stack_type,pat,s,sprev,keep) do {\
618 STACK_ENSURE(1);\
619 stk->type = (stack_type);\
620 stk->u.state.pcode = (pat);\
621 stk->u.state.pstr = (s);\
622 stk->u.state.pstr_prev = (sprev);\
623 stk->u.state.state_check = 0;\
624 stk->u.state.pkeep = (keep);\
625 STACK_INC;\
626} while(0)
627
628# define STACK_PUSH_ENSURED(stack_type,pat) do {\
629 stk->type = (stack_type);\
630 stk->u.state.pcode = (pat);\
631 stk->u.state.state_check = 0;\
632 STACK_INC;\
633} while(0)
634
635# define STACK_PUSH_ALT_WITH_STATE_CHECK(pat,s,sprev,snum,keep) do {\
636 STACK_ENSURE(1);\
637 stk->type = STK_ALT;\
638 stk->u.state.pcode = (pat);\
639 stk->u.state.pstr = (s);\
640 stk->u.state.pstr_prev = (sprev);\
641 stk->u.state.state_check = ((state_check_buff != NULL) ? (snum) : 0);\
642 stk->u.state.pkeep = (keep);\
643 STACK_INC;\
644} while(0)
645
646# define STACK_PUSH_STATE_CHECK(s,snum) do {\
647 if (state_check_buff != NULL) {\
648 STACK_ENSURE(1);\
649 stk->type = STK_STATE_CHECK_MARK;\
650 stk->u.state.pstr = (s);\
651 stk->u.state.state_check = (snum);\
652 STACK_INC;\
653 }\
654} while(0)
655
656#else /* USE_COMBINATION_EXPLOSION_CHECK */
657
658# define ELSE_IF_STATE_CHECK_MARK(stk)
659
660# define STACK_PUSH(stack_type,pat,s,sprev,keep) do {\
661 STACK_ENSURE(1);\
662 stk->type = (stack_type);\
663 stk->u.state.pcode = (pat);\
664 stk->u.state.pstr = (s);\
665 stk->u.state.pstr_prev = (sprev);\
666 stk->u.state.pkeep = (keep);\
667 STACK_INC;\
668} while(0)
669
670# define STACK_PUSH_ENSURED(stack_type,pat) do {\
671 stk->type = (stack_type);\
672 stk->u.state.pcode = (pat);\
673 STACK_INC;\
674} while(0)
675#endif /* USE_COMBINATION_EXPLOSION_CHECK */
676
677#define STACK_PUSH_ALT(pat,s,sprev,keep) STACK_PUSH(STK_ALT,pat,s,sprev,keep)
678#define STACK_PUSH_POS(s,sprev,keep) STACK_PUSH(STK_POS,NULL_UCHARP,s,sprev,keep)
679#define STACK_PUSH_POS_NOT(pat,s,sprev,keep) STACK_PUSH(STK_POS_NOT,pat,s,sprev,keep)
680#define STACK_PUSH_ABSENT STACK_PUSH_TYPE(STK_ABSENT)
681#define STACK_PUSH_STOP_BT STACK_PUSH_TYPE(STK_STOP_BT)
682#define STACK_PUSH_LOOK_BEHIND_NOT(pat,s,sprev,keep) \
683 STACK_PUSH(STK_LOOK_BEHIND_NOT,pat,s,sprev,keep)
684
685#define STACK_PUSH_REPEAT(id, pat) do {\
686 STACK_ENSURE(1);\
687 stk->type = STK_REPEAT;\
688 stk->u.repeat.num = (id);\
689 stk->u.repeat.pcode = (pat);\
690 stk->u.repeat.count = 0;\
691 STACK_INC;\
692} while(0)
693
694#define STACK_PUSH_REPEAT_INC(sindex) do {\
695 STACK_ENSURE(1);\
696 stk->type = STK_REPEAT_INC;\
697 stk->u.repeat_inc.si = (sindex);\
698 STACK_INC;\
699} while(0)
700
701#define STACK_PUSH_MEM_START(mnum, s) do {\
702 STACK_ENSURE(1);\
703 stk->type = STK_MEM_START;\
704 stk->u.mem.num = (mnum);\
705 stk->u.mem.pstr = (s);\
706 stk->u.mem.start = mem_start_stk[mnum];\
707 stk->u.mem.end = mem_end_stk[mnum];\
708 mem_start_stk[mnum] = GET_STACK_INDEX(stk);\
709 mem_end_stk[mnum] = INVALID_STACK_INDEX;\
710 STACK_INC;\
711} while(0)
712
713#define STACK_PUSH_MEM_END(mnum, s) do {\
714 STACK_ENSURE(1);\
715 stk->type = STK_MEM_END;\
716 stk->u.mem.num = (mnum);\
717 stk->u.mem.pstr = (s);\
718 stk->u.mem.start = mem_start_stk[mnum];\
719 stk->u.mem.end = mem_end_stk[mnum];\
720 mem_end_stk[mnum] = GET_STACK_INDEX(stk);\
721 STACK_INC;\
722} while(0)
723
724#define STACK_PUSH_MEM_END_MARK(mnum) do {\
725 STACK_ENSURE(1);\
726 stk->type = STK_MEM_END_MARK;\
727 stk->u.mem.num = (mnum);\
728 STACK_INC;\
729} while(0)
730
731#define STACK_GET_MEM_START(mnum, k) do {\
732 int level = 0;\
733 k = stk;\
734 while (k > stk_base) {\
735 k--;\
736 if ((k->type & STK_MASK_MEM_END_OR_MARK) != 0 \
737 && k->u.mem.num == (mnum)) {\
738 level++;\
739 }\
740 else if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\
741 if (level == 0) break;\
742 level--;\
743 }\
744 }\
745} while(0)
746
747#define STACK_GET_MEM_RANGE(k, mnum, start, end) do {\
748 int level = 0;\
749 while (k < stk) {\
750 if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\
751 if (level == 0) (start) = k->u.mem.pstr;\
752 level++;\
753 }\
754 else if (k->type == STK_MEM_END && k->u.mem.num == (mnum)) {\
755 level--;\
756 if (level == 0) {\
757 (end) = k->u.mem.pstr;\
758 break;\
759 }\
760 }\
761 k++;\
762 }\
763} while(0)
764
765#define STACK_PUSH_NULL_CHECK_START(cnum, s) do {\
766 STACK_ENSURE(1);\
767 stk->type = STK_NULL_CHECK_START;\
768 stk->u.null_check.num = (cnum);\
769 stk->u.null_check.pstr = (s);\
770 STACK_INC;\
771} while(0)
772
773#define STACK_PUSH_NULL_CHECK_END(cnum) do {\
774 STACK_ENSURE(1);\
775 stk->type = STK_NULL_CHECK_END;\
776 stk->u.null_check.num = (cnum);\
777 STACK_INC;\
778} while(0)
779
780#define STACK_PUSH_CALL_FRAME(pat) do {\
781 STACK_ENSURE(1);\
782 stk->type = STK_CALL_FRAME;\
783 stk->u.call_frame.ret_addr = (pat);\
784 STACK_INC;\
785} while(0)
786
787#define STACK_PUSH_RETURN do {\
788 STACK_ENSURE(1);\
789 stk->type = STK_RETURN;\
790 STACK_INC;\
791} while(0)
792
793#define STACK_PUSH_ABSENT_POS(start, end) do {\
794 STACK_ENSURE(1);\
795 stk->type = STK_ABSENT_POS;\
796 stk->u.absent_pos.abs_pstr = (start);\
797 stk->u.absent_pos.end_pstr = (end);\
798 STACK_INC;\
799} while(0)
800
801
802#ifdef ONIG_DEBUG
803# define STACK_BASE_CHECK(p, at) \
804 if ((p) < stk_base) {\
805 fprintf(stderr, "at %s\n", at);\
806 goto stack_error;\
807 }
808#else
809# define STACK_BASE_CHECK(p, at)
810#endif
811
812#define STACK_POP_ONE do {\
813 stk--;\
814 STACK_BASE_CHECK(stk, "STACK_POP_ONE"); \
815} while(0)
816
817#define STACK_POP do {\
818 switch (pop_level) {\
819 case STACK_POP_LEVEL_FREE:\
820 while (1) {\
821 stk--;\
822 STACK_BASE_CHECK(stk, "STACK_POP"); \
823 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
824 ELSE_IF_STATE_CHECK_MARK(stk);\
825 }\
826 break;\
827 case STACK_POP_LEVEL_MEM_START:\
828 while (1) {\
829 stk--;\
830 STACK_BASE_CHECK(stk, "STACK_POP 2"); \
831 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
832 else if (stk->type == STK_MEM_START) {\
833 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
834 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
835 }\
836 ELSE_IF_STATE_CHECK_MARK(stk);\
837 }\
838 break;\
839 default:\
840 while (1) {\
841 stk--;\
842 STACK_BASE_CHECK(stk, "STACK_POP 3"); \
843 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
844 else if (stk->type == STK_MEM_START) {\
845 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
846 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
847 }\
848 else if (stk->type == STK_REPEAT_INC) {\
849 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
850 }\
851 else if (stk->type == STK_MEM_END) {\
852 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
853 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
854 }\
855 ELSE_IF_STATE_CHECK_MARK(stk);\
856 }\
857 break;\
858 }\
859} while(0)
860
861#define STACK_POP_TIL_POS_NOT do {\
862 while (1) {\
863 stk--;\
864 STACK_BASE_CHECK(stk, "STACK_POP_TIL_POS_NOT"); \
865 if (stk->type == STK_POS_NOT) break;\
866 else if (stk->type == STK_MEM_START) {\
867 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
868 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
869 }\
870 else if (stk->type == STK_REPEAT_INC) {\
871 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
872 }\
873 else if (stk->type == STK_MEM_END) {\
874 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
875 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
876 }\
877 ELSE_IF_STATE_CHECK_MARK(stk);\
878 }\
879} while(0)
880
881#define STACK_POP_TIL_LOOK_BEHIND_NOT do {\
882 while (1) {\
883 stk--;\
884 STACK_BASE_CHECK(stk, "STACK_POP_TIL_LOOK_BEHIND_NOT"); \
885 if (stk->type == STK_LOOK_BEHIND_NOT) break;\
886 else if (stk->type == STK_MEM_START) {\
887 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
888 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
889 }\
890 else if (stk->type == STK_REPEAT_INC) {\
891 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
892 }\
893 else if (stk->type == STK_MEM_END) {\
894 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
895 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
896 }\
897 ELSE_IF_STATE_CHECK_MARK(stk);\
898 }\
899} while(0)
900
901#define STACK_POP_TIL_ABSENT do {\
902 while (1) {\
903 stk--;\
904 STACK_BASE_CHECK(stk, "STACK_POP_TIL_ABSENT"); \
905 if (stk->type == STK_ABSENT) break;\
906 else if (stk->type == STK_MEM_START) {\
907 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
908 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
909 }\
910 else if (stk->type == STK_REPEAT_INC) {\
911 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
912 }\
913 else if (stk->type == STK_MEM_END) {\
914 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
915 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
916 }\
917 ELSE_IF_STATE_CHECK_MARK(stk);\
918 }\
919} while(0)
920
921#define STACK_POP_ABSENT_POS(start, end) do {\
922 stk--;\
923 STACK_BASE_CHECK(stk, "STACK_POP_ABSENT_POS"); \
924 (start) = stk->u.absent_pos.abs_pstr;\
925 (end) = stk->u.absent_pos.end_pstr;\
926} while(0)
927
928#define STACK_POS_END(k) do {\
929 k = stk;\
930 while (1) {\
931 k--;\
932 STACK_BASE_CHECK(k, "STACK_POS_END"); \
933 if (IS_TO_VOID_TARGET(k)) {\
934 k->type = STK_VOID;\
935 }\
936 else if (k->type == STK_POS) {\
937 k->type = STK_VOID;\
938 break;\
939 }\
940 }\
941} while(0)
942
943#define STACK_STOP_BT_END do {\
944 OnigStackType *k = stk;\
945 while (1) {\
946 k--;\
947 STACK_BASE_CHECK(k, "STACK_STOP_BT_END"); \
948 if (IS_TO_VOID_TARGET(k)) {\
949 k->type = STK_VOID;\
950 }\
951 else if (k->type == STK_STOP_BT) {\
952 k->type = STK_VOID;\
953 break;\
954 }\
955 }\
956} while(0)
957
958#define STACK_NULL_CHECK(isnull,id,s) do {\
959 OnigStackType* k = stk;\
960 while (1) {\
961 k--;\
962 STACK_BASE_CHECK(k, "STACK_NULL_CHECK"); \
963 if (k->type == STK_NULL_CHECK_START) {\
964 if (k->u.null_check.num == (id)) {\
965 (isnull) = (k->u.null_check.pstr == (s));\
966 break;\
967 }\
968 }\
969 }\
970} while(0)
971
972#define STACK_NULL_CHECK_REC(isnull,id,s) do {\
973 int level = 0;\
974 OnigStackType* k = stk;\
975 while (1) {\
976 k--;\
977 STACK_BASE_CHECK(k, "STACK_NULL_CHECK_REC"); \
978 if (k->type == STK_NULL_CHECK_START) {\
979 if (k->u.null_check.num == (id)) {\
980 if (level == 0) {\
981 (isnull) = (k->u.null_check.pstr == (s));\
982 break;\
983 }\
984 else level--;\
985 }\
986 }\
987 else if (k->type == STK_NULL_CHECK_END) {\
988 level++;\
989 }\
990 }\
991} while(0)
992
993#define STACK_NULL_CHECK_MEMST(isnull,id,s,reg) do {\
994 OnigStackType* k = stk;\
995 while (1) {\
996 k--;\
997 STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST"); \
998 if (k->type == STK_NULL_CHECK_START) {\
999 if (k->u.null_check.num == (id)) {\
1000 if (k->u.null_check.pstr != (s)) {\
1001 (isnull) = 0;\
1002 break;\
1003 }\
1004 else {\
1005 UChar* endp;\
1006 (isnull) = 1;\
1007 while (k < stk) {\
1008 if (k->type == STK_MEM_START) {\
1009 if (k->u.mem.end == INVALID_STACK_INDEX) {\
1010 (isnull) = 0; break;\
1011 }\
1012 if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\
1013 endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\
1014 else\
1015 endp = (UChar* )k->u.mem.end;\
1016 if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\
1017 (isnull) = 0; break;\
1018 }\
1019 else if (endp != s) {\
1020 (isnull) = -1; /* empty, but position changed */ \
1021 }\
1022 }\
1023 k++;\
1024 }\
1025 break;\
1026 }\
1027 }\
1028 }\
1029 }\
1030} while(0)
1031
1032#define STACK_NULL_CHECK_MEMST_REC(isnull,id,s,reg) do {\
1033 int level = 0;\
1034 OnigStackType* k = stk;\
1035 while (1) {\
1036 k--;\
1037 STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST_REC"); \
1038 if (k->type == STK_NULL_CHECK_START) {\
1039 if (k->u.null_check.num == (id)) {\
1040 if (level == 0) {\
1041 if (k->u.null_check.pstr != (s)) {\
1042 (isnull) = 0;\
1043 break;\
1044 }\
1045 else {\
1046 UChar* endp;\
1047 (isnull) = 1;\
1048 while (k < stk) {\
1049 if (k->type == STK_MEM_START) {\
1050 if (k->u.mem.end == INVALID_STACK_INDEX) {\
1051 (isnull) = 0; break;\
1052 }\
1053 if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\
1054 endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\
1055 else\
1056 endp = (UChar* )k->u.mem.end;\
1057 if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\
1058 (isnull) = 0; break;\
1059 }\
1060 else if (endp != s) {\
1061 (isnull) = -1; /* empty, but position changed */ \
1062 }\
1063 }\
1064 k++;\
1065 }\
1066 break;\
1067 }\
1068 }\
1069 else {\
1070 level--;\
1071 }\
1072 }\
1073 }\
1074 else if (k->type == STK_NULL_CHECK_END) {\
1075 if (k->u.null_check.num == (id)) level++;\
1076 }\
1077 }\
1078} while(0)
1079
1080#define STACK_GET_REPEAT(id, k) do {\
1081 int level = 0;\
1082 k = stk;\
1083 while (1) {\
1084 k--;\
1085 STACK_BASE_CHECK(k, "STACK_GET_REPEAT"); \
1086 if (k->type == STK_REPEAT) {\
1087 if (level == 0) {\
1088 if (k->u.repeat.num == (id)) {\
1089 break;\
1090 }\
1091 }\
1092 }\
1093 else if (k->type == STK_CALL_FRAME) level--;\
1094 else if (k->type == STK_RETURN) level++;\
1095 }\
1096} while(0)
1097
1098#define STACK_RETURN(addr) do {\
1099 int level = 0;\
1100 OnigStackType* k = stk;\
1101 while (1) {\
1102 k--;\
1103 STACK_BASE_CHECK(k, "STACK_RETURN"); \
1104 if (k->type == STK_CALL_FRAME) {\
1105 if (level == 0) {\
1106 (addr) = k->u.call_frame.ret_addr;\
1107 break;\
1108 }\
1109 else level--;\
1110 }\
1111 else if (k->type == STK_RETURN)\
1112 level++;\
1113 }\
1114} while(0)
1115
1116
1117#define STRING_CMP(s1,s2,len) do {\
1118 while (len-- > 0) {\
1119 if (*s1++ != *s2++) goto fail;\
1120 }\
1121} while(0)
1122
1123#define STRING_CMP_IC(case_fold_flag,s1,ps2,len,text_end) do {\
1124 if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len, text_end) == 0) \
1125 goto fail; \
1126} while(0)
1127
1128static int string_cmp_ic(OnigEncoding enc, int case_fold_flag,
1129 UChar* s1, UChar** ps2, OnigDistance mblen, const UChar* text_end)
1130{
1133 UChar *p1, *p2, *end1, *s2;
1134 int len1, len2;
1135
1136 s2 = *ps2;
1137 end1 = s1 + mblen;
1138 while (s1 < end1) {
1139 len1 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s1, text_end, buf1);
1140 len2 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s2, text_end, buf2);
1141 if (len1 != len2) return 0;
1142 p1 = buf1;
1143 p2 = buf2;
1144 while (len1-- > 0) {
1145 if (*p1 != *p2) return 0;
1146 p1++;
1147 p2++;
1148 }
1149 }
1150
1151 *ps2 = s2;
1152 return 1;
1153}
1154
1155#define STRING_CMP_VALUE(s1,s2,len,is_fail) do {\
1156 is_fail = 0;\
1157 while (len-- > 0) {\
1158 if (*s1++ != *s2++) {\
1159 is_fail = 1; break;\
1160 }\
1161 }\
1162} while(0)
1163
1164#define STRING_CMP_VALUE_IC(case_fold_flag,s1,ps2,len,text_end,is_fail) do {\
1165 if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len, text_end) == 0) \
1166 is_fail = 1; \
1167 else \
1168 is_fail = 0; \
1169} while(0)
1170
1171
1172#define IS_EMPTY_STR (str == end)
1173#define ON_STR_BEGIN(s) ((s) == str)
1174#define ON_STR_END(s) ((s) == end)
1175#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
1176# define DATA_ENSURE_CHECK1 (s < right_range)
1177# define DATA_ENSURE_CHECK(n) (s + (n) <= right_range)
1178# define DATA_ENSURE(n) if (s + (n) > right_range) goto fail
1179# define ABSENT_END_POS right_range
1180#else
1181# define DATA_ENSURE_CHECK1 (s < end)
1182# define DATA_ENSURE_CHECK(n) (s + (n) <= end)
1183# define DATA_ENSURE(n) if (s + (n) > end) goto fail
1184# define ABSENT_END_POS end
1185#endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */
1186
1187
1188#ifdef USE_CAPTURE_HISTORY
1189static int
1190make_capture_history_tree(OnigCaptureTreeNode* node, OnigStackType** kp,
1191 OnigStackType* stk_top, UChar* str, regex_t* reg)
1192{
1193 int n, r;
1194 OnigCaptureTreeNode* child;
1195 OnigStackType* k = *kp;
1196
1197 while (k < stk_top) {
1198 if (k->type == STK_MEM_START) {
1199 n = k->u.mem.num;
1201 BIT_STATUS_AT(reg->capture_history, n) != 0) {
1202 child = history_node_new();
1204 child->group = n;
1205 child->beg = k->u.mem.pstr - str;
1206 r = history_tree_add_child(node, child);
1207 if (r != 0) {
1208 history_tree_free(child);
1209 return r;
1210 }
1211 *kp = (k + 1);
1212 r = make_capture_history_tree(child, kp, stk_top, str, reg);
1213 if (r != 0) return r;
1214
1215 k = *kp;
1216 child->end = k->u.mem.pstr - str;
1217 }
1218 }
1219 else if (k->type == STK_MEM_END) {
1220 if (k->u.mem.num == node->group) {
1221 node->end = k->u.mem.pstr - str;
1222 *kp = k;
1223 return 0;
1224 }
1225 }
1226 k++;
1227 }
1228
1229 return 1; /* 1: root node ending. */
1230}
1231#endif /* USE_CAPTURE_HISTORY */
1232
1233#ifdef USE_BACKREF_WITH_LEVEL
1234static int mem_is_in_memp(int mem, int num, UChar* memp)
1235{
1236 int i;
1237 MemNumType m;
1238
1239 for (i = 0; i < num; i++) {
1240 GET_MEMNUM_INC(m, memp);
1241 if (mem == (int )m) return 1;
1242 }
1243 return 0;
1244}
1245
1246static int backref_match_at_nested_level(regex_t* reg,
1247 OnigStackType* top, OnigStackType* stk_base,
1248 int ignore_case, int case_fold_flag,
1249 int nest, int mem_num, UChar* memp, UChar** s, const UChar* send)
1250{
1251 UChar *ss, *p, *pstart, *pend = NULL_UCHARP;
1252 int level;
1253 OnigStackType* k;
1254
1255 level = 0;
1256 k = top;
1257 k--;
1258 while (k >= stk_base) {
1259 if (k->type == STK_CALL_FRAME) {
1260 level--;
1261 }
1262 else if (k->type == STK_RETURN) {
1263 level++;
1264 }
1265 else if (level == nest) {
1266 if (k->type == STK_MEM_START) {
1267 if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) {
1268 pstart = k->u.mem.pstr;
1269 if (pend != NULL_UCHARP) {
1270 if (pend - pstart > send - *s) return 0; /* or goto next_mem; */
1271 p = pstart;
1272 ss = *s;
1273
1274 if (ignore_case != 0) {
1275 if (string_cmp_ic(reg->enc, case_fold_flag,
1276 pstart, &ss, pend - pstart, send) == 0)
1277 return 0; /* or goto next_mem; */
1278 }
1279 else {
1280 while (p < pend) {
1281 if (*p++ != *ss++) return 0; /* or goto next_mem; */
1282 }
1283 }
1284
1285 *s = ss;
1286 return 1;
1287 }
1288 }
1289 }
1290 else if (k->type == STK_MEM_END) {
1291 if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) {
1292 pend = k->u.mem.pstr;
1293 }
1294 }
1295 }
1296 k--;
1297 }
1298
1299 return 0;
1300}
1301#endif /* USE_BACKREF_WITH_LEVEL */
1302
1303
1304#ifdef ONIG_DEBUG_STATISTICS
1305
1306# ifdef _WIN32
1307# include <windows.h>
1308static LARGE_INTEGER ts, te, freq;
1309# define GETTIME(t) QueryPerformanceCounter(&(t))
1310# define TIMEDIFF(te,ts) (unsigned long )(((te).QuadPart - (ts).QuadPart) \
1311 * 1000000 / freq.QuadPart)
1312# else /* _WIN32 */
1313
1314# define USE_TIMEOFDAY
1315
1316# ifdef USE_TIMEOFDAY
1317# ifdef HAVE_SYS_TIME_H
1318# include <sys/time.h>
1319# endif
1320# ifdef HAVE_UNISTD_H
1321# include <unistd.h>
1322# endif
1323static struct timeval ts, te;
1324# define GETTIME(t) gettimeofday(&(t), (struct timezone* )0)
1325# define TIMEDIFF(te,ts) (((te).tv_usec - (ts).tv_usec) + \
1326 (((te).tv_sec - (ts).tv_sec)*1000000))
1327# else /* USE_TIMEOFDAY */
1328# ifdef HAVE_SYS_TIMES_H
1329# include <sys/times.h>
1330# endif
1331static struct tms ts, te;
1332# define GETTIME(t) times(&(t))
1333# define TIMEDIFF(te,ts) ((te).tms_utime - (ts).tms_utime)
1334# endif /* USE_TIMEOFDAY */
1335
1336# endif /* _WIN32 */
1337
1338static int OpCounter[256];
1339static int OpPrevCounter[256];
1340static unsigned long OpTime[256];
1341static int OpCurr = OP_FINISH;
1342static int OpPrevTarget = OP_FAIL;
1343static int MaxStackDepth = 0;
1344
1345# define MOP_IN(opcode) do {\
1346 if (opcode == OpPrevTarget) OpPrevCounter[OpCurr]++;\
1347 OpCurr = opcode;\
1348 OpCounter[opcode]++;\
1349 GETTIME(ts);\
1350} while(0)
1351
1352# define MOP_OUT do {\
1353 GETTIME(te);\
1354 OpTime[OpCurr] += TIMEDIFF(te, ts);\
1355} while(0)
1356
1357extern void
1358onig_statistics_init(void)
1359{
1360 int i;
1361 for (i = 0; i < 256; i++) {
1362 OpCounter[i] = OpPrevCounter[i] = 0; OpTime[i] = 0;
1363 }
1364 MaxStackDepth = 0;
1365# ifdef _WIN32
1366 QueryPerformanceFrequency(&freq);
1367# endif
1368}
1369
1370extern void
1371onig_print_statistics(FILE* f)
1372{
1373 int i;
1374 fprintf(f, " count prev time\n");
1375 for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
1376 fprintf(f, "%8d: %8d: %10lu: %s\n",
1377 OpCounter[i], OpPrevCounter[i], OpTime[i], OnigOpInfo[i].name);
1378 }
1379 fprintf(f, "\nmax stack depth: %d\n", MaxStackDepth);
1380}
1381
1382# define STACK_INC do {\
1383 stk++;\
1384 if (stk - stk_base > MaxStackDepth) \
1385 MaxStackDepth = stk - stk_base;\
1386} while(0)
1387
1388#else /* ONIG_DEBUG_STATISTICS */
1389# define STACK_INC stk++
1390
1391# define MOP_IN(opcode)
1392# define MOP_OUT
1393#endif /* ONIG_DEBUG_STATISTICS */
1394
1395
1396#ifdef ONIG_DEBUG_MATCH
1397static char *
1398stack_type_str(int stack_type)
1399{
1400 switch (stack_type) {
1401 case STK_ALT: return "Alt ";
1402 case STK_LOOK_BEHIND_NOT: return "LBNot ";
1403 case STK_POS_NOT: return "PosNot";
1404 case STK_MEM_START: return "MemS ";
1405 case STK_MEM_END: return "MemE ";
1406 case STK_REPEAT_INC: return "RepInc";
1407 case STK_STATE_CHECK_MARK: return "StChMk";
1408 case STK_NULL_CHECK_START: return "NulChS";
1409 case STK_NULL_CHECK_END: return "NulChE";
1410 case STK_MEM_END_MARK: return "MemEMk";
1411 case STK_POS: return "Pos ";
1412 case STK_STOP_BT: return "StopBt";
1413 case STK_REPEAT: return "Rep ";
1414 case STK_CALL_FRAME: return "Call ";
1415 case STK_RETURN: return "Ret ";
1416 case STK_VOID: return "Void ";
1417 case STK_ABSENT_POS: return "AbsPos";
1418 case STK_ABSENT: return "Absent";
1419 default: return " ";
1420 }
1421}
1422#endif
1423
1424/* match data(str - end) from position (sstart). */
1425/* if sstart == str then set sprev to NULL. */
1426static OnigPosition
1427match_at(regex_t* reg, const UChar* str, const UChar* end,
1428#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
1429 const UChar* right_range,
1430#endif
1431 const UChar* sstart, UChar* sprev, OnigMatchArg* msa)
1432{
1433 static const UChar FinishCode[] = { OP_FINISH };
1434
1435 int i, num_mem, pop_level;
1436 ptrdiff_t n, best_len;
1437 LengthType tlen, tlen2;
1438 MemNumType mem;
1439 RelAddrType addr;
1440 OnigOptionType option = reg->options;
1441 OnigEncoding encode = reg->enc;
1442 OnigCaseFoldType case_fold_flag = reg->case_fold_flag;
1443 UChar *s, *q, *sbegin;
1444 UChar *p = reg->p;
1445 UChar *pkeep;
1446 char *alloca_base;
1447 char *xmalloc_base = NULL;
1448 OnigStackType *stk_alloc, *stk_base, *stk, *stk_end;
1449 OnigStackType *stkp; /* used as any purpose. */
1450 OnigStackIndex si;
1451 OnigStackIndex *repeat_stk;
1452 OnigStackIndex *mem_start_stk, *mem_end_stk;
1453#ifdef USE_COMBINATION_EXPLOSION_CHECK
1454 int scv;
1455 unsigned char* state_check_buff = msa->state_check_buff;
1456 int num_comb_exp_check = reg->num_comb_exp_check;
1457#endif
1458
1459#if USE_TOKEN_THREADED_VM
1460# define OP_OFFSET 1
1461# define VM_LOOP JUMP;
1462# define VM_LOOP_END
1463# define CASE(x) L_##x: sbegin = s; OPCODE_EXEC_HOOK;
1464# define DEFAULT L_DEFAULT:
1465# define NEXT sprev = sbegin; JUMP
1466# define JUMP RB_GNUC_EXTENSION_BLOCK(goto *oplabels[*p++])
1467
1468 RB_GNUC_EXTENSION static const void *oplabels[] = {
1469 &&L_OP_FINISH, /* matching process terminator (no more alternative) */
1470 &&L_OP_END, /* pattern code terminator (success end) */
1471
1472 &&L_OP_EXACT1, /* single byte, N = 1 */
1473 &&L_OP_EXACT2, /* single byte, N = 2 */
1474 &&L_OP_EXACT3, /* single byte, N = 3 */
1475 &&L_OP_EXACT4, /* single byte, N = 4 */
1476 &&L_OP_EXACT5, /* single byte, N = 5 */
1477 &&L_OP_EXACTN, /* single byte */
1478 &&L_OP_EXACTMB2N1, /* mb-length = 2 N = 1 */
1479 &&L_OP_EXACTMB2N2, /* mb-length = 2 N = 2 */
1480 &&L_OP_EXACTMB2N3, /* mb-length = 2 N = 3 */
1481 &&L_OP_EXACTMB2N, /* mb-length = 2 */
1482 &&L_OP_EXACTMB3N, /* mb-length = 3 */
1483 &&L_OP_EXACTMBN, /* other length */
1484
1485 &&L_OP_EXACT1_IC, /* single byte, N = 1, ignore case */
1486 &&L_OP_EXACTN_IC, /* single byte, ignore case */
1487
1488 &&L_OP_CCLASS,
1489 &&L_OP_CCLASS_MB,
1490 &&L_OP_CCLASS_MIX,
1491 &&L_OP_CCLASS_NOT,
1492 &&L_OP_CCLASS_MB_NOT,
1493 &&L_OP_CCLASS_MIX_NOT,
1494
1495 &&L_OP_ANYCHAR, /* "." */
1496 &&L_OP_ANYCHAR_ML, /* "." multi-line */
1497 &&L_OP_ANYCHAR_STAR, /* ".*" */
1498 &&L_OP_ANYCHAR_ML_STAR, /* ".*" multi-line */
1499 &&L_OP_ANYCHAR_STAR_PEEK_NEXT,
1500 &&L_OP_ANYCHAR_ML_STAR_PEEK_NEXT,
1501
1502 &&L_OP_WORD,
1503 &&L_OP_NOT_WORD,
1504 &&L_OP_WORD_BOUND,
1505 &&L_OP_NOT_WORD_BOUND,
1506# ifdef USE_WORD_BEGIN_END
1507 &&L_OP_WORD_BEGIN,
1508 &&L_OP_WORD_END,
1509# else
1510 &&L_DEFAULT,
1511 &&L_DEFAULT,
1512# endif
1513 &&L_OP_ASCII_WORD,
1514 &&L_OP_NOT_ASCII_WORD,
1515 &&L_OP_ASCII_WORD_BOUND,
1516 &&L_OP_NOT_ASCII_WORD_BOUND,
1517# ifdef USE_WORD_BEGIN_END
1518 &&L_OP_ASCII_WORD_BEGIN,
1519 &&L_OP_ASCII_WORD_END,
1520# else
1521 &&L_DEFAULT,
1522 &&L_DEFAULT,
1523# endif
1524
1525 &&L_OP_BEGIN_BUF,
1526 &&L_OP_END_BUF,
1527 &&L_OP_BEGIN_LINE,
1528 &&L_OP_END_LINE,
1529 &&L_OP_SEMI_END_BUF,
1530 &&L_OP_BEGIN_POSITION,
1531
1532 &&L_OP_BACKREF1,
1533 &&L_OP_BACKREF2,
1534 &&L_OP_BACKREFN,
1535 &&L_OP_BACKREFN_IC,
1536 &&L_OP_BACKREF_MULTI,
1537 &&L_OP_BACKREF_MULTI_IC,
1538# ifdef USE_BACKREF_WITH_LEVEL
1539 &&L_OP_BACKREF_WITH_LEVEL, /* \k<xxx+n>, \k<xxx-n> */
1540# else
1541 &&L_DEFAULT,
1542# endif
1543 &&L_OP_MEMORY_START,
1544 &&L_OP_MEMORY_START_PUSH, /* push back-tracker to stack */
1545 &&L_OP_MEMORY_END_PUSH, /* push back-tracker to stack */
1546# ifdef USE_SUBEXP_CALL
1547 &&L_OP_MEMORY_END_PUSH_REC, /* push back-tracker to stack */
1548# else
1549 &&L_DEFAULT,
1550# endif
1551 &&L_OP_MEMORY_END,
1552# ifdef USE_SUBEXP_CALL
1553 &&L_OP_MEMORY_END_REC, /* push marker to stack */
1554# else
1555 &&L_DEFAULT,
1556# endif
1557
1558 &&L_OP_KEEP,
1559
1560 &&L_OP_FAIL, /* pop stack and move */
1561 &&L_OP_JUMP,
1562 &&L_OP_PUSH,
1563 &&L_OP_POP,
1564# ifdef USE_OP_PUSH_OR_JUMP_EXACT
1565 &&L_OP_PUSH_OR_JUMP_EXACT1, /* if match exact then push, else jump. */
1566# else
1567 &&L_DEFAULT,
1568# endif
1569 &&L_OP_PUSH_IF_PEEK_NEXT, /* if match exact then push, else none. */
1570 &&L_OP_REPEAT, /* {n,m} */
1571 &&L_OP_REPEAT_NG, /* {n,m}? (non greedy) */
1572 &&L_OP_REPEAT_INC,
1573 &&L_OP_REPEAT_INC_NG, /* non greedy */
1574 &&L_OP_REPEAT_INC_SG, /* search and get in stack */
1575 &&L_OP_REPEAT_INC_NG_SG, /* search and get in stack (non greedy) */
1576 &&L_OP_NULL_CHECK_START, /* null loop checker start */
1577 &&L_OP_NULL_CHECK_END, /* null loop checker end */
1578# ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
1579 &&L_OP_NULL_CHECK_END_MEMST, /* null loop checker end (with capture status) */
1580# else
1581 &&L_DEFAULT,
1582# endif
1583# ifdef USE_SUBEXP_CALL
1584 &&L_OP_NULL_CHECK_END_MEMST_PUSH, /* with capture status and push check-end */
1585# else
1586 &&L_DEFAULT,
1587# endif
1588
1589 &&L_OP_PUSH_POS, /* (?=...) start */
1590 &&L_OP_POP_POS, /* (?=...) end */
1591 &&L_OP_PUSH_POS_NOT, /* (?!...) start */
1592 &&L_OP_FAIL_POS, /* (?!...) end */
1593 &&L_OP_PUSH_STOP_BT, /* (?>...) start */
1594 &&L_OP_POP_STOP_BT, /* (?>...) end */
1595 &&L_OP_LOOK_BEHIND, /* (?<=...) start (no needs end opcode) */
1596 &&L_OP_PUSH_LOOK_BEHIND_NOT, /* (?<!...) start */
1597 &&L_OP_FAIL_LOOK_BEHIND_NOT, /* (?<!...) end */
1598 &&L_OP_PUSH_ABSENT_POS, /* (?~...) start */
1599 &&L_OP_ABSENT, /* (?~...) start of inner loop */
1600 &&L_OP_ABSENT_END, /* (?~...) end */
1601
1602# ifdef USE_SUBEXP_CALL
1603 &&L_OP_CALL, /* \g<name> */
1604 &&L_OP_RETURN,
1605# else
1606 &&L_DEFAULT,
1607 &&L_DEFAULT,
1608# endif
1609 &&L_OP_CONDITION,
1610
1611# ifdef USE_COMBINATION_EXPLOSION_CHECK
1612 &&L_OP_STATE_CHECK_PUSH, /* combination explosion check and push */
1613 &&L_OP_STATE_CHECK_PUSH_OR_JUMP, /* check ok -> push, else jump */
1614 &&L_OP_STATE_CHECK, /* check only */
1615# else
1616 &&L_DEFAULT,
1617 &&L_DEFAULT,
1618 &&L_DEFAULT,
1619# endif
1620# ifdef USE_COMBINATION_EXPLOSION_CHECK
1621 &&L_OP_STATE_CHECK_ANYCHAR_STAR,
1622 &&L_OP_STATE_CHECK_ANYCHAR_ML_STAR,
1623# else
1624 &&L_DEFAULT,
1625 &&L_DEFAULT,
1626# endif
1627 /* no need: IS_DYNAMIC_OPTION() == 0 */
1628# if 0 /* no need: IS_DYNAMIC_OPTION() == 0 */
1629 &&L_OP_SET_OPTION_PUSH, /* set option and push recover option */
1630 &&L_OP_SET_OPTION /* set option */
1631# else
1632 &&L_DEFAULT,
1633 &&L_DEFAULT
1634# endif
1635 };
1636#else /* USE_TOKEN_THREADED_VM */
1637
1638# define OP_OFFSET 0
1639# define VM_LOOP \
1640 while (1) { \
1641 OPCODE_EXEC_HOOK; \
1642 sbegin = s; \
1643 switch (*p++) {
1644# define VM_LOOP_END } sprev = sbegin; }
1645# define CASE(x) case x:
1646# define DEFAULT default:
1647# define NEXT break
1648# define JUMP continue; break
1649#endif /* USE_TOKEN_THREADED_VM */
1650
1651
1652#ifdef USE_SUBEXP_CALL
1653/* Stack #0 is used to store the pattern itself and used for (?R), \g<0>,
1654 etc. Additional space is required. */
1655# define ADD_NUMMEM 1
1656#else
1657/* Stack #0 not is used. */
1658# define ADD_NUMMEM 0
1659#endif
1660
1661 n = reg->num_repeat + (reg->num_mem + ADD_NUMMEM) * 2;
1662
1663 STACK_INIT(alloca_base, xmalloc_base, n, INIT_MATCH_STACK_SIZE);
1664 pop_level = reg->stack_pop_level;
1665 num_mem = reg->num_mem;
1666 repeat_stk = (OnigStackIndex* )alloca_base;
1667
1668 mem_start_stk = (OnigStackIndex* )(repeat_stk + reg->num_repeat);
1669 mem_end_stk = mem_start_stk + (num_mem + ADD_NUMMEM);
1670 {
1671 OnigStackIndex *pp = mem_start_stk;
1672 for (; pp < repeat_stk + n; pp += 2) {
1673 pp[0] = INVALID_STACK_INDEX;
1674 pp[1] = INVALID_STACK_INDEX;
1675 }
1676 }
1677#ifndef USE_SUBEXP_CALL
1678 mem_start_stk--; /* for index start from 1,
1679 mem_start_stk[1]..mem_start_stk[num_mem] */
1680 mem_end_stk--; /* for index start from 1,
1681 mem_end_stk[1]..mem_end_stk[num_mem] */
1682#endif
1683
1684#ifdef ONIG_DEBUG_MATCH
1685 fprintf(stderr, "match_at: str: %"PRIuPTR" (%p), end: %"PRIuPTR" (%p), start: %"PRIuPTR" (%p), sprev: %"PRIuPTR" (%p)\n",
1686 (uintptr_t )str, str, (uintptr_t )end, end, (uintptr_t )sstart, sstart, (uintptr_t )sprev, sprev);
1687 fprintf(stderr, "size: %d, start offset: %d\n",
1688 (int )(end - str), (int )(sstart - str));
1689 fprintf(stderr, "\n ofs> str stk:type addr:opcode\n");
1690#endif
1691
1692 STACK_PUSH_ENSURED(STK_ALT, (UChar* )FinishCode); /* bottom stack */
1693 best_len = ONIG_MISMATCH;
1694 s = (UChar* )sstart;
1695 pkeep = (UChar* )sstart;
1696
1697
1698#ifdef ONIG_DEBUG_MATCH
1699# define OPCODE_EXEC_HOOK \
1700 if (s) { \
1701 UChar *op, *q, *bp, buf[50]; \
1702 int len; \
1703 op = p - OP_OFFSET; \
1704 fprintf(stderr, "%4"PRIdPTR"> \"", (*op == OP_FINISH) ? (ptrdiff_t )-1 : s - str); \
1705 bp = buf; \
1706 q = s; \
1707 if (*op != OP_FINISH) { /* s may not be a valid pointer if OP_FINISH. */ \
1708 for (i = 0; i < 7 && q < end; i++) { \
1709 len = enclen(encode, q, end); \
1710 while (len-- > 0) *bp++ = *q++; \
1711 } \
1712 if (q < end) { xmemcpy(bp, "...", 3); bp += 3; } \
1713 } \
1714 xmemcpy(bp, "\"", 1); bp += 1; \
1715 *bp = 0; \
1716 fputs((char* )buf, stderr); \
1717 for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', stderr); \
1718 fprintf(stderr, "%4"PRIdPTR":%s %4"PRIdPTR":", \
1719 stk - stk_base - 1, \
1720 (stk > stk_base) ? stack_type_str(stk[-1].type) : " ", \
1721 (op == FinishCode) ? (ptrdiff_t )-1 : op - reg->p); \
1722 onig_print_compiled_byte_code(stderr, op, reg->p+reg->used, NULL, encode); \
1723 fprintf(stderr, "\n"); \
1724 }
1725#else
1726# define OPCODE_EXEC_HOOK ((void) 0)
1727#endif
1728
1729
1730 VM_LOOP {
1732 n = s - sstart;
1733 if (n > best_len) {
1734 OnigRegion* region;
1735#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
1736 if (IS_FIND_LONGEST(option)) {
1737 if (n > msa->best_len) {
1738 msa->best_len = n;
1739 msa->best_s = (UChar* )sstart;
1740 }
1741 else
1742 goto end_best_len;
1743 }
1744#endif
1745 best_len = n;
1746 region = msa->region;
1747 if (region) {
1748 region->beg[0] = ((pkeep > s) ? s : pkeep) - str;
1749 region->end[0] = s - str;
1750 for (i = 1; i <= num_mem; i++) {
1751 if (mem_end_stk[i] != INVALID_STACK_INDEX) {
1752 if (BIT_STATUS_AT(reg->bt_mem_start, i))
1753 region->beg[i] = STACK_AT(mem_start_stk[i])->u.mem.pstr - str;
1754 else
1755 region->beg[i] = (UChar* )((void* )mem_start_stk[i]) - str;
1756
1757 region->end[i] = (BIT_STATUS_AT(reg->bt_mem_end, i)
1758 ? STACK_AT(mem_end_stk[i])->u.mem.pstr
1759 : (UChar* )((void* )mem_end_stk[i])) - str;
1760 }
1761 else {
1762 region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
1763 }
1764 }
1765
1766#ifdef USE_CAPTURE_HISTORY
1767 if (reg->capture_history != 0) {
1768 int r;
1769 OnigCaptureTreeNode* node;
1770
1771 if (IS_NULL(region->history_root)) {
1772 region->history_root = node = history_node_new();
1774 }
1775 else {
1776 node = region->history_root;
1777 history_tree_clear(node);
1778 }
1779
1780 node->group = 0;
1781 node->beg = ((pkeep > s) ? s : pkeep) - str;
1782 node->end = s - str;
1783
1784 stkp = stk_base;
1785 r = make_capture_history_tree(region->history_root, &stkp,
1786 stk, (UChar* )str, reg);
1787 if (r < 0) {
1788 best_len = r; /* error code */
1789 goto finish;
1790 }
1791 }
1792#endif /* USE_CAPTURE_HISTORY */
1793 } /* if (region) */
1794 } /* n > best_len */
1795
1796#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
1797 end_best_len:
1798#endif
1799 MOP_OUT;
1800
1801 if (IS_FIND_CONDITION(option)) {
1802 if (IS_FIND_NOT_EMPTY(option) && s == sstart) {
1803 best_len = ONIG_MISMATCH;
1804 goto fail; /* for retry */
1805 }
1806 if (IS_FIND_LONGEST(option) && DATA_ENSURE_CHECK1) {
1807 goto fail; /* for retry */
1808 }
1809 }
1810
1811 /* default behavior: return first-matching result. */
1812 goto finish;
1813 NEXT;
1814
1816 DATA_ENSURE(1);
1817 if (*p != *s) goto fail;
1818 p++; s++;
1819 MOP_OUT;
1820 NEXT;
1821
1823 {
1824 int len;
1826
1827 DATA_ENSURE(1);
1828 len = ONIGENC_MBC_CASE_FOLD(encode,
1829 /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */
1830 case_fold_flag,
1831 &s, end, lowbuf);
1832 DATA_ENSURE(0);
1833 q = lowbuf;
1834 while (len-- > 0) {
1835 if (*p != *q) {
1836 goto fail;
1837 }
1838 p++; q++;
1839 }
1840 }
1841 MOP_OUT;
1842 NEXT;
1843
1845 DATA_ENSURE(2);
1846 if (*p != *s) goto fail;
1847 p++; s++;
1848 if (*p != *s) goto fail;
1849 sprev = s;
1850 p++; s++;
1851 MOP_OUT;
1852 JUMP;
1853
1855 DATA_ENSURE(3);
1856 if (*p != *s) goto fail;
1857 p++; s++;
1858 if (*p != *s) goto fail;
1859 p++; s++;
1860 if (*p != *s) goto fail;
1861 sprev = s;
1862 p++; s++;
1863 MOP_OUT;
1864 JUMP;
1865
1867 DATA_ENSURE(4);
1868 if (*p != *s) goto fail;
1869 p++; s++;
1870 if (*p != *s) goto fail;
1871 p++; s++;
1872 if (*p != *s) goto fail;
1873 p++; s++;
1874 if (*p != *s) goto fail;
1875 sprev = s;
1876 p++; s++;
1877 MOP_OUT;
1878 JUMP;
1879
1881 DATA_ENSURE(5);
1882 if (*p != *s) goto fail;
1883 p++; s++;
1884 if (*p != *s) goto fail;
1885 p++; s++;
1886 if (*p != *s) goto fail;
1887 p++; s++;
1888 if (*p != *s) goto fail;
1889 p++; s++;
1890 if (*p != *s) goto fail;
1891 sprev = s;
1892 p++; s++;
1893 MOP_OUT;
1894 JUMP;
1895
1897 GET_LENGTH_INC(tlen, p);
1898 DATA_ENSURE(tlen);
1899 while (tlen-- > 0) {
1900 if (*p++ != *s++) goto fail;
1901 }
1902 sprev = s - 1;
1903 MOP_OUT;
1904 JUMP;
1905
1907 {
1908 int len;
1909 UChar *q, *endp, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
1910
1911 GET_LENGTH_INC(tlen, p);
1912 endp = p + tlen;
1913
1914 while (p < endp) {
1915 sprev = s;
1916 DATA_ENSURE(1);
1917 len = ONIGENC_MBC_CASE_FOLD(encode,
1918 /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */
1919 case_fold_flag,
1920 &s, end, lowbuf);
1921 DATA_ENSURE(0);
1922 q = lowbuf;
1923 while (len-- > 0) {
1924 if (*p != *q) goto fail;
1925 p++; q++;
1926 }
1927 }
1928 }
1929
1930 MOP_OUT;
1931 JUMP;
1932
1934 DATA_ENSURE(2);
1935 if (*p != *s) goto fail;
1936 p++; s++;
1937 if (*p != *s) goto fail;
1938 p++; s++;
1939 MOP_OUT;
1940 NEXT;
1941
1943 DATA_ENSURE(4);
1944 if (*p != *s) goto fail;
1945 p++; s++;
1946 if (*p != *s) goto fail;
1947 p++; s++;
1948 sprev = s;
1949 if (*p != *s) goto fail;
1950 p++; s++;
1951 if (*p != *s) goto fail;
1952 p++; s++;
1953 MOP_OUT;
1954 JUMP;
1955
1957 DATA_ENSURE(6);
1958 if (*p != *s) goto fail;
1959 p++; s++;
1960 if (*p != *s) goto fail;
1961 p++; s++;
1962 if (*p != *s) goto fail;
1963 p++; s++;
1964 if (*p != *s) goto fail;
1965 p++; s++;
1966 sprev = s;
1967 if (*p != *s) goto fail;
1968 p++; s++;
1969 if (*p != *s) goto fail;
1970 p++; s++;
1971 MOP_OUT;
1972 JUMP;
1973
1975 GET_LENGTH_INC(tlen, p);
1976 DATA_ENSURE(tlen * 2);
1977 while (tlen-- > 0) {
1978 if (*p != *s) goto fail;
1979 p++; s++;
1980 if (*p != *s) goto fail;
1981 p++; s++;
1982 }
1983 sprev = s - 2;
1984 MOP_OUT;
1985 JUMP;
1986
1988 GET_LENGTH_INC(tlen, p);
1989 DATA_ENSURE(tlen * 3);
1990 while (tlen-- > 0) {
1991 if (*p != *s) goto fail;
1992 p++; s++;
1993 if (*p != *s) goto fail;
1994 p++; s++;
1995 if (*p != *s) goto fail;
1996 p++; s++;
1997 }
1998 sprev = s - 3;
1999 MOP_OUT;
2000 JUMP;
2001
2003 GET_LENGTH_INC(tlen, p); /* mb-len */
2004 GET_LENGTH_INC(tlen2, p); /* string len */
2005 tlen2 *= tlen;
2006 DATA_ENSURE(tlen2);
2007 while (tlen2-- > 0) {
2008 if (*p != *s) goto fail;
2009 p++; s++;
2010 }
2011 sprev = s - tlen;
2012 MOP_OUT;
2013 JUMP;
2014
2016 DATA_ENSURE(1);
2017 if (BITSET_AT(((BitSetRef )p), *s) == 0) goto fail;
2018 p += SIZE_BITSET;
2019 s += enclen(encode, s, end); /* OP_CCLASS can match mb-code. \D, \S */
2020 MOP_OUT;
2021 NEXT;
2022
2024 if (! ONIGENC_IS_MBC_HEAD(encode, s, end)) goto fail;
2025
2026 cclass_mb:
2027 GET_LENGTH_INC(tlen, p);
2028 {
2029 OnigCodePoint code;
2030 UChar *ss;
2031 int mb_len;
2032
2033 DATA_ENSURE(1);
2034 mb_len = enclen(encode, s, end);
2035 DATA_ENSURE(mb_len);
2036 ss = s;
2037 s += mb_len;
2038 code = ONIGENC_MBC_TO_CODE(encode, ss, s);
2039
2040#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
2041 if (! onig_is_in_code_range(p, code)) goto fail;
2042#else
2043 q = p;
2044 ALIGNMENT_RIGHT(q);
2045 if (! onig_is_in_code_range(q, code)) goto fail;
2046#endif
2047 }
2048 p += tlen;
2049 MOP_OUT;
2050 NEXT;
2051
2053 DATA_ENSURE(1);
2054 if (ONIGENC_IS_MBC_HEAD(encode, s, end)) {
2055 p += SIZE_BITSET;
2056 goto cclass_mb;
2057 }
2058 else {
2059 if (BITSET_AT(((BitSetRef )p), *s) == 0)
2060 goto fail;
2061
2062 p += SIZE_BITSET;
2063 GET_LENGTH_INC(tlen, p);
2064 p += tlen;
2065 s++;
2066 }
2067 MOP_OUT;
2068 NEXT;
2069
2071 DATA_ENSURE(1);
2072 if (BITSET_AT(((BitSetRef )p), *s) != 0) goto fail;
2073 p += SIZE_BITSET;
2074 s += enclen(encode, s, end);
2075 MOP_OUT;
2076 NEXT;
2077
2079 DATA_ENSURE(1);
2080 if (! ONIGENC_IS_MBC_HEAD(encode, s, end)) {
2081 s++;
2082 GET_LENGTH_INC(tlen, p);
2083 p += tlen;
2084 goto cc_mb_not_success;
2085 }
2086
2087 cclass_mb_not:
2088 GET_LENGTH_INC(tlen, p);
2089 {
2090 OnigCodePoint code;
2091 UChar *ss;
2092 int mb_len = enclen(encode, s, end);
2093
2094 if (! DATA_ENSURE_CHECK(mb_len)) {
2095 DATA_ENSURE(1);
2096 s = (UChar* )end;
2097 p += tlen;
2098 goto cc_mb_not_success;
2099 }
2100
2101 ss = s;
2102 s += mb_len;
2103 code = ONIGENC_MBC_TO_CODE(encode, ss, s);
2104
2105#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
2106 if (onig_is_in_code_range(p, code)) goto fail;
2107#else
2108 q = p;
2109 ALIGNMENT_RIGHT(q);
2110 if (onig_is_in_code_range(q, code)) goto fail;
2111#endif
2112 }
2113 p += tlen;
2114
2115 cc_mb_not_success:
2116 MOP_OUT;
2117 NEXT;
2118
2120 DATA_ENSURE(1);
2121 if (ONIGENC_IS_MBC_HEAD(encode, s, end)) {
2122 p += SIZE_BITSET;
2123 goto cclass_mb_not;
2124 }
2125 else {
2126 if (BITSET_AT(((BitSetRef )p), *s) != 0)
2127 goto fail;
2128
2129 p += SIZE_BITSET;
2130 GET_LENGTH_INC(tlen, p);
2131 p += tlen;
2132 s++;
2133 }
2134 MOP_OUT;
2135 NEXT;
2136
2138 DATA_ENSURE(1);
2139 n = enclen(encode, s, end);
2140 DATA_ENSURE(n);
2141 if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail;
2142 s += n;
2143 MOP_OUT;
2144 NEXT;
2145
2147 DATA_ENSURE(1);
2148 n = enclen(encode, s, end);
2149 DATA_ENSURE(n);
2150 s += n;
2151 MOP_OUT;
2152 NEXT;
2153
2155 while (DATA_ENSURE_CHECK1) {
2156 STACK_PUSH_ALT(p, s, sprev, pkeep);
2157 n = enclen(encode, s, end);
2158 DATA_ENSURE(n);
2159 if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail;
2160 sprev = s;
2161 s += n;
2162 }
2163 MOP_OUT;
2164 JUMP;
2165
2167 while (DATA_ENSURE_CHECK1) {
2168 STACK_PUSH_ALT(p, s, sprev, pkeep);
2169 n = enclen(encode, s, end);
2170 if (n > 1) {
2171 DATA_ENSURE(n);
2172 sprev = s;
2173 s += n;
2174 }
2175 else {
2176 sprev = s;
2177 s++;
2178 }
2179 }
2180 MOP_OUT;
2181 JUMP;
2182
2184 while (DATA_ENSURE_CHECK1) {
2185 if (*p == *s) {
2186 STACK_PUSH_ALT(p + 1, s, sprev, pkeep);
2187 }
2188 n = enclen(encode, s, end);
2189 DATA_ENSURE(n);
2190 if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail;
2191 sprev = s;
2192 s += n;
2193 }
2194 p++;
2195 MOP_OUT;
2196 NEXT;
2197
2199 while (DATA_ENSURE_CHECK1) {
2200 if (*p == *s) {
2201 STACK_PUSH_ALT(p + 1, s, sprev, pkeep);
2202 }
2203 n = enclen(encode, s, end);
2204 if (n > 1) {
2205 DATA_ENSURE(n);
2206 sprev = s;
2207 s += n;
2208 }
2209 else {
2210 sprev = s;
2211 s++;
2212 }
2213 }
2214 p++;
2215 MOP_OUT;
2216 NEXT;
2217
2218#ifdef USE_COMBINATION_EXPLOSION_CHECK
2221 while (DATA_ENSURE_CHECK1) {
2222 STATE_CHECK_VAL(scv, mem);
2223 if (scv) goto fail;
2224
2225 STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem, pkeep);
2226 n = enclen(encode, s, end);
2227 DATA_ENSURE(n);
2228 if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail;
2229 sprev = s;
2230 s += n;
2231 }
2232 MOP_OUT;
2233 NEXT;
2234
2237
2239 while (DATA_ENSURE_CHECK1) {
2240 STATE_CHECK_VAL(scv, mem);
2241 if (scv) goto fail;
2242
2243 STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem, pkeep);
2244 n = enclen(encode, s, end);
2245 if (n > 1) {
2246 DATA_ENSURE(n);
2247 sprev = s;
2248 s += n;
2249 }
2250 else {
2251 sprev = s;
2252 s++;
2253 }
2254 }
2255 MOP_OUT;
2256 NEXT;
2257#endif /* USE_COMBINATION_EXPLOSION_CHECK */
2258
2260 DATA_ENSURE(1);
2261 if (! ONIGENC_IS_MBC_WORD(encode, s, end))
2262 goto fail;
2263
2264 s += enclen(encode, s, end);
2265 MOP_OUT;
2266 NEXT;
2267
2269 DATA_ENSURE(1);
2270 if (! ONIGENC_IS_MBC_ASCII_WORD(encode, s, end))
2271 goto fail;
2272
2273 s += enclen(encode, s, end);
2274 MOP_OUT;
2275 NEXT;
2276
2278 DATA_ENSURE(1);
2279 if (ONIGENC_IS_MBC_WORD(encode, s, end))
2280 goto fail;
2281
2282 s += enclen(encode, s, end);
2283 MOP_OUT;
2284 NEXT;
2285
2287 DATA_ENSURE(1);
2288 if (ONIGENC_IS_MBC_ASCII_WORD(encode, s, end))
2289 goto fail;
2290
2291 s += enclen(encode, s, end);
2292 MOP_OUT;
2293 NEXT;
2294
2296 if (ON_STR_BEGIN(s)) {
2297 DATA_ENSURE(1);
2298 if (! ONIGENC_IS_MBC_WORD(encode, s, end))
2299 goto fail;
2300 }
2301 else if (ON_STR_END(s)) {
2302 if (! ONIGENC_IS_MBC_WORD(encode, sprev, end))
2303 goto fail;
2304 }
2305 else {
2306 if (ONIGENC_IS_MBC_WORD(encode, s, end)
2307 == ONIGENC_IS_MBC_WORD(encode, sprev, end))
2308 goto fail;
2309 }
2310 MOP_OUT;
2311 JUMP;
2312
2314 if (ON_STR_BEGIN(s)) {
2315 DATA_ENSURE(1);
2316 if (! ONIGENC_IS_MBC_ASCII_WORD(encode, s, end))
2317 goto fail;
2318 }
2319 else if (ON_STR_END(s)) {
2320 if (! ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end))
2321 goto fail;
2322 }
2323 else {
2324 if (ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)
2325 == ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end))
2326 goto fail;
2327 }
2328 MOP_OUT;
2329 JUMP;
2330
2332 if (ON_STR_BEGIN(s)) {
2333 if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end))
2334 goto fail;
2335 }
2336 else if (ON_STR_END(s)) {
2337 if (ONIGENC_IS_MBC_WORD(encode, sprev, end))
2338 goto fail;
2339 }
2340 else {
2341 if (ONIGENC_IS_MBC_WORD(encode, s, end)
2342 != ONIGENC_IS_MBC_WORD(encode, sprev, end))
2343 goto fail;
2344 }
2345 MOP_OUT;
2346 JUMP;
2347
2349 if (ON_STR_BEGIN(s)) {
2350 if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_ASCII_WORD(encode, s, end))
2351 goto fail;
2352 }
2353 else if (ON_STR_END(s)) {
2354 if (ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end))
2355 goto fail;
2356 }
2357 else {
2358 if (ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)
2359 != ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end))
2360 goto fail;
2361 }
2362 MOP_OUT;
2363 JUMP;
2364
2365#ifdef USE_WORD_BEGIN_END
2367 if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end)) {
2368 if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_WORD(encode, sprev, end)) {
2369 MOP_OUT;
2370 JUMP;
2371 }
2372 }
2373 goto fail;
2374 NEXT;
2375
2377 if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)) {
2378 if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end)) {
2379 MOP_OUT;
2380 JUMP;
2381 }
2382 }
2383 goto fail;
2384 NEXT;
2385
2387 if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_WORD(encode, sprev, end)) {
2388 if (ON_STR_END(s) || !ONIGENC_IS_MBC_WORD(encode, s, end)) {
2389 MOP_OUT;
2390 JUMP;
2391 }
2392 }
2393 goto fail;
2394 NEXT;
2395
2397 if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end)) {
2398 if (ON_STR_END(s) || !ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)) {
2399 MOP_OUT;
2400 JUMP;
2401 }
2402 }
2403 goto fail;
2404 NEXT;
2405#endif
2406
2408 if (! ON_STR_BEGIN(s)) goto fail;
2409 if (IS_NOTBOS(msa->options)) goto fail;
2410
2411 MOP_OUT;
2412 JUMP;
2413
2415 if (! ON_STR_END(s)) goto fail;
2416 if (IS_NOTEOS(msa->options)) goto fail;
2417
2418 MOP_OUT;
2419 JUMP;
2420
2422 if (ON_STR_BEGIN(s)) {
2423 if (IS_NOTBOL(msa->options)) goto fail;
2424 MOP_OUT;
2425 JUMP;
2426 }
2427 else if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)
2429 && !(IS_NEWLINE_CRLF(option)
2430 && ONIGENC_IS_MBC_CRNL(encode, sprev, end))
2431#endif
2432 && !ON_STR_END(s)) {
2433 MOP_OUT;
2434 JUMP;
2435 }
2436 goto fail;
2437 NEXT;
2438
2440 if (ON_STR_END(s)) {
2441#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
2442 if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE_EX(encode, sprev, str, end, option, 1)) {
2443#endif
2444 if (IS_NOTEOL(msa->options)) goto fail;
2445 MOP_OUT;
2446 JUMP;
2447#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
2448 }
2449#endif
2450 }
2451 else if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 1)) {
2452 MOP_OUT;
2453 JUMP;
2454 }
2455 goto fail;
2456 NEXT;
2457
2459 if (ON_STR_END(s)) {
2460#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
2461 if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE_EX(encode, sprev, str, end, option, 1)) {
2462#endif
2463 if (IS_NOTEOL(msa->options)) goto fail;
2464 MOP_OUT;
2465 JUMP;
2466#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
2467 }
2468#endif
2469 }
2470 else if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 1)) {
2471 UChar* ss = s + enclen(encode, s, end);
2472 if (ON_STR_END(ss)) {
2473 MOP_OUT;
2474 JUMP;
2475 }
2476#ifdef USE_CRNL_AS_LINE_TERMINATOR
2477 else if (IS_NEWLINE_CRLF(option)
2478 && ONIGENC_IS_MBC_CRNL(encode, s, end)) {
2479 ss += enclen(encode, ss, end);
2480 if (ON_STR_END(ss)) {
2481 MOP_OUT;
2482 JUMP;
2483 }
2484 }
2485#endif
2486 }
2487 goto fail;
2488 NEXT;
2489
2491 if (s != msa->gpos)
2492 goto fail;
2493
2494 MOP_OUT;
2495 JUMP;
2496
2498 GET_MEMNUM_INC(mem, p);
2499 STACK_PUSH_MEM_START(mem, s);
2500 MOP_OUT;
2501 JUMP;
2502
2504 GET_MEMNUM_INC(mem, p);
2505 mem_start_stk[mem] = (OnigStackIndex )((void* )s);
2506 mem_end_stk[mem] = INVALID_STACK_INDEX;
2507 MOP_OUT;
2508 JUMP;
2509
2511 GET_MEMNUM_INC(mem, p);
2512 STACK_PUSH_MEM_END(mem, s);
2513 MOP_OUT;
2514 JUMP;
2515
2517 GET_MEMNUM_INC(mem, p);
2518 mem_end_stk[mem] = (OnigStackIndex )((void* )s);
2519 MOP_OUT;
2520 JUMP;
2521
2523 pkeep = s;
2524 MOP_OUT;
2525 JUMP;
2526
2527#ifdef USE_SUBEXP_CALL
2529 GET_MEMNUM_INC(mem, p);
2530 STACK_GET_MEM_START(mem, stkp); /* should be before push mem-end. */
2531 STACK_PUSH_MEM_END(mem, s);
2532 mem_start_stk[mem] = GET_STACK_INDEX(stkp);
2533 MOP_OUT;
2534 JUMP;
2535
2537 GET_MEMNUM_INC(mem, p);
2538 mem_end_stk[mem] = (OnigStackIndex )((void* )s);
2539 STACK_GET_MEM_START(mem, stkp);
2540
2541 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
2542 mem_start_stk[mem] = GET_STACK_INDEX(stkp);
2543 else
2544 mem_start_stk[mem] = (OnigStackIndex )((void* )stkp->u.mem.pstr);
2545
2547 MOP_OUT;
2548 JUMP;
2549#endif
2550
2552 mem = 1;
2553 goto backref;
2554 NEXT;
2555
2557 mem = 2;
2558 goto backref;
2559 NEXT;
2560
2562 GET_MEMNUM_INC(mem, p);
2563 backref:
2564 {
2565 int len;
2566 UChar *pstart, *pend;
2567
2568 /* if you want to remove following line,
2569 you should check in parse and compile time. */
2570 if (mem > num_mem) goto fail;
2571 if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail;
2572 if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
2573
2574 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
2575 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
2576 else
2577 pstart = (UChar* )((void* )mem_start_stk[mem]);
2578
2579 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
2580 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
2581 : (UChar* )((void* )mem_end_stk[mem]));
2582 n = pend - pstart;
2583 DATA_ENSURE(n);
2584 sprev = s;
2585 STRING_CMP(pstart, s, n);
2586 while (sprev + (len = enclen(encode, sprev, end)) < s)
2587 sprev += len;
2588
2589 MOP_OUT;
2590 JUMP;
2591 }
2592
2594 GET_MEMNUM_INC(mem, p);
2595 {
2596 int len;
2597 UChar *pstart, *pend;
2598
2599 /* if you want to remove following line,
2600 you should check in parse and compile time. */
2601 if (mem > num_mem) goto fail;
2602 if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail;
2603 if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
2604
2605 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
2606 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
2607 else
2608 pstart = (UChar* )((void* )mem_start_stk[mem]);
2609
2610 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
2611 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
2612 : (UChar* )((void* )mem_end_stk[mem]));
2613 n = pend - pstart;
2614 DATA_ENSURE(n);
2615 sprev = s;
2616 STRING_CMP_IC(case_fold_flag, pstart, &s, (int)n, end);
2617 while (sprev + (len = enclen(encode, sprev, end)) < s)
2618 sprev += len;
2619
2620 MOP_OUT;
2621 JUMP;
2622 }
2623 NEXT;
2624
2626 {
2627 int len, is_fail;
2628 UChar *pstart, *pend, *swork;
2629
2630 GET_LENGTH_INC(tlen, p);
2631 for (i = 0; i < tlen; i++) {
2632 GET_MEMNUM_INC(mem, p);
2633
2634 if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue;
2635 if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
2636
2637 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
2638 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
2639 else
2640 pstart = (UChar* )((void* )mem_start_stk[mem]);
2641
2642 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
2643 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
2644 : (UChar* )((void* )mem_end_stk[mem]));
2645 n = pend - pstart;
2646 DATA_ENSURE(n);
2647 sprev = s;
2648 swork = s;
2649 STRING_CMP_VALUE(pstart, swork, n, is_fail);
2650 if (is_fail) continue;
2651 s = swork;
2652 while (sprev + (len = enclen(encode, sprev, end)) < s)
2653 sprev += len;
2654
2655 p += (SIZE_MEMNUM * (tlen - i - 1));
2656 break; /* success */
2657 }
2658 if (i == tlen) goto fail;
2659 MOP_OUT;
2660 JUMP;
2661 }
2662 NEXT;
2663
2665 {
2666 int len, is_fail;
2667 UChar *pstart, *pend, *swork;
2668
2669 GET_LENGTH_INC(tlen, p);
2670 for (i = 0; i < tlen; i++) {
2671 GET_MEMNUM_INC(mem, p);
2672
2673 if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue;
2674 if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
2675
2676 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
2677 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
2678 else
2679 pstart = (UChar* )((void* )mem_start_stk[mem]);
2680
2681 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
2682 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
2683 : (UChar* )((void* )mem_end_stk[mem]));
2684 n = pend - pstart;
2685 DATA_ENSURE(n);
2686 sprev = s;
2687 swork = s;
2688 STRING_CMP_VALUE_IC(case_fold_flag, pstart, &swork, n, end, is_fail);
2689 if (is_fail) continue;
2690 s = swork;
2691 while (sprev + (len = enclen(encode, sprev, end)) < s)
2692 sprev += len;
2693
2694 p += (SIZE_MEMNUM * (tlen - i - 1));
2695 break; /* success */
2696 }
2697 if (i == tlen) goto fail;
2698 MOP_OUT;
2699 JUMP;
2700 }
2701
2702#ifdef USE_BACKREF_WITH_LEVEL
2704 {
2705 int len;
2706 OnigOptionType ic;
2707 LengthType level;
2708
2709 GET_OPTION_INC(ic, p);
2710 GET_LENGTH_INC(level, p);
2711 GET_LENGTH_INC(tlen, p);
2712
2713 sprev = s;
2714 if (backref_match_at_nested_level(reg, stk, stk_base, ic,
2715 case_fold_flag, (int )level, (int )tlen, p, &s, end)) {
2716 while (sprev + (len = enclen(encode, sprev, end)) < s)
2717 sprev += len;
2718
2719 p += (SIZE_MEMNUM * tlen);
2720 }
2721 else
2722 goto fail;
2723
2724 MOP_OUT;
2725 JUMP;
2726 }
2727
2728#endif
2729
2730#if 0 /* no need: IS_DYNAMIC_OPTION() == 0 */
2732 GET_OPTION_INC(option, p);
2733 STACK_PUSH_ALT(p, s, sprev, pkeep);
2735 MOP_OUT;
2736 JUMP;
2737
2739 GET_OPTION_INC(option, p);
2740 MOP_OUT;
2741 JUMP;
2742#endif
2743
2745 GET_MEMNUM_INC(mem, p); /* mem: null check id */
2747 MOP_OUT;
2748 JUMP;
2749
2751 {
2752 int isnull;
2753
2754 GET_MEMNUM_INC(mem, p); /* mem: null check id */
2755 STACK_NULL_CHECK(isnull, mem, s);
2756 if (isnull) {
2757#ifdef ONIG_DEBUG_MATCH
2758 fprintf(stderr, "NULL_CHECK_END: skip id:%d, s:%"PRIuPTR" (%p)\n",
2759 (int )mem, (uintptr_t )s, s);
2760#endif
2761 null_check_found:
2762 /* empty loop founded, skip next instruction */
2763 switch (*p++) {
2764 case OP_JUMP:
2765 case OP_PUSH:
2766 p += SIZE_RELADDR;
2767 break;
2768 case OP_REPEAT_INC:
2769 case OP_REPEAT_INC_NG:
2770 case OP_REPEAT_INC_SG:
2772 p += SIZE_MEMNUM;
2773 break;
2774 default:
2775 goto unexpected_bytecode_error;
2776 break;
2777 }
2778 }
2779 }
2780 MOP_OUT;
2781 JUMP;
2782
2783#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
2785 {
2786 int isnull;
2787
2788 GET_MEMNUM_INC(mem, p); /* mem: null check id */
2789 STACK_NULL_CHECK_MEMST(isnull, mem, s, reg);
2790 if (isnull) {
2791# ifdef ONIG_DEBUG_MATCH
2792 fprintf(stderr, "NULL_CHECK_END_MEMST: skip id:%d, s:%"PRIuPTR" (%p)\n",
2793 (int )mem, (uintptr_t )s, s);
2794# endif
2795 if (isnull == -1) goto fail;
2796 goto null_check_found;
2797 }
2798 }
2799 MOP_OUT;
2800 JUMP;
2801#endif
2802
2803#ifdef USE_SUBEXP_CALL
2806 {
2807 int isnull;
2808
2809 GET_MEMNUM_INC(mem, p); /* mem: null check id */
2810# ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
2811 STACK_NULL_CHECK_MEMST_REC(isnull, mem, s, reg);
2812# else
2813 STACK_NULL_CHECK_REC(isnull, mem, s);
2814# endif
2815 if (isnull) {
2816# ifdef ONIG_DEBUG_MATCH
2817 fprintf(stderr, "NULL_CHECK_END_MEMST_PUSH: skip id:%d, s:%"PRIuPTR" (%p)\n",
2818 (int )mem, (uintptr_t )s, s);
2819# endif
2820 if (isnull == -1) goto fail;
2821 goto null_check_found;
2822 }
2823 else {
2825 }
2826 }
2827 MOP_OUT;
2828 JUMP;
2829#endif
2830
2832 GET_RELADDR_INC(addr, p);
2833 p += addr;
2834 MOP_OUT;
2836 JUMP;
2837
2839 GET_RELADDR_INC(addr, p);
2840 STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
2841 MOP_OUT;
2842 JUMP;
2843
2844#ifdef USE_COMBINATION_EXPLOSION_CHECK
2847 STATE_CHECK_VAL(scv, mem);
2848 if (scv) goto fail;
2849
2850 GET_RELADDR_INC(addr, p);
2851 STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem, pkeep);
2852 MOP_OUT;
2853 JUMP;
2854
2857 GET_RELADDR_INC(addr, p);
2858 STATE_CHECK_VAL(scv, mem);
2859 if (scv) {
2860 p += addr;
2861 }
2862 else {
2863 STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem, pkeep);
2864 }
2865 MOP_OUT;
2866 JUMP;
2867
2870 STATE_CHECK_VAL(scv, mem);
2871 if (scv) goto fail;
2872
2873 STACK_PUSH_STATE_CHECK(s, mem);
2874 MOP_OUT;
2875 JUMP;
2876#endif /* USE_COMBINATION_EXPLOSION_CHECK */
2877
2880 MOP_OUT;
2881 JUMP;
2882
2883#ifdef USE_OP_PUSH_OR_JUMP_EXACT
2885 GET_RELADDR_INC(addr, p);
2886 if (*p == *s && DATA_ENSURE_CHECK1) {
2887 p++;
2888 STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
2889 MOP_OUT;
2890 JUMP;
2891 }
2892 p += (addr + 1);
2893 MOP_OUT;
2894 JUMP;
2895#endif
2896
2898 GET_RELADDR_INC(addr, p);
2899 if (*p == *s) {
2900 p++;
2901 STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
2902 MOP_OUT;
2903 JUMP;
2904 }
2905 p++;
2906 MOP_OUT;
2907 JUMP;
2908
2910 {
2911 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
2912 GET_RELADDR_INC(addr, p);
2913
2914 STACK_ENSURE(1);
2915 repeat_stk[mem] = GET_STACK_INDEX(stk);
2916 STACK_PUSH_REPEAT(mem, p);
2917
2918 if (reg->repeat_range[mem].lower == 0) {
2919 STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
2920 }
2921 }
2922 MOP_OUT;
2923 JUMP;
2924
2926 {
2927 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
2928 GET_RELADDR_INC(addr, p);
2929
2930 STACK_ENSURE(1);
2931 repeat_stk[mem] = GET_STACK_INDEX(stk);
2932 STACK_PUSH_REPEAT(mem, p);
2933
2934 if (reg->repeat_range[mem].lower == 0) {
2935 STACK_PUSH_ALT(p, s, sprev, pkeep);
2936 p += addr;
2937 }
2938 }
2939 MOP_OUT;
2940 JUMP;
2941
2943 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
2944 si = repeat_stk[mem];
2945 stkp = STACK_AT(si);
2946
2947 repeat_inc:
2948 stkp->u.repeat.count++;
2949 if (stkp->u.repeat.count >= reg->repeat_range[mem].upper) {
2950 /* end of repeat. Nothing to do. */
2951 }
2952 else if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
2953 STACK_PUSH_ALT(p, s, sprev, pkeep);
2954 p = STACK_AT(si)->u.repeat.pcode; /* Don't use stkp after PUSH. */
2955 }
2956 else {
2957 p = stkp->u.repeat.pcode;
2958 }
2960 MOP_OUT;
2962 JUMP;
2963
2965 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
2966 STACK_GET_REPEAT(mem, stkp);
2967 si = GET_STACK_INDEX(stkp);
2968 goto repeat_inc;
2969 NEXT;
2970
2972 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
2973 si = repeat_stk[mem];
2974 stkp = STACK_AT(si);
2975
2976 repeat_inc_ng:
2977 stkp->u.repeat.count++;
2978 if (stkp->u.repeat.count < reg->repeat_range[mem].upper) {
2979 if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
2980 UChar* pcode = stkp->u.repeat.pcode;
2981
2983 STACK_PUSH_ALT(pcode, s, sprev, pkeep);
2984 }
2985 else {
2986 p = stkp->u.repeat.pcode;
2988 }
2989 }
2990 else if (stkp->u.repeat.count == reg->repeat_range[mem].upper) {
2992 }
2993 MOP_OUT;
2995 JUMP;
2996
2998 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
2999 STACK_GET_REPEAT(mem, stkp);
3000 si = GET_STACK_INDEX(stkp);
3001 goto repeat_inc_ng;
3002 NEXT;
3003
3005 STACK_PUSH_POS(s, sprev, pkeep);
3006 MOP_OUT;
3007 JUMP;
3008
3010 {
3011 STACK_POS_END(stkp);
3012 s = stkp->u.state.pstr;
3013 sprev = stkp->u.state.pstr_prev;
3014 }
3015 MOP_OUT;
3016 JUMP;
3017
3019 GET_RELADDR_INC(addr, p);
3020 STACK_PUSH_POS_NOT(p + addr, s, sprev, pkeep);
3021 MOP_OUT;
3022 JUMP;
3023
3026 goto fail;
3027 NEXT;
3028
3031 MOP_OUT;
3032 JUMP;
3033
3036 MOP_OUT;
3037 JUMP;
3038
3040 GET_LENGTH_INC(tlen, p);
3041 s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, end, (int )tlen);
3042 if (IS_NULL(s)) goto fail;
3043 sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s, end);
3044 MOP_OUT;
3045 JUMP;
3046
3048 GET_RELADDR_INC(addr, p);
3049 GET_LENGTH_INC(tlen, p);
3050 q = (UChar* )ONIGENC_STEP_BACK(encode, str, s, end, (int )tlen);
3051 if (IS_NULL(q)) {
3052 /* too short case -> success. ex. /(?<!XXX)a/.match("a")
3053 If you want to change to fail, replace following line. */
3054 p += addr;
3055 /* goto fail; */
3056 }
3057 else {
3058 STACK_PUSH_LOOK_BEHIND_NOT(p + addr, s, sprev, pkeep);
3059 s = q;
3060 sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s, end);
3061 }
3062 MOP_OUT;
3063 JUMP;
3064
3067 goto fail;
3068 NEXT;
3069
3071 /* Save the absent-start-pos and the original end-pos. */
3073 MOP_OUT;
3074 JUMP;
3075
3077 {
3078 const UChar* aend = ABSENT_END_POS;
3079 UChar* absent;
3080 UChar* selfp = p - 1;
3081
3082 STACK_POP_ABSENT_POS(absent, ABSENT_END_POS); /* Restore end-pos. */
3083 GET_RELADDR_INC(addr, p);
3084#ifdef ONIG_DEBUG_MATCH
3085 fprintf(stderr, "ABSENT: s:%p, end:%p, absent:%p, aend:%p\n", s, end, absent, aend);
3086#endif
3087 if ((absent > aend) && (s > absent)) {
3088 /* An empty match occurred in (?~...) at the start point.
3089 * Never match. */
3090 STACK_POP;
3091 goto fail;
3092 }
3093 else if ((s >= aend) && (s > absent)) {
3094 if (s > aend) {
3095 /* Only one (or less) character matched in the last iteration.
3096 * This is not a possible point. */
3097 goto fail;
3098 }
3099 /* All possible points were found. Try matching after (?~...). */
3100 DATA_ENSURE(0);
3101 p += addr;
3102 }
3103 else {
3104 STACK_PUSH_ALT(p + addr, s, sprev, pkeep); /* Push possible point. */
3105 n = enclen(encode, s, end);
3106 STACK_PUSH_ABSENT_POS(absent, ABSENT_END_POS); /* Save the original pos. */
3107 STACK_PUSH_ALT(selfp, s + n, s, pkeep); /* Next iteration. */
3109 ABSENT_END_POS = aend;
3110 }
3111 }
3112 MOP_OUT;
3113 JUMP;
3114
3116 /* The pattern inside (?~...) was matched.
3117 * Set the end-pos temporary and go to next iteration. */
3118 if (sprev < ABSENT_END_POS)
3119 ABSENT_END_POS = sprev;
3120#ifdef ONIG_DEBUG_MATCH
3121 fprintf(stderr, "ABSENT_END: end:%p\n", ABSENT_END_POS);
3122#endif
3124 goto fail;
3125 NEXT;
3126
3127#ifdef USE_SUBEXP_CALL
3129 GET_ABSADDR_INC(addr, p);
3131 p = reg->p + addr;
3132 MOP_OUT;
3133 JUMP;
3134
3136 STACK_RETURN(p);
3138 MOP_OUT;
3139 JUMP;
3140#endif
3141
3143 GET_MEMNUM_INC(mem, p);
3144 GET_RELADDR_INC(addr, p);
3145 if ((mem > num_mem) ||
3146 (mem_end_stk[mem] == INVALID_STACK_INDEX) ||
3147 (mem_start_stk[mem] == INVALID_STACK_INDEX)) {
3148 p += addr;
3149 }
3150 MOP_OUT;
3151 JUMP;
3152
3154 goto finish;
3155 NEXT;
3156
3157 CASE(OP_FAIL)
3158 if (0) {
3159 /* fall */
3160 fail:
3161 MOP_OUT;
3162 }
3163 MOP_IN(OP_FAIL);
3164 STACK_POP;
3165 p = stk->u.state.pcode;
3166 s = stk->u.state.pstr;
3167 sprev = stk->u.state.pstr_prev;
3168 pkeep = stk->u.state.pkeep;
3169
3170#ifdef USE_COMBINATION_EXPLOSION_CHECK
3171 if (stk->u.state.state_check != 0) {
3173 stk++;
3174 }
3175#endif
3176
3177 MOP_OUT;
3178 JUMP;
3179
3180 DEFAULT
3181 goto bytecode_error;
3182 } VM_LOOP_END
3183
3184 finish:
3185 STACK_SAVE;
3186 if (xmalloc_base) xfree(xmalloc_base);
3187 return best_len;
3188
3189#ifdef ONIG_DEBUG
3190 stack_error:
3191 STACK_SAVE;
3192 if (xmalloc_base) xfree(xmalloc_base);
3193 return ONIGERR_STACK_BUG;
3194#endif
3195
3196 bytecode_error:
3197 STACK_SAVE;
3198 if (xmalloc_base) xfree(xmalloc_base);
3200
3201 unexpected_bytecode_error:
3202 STACK_SAVE;
3203 if (xmalloc_base) xfree(xmalloc_base);
3205}
3206
3207
3208static UChar*
3209slow_search(OnigEncoding enc, UChar* target, UChar* target_end,
3210 const UChar* text, const UChar* text_end, UChar* text_range)
3211{
3212 UChar *t, *p, *s, *end;
3213
3214 end = (UChar* )text_end;
3215 end -= target_end - target - 1;
3216 if (end > text_range)
3217 end = text_range;
3218
3219 s = (UChar* )text;
3220
3221 if (enc->max_enc_len == enc->min_enc_len) {
3222 int n = enc->max_enc_len;
3223
3224 while (s < end) {
3225 if (*s == *target) {
3226 p = s + 1;
3227 t = target + 1;
3228 if (target_end == t || memcmp(t, p, target_end - t) == 0)
3229 return s;
3230 }
3231 s += n;
3232 }
3233 return (UChar* )NULL;
3234 }
3235 while (s < end) {
3236 if (*s == *target) {
3237 p = s + 1;
3238 t = target + 1;
3239 if (target_end == t || memcmp(t, p, target_end - t) == 0)
3240 return s;
3241 }
3242 s += enclen(enc, s, text_end);
3243 }
3244
3245 return (UChar* )NULL;
3246}
3247
3248static int
3249str_lower_case_match(OnigEncoding enc, int case_fold_flag,
3250 const UChar* t, const UChar* tend,
3251 const UChar* p, const UChar* end)
3252{
3253 int lowlen;
3255
3256 while (t < tend) {
3257 lowlen = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &p, end, lowbuf);
3258 q = lowbuf;
3259 while (lowlen > 0) {
3260 if (*t++ != *q++) return 0;
3261 lowlen--;
3262 }
3263 }
3264
3265 return 1;
3266}
3267
3268static UChar*
3269slow_search_ic(OnigEncoding enc, int case_fold_flag,
3270 UChar* target, UChar* target_end,
3271 const UChar* text, const UChar* text_end, UChar* text_range)
3272{
3273 UChar *s, *end;
3274
3275 end = (UChar* )text_end;
3276 end -= target_end - target - 1;
3277 if (end > text_range)
3278 end = text_range;
3279
3280 s = (UChar* )text;
3281
3282 while (s < end) {
3283 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
3284 s, text_end))
3285 return s;
3286
3287 s += enclen(enc, s, text_end);
3288 }
3289
3290 return (UChar* )NULL;
3291}
3292
3293static UChar*
3294slow_search_backward(OnigEncoding enc, UChar* target, UChar* target_end,
3295 const UChar* text, const UChar* adjust_text,
3296 const UChar* text_end, const UChar* text_start)
3297{
3298 UChar *t, *p, *s;
3299
3300 s = (UChar* )text_end;
3301 s -= (target_end - target);
3302 if (s > text_start)
3303 s = (UChar* )text_start;
3304 else
3305 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s, text_end);
3306
3307 while (s >= text) {
3308 if (*s == *target) {
3309 p = s + 1;
3310 t = target + 1;
3311 while (t < target_end) {
3312 if (*t != *p++)
3313 break;
3314 t++;
3315 }
3316 if (t == target_end)
3317 return s;
3318 }
3319 s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s, text_end);
3320 }
3321
3322 return (UChar* )NULL;
3323}
3324
3325static UChar*
3326slow_search_backward_ic(OnigEncoding enc, int case_fold_flag,
3327 UChar* target, UChar* target_end,
3328 const UChar* text, const UChar* adjust_text,
3329 const UChar* text_end, const UChar* text_start)
3330{
3331 UChar *s;
3332
3333 s = (UChar* )text_end;
3334 s -= (target_end - target);
3335 if (s > text_start)
3336 s = (UChar* )text_start;
3337 else
3338 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s, text_end);
3339
3340 while (s >= text) {
3341 if (str_lower_case_match(enc, case_fold_flag,
3342 target, target_end, s, text_end))
3343 return s;
3344
3345 s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s, text_end);
3346 }
3347
3348 return (UChar* )NULL;
3349}
3350
3351#ifndef USE_SUNDAY_QUICK_SEARCH
3352/* Boyer-Moore-Horspool search applied to a multibyte string */
3353static UChar*
3354bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end,
3355 const UChar* text, const UChar* text_end,
3356 const UChar* text_range)
3357{
3358 const UChar *s, *se, *t, *p, *end;
3359 const UChar *tail;
3360 ptrdiff_t skip, tlen1;
3361
3362# ifdef ONIG_DEBUG_SEARCH
3363 fprintf(stderr, "bm_search_notrev: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
3364 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
3365# endif
3366
3367 tail = target_end - 1;
3368 tlen1 = tail - target;
3369 end = text_range;
3370 if (end + tlen1 > text_end)
3371 end = text_end - tlen1;
3372
3373 s = text;
3374
3375 if (IS_NULL(reg->int_map)) {
3376 while (s < end) {
3377 p = se = s + tlen1;
3378 t = tail;
3379 while (*p == *t) {
3380 if (t == target) return (UChar* )s;
3381 p--; t--;
3382 }
3383 skip = reg->map[*se];
3384 t = s;
3385 do {
3386 s += enclen(reg->enc, s, end);
3387 } while ((s - t) < skip && s < end);
3388 }
3389 }
3390 else {
3391# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
3392 while (s < end) {
3393 p = se = s + tlen1;
3394 t = tail;
3395 while (*p == *t) {
3396 if (t == target) return (UChar* )s;
3397 p--; t--;
3398 }
3399 skip = reg->int_map[*se];
3400 t = s;
3401 do {
3402 s += enclen(reg->enc, s, end);
3403 } while ((s - t) < skip && s < end);
3404 }
3405# endif
3406 }
3407
3408 return (UChar* )NULL;
3409}
3410
3411/* Boyer-Moore-Horspool search */
3412static UChar*
3413bm_search(regex_t* reg, const UChar* target, const UChar* target_end,
3414 const UChar* text, const UChar* text_end, const UChar* text_range)
3415{
3416 const UChar *s, *t, *p, *end;
3417 const UChar *tail;
3418
3419# ifdef ONIG_DEBUG_SEARCH
3420 fprintf(stderr, "bm_search: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
3421 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
3422# endif
3423
3424 end = text_range + (target_end - target) - 1;
3425 if (end > text_end)
3426 end = text_end;
3427
3428 tail = target_end - 1;
3429 s = text + (target_end - target) - 1;
3430 if (IS_NULL(reg->int_map)) {
3431 while (s < end) {
3432 p = s;
3433 t = tail;
3434# ifdef ONIG_DEBUG_SEARCH
3435 fprintf(stderr, "bm_search_loop: pos: %"PRIdPTR" %s\n",
3436 (intptr_t )(s - text), s);
3437# endif
3438 while (*p == *t) {
3439 if (t == target) return (UChar* )p;
3440 p--; t--;
3441 }
3442 s += reg->map[*s];
3443 }
3444 }
3445 else { /* see int_map[] */
3446# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
3447 while (s < end) {
3448 p = s;
3449 t = tail;
3450 while (*p == *t) {
3451 if (t == target) return (UChar* )p;
3452 p--; t--;
3453 }
3454 s += reg->int_map[*s];
3455 }
3456# endif
3457 }
3458 return (UChar* )NULL;
3459}
3460
3461/* Boyer-Moore-Horspool search applied to a multibyte string (ignore case) */
3462static UChar*
3463bm_search_notrev_ic(regex_t* reg, const UChar* target, const UChar* target_end,
3464 const UChar* text, const UChar* text_end,
3465 const UChar* text_range)
3466{
3467 const UChar *s, *se, *t, *end;
3468 const UChar *tail;
3469 ptrdiff_t skip, tlen1;
3470 OnigEncoding enc = reg->enc;
3471 int case_fold_flag = reg->case_fold_flag;
3472
3473# ifdef ONIG_DEBUG_SEARCH
3474 fprintf(stderr, "bm_search_notrev_ic: text: %d (%p), text_end: %d (%p), text_range: %d (%p)\n",
3475 (int )text, text, (int )text_end, text_end, (int )text_range, text_range);
3476# endif
3477
3478 tail = target_end - 1;
3479 tlen1 = tail - target;
3480 end = text_range;
3481 if (end + tlen1 > text_end)
3482 end = text_end - tlen1;
3483
3484 s = text;
3485
3486 if (IS_NULL(reg->int_map)) {
3487 while (s < end) {
3488 se = s + tlen1;
3489 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
3490 s, se + 1))
3491 return (UChar* )s;
3492 skip = reg->map[*se];
3493 t = s;
3494 do {
3495 s += enclen(reg->enc, s, end);
3496 } while ((s - t) < skip && s < end);
3497 }
3498 }
3499 else {
3500# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
3501 while (s < end) {
3502 se = s + tlen1;
3503 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
3504 s, se + 1))
3505 return (UChar* )s;
3506 skip = reg->int_map[*se];
3507 t = s;
3508 do {
3509 s += enclen(reg->enc, s, end);
3510 } while ((s - t) < skip && s < end);
3511 }
3512# endif
3513 }
3514
3515 return (UChar* )NULL;
3516}
3517
3518/* Boyer-Moore-Horspool search (ignore case) */
3519static UChar*
3520bm_search_ic(regex_t* reg, const UChar* target, const UChar* target_end,
3521 const UChar* text, const UChar* text_end, const UChar* text_range)
3522{
3523 const UChar *s, *p, *end;
3524 const UChar *tail;
3525 OnigEncoding enc = reg->enc;
3526 int case_fold_flag = reg->case_fold_flag;
3527
3528# ifdef ONIG_DEBUG_SEARCH
3529 fprintf(stderr, "bm_search_ic: text: %d (%p), text_end: %d (%p), text_range: %d (%p)\n",
3530 (int )text, text, (int )text_end, text_end, (int )text_range, text_range);
3531# endif
3532
3533 end = text_range + (target_end - target) - 1;
3534 if (end > text_end)
3535 end = text_end;
3536
3537 tail = target_end - 1;
3538 s = text + (target_end - target) - 1;
3539 if (IS_NULL(reg->int_map)) {
3540 while (s < end) {
3541 p = s - (target_end - target) + 1;
3542 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
3543 p, s + 1))
3544 return (UChar* )p;
3545 s += reg->map[*s];
3546 }
3547 }
3548 else { /* see int_map[] */
3549# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
3550 while (s < end) {
3551 p = s - (target_end - target) + 1;
3552 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
3553 p, s + 1))
3554 return (UChar* )p;
3555 s += reg->int_map[*s];
3556 }
3557# endif
3558 }
3559 return (UChar* )NULL;
3560}
3561
3562#else /* USE_SUNDAY_QUICK_SEARCH */
3563
3564/* Sunday's quick search applied to a multibyte string */
3565static UChar*
3566bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end,
3567 const UChar* text, const UChar* text_end,
3568 const UChar* text_range)
3569{
3570 const UChar *s, *se, *t, *p, *end;
3571 const UChar *tail;
3572 ptrdiff_t skip, tlen1;
3573 OnigEncoding enc = reg->enc;
3574
3575# ifdef ONIG_DEBUG_SEARCH
3576 fprintf(stderr, "bm_search_notrev: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
3577 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
3578# endif
3579
3580 tail = target_end - 1;
3581 tlen1 = tail - target;
3582 end = text_range;
3583 if (end + tlen1 > text_end)
3584 end = text_end - tlen1;
3585
3586 s = text;
3587
3588 if (IS_NULL(reg->int_map)) {
3589 while (s < end) {
3590 p = se = s + tlen1;
3591 t = tail;
3592 while (*p == *t) {
3593 if (t == target) return (UChar* )s;
3594 p--; t--;
3595 }
3596 if (s + 1 >= end) break;
3597 skip = reg->map[se[1]];
3598 t = s;
3599 do {
3600 s += enclen(enc, s, end);
3601 } while ((s - t) < skip && s < end);
3602 }
3603 }
3604 else {
3605# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
3606 while (s < end) {
3607 p = se = s + tlen1;
3608 t = tail;
3609 while (*p == *t) {
3610 if (t == target) return (UChar* )s;
3611 p--; t--;
3612 }
3613 if (s + 1 >= end) break;
3614 skip = reg->int_map[se[1]];
3615 t = s;
3616 do {
3617 s += enclen(enc, s, end);
3618 } while ((s - t) < skip && s < end);
3619 }
3620# endif
3621 }
3622
3623 return (UChar* )NULL;
3624}
3625
3626/* Sunday's quick search */
3627static UChar*
3628bm_search(regex_t* reg, const UChar* target, const UChar* target_end,
3629 const UChar* text, const UChar* text_end, const UChar* text_range)
3630{
3631 const UChar *s, *t, *p, *end;
3632 const UChar *tail;
3633 ptrdiff_t tlen1;
3634
3635# ifdef ONIG_DEBUG_SEARCH
3636 fprintf(stderr, "bm_search: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
3637 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
3638# endif
3639
3640 tail = target_end - 1;
3641 tlen1 = tail - target;
3642 end = text_range + tlen1;
3643 if (end > text_end)
3644 end = text_end;
3645
3646 s = text + tlen1;
3647 if (IS_NULL(reg->int_map)) {
3648 while (s < end) {
3649 p = s;
3650 t = tail;
3651 while (*p == *t) {
3652 if (t == target) return (UChar* )p;
3653 p--; t--;
3654 }
3655 if (s + 1 >= end) break;
3656 s += reg->map[s[1]];
3657 }
3658 }
3659 else { /* see int_map[] */
3660# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
3661 while (s < end) {
3662 p = s;
3663 t = tail;
3664 while (*p == *t) {
3665 if (t == target) return (UChar* )p;
3666 p--; t--;
3667 }
3668 if (s + 1 >= end) break;
3669 s += reg->int_map[s[1]];
3670 }
3671# endif
3672 }
3673 return (UChar* )NULL;
3674}
3675
3676/* Sunday's quick search applied to a multibyte string (ignore case) */
3677static UChar*
3678bm_search_notrev_ic(regex_t* reg, const UChar* target, const UChar* target_end,
3679 const UChar* text, const UChar* text_end,
3680 const UChar* text_range)
3681{
3682 const UChar *s, *se, *t, *end;
3683 const UChar *tail;
3684 ptrdiff_t skip, tlen1;
3685 OnigEncoding enc = reg->enc;
3686 int case_fold_flag = reg->case_fold_flag;
3687
3688# ifdef ONIG_DEBUG_SEARCH
3689 fprintf(stderr, "bm_search_notrev_ic: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
3690 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
3691# endif
3692
3693 tail = target_end - 1;
3694 tlen1 = tail - target;
3695 end = text_range;
3696 if (end + tlen1 > text_end)
3697 end = text_end - tlen1;
3698
3699 s = text;
3700
3701 if (IS_NULL(reg->int_map)) {
3702 while (s < end) {
3703 se = s + tlen1;
3704 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
3705 s, se + 1))
3706 return (UChar* )s;
3707 if (s + 1 >= end) break;
3708 skip = reg->map[se[1]];
3709 t = s;
3710 do {
3711 s += enclen(enc, s, end);
3712 } while ((s - t) < skip && s < end);
3713 }
3714 }
3715 else {
3716# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
3717 while (s < end) {
3718 se = s + tlen1;
3719 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
3720 s, se + 1))
3721 return (UChar* )s;
3722 if (s + 1 >= end) break;
3723 skip = reg->int_map[se[1]];
3724 t = s;
3725 do {
3726 s += enclen(enc, s, end);
3727 } while ((s - t) < skip && s < end);
3728 }
3729# endif
3730 }
3731
3732 return (UChar* )NULL;
3733}
3734
3735/* Sunday's quick search (ignore case) */
3736static UChar*
3737bm_search_ic(regex_t* reg, const UChar* target, const UChar* target_end,
3738 const UChar* text, const UChar* text_end, const UChar* text_range)
3739{
3740 const UChar *s, *p, *end;
3741 const UChar *tail;
3742 ptrdiff_t tlen1;
3743 OnigEncoding enc = reg->enc;
3744 int case_fold_flag = reg->case_fold_flag;
3745
3746# ifdef ONIG_DEBUG_SEARCH
3747 fprintf(stderr, "bm_search_ic: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
3748 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
3749# endif
3750
3751 tail = target_end - 1;
3752 tlen1 = tail - target;
3753 end = text_range + tlen1;
3754 if (end > text_end)
3755 end = text_end;
3756
3757 s = text + tlen1;
3758 if (IS_NULL(reg->int_map)) {
3759 while (s < end) {
3760 p = s - tlen1;
3761 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
3762 p, s + 1))
3763 return (UChar* )p;
3764 if (s + 1 >= end) break;
3765 s += reg->map[s[1]];
3766 }
3767 }
3768 else { /* see int_map[] */
3769# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
3770 while (s < end) {
3771 p = s - tlen1;
3772 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
3773 p, s + 1))
3774 return (UChar* )p;
3775 if (s + 1 >= end) break;
3776 s += reg->int_map[s[1]];
3777 }
3778# endif
3779 }
3780 return (UChar* )NULL;
3781}
3782#endif /* USE_SUNDAY_QUICK_SEARCH */
3783
3784#ifdef USE_INT_MAP_BACKWARD
3785static int
3786set_bm_backward_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED,
3787 int** skip)
3788{
3789 int i, len;
3790
3791 if (IS_NULL(*skip)) {
3792 *skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
3793 if (IS_NULL(*skip)) return ONIGERR_MEMORY;
3794 }
3795
3796 len = (int )(end - s);
3797 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
3798 (*skip)[i] = len;
3799
3800 for (i = len - 1; i > 0; i--)
3801 (*skip)[s[i]] = i;
3802
3803 return 0;
3804}
3805
3806static UChar*
3807bm_search_backward(regex_t* reg, const UChar* target, const UChar* target_end,
3808 const UChar* text, const UChar* adjust_text,
3809 const UChar* text_end, const UChar* text_start)
3810{
3811 const UChar *s, *t, *p;
3812
3813 s = text_end - (target_end - target);
3814 if (text_start < s)
3815 s = text_start;
3816 else
3817 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s, text_end);
3818
3819 while (s >= text) {
3820 p = s;
3821 t = target;
3822 while (t < target_end && *p == *t) {
3823 p++; t++;
3824 }
3825 if (t == target_end)
3826 return (UChar* )s;
3827
3828 s -= reg->int_map_backward[*s];
3829 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s, text_end);
3830 }
3831
3832 return (UChar* )NULL;
3833}
3834#endif
3835
3836static UChar*
3837map_search(OnigEncoding enc, UChar map[],
3838 const UChar* text, const UChar* text_range, const UChar* text_end)
3839{
3840 const UChar *s = text;
3841
3842 while (s < text_range) {
3843 if (map[*s]) return (UChar* )s;
3844
3845 s += enclen(enc, s, text_end);
3846 }
3847 return (UChar* )NULL;
3848}
3849
3850static UChar*
3851map_search_backward(OnigEncoding enc, UChar map[],
3852 const UChar* text, const UChar* adjust_text,
3853 const UChar* text_start, const UChar* text_end)
3854{
3855 const UChar *s = text_start;
3856
3857 while (s >= text) {
3858 if (map[*s]) return (UChar* )s;
3859
3860 s = onigenc_get_prev_char_head(enc, adjust_text, s, text_end);
3861 }
3862 return (UChar* )NULL;
3863}
3864
3865extern OnigPosition
3866onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, OnigRegion* region,
3867 OnigOptionType option)
3868{
3869 ptrdiff_t r;
3870 UChar *prev;
3871 OnigMatchArg msa;
3872
3873 MATCH_ARG_INIT(msa, option, region, at, at);
3874#ifdef USE_COMBINATION_EXPLOSION_CHECK
3875 {
3876 int offset = at - str;
3877 STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check);
3878 }
3879#endif
3880
3881 if (region) {
3882 r = onig_region_resize_clear(region, reg->num_mem + 1);
3883 }
3884 else
3885 r = 0;
3886
3887 if (r == 0) {
3888 prev = (UChar* )onigenc_get_prev_char_head(reg->enc, str, at, end);
3889 r = match_at(reg, str, end,
3890#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
3891 end,
3892#endif
3893 at, prev, &msa);
3894 }
3895
3896 MATCH_ARG_FREE(msa);
3897 return r;
3898}
3899
3900static int
3901forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
3902 UChar* range, UChar** low, UChar** high, UChar** low_prev)
3903{
3904 UChar *p, *pprev = (UChar* )NULL;
3905
3906#ifdef ONIG_DEBUG_SEARCH
3907 fprintf(stderr, "forward_search_range: str: %"PRIuPTR" (%p), end: %"PRIuPTR" (%p), s: %"PRIuPTR" (%p), range: %"PRIuPTR" (%p)\n",
3908 (uintptr_t )str, str, (uintptr_t )end, end, (uintptr_t )s, s, (uintptr_t )range, range);
3909#endif
3910
3911 p = s;
3912 if (reg->dmin > 0) {
3913 if (ONIGENC_IS_SINGLEBYTE(reg->enc)) {
3914 p += reg->dmin;
3915 }
3916 else {
3917 UChar *q = p + reg->dmin;
3918
3919 if (q >= end) return 0; /* fail */
3920 while (p < q) p += enclen(reg->enc, p, end);
3921 }
3922 }
3923
3924 retry:
3925 switch (reg->optimize) {
3927 p = slow_search(reg->enc, reg->exact, reg->exact_end, p, end, range);
3928 break;
3930 p = slow_search_ic(reg->enc, reg->case_fold_flag,
3931 reg->exact, reg->exact_end, p, end, range);
3932 break;
3933
3935 p = bm_search(reg, reg->exact, reg->exact_end, p, end, range);
3936 break;
3937
3939 p = bm_search_notrev(reg, reg->exact, reg->exact_end, p, end, range);
3940 break;
3941
3943 p = bm_search_ic(reg, reg->exact, reg->exact_end, p, end, range);
3944 break;
3945
3947 p = bm_search_notrev_ic(reg, reg->exact, reg->exact_end, p, end, range);
3948 break;
3949
3950 case ONIG_OPTIMIZE_MAP:
3951 p = map_search(reg->enc, reg->map, p, range, end);
3952 break;
3953 }
3954
3955 if (p && p < range) {
3956 if (p - reg->dmin < s) {
3957 retry_gate:
3958 pprev = p;
3959 p += enclen(reg->enc, p, end);
3960 goto retry;
3961 }
3962
3963 if (reg->sub_anchor) {
3964 UChar* prev;
3965
3966 switch (reg->sub_anchor) {
3967 case ANCHOR_BEGIN_LINE:
3968 if (!ON_STR_BEGIN(p)) {
3969 prev = onigenc_get_prev_char_head(reg->enc,
3970 (pprev ? pprev : str), p, end);
3971 if (!ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 0))
3972 goto retry_gate;
3973 }
3974 break;
3975
3976 case ANCHOR_END_LINE:
3977 if (ON_STR_END(p)) {
3978#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3979 prev = (UChar* )onigenc_get_prev_char_head(reg->enc,
3980 (pprev ? pprev : str), p);
3981 if (prev && ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 1))
3982 goto retry_gate;
3983#endif
3984 }
3985 else if (! ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, p, str, end, reg->options, 1))
3986 goto retry_gate;
3987 break;
3988 }
3989 }
3990
3991 if (reg->dmax == 0) {
3992 *low = p;
3993 if (low_prev) {
3994 if (*low > s)
3995 *low_prev = onigenc_get_prev_char_head(reg->enc, s, p, end);
3996 else
3997 *low_prev = onigenc_get_prev_char_head(reg->enc,
3998 (pprev ? pprev : str), p, end);
3999 }
4000 }
4001 else {
4002 if (reg->dmax != ONIG_INFINITE_DISTANCE) {
4003 if (p < str + reg->dmax) {
4004 *low = (UChar* )str;
4005 if (low_prev)
4006 *low_prev = onigenc_get_prev_char_head(reg->enc, str, *low, end);
4007 }
4008 else {
4009 *low = p - reg->dmax;
4010 if (*low > s) {
4012 *low, end, (const UChar** )low_prev);
4013 if (low_prev && IS_NULL(*low_prev))
4014 *low_prev = onigenc_get_prev_char_head(reg->enc,
4015 (pprev ? pprev : s), *low, end);
4016 }
4017 else {
4018 if (low_prev)
4019 *low_prev = onigenc_get_prev_char_head(reg->enc,
4020 (pprev ? pprev : str), *low, end);
4021 }
4022 }
4023 }
4024 }
4025 /* no needs to adjust *high, *high is used as range check only */
4026 *high = p - reg->dmin;
4027
4028#ifdef ONIG_DEBUG_SEARCH
4030 "forward_search_range success: low: %"PRIdPTR", high: %"PRIdPTR", dmin: %"PRIdPTR", dmax: %"PRIdPTR"\n",
4031 *low - str, *high - str, reg->dmin, reg->dmax);
4032#endif
4033 return 1; /* success */
4034 }
4035
4036 return 0; /* fail */
4037}
4038
4039#define BM_BACKWARD_SEARCH_LENGTH_THRESHOLD 100
4040
4041static int
4042backward_search_range(regex_t* reg, const UChar* str, const UChar* end,
4043 UChar* s, const UChar* range, UChar* adjrange,
4044 UChar** low, UChar** high)
4045{
4046 UChar *p;
4047
4048 range += reg->dmin;
4049 p = s;
4050
4051 retry:
4052 switch (reg->optimize) {
4054 exact_method:
4055 p = slow_search_backward(reg->enc, reg->exact, reg->exact_end,
4056 range, adjrange, end, p);
4057 break;
4058
4062 p = slow_search_backward_ic(reg->enc, reg->case_fold_flag,
4063 reg->exact, reg->exact_end,
4064 range, adjrange, end, p);
4065 break;
4066
4069#ifdef USE_INT_MAP_BACKWARD
4070 if (IS_NULL(reg->int_map_backward)) {
4071 int r;
4073 goto exact_method;
4074
4075 r = set_bm_backward_skip(reg->exact, reg->exact_end, reg->enc,
4076 &(reg->int_map_backward));
4077 if (r) return r;
4078 }
4079 p = bm_search_backward(reg, reg->exact, reg->exact_end, range, adjrange,
4080 end, p);
4081#else
4082 goto exact_method;
4083#endif
4084 break;
4085
4086 case ONIG_OPTIMIZE_MAP:
4087 p = map_search_backward(reg->enc, reg->map, range, adjrange, p, end);
4088 break;
4089 }
4090
4091 if (p) {
4092 if (reg->sub_anchor) {
4093 UChar* prev;
4094
4095 switch (reg->sub_anchor) {
4096 case ANCHOR_BEGIN_LINE:
4097 if (!ON_STR_BEGIN(p)) {
4098 prev = onigenc_get_prev_char_head(reg->enc, str, p, end);
4099 if (!ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 0)) {
4100 p = prev;
4101 goto retry;
4102 }
4103 }
4104 break;
4105
4106 case ANCHOR_END_LINE:
4107 if (ON_STR_END(p)) {
4108#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
4109 prev = onigenc_get_prev_char_head(reg->enc, adjrange, p);
4110 if (IS_NULL(prev)) goto fail;
4111 if (ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 1)) {
4112 p = prev;
4113 goto retry;
4114 }
4115#endif
4116 }
4117 else if (! ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, p, str, end, reg->options, 1)) {
4118 p = onigenc_get_prev_char_head(reg->enc, adjrange, p, end);
4119 if (IS_NULL(p)) goto fail;
4120 goto retry;
4121 }
4122 break;
4123 }
4124 }
4125
4126 /* no needs to adjust *high, *high is used as range check only */
4127 if (reg->dmax != ONIG_INFINITE_DISTANCE) {
4128 *low = p - reg->dmax;
4129 *high = p - reg->dmin;
4130 *high = onigenc_get_right_adjust_char_head(reg->enc, adjrange, *high, end);
4131 }
4132
4133#ifdef ONIG_DEBUG_SEARCH
4134 fprintf(stderr, "backward_search_range: low: %d, high: %d\n",
4135 (int )(*low - str), (int )(*high - str));
4136#endif
4137 return 1; /* success */
4138 }
4139
4140 fail:
4141#ifdef ONIG_DEBUG_SEARCH
4142 fprintf(stderr, "backward_search_range: fail.\n");
4143#endif
4144 return 0; /* fail */
4145}
4146
4147
4148extern OnigPosition
4149onig_search(regex_t* reg, const UChar* str, const UChar* end,
4150 const UChar* start, const UChar* range, OnigRegion* region, OnigOptionType option)
4151{
4152 return onig_search_gpos(reg, str, end, start, start, range, region, option);
4153}
4154
4155extern OnigPosition
4156onig_search_gpos(regex_t* reg, const UChar* str, const UChar* end,
4157 const UChar* global_pos,
4158 const UChar* start, const UChar* range, OnigRegion* region, OnigOptionType option)
4159{
4160 ptrdiff_t r;
4161 UChar *s, *prev;
4162 OnigMatchArg msa;
4163#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
4164 const UChar *orig_start = start;
4165 const UChar *orig_range = range;
4166#endif
4167
4168#ifdef ONIG_DEBUG_SEARCH
4170 "onig_search (entry point): str: %"PRIuPTR" (%p), end: %"PRIuPTR", start: %"PRIuPTR", range: %"PRIuPTR"\n",
4171 (uintptr_t )str, str, end - str, start - str, range - str);
4172#endif
4173
4174 if (region) {
4175 r = onig_region_resize_clear(region, reg->num_mem + 1);
4176 if (r) goto finish_no_msa;
4177 }
4178
4179 if (start > end || start < str) goto mismatch_no_msa;
4180
4181
4182#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
4183# ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
4184# define MATCH_AND_RETURN_CHECK(upper_range) \
4185 r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
4186 if (r != ONIG_MISMATCH) {\
4187 if (r >= 0) {\
4188 if (! IS_FIND_LONGEST(reg->options)) {\
4189 goto match;\
4190 }\
4191 }\
4192 else goto finish; /* error */ \
4193 }
4194# else
4195# define MATCH_AND_RETURN_CHECK(upper_range) \
4196 r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
4197 if (r != ONIG_MISMATCH) {\
4198 if (r >= 0) {\
4199 goto match;\
4200 }\
4201 else goto finish; /* error */ \
4202 }
4203# endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */
4204#else
4205# ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
4206# define MATCH_AND_RETURN_CHECK(none) \
4207 r = match_at(reg, str, end, s, prev, &msa);\
4208 if (r != ONIG_MISMATCH) {\
4209 if (r >= 0) {\
4210 if (! IS_FIND_LONGEST(reg->options)) {\
4211 goto match;\
4212 }\
4213 }\
4214 else goto finish; /* error */ \
4215 }
4216# else
4217# define MATCH_AND_RETURN_CHECK(none) \
4218 r = match_at(reg, str, end, s, prev, &msa);\
4219 if (r != ONIG_MISMATCH) {\
4220 if (r >= 0) {\
4221 goto match;\
4222 }\
4223 else goto finish; /* error */ \
4224 }
4225# endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */
4226#endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */
4227
4228
4229 /* anchor optimize: resume search range */
4230 if (reg->anchor != 0 && str < end) {
4231 UChar *min_semi_end, *max_semi_end;
4232
4233 if (reg->anchor & ANCHOR_BEGIN_POSITION) {
4234 /* search start-position only */
4235 begin_position:
4236 if (range > start)
4237 {
4238 if (global_pos > start)
4239 {
4240 if (global_pos < range)
4241 range = global_pos + 1;
4242 }
4243 else
4244 range = start + 1;
4245 }
4246 else
4247 range = start;
4248 }
4249 else if (reg->anchor & ANCHOR_BEGIN_BUF) {
4250 /* search str-position only */
4251 if (range > start) {
4252 if (start != str) goto mismatch_no_msa;
4253 range = str + 1;
4254 }
4255 else {
4256 if (range <= str) {
4257 start = str;
4258 range = str;
4259 }
4260 else
4261 goto mismatch_no_msa;
4262 }
4263 }
4264 else if (reg->anchor & ANCHOR_END_BUF) {
4265 min_semi_end = max_semi_end = (UChar* )end;
4266
4267 end_buf:
4268 if ((OnigDistance )(max_semi_end - str) < reg->anchor_dmin)
4269 goto mismatch_no_msa;
4270
4271 if (range > start) {
4272 if ((OnigDistance )(min_semi_end - start) > reg->anchor_dmax) {
4273 start = min_semi_end - reg->anchor_dmax;
4274 if (start < end)
4275 start = onigenc_get_right_adjust_char_head(reg->enc, str, start, end);
4276 }
4277 if ((OnigDistance )(max_semi_end - (range - 1)) < reg->anchor_dmin) {
4278 range = max_semi_end - reg->anchor_dmin + 1;
4279 }
4280
4281 if (start > range) goto mismatch_no_msa;
4282 /* If start == range, match with empty at end.
4283 Backward search is used. */
4284 }
4285 else {
4286 if ((OnigDistance )(min_semi_end - range) > reg->anchor_dmax) {
4287 range = min_semi_end - reg->anchor_dmax;
4288 }
4289 if ((OnigDistance )(max_semi_end - start) < reg->anchor_dmin) {
4290 start = max_semi_end - reg->anchor_dmin;
4291 start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, start, end);
4292 }
4293 if (range > start) goto mismatch_no_msa;
4294 }
4295 }
4296 else if (reg->anchor & ANCHOR_SEMI_END_BUF) {
4297 UChar* pre_end = ONIGENC_STEP_BACK(reg->enc, str, end, end, 1);
4298
4299 max_semi_end = (UChar* )end;
4300 if (ONIGENC_IS_MBC_NEWLINE(reg->enc, pre_end, end)) {
4301 min_semi_end = pre_end;
4302
4303#ifdef USE_CRNL_AS_LINE_TERMINATOR
4304 pre_end = ONIGENC_STEP_BACK(reg->enc, str, pre_end, end, 1);
4305 if (IS_NOT_NULL(pre_end) &&
4306 IS_NEWLINE_CRLF(reg->options) &&
4307 ONIGENC_IS_MBC_CRNL(reg->enc, pre_end, end)) {
4308 min_semi_end = pre_end;
4309 }
4310#endif
4311 if (min_semi_end > str && start <= min_semi_end) {
4312 goto end_buf;
4313 }
4314 }
4315 else {
4316 min_semi_end = (UChar* )end;
4317 goto end_buf;
4318 }
4319 }
4320 else if ((reg->anchor & ANCHOR_ANYCHAR_STAR_ML)) {
4321 goto begin_position;
4322 }
4323 }
4324 else if (str == end) { /* empty string */
4325 static const UChar address_for_empty_string[] = "";
4326
4327#ifdef ONIG_DEBUG_SEARCH
4328 fprintf(stderr, "onig_search: empty string.\n");
4329#endif
4330
4331 if (reg->threshold_len == 0) {
4332 start = end = str = address_for_empty_string;
4333 s = (UChar* )start;
4334 prev = (UChar* )NULL;
4335
4336 MATCH_ARG_INIT(msa, option, region, start, start);
4337#ifdef USE_COMBINATION_EXPLOSION_CHECK
4338 msa.state_check_buff = (void* )0;
4339 msa.state_check_buff_size = 0; /* NO NEED, for valgrind */
4340#endif
4342 goto mismatch;
4343 }
4344 goto mismatch_no_msa;
4345 }
4346
4347#ifdef ONIG_DEBUG_SEARCH
4348 fprintf(stderr, "onig_search(apply anchor): end: %d, start: %d, range: %d\n",
4349 (int )(end - str), (int )(start - str), (int )(range - str));
4350#endif
4351
4352 MATCH_ARG_INIT(msa, option, region, start, global_pos);
4353#ifdef USE_COMBINATION_EXPLOSION_CHECK
4354 {
4355 int offset = (MIN(start, range) - str);
4356 STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check);
4357 }
4358#endif
4359
4360 s = (UChar* )start;
4361 if (range > start) { /* forward search */
4362 if (s > str)
4363 prev = onigenc_get_prev_char_head(reg->enc, str, s, end);
4364 else
4365 prev = (UChar* )NULL;
4366
4367 if (reg->optimize != ONIG_OPTIMIZE_NONE) {
4368 UChar *sch_range, *low, *high, *low_prev;
4369
4370 sch_range = (UChar* )range;
4371 if (reg->dmax != 0) {
4372 if (reg->dmax == ONIG_INFINITE_DISTANCE)
4373 sch_range = (UChar* )end;
4374 else {
4375 sch_range += reg->dmax;
4376 if (sch_range > end) sch_range = (UChar* )end;
4377 }
4378 }
4379
4380 if ((end - start) < reg->threshold_len)
4381 goto mismatch;
4382
4383 if (reg->dmax != ONIG_INFINITE_DISTANCE) {
4384 do {
4385 if (! forward_search_range(reg, str, end, s, sch_range,
4386 &low, &high, &low_prev)) goto mismatch;
4387 if (s < low) {
4388 s = low;
4389 prev = low_prev;
4390 }
4391 while (s <= high) {
4392 MATCH_AND_RETURN_CHECK(orig_range);
4393 prev = s;
4394 s += enclen(reg->enc, s, end);
4395 }
4396 } while (s < range);
4397 goto mismatch;
4398 }
4399 else { /* check only. */
4400 if (! forward_search_range(reg, str, end, s, sch_range,
4401 &low, &high, (UChar** )NULL)) goto mismatch;
4402
4403 if ((reg->anchor & ANCHOR_ANYCHAR_STAR) != 0) {
4404 do {
4405 MATCH_AND_RETURN_CHECK(orig_range);
4406 prev = s;
4407 s += enclen(reg->enc, s, end);
4408
4409 if ((reg->anchor & (ANCHOR_LOOK_BEHIND | ANCHOR_PREC_READ_NOT)) == 0) {
4410 while (!ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 0)
4411 && s < range) {
4412 prev = s;
4413 s += enclen(reg->enc, s, end);
4414 }
4415 }
4416 } while (s < range);
4417 goto mismatch;
4418 }
4419 }
4420 }
4421
4422 do {
4423 MATCH_AND_RETURN_CHECK(orig_range);
4424 prev = s;
4425 s += enclen(reg->enc, s, end);
4426 } while (s < range);
4427
4428 if (s == range) { /* because empty match with /$/. */
4429 MATCH_AND_RETURN_CHECK(orig_range);
4430 }
4431 }
4432 else { /* backward search */
4433 if (reg->optimize != ONIG_OPTIMIZE_NONE) {
4434 UChar *low, *high, *adjrange, *sch_start;
4435
4436 if (range < end)
4437 adjrange = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, range, end);
4438 else
4439 adjrange = (UChar* )end;
4440
4441 if (reg->dmax != ONIG_INFINITE_DISTANCE &&
4442 (end - range) >= reg->threshold_len) {
4443 do {
4444 sch_start = s + reg->dmax;
4445 if (sch_start > end) sch_start = (UChar* )end;
4446 if (backward_search_range(reg, str, end, sch_start, range, adjrange,
4447 &low, &high) <= 0)
4448 goto mismatch;
4449
4450 if (s > high)
4451 s = high;
4452
4453 while (s >= low) {
4454 prev = onigenc_get_prev_char_head(reg->enc, str, s, end);
4455 MATCH_AND_RETURN_CHECK(orig_start);
4456 s = prev;
4457 }
4458 } while (s >= range);
4459 goto mismatch;
4460 }
4461 else { /* check only. */
4462 if ((end - range) < reg->threshold_len) goto mismatch;
4463
4464 sch_start = s;
4465 if (reg->dmax != 0) {
4466 if (reg->dmax == ONIG_INFINITE_DISTANCE)
4467 sch_start = (UChar* )end;
4468 else {
4469 sch_start += reg->dmax;
4470 if (sch_start > end) sch_start = (UChar* )end;
4471 else
4472 sch_start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc,
4473 start, sch_start, end);
4474 }
4475 }
4476 if (backward_search_range(reg, str, end, sch_start, range, adjrange,
4477 &low, &high) <= 0) goto mismatch;
4478 }
4479 }
4480
4481 do {
4482 prev = onigenc_get_prev_char_head(reg->enc, str, s, end);
4483 MATCH_AND_RETURN_CHECK(orig_start);
4484 s = prev;
4485 } while (s >= range);
4486 }
4487
4488 mismatch:
4489#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
4490 if (IS_FIND_LONGEST(reg->options)) {
4491 if (msa.best_len >= 0) {
4492 s = msa.best_s;
4493 goto match;
4494 }
4495 }
4496#endif
4497 r = ONIG_MISMATCH;
4498
4499 finish:
4500 MATCH_ARG_FREE(msa);
4501
4502 /* If result is mismatch and no FIND_NOT_EMPTY option,
4503 then the region is not set in match_at(). */
4504 if (IS_FIND_NOT_EMPTY(reg->options) && region) {
4505 onig_region_clear(region);
4506 }
4507
4508#ifdef ONIG_DEBUG
4509 if (r != ONIG_MISMATCH)
4510 fprintf(stderr, "onig_search: error %"PRIdPTRDIFF"\n", r);
4511#endif
4512 return r;
4513
4514 mismatch_no_msa:
4515 r = ONIG_MISMATCH;
4516 finish_no_msa:
4517#ifdef ONIG_DEBUG
4518 if (r != ONIG_MISMATCH)
4519 fprintf(stderr, "onig_search: error %"PRIdPTRDIFF"\n", r);
4520#endif
4521 return r;
4522
4523 match:
4524 MATCH_ARG_FREE(msa);
4525 return s - str;
4526}
4527
4528extern OnigPosition
4529onig_scan(regex_t* reg, const UChar* str, const UChar* end,
4530 OnigRegion* region, OnigOptionType option,
4531 int (*scan_callback)(OnigPosition, OnigPosition, OnigRegion*, void*),
4532 void* callback_arg)
4533{
4534 OnigPosition r;
4536 int rs;
4537 const UChar* start;
4538
4539 n = 0;
4540 start = str;
4541 while (1) {
4542 r = onig_search(reg, str, end, start, end, region, option);
4543 if (r >= 0) {
4544 rs = scan_callback(n, r, region, callback_arg);
4545 n++;
4546 if (rs != 0)
4547 return rs;
4548
4549 if (region->end[0] == start - str) {
4550 if (start >= end) break;
4551 start += enclen(reg->enc, start, end);
4552 }
4553 else
4554 start = str + region->end[0];
4555
4556 if (start > end)
4557 break;
4558 }
4559 else if (r == ONIG_MISMATCH) {
4560 break;
4561 }
4562 else { /* error */
4563 return r;
4564 }
4565 }
4566
4567 return n;
4568}
4569
4570extern OnigEncoding
4572{
4573 return reg->enc;
4574}
4575
4576extern OnigOptionType
4578{
4579 return reg->options;
4580}
4581
4582extern OnigCaseFoldType
4584{
4585 return reg->case_fold_flag;
4586}
4587
4588extern const OnigSyntaxType*
4590{
4591 return reg->syntax;
4592}
4593
4594extern int
4596{
4597 return reg->num_mem;
4598}
4599
4600extern int
4602{
4603#ifdef USE_CAPTURE_HISTORY
4604 int i, n;
4605
4606 n = 0;
4607 for (i = 0; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
4608 if (BIT_STATUS_AT(reg->capture_history, i) != 0)
4609 n++;
4610 }
4611 return n;
4612#else
4613 return 0;
4614#endif
4615}
4616
4617extern void
4619{
4620 *to = *from;
4621}
#define range(low, item, hi)
Definition: date_strftime.c:21
#define fail()
#define rb_enc_asciicompat(enc)
Definition: encoding.h:245
char str[HTML_ESCAPE_MAX_LEN+1]
Definition: escape.c:18
unsigned int top
Definition: nkf.c:4323
const char * name
Definition: nkf.c:208
#define ARG_UNUSED
Definition: nkf.h:181
#define ONIG_MISMATCH
Definition: onigmo.h:625
#define ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s, end)
Definition: onigmo.h:336
#define ONIG_INFINITE_DISTANCE
Definition: onigmo.h:85
#define ONIGERR_UNDEFINED_BYTECODE
Definition: onigmo.h:633
ONIG_EXTERN OnigUChar * onigenc_get_right_adjust_char_head(OnigEncoding enc, const OnigUChar *start, const OnigUChar *s, const OnigUChar *end)
#define ONIG_REGION_NOTPOS
Definition: onigmo.h:734
unsigned int OnigCaseFoldType
Definition: onigmo.h:95
#define ONIGENC_MBC_TO_CODE(enc, p, end)
Definition: onigmo.h:366
#define ONIGERR_INVALID_ARGUMENT
Definition: onigmo.h:640
#define UChar
Definition: onigmo.h:76
ptrdiff_t OnigPosition
Definition: onigmo.h:83
#define ONIG_NREGION
Definition: onigmo.h:437
unsigned int OnigCodePoint
Definition: onigmo.h:80
#define ONIGERR_MEMORY
Definition: onigmo.h:629
#define ONIGENC_STEP_BACK(enc, start, s, end, n)
Definition: onigmo.h:342
#define ONIG_MAX_CAPTURE_HISTORY_GROUP
Definition: onigmo.h:700
#define ONIGENC_MBC_MINLEN(enc)
Definition: onigmo.h:364
#define ONIGENC_MBC_CASE_FOLD(enc, flag, pp, end, buf)
Definition: onigmo.h:332
#define ONIG_CHAR_TABLE_SIZE
Definition: onigmo.h:753
#define ONIGENC_MBC_CASE_FOLD_MAXLEN
Definition: onigmo.h:290
ONIG_EXTERN OnigUChar * onigenc_get_right_adjust_char_head_with_prev(OnigEncoding enc, const OnigUChar *start, const OnigUChar *s, const OnigUChar *end, const OnigUChar **prev)
ONIG_EXTERN OnigUChar * onigenc_get_prev_char_head(OnigEncoding enc, const OnigUChar *start, const OnigUChar *s, const OnigUChar *end)
#define ONIGENC_IS_SINGLEBYTE(enc)
Definition: onigmo.h:318
#define ONIGERR_UNEXPECTED_BYTECODE
Definition: onigmo.h:634
unsigned int OnigOptionType
Definition: onigmo.h:445
#define ONIGENC_IS_MBC_NEWLINE(enc, p, end)
Definition: onigmo.h:365
#define ONIGENC_IS_MBC_HEAD(enc, p, e)
Definition: onigmo.h:319
#define ONIGERR_STACK_BUG
Definition: onigmo.h:632
#define ONIGERR_MATCH_STACK_LIMIT_OVER
Definition: onigmo.h:635
size_t OnigDistance
Definition: onigmo.h:82
#define ONIGENC_IS_MBC_WORD(enc, s, end)
Definition: onigmo.h:322
#define NULL
int memcmp(const void *, const void *, size_t)
Definition: memcmp.c:7
long int ptrdiff_t
#define xfree
#define MIN(a, b)
#define xrealloc
__intptr_t intptr_t
int fprintf(FILE *__restrict__, const char *__restrict__,...) __attribute__((__format__(__printf__
const char size_t n
#define stderr
#define xmalloc
uint32_t i
__inline__ const void *__restrict__ size_t len
#define PRIdPTR
const char * s2
#define PRIuPTR
unsigned int size
__uintptr_t uintptr_t
#define RB_GNUC_EXTENSION
#define PRIdPTRDIFF
__inline__ int
int mblen(const char *, size_t)
int onig_is_in_code_range(const UChar *p, OnigCodePoint code)
Definition: regcomp.c:6076
#define USE_CRNL_AS_LINE_TERMINATOR
Definition: regenc.h:132
#define enclen(enc, p, e)
Definition: regenc.h:93
unsigned int onig_get_match_stack_limit_size(void)
Definition: regexec.c:523
void onig_region_copy(OnigRegion *to, const OnigRegion *from)
Definition: regexec.c:359
int onig_number_of_capture_histories(const regex_t *reg)
Definition: regexec.c:4601
#define ABSENT_END_POS
Definition: regexec.c:1184
#define STACK_PUSH_LOOK_BEHIND_NOT(pat, s, sprev, keep)
Definition: regexec.c:682
void onig_region_init(OnigRegion *region)
Definition: regexec.c:320
#define STACK_NULL_CHECK_REC(isnull, id, s)
Definition: regexec.c:972
#define STACK_STOP_BT_END
Definition: regexec.c:943
#define STACK_PUSH_REPEAT(id, pat)
Definition: regexec.c:685
#define VM_LOOP
#define STACK_PUSH_ABSENT
Definition: regexec.c:680
int onig_region_set(OnigRegion *region, int at, int beg, int end)
Definition: regexec.c:305
#define STACK_PUSH_MEM_START(mnum, s)
Definition: regexec.c:701
#define DATA_ENSURE_CHECK(n)
Definition: regexec.c:1182
#define STK_CALL_FRAME
Definition: regexec.c:405
#define NEXT
#define MOP_IN(opcode)
Definition: regexec.c:1391
#define STK_VOID
Definition: regexec.c:407
OnigPosition onig_match(regex_t *reg, const UChar *str, const UChar *end, const UChar *at, OnigRegion *region, OnigOptionType option)
Definition: regexec.c:3866
#define STRING_CMP_VALUE(s1, s2, len, is_fail)
Definition: regexec.c:1155
#define STACK_PUSH_NULL_CHECK_START(cnum, s)
Definition: regexec.c:765
#define STK_NULL_CHECK_END
Definition: regexec.c:400
OnigOptionType onig_get_options(const regex_t *reg)
Definition: regexec.c:4577
#define DEFAULT
#define DATA_ENSURE(n)
Definition: regexec.c:1183
OnigPosition onig_search(regex_t *reg, const UChar *str, const UChar *end, const UChar *start, const UChar *range, OnigRegion *region, OnigOptionType option)
Definition: regexec.c:4149
#define STACK_POP_TIL_LOOK_BEHIND_NOT
Definition: regexec.c:881
void onig_region_free(OnigRegion *r, int free_self)
Definition: regexec.c:343
#define STACK_PUSH_POS(s, sprev, keep)
Definition: regexec.c:678
#define STACK_PUSH_CALL_FRAME(pat)
Definition: regexec.c:780
#define STK_ABSENT_POS
Definition: regexec.c:408
#define ONIGENC_IS_MBC_ASCII_WORD(enc, s, end)
Definition: regexec.c:55
#define STACK_POP_ABSENT_POS(start, end)
Definition: regexec.c:921
#define STACK_PUSH_MEM_END(mnum, s)
Definition: regexec.c:713
#define STK_REPEAT
Definition: regexec.c:404
#define STACK_PUSH_POS_NOT(pat, s, sprev, keep)
Definition: regexec.c:679
int onig_number_of_captures(const regex_t *reg)
Definition: regexec.c:4595
#define STK_MEM_END_MARK
Definition: regexec.c:401
#define ADD_NUMMEM
#define CASE(x)
#define STACK_POP_TIL_POS_NOT
Definition: regexec.c:861
#define STACK_SAVE
Definition: regexec.c:513
#define STACK_NULL_CHECK_MEMST(isnull, id, s, reg)
Definition: regexec.c:993
#define MATCH_AND_RETURN_CHECK(none)
int onig_region_resize(OnigRegion *region, int n)
Definition: regexec.c:248
#define STK_STOP_BT
Definition: regexec.c:403
#define STRING_CMP_IC(case_fold_flag, s1, ps2, len, text_end)
Definition: regexec.c:1123
#define STACK_PUSH_RETURN
Definition: regexec.c:787
#define STACK_PUSH_STOP_BT
Definition: regexec.c:681
#define GET_STACK_INDEX(stk)
Definition: regexec.c:589
#define DATA_ENSURE_CHECK1
Definition: regexec.c:1181
#define STK_MEM_END
Definition: regexec.c:395
#define STACK_GET_MEM_START(mnum, k)
Definition: regexec.c:731
#define VM_LOOP_END
#define STACK_NULL_CHECK_MEMST_REC(isnull, id, s, reg)
Definition: regexec.c:1032
#define STK_MEM_START
Definition: regexec.c:394
#define STK_REPEAT_INC
Definition: regexec.c:396
#define STACK_AT(index)
Definition: regexec.c:588
#define STACK_PUSH_ENSURED(stack_type, pat)
Definition: regexec.c:670
OnigEncoding onig_get_encoding(const regex_t *reg)
Definition: regexec.c:4571
#define STK_RETURN
Definition: regexec.c:406
#define STK_STATE_CHECK_MARK
Definition: regexec.c:397
#define ONIGENC_IS_MBC_CRNL(enc, p, end)
Definition: regexec.c:62
#define STK_POS
Definition: regexec.c:402
#define ON_STR_END(s)
Definition: regexec.c:1174
#define STACK_POP_ONE
Definition: regexec.c:812
#define ON_STR_BEGIN(s)
Definition: regexec.c:1173
#define ONIGENC_IS_MBC_NEWLINE_EX(enc, p, start, end, option, check_prev)
Definition: regexec.c:65
void onig_copy_encoding(OnigEncodingType *to, OnigEncoding from)
Definition: regexec.c:4618
#define STRING_CMP(s1, s2, len)
Definition: regexec.c:1117
#define STACK_INIT(alloc_addr, heap_addr, ptr_num, stack_num)
Definition: regexec.c:479
OnigCaseFoldType onig_get_case_fold_flag(const regex_t *reg)
Definition: regexec.c:4583
#define STACK_PUSH_ALT(pat, s, sprev, keep)
Definition: regexec.c:677
#define STACK_POP
Definition: regexec.c:817
OnigPosition onig_search_gpos(regex_t *reg, const UChar *str, const UChar *end, const UChar *global_pos, const UChar *start, const UChar *range, OnigRegion *region, OnigOptionType option)
Definition: regexec.c:4156
#define ENC_DUMMY_FLAG
Definition: regexec.c:48
#define STACK_PUSH_ABSENT_POS(start, end)
Definition: regexec.c:793
#define STACK_POS_END(k)
Definition: regexec.c:928
#define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start, arg_gpos)
Definition: regexec.c:417
OnigPosition onig_scan(regex_t *reg, const UChar *str, const UChar *end, OnigRegion *region, OnigOptionType option, int(*scan_callback)(OnigPosition, OnigPosition, OnigRegion *, void *), void *callback_arg)
Definition: regexec.c:4529
#define STACK_NULL_CHECK(isnull, id, s)
Definition: regexec.c:958
#define STK_ABSENT
Definition: regexec.c:409
const OnigSyntaxType * onig_get_syntax(const regex_t *reg)
Definition: regexec.c:4589
#define STACK_GET_REPEAT(id, k)
Definition: regexec.c:1080
#define MOP_OUT
Definition: regexec.c:1392
#define STACK_PUSH_REPEAT_INC(sindex)
Definition: regexec.c:694
#define INVALID_STACK_INDEX
stack
Definition: regexec.c:386
#define STACK_ENSURE(n)
Definition: regexec.c:577
void onig_region_clear(OnigRegion *region)
Definition: regexec.c:235
#define STACK_RETURN(addr)
Definition: regexec.c:1098
OnigRegion * onig_region_new(void)
Definition: regexec.c:332
#define IS_EMPTY_STR
Definition: regexec.c:1172
#define STACK_PUSH_MEM_END_MARK(mnum)
Definition: regexec.c:724
#define STACK_POP_TIL_ABSENT
Definition: regexec.c:901
#define MATCH_ARG_FREE(msa)
Definition: regexec.c:472
#define STK_POS_NOT
Definition: regexec.c:392
#define JUMP
#define STK_LOOK_BEHIND_NOT
Definition: regexec.c:391
#define STACK_PUSH_NULL_CHECK_END(cnum)
Definition: regexec.c:773
#define STK_ALT
Definition: regexec.c:390
#define BM_BACKWARD_SEARCH_LENGTH_THRESHOLD
Definition: regexec.c:4039
int onig_set_match_stack_limit_size(unsigned int size)
Definition: regexec.c:529
#define STRING_CMP_VALUE_IC(case_fold_flag, s1, ps2, len, text_end, is_fail)
Definition: regexec.c:1164
#define STK_NULL_CHECK_START
Definition: regexec.c:399
#define ANCHOR_BEGIN_LINE
Definition: regint.h:528
#define CHECK_NULL_RETURN_MEMERR(p)
Definition: regint.h:301
#define ANCHOR_PREC_READ_NOT
Definition: regint.h:539
#define ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC
Definition: regint.h:349
#define INIT_MATCH_STACK_SIZE
Definition: regint.h:86
#define ONIG_OPTIMIZE_MAP
Definition: regint.h:347
#define ONIG_OPTIMIZE_EXACT
Definition: regint.h:343
#define GET_OPTION_INC(option, p)
Definition: regint.h:692
int LengthType
Definition: regint.h:669
short int MemNumType
Definition: regint.h:671
#define IS_NOTBOL(option)
Definition: regint.h:389
#define ANCHOR_BEGIN_POSITION
Definition: regint.h:529
#define GET_MEMNUM_INC(num, p)
Definition: regint.h:690
#define IS_FIND_LONGEST(option)
Definition: regint.h:385
#define ONIG_OPTIMIZE_EXACT_BM
Definition: regint.h:344
#define BITSET_AT(bs, pos)
Definition: regint.h:435
#define CHECK_NULL_RETURN(p)
Definition: regint.h:300
#define SIZE_RELADDR
Definition: regint.h:676
#define ANCHOR_LOOK_BEHIND
Definition: regint.h:540
#define SIZE_BITSET
Definition: regint.h:425
#define IS_NOT_NULL(p)
Definition: regint.h:299
#define ANCHOR_END_LINE
Definition: regint.h:532
#define ANCHOR_BEGIN_BUF
Definition: regint.h:527
#define ONIG_OPTIMIZE_EXACT_BM_NOT_REV
Definition: regint.h:345
intptr_t OnigStackIndex
Definition: regint.h:811
#define IS_NOTEOL(option)
Definition: regint.h:390
#define NULL_UCHARP
Definition: regint.h:302
#define SIZE_OP_FAIL
Definition: regint.h:720
#define ANCHOR_ANYCHAR_STAR_ML
Definition: regint.h:544
#define GET_STATE_CHECK_NUM_INC(num, p)
Definition: regint.h:694
#define GET_LENGTH_INC(len, p)
Definition: regint.h:689
#define IS_FIND_NOT_EMPTY(option)
Definition: regint.h:386
#define IS_NOTEOS(option)
Definition: regint.h:392
#define IS_NULL(p)
Definition: regint.h:298
#define ANCHOR_SEMI_END_BUF
Definition: regint.h:531
int RelAddrType
Definition: regint.h:667
#define ALIGNMENT_RIGHT(addr)
Definition: regint.h:329
#define ONIG_OPTIMIZE_EXACT_IC
Definition: regint.h:346
#define ONIG_OPTIMIZE_NONE
Definition: regint.h:342
#define SIZE_MEMNUM
Definition: regint.h:679
@ OP_EXACTMB3N
Definition: regint.h:563
@ OP_END
Definition: regint.h:551
@ OP_LOOK_BEHIND
Definition: regint.h:644
@ OP_CALL
Definition: regint.h:651
@ OP_ASCII_WORD
Definition: regint.h:590
@ OP_STATE_CHECK_PUSH_OR_JUMP
Definition: regint.h:657
@ OP_PUSH_POS_NOT
Definition: regint.h:640
@ OP_STATE_CHECK_ANYCHAR_STAR
Definition: regint.h:659
@ OP_REPEAT_INC_NG
Definition: regint.h:630
@ OP_STATE_CHECK
Definition: regint.h:658
@ OP_MEMORY_END_REC
Definition: regint.h:617
@ OP_REPEAT_INC
Definition: regint.h:629
@ OP_ANYCHAR_ML
Definition: regint.h:577
@ OP_POP_POS
Definition: regint.h:639
@ OP_WORD_END
Definition: regint.h:588
@ OP_WORD_BEGIN
Definition: regint.h:587
@ OP_POP_STOP_BT
Definition: regint.h:643
@ OP_ANYCHAR_STAR_PEEK_NEXT
Definition: regint.h:580
@ OP_BACKREFN
Definition: regint.h:606
@ OP_PUSH_LOOK_BEHIND_NOT
Definition: regint.h:645
@ OP_PUSH_STOP_BT
Definition: regint.h:642
@ OP_EXACTMBN
Definition: regint.h:564
@ OP_EXACTMB2N
Definition: regint.h:562
@ OP_MEMORY_START
Definition: regint.h:612
@ OP_SET_OPTION
Definition: regint.h:664
@ OP_NULL_CHECK_START
Definition: regint.h:633
@ OP_BEGIN_LINE
Definition: regint.h:599
@ OP_WORD_BOUND
Definition: regint.h:585
@ OP_NOT_ASCII_WORD_BOUND
Definition: regint.h:593
@ OP_ANYCHAR
Definition: regint.h:576
@ OP_SET_OPTION_PUSH
Definition: regint.h:663
@ OP_EXACT4
Definition: regint.h:556
@ OP_EXACT5
Definition: regint.h:557
@ OP_REPEAT
Definition: regint.h:627
@ OP_EXACT2
Definition: regint.h:554
@ OP_MEMORY_END
Definition: regint.h:616
@ OP_ANYCHAR_ML_STAR
Definition: regint.h:579
@ OP_EXACTN
Definition: regint.h:558
@ OP_PUSH_OR_JUMP_EXACT1
Definition: regint.h:625
@ OP_ANYCHAR_STAR
Definition: regint.h:578
@ OP_JUMP
Definition: regint.h:622
@ OP_END_LINE
Definition: regint.h:600
@ OP_MEMORY_END_PUSH_REC
Definition: regint.h:615
@ OP_BEGIN_POSITION
Definition: regint.h:602
@ OP_PUSH_POS
Definition: regint.h:638
@ OP_NOT_ASCII_WORD
Definition: regint.h:591
@ OP_NULL_CHECK_END_MEMST_PUSH
Definition: regint.h:636
@ OP_END_BUF
Definition: regint.h:598
@ OP_EXACT1
Definition: regint.h:553
@ OP_CCLASS
Definition: regint.h:569
@ OP_WORD
Definition: regint.h:583
@ OP_FINISH
Definition: regint.h:550
@ OP_ASCII_WORD_END
Definition: regint.h:595
@ OP_BACKREF1
Definition: regint.h:604
@ OP_PUSH
Definition: regint.h:623
@ OP_BACKREFN_IC
Definition: regint.h:607
@ OP_PUSH_ABSENT_POS
Definition: regint.h:647
@ OP_MEMORY_END_PUSH
Definition: regint.h:614
@ OP_ASCII_WORD_BOUND
Definition: regint.h:592
@ OP_STATE_CHECK_PUSH
Definition: regint.h:656
@ OP_CCLASS_MIX_NOT
Definition: regint.h:574
@ OP_CCLASS_MB
Definition: regint.h:570
@ OP_SEMI_END_BUF
Definition: regint.h:601
@ OP_STATE_CHECK_ANYCHAR_ML_STAR
Definition: regint.h:660
@ OP_CONDITION
Definition: regint.h:654
@ OP_PUSH_IF_PEEK_NEXT
Definition: regint.h:626
@ OP_ABSENT
Definition: regint.h:648
@ OP_NOT_WORD
Definition: regint.h:584
@ OP_ASCII_WORD_BEGIN
Definition: regint.h:594
@ OP_REPEAT_NG
Definition: regint.h:628
@ OP_BACKREF_MULTI_IC
Definition: regint.h:609
@ OP_REPEAT_INC_SG
Definition: regint.h:631
@ OP_EXACT3
Definition: regint.h:555
@ OP_EXACTMB2N1
Definition: regint.h:559
@ OP_CCLASS_MB_NOT
Definition: regint.h:573
@ OP_FAIL_POS
Definition: regint.h:641
@ OP_EXACT1_IC
Definition: regint.h:566
@ OP_EXACTMB2N2
Definition: regint.h:560
@ OP_BEGIN_BUF
Definition: regint.h:597
@ OP_ANYCHAR_ML_STAR_PEEK_NEXT
Definition: regint.h:581
@ OP_RETURN
Definition: regint.h:652
@ OP_MEMORY_START_PUSH
Definition: regint.h:613
@ OP_KEEP
Definition: regint.h:619
@ OP_FAIL
Definition: regint.h:621
@ OP_NOT_WORD_BOUND
Definition: regint.h:586
@ OP_REPEAT_INC_NG_SG
Definition: regint.h:632
@ OP_FAIL_LOOK_BEHIND_NOT
Definition: regint.h:646
@ OP_BACKREF2
Definition: regint.h:605
@ OP_EXACTMB2N3
Definition: regint.h:561
@ OP_NULL_CHECK_END
Definition: regint.h:634
@ OP_BACKREF_MULTI
Definition: regint.h:608
@ OP_CCLASS_NOT
Definition: regint.h:572
@ OP_NULL_CHECK_END_MEMST
Definition: regint.h:635
@ OP_BACKREF_WITH_LEVEL
Definition: regint.h:610
@ OP_CCLASS_MIX
Definition: regint.h:571
@ OP_EXACTN_IC
Definition: regint.h:567
@ OP_ABSENT_END
Definition: regint.h:649
@ OP_POP
Definition: regint.h:624
#define IS_FIND_CONDITION(option)
Definition: regint.h:387
#define IS_NOTBOS(option)
Definition: regint.h:391
#define ANCHOR_ANYCHAR_STAR
Definition: regint.h:543
#define ONIG_OPTIMIZE_EXACT_BM_IC
Definition: regint.h:348
#define xmemcpy
Definition: regint.h:202
#define GET_ABSADDR_INC(addr, p)
Definition: regint.h:688
Bits * BitSetRef
Definition: regint.h:423
#define CHECK_INTERRUPT_IN_MATCH_AT
Definition: regint.h:174
#define DEFAULT_MATCH_STACK_LIMIT_SIZE
Definition: regint.h:87
#define BIT_STATUS_AT(stats, n)
Definition: regint.h:357
#define IS_NEWLINE_CRLF(option)
Definition: regint.h:396
#define GET_RELADDR_INC(addr, p)
Definition: regint.h:687
#define ANCHOR_END_BUF
Definition: regint.h:530
#define SIZE_OP_SET_OPTION
Definition: regint.h:718
VALUE stack_type
Definition: ripper.c:187
#define f
UChar * pkeep
Definition: regint.h:823
union _OnigStackType::@198 u
UChar * pstr_prev
Definition: regint.h:819
OnigStackIndex end
Definition: regint.h:838
struct _OnigStackType::@198::@199 state
struct _OnigStackType::@198::@200 repeat
UChar * pstr
Definition: regint.h:818
struct _OnigStackType::@198::@202 mem
unsigned int type
Definition: regint.h:814
UChar * pcode
Definition: regint.h:817
OnigOptionType options
Definition: regint.h:861
OnigRegion * region
Definition: regint.h:862
UChar * best_s
Definition: regint.h:867
OnigPosition best_len
Definition: regint.h:866
const UChar * gpos
Definition: regint.h:864
void * stack_p
Definition: regint.h:859
OnigDistance dmin
Definition: onigmo.h:793
unsigned char * exact_end
Definition: onigmo.h:789
OnigEncoding enc
Definition: onigmo.h:776
unsigned int capture_history
Definition: onigmo.h:766
unsigned int bt_mem_start
Definition: onigmo.h:767
OnigCaseFoldType case_fold_flag
Definition: onigmo.h:779
const OnigSyntaxType * syntax
Definition: onigmo.h:777
int num_comb_exp_check
Definition: onigmo.h:764
OnigRepeatRange * repeat_range
Definition: onigmo.h:774
int * int_map_backward
Definition: onigmo.h:792
unsigned int bt_mem_end
Definition: onigmo.h:768
int stack_pop_level
Definition: onigmo.h:769
OnigDistance anchor_dmax
Definition: onigmo.h:786
unsigned char map[ONIG_CHAR_TABLE_SIZE]
Definition: onigmo.h:790
OnigOptionType options
Definition: onigmo.h:772
unsigned char * p
Definition: onigmo.h:757
unsigned char * exact
Definition: onigmo.h:788
OnigDistance dmax
Definition: onigmo.h:794
OnigDistance anchor_dmin
Definition: onigmo.h:785
OnigPosition * beg
Definition: onigmo.h:719
int num_regs
Definition: onigmo.h:718
int allocated
Definition: onigmo.h:717
OnigPosition * end
Definition: onigmo.h:720
Definition: win32.h:732