34# undef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
36# define USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
39#ifndef USE_TOKEN_THREADED_VM
41# define USE_TOKEN_THREADED_VM 1
43# define USE_TOKEN_THREADED_VM 0
48# define ENC_DUMMY_FLAG (1<<24)
52 return ONIGENC_MBC_MINLEN(enc)==1 && !((enc)->ruby_encoding_index & ENC_DUMMY_FLAG);
54# undef ONIGENC_IS_MBC_ASCII_WORD
55# define ONIGENC_IS_MBC_ASCII_WORD(enc,s,end) \
56 (rb_enc_asciicompat(enc) ? (ISALNUM(*s) || *s=='_') : \
57 onigenc_ascii_is_code_ctype( \
58 ONIGENC_MBC_TO_CODE(enc,s,end),ONIGENC_CTYPE_WORD,enc))
61#ifdef USE_CRNL_AS_LINE_TERMINATOR
62# define ONIGENC_IS_MBC_CRNL(enc,p,end) \
63 (ONIGENC_MBC_TO_CODE(enc,p,end) == 13 && \
64 ONIGENC_MBC_TO_CODE(enc,(p+enclen(enc,p,end)),end) == 10)
65# define ONIGENC_IS_MBC_NEWLINE_EX(enc,p,start,end,option,check_prev) \
66 is_mbc_newline_ex((enc),(p),(start),(end),(option),(check_prev))
68is_mbc_newline_ex(
OnigEncoding enc,
const UChar *p,
const UChar *start,
69 const UChar *end, OnigOptionType option,
int check_prev)
71 if (IS_NEWLINE_CRLF(option)) {
72 if (ONIGENC_MBC_TO_CODE(enc, p, end) == 0x0a) {
74 const UChar *prev = onigenc_get_prev_char_head(enc, start, p, end);
75 if ((prev != NULL) && ONIGENC_MBC_TO_CODE(enc, prev, end) == 0x0d)
84 const UChar *pnext = p + enclen(enc, p, end);
86 ONIGENC_MBC_TO_CODE(enc, p, end) == 0x0d &&
87 ONIGENC_MBC_TO_CODE(enc, pnext, end) == 0x0a)
89 if (ONIGENC_IS_MBC_NEWLINE(enc, p, end))
95 return ONIGENC_IS_MBC_NEWLINE(enc, p, end);
99# define ONIGENC_IS_MBC_NEWLINE_EX(enc,p,start,end,option,check_prev) \
100 ONIGENC_IS_MBC_NEWLINE((enc), (p), (end))
103#ifdef USE_CAPTURE_HISTORY
104static void history_tree_free(OnigCaptureTreeNode* node);
107history_tree_clear(OnigCaptureTreeNode* node)
111 if (IS_NOT_NULL(node)) {
112 for (i = 0; i < node->num_childs; i++) {
113 if (IS_NOT_NULL(node->childs[i])) {
114 history_tree_free(node->childs[i]);
117 for (i = 0; i < node->allocated; i++) {
118 node->childs[i] = (OnigCaptureTreeNode* )0;
120 node->num_childs = 0;
121 node->beg = ONIG_REGION_NOTPOS;
122 node->end = ONIG_REGION_NOTPOS;
125 node->childs = (OnigCaptureTreeNode** )0;
130history_tree_free(OnigCaptureTreeNode* node)
132 history_tree_clear(node);
139 if (IS_NOT_NULL(r->history_root)) {
140 history_tree_free(r->history_root);
141 r->history_root = (OnigCaptureTreeNode* )0;
145static OnigCaptureTreeNode*
146history_node_new(
void)
148 OnigCaptureTreeNode* node;
150 node = (OnigCaptureTreeNode* )
xmalloc(
sizeof(OnigCaptureTreeNode));
151 CHECK_NULL_RETURN(node);
152 node->childs = (OnigCaptureTreeNode** )0;
154 node->num_childs = 0;
156 node->beg = ONIG_REGION_NOTPOS;
157 node->end = ONIG_REGION_NOTPOS;
163history_tree_add_child(OnigCaptureTreeNode* parent, OnigCaptureTreeNode* child)
165# define HISTORY_TREE_INIT_ALLOC_SIZE 8
167 if (parent->num_childs >= parent->allocated) {
170 if (IS_NULL(parent->childs)) {
171 n = HISTORY_TREE_INIT_ALLOC_SIZE;
173 (OnigCaptureTreeNode** )
xmalloc(
sizeof(OnigCaptureTreeNode*) * n);
174 CHECK_NULL_RETURN_MEMERR(parent->childs);
177 OnigCaptureTreeNode** tmp;
178 n = parent->allocated * 2;
180 (OnigCaptureTreeNode** )
xrealloc(parent->childs,
181 sizeof(OnigCaptureTreeNode*) * n);
183 history_tree_clear(parent);
184 return ONIGERR_MEMORY;
186 parent->childs = tmp;
188 for (i = parent->allocated; i < n; i++) {
189 parent->childs[i] = (OnigCaptureTreeNode* )0;
191 parent->allocated = n;
194 parent->childs[parent->num_childs] = child;
195 parent->num_childs++;
199static OnigCaptureTreeNode*
200history_tree_clone(OnigCaptureTreeNode* node)
203 OnigCaptureTreeNode *clone, *child;
205 clone = history_node_new();
206 CHECK_NULL_RETURN(clone);
208 clone->beg = node->beg;
209 clone->end = node->end;
210 for (i = 0; i < node->num_childs; i++) {
211 child = history_tree_clone(node->childs[i]);
212 if (IS_NULL(child)) {
213 history_tree_free(clone);
214 return (OnigCaptureTreeNode* )0;
216 r = history_tree_add_child(clone, child);
218 history_tree_free(child);
219 history_tree_free(clone);
220 return (OnigCaptureTreeNode* )0;
227extern OnigCaptureTreeNode*
230 return region->history_root;
234#ifdef USE_CACHE_MATCH_OPT
238count_num_cache_opcode(
regex_t* reg,
long* num,
long* table_size)
241 UChar* pend = p + reg->used;
244 MemNumType current_mem = -1;
245 long current_mem_num = 0;
257 case OP_EXACT1: p++;
break;
258 case OP_EXACT2: p += 2;
break;
259 case OP_EXACT3: p += 3;
break;
260 case OP_EXACT4: p += 4;
break;
261 case OP_EXACT5: p += 5;
break;
263 GET_LENGTH_INC(len, p); p += len;
break;
264 case OP_EXACTMB2N1: p += 2;
break;
265 case OP_EXACTMB2N2: p += 4;
break;
266 case OP_EXACTMB2N3: p += 6;
break;
268 GET_LENGTH_INC(len, p); p += len * 2;
break;
270 GET_LENGTH_INC(len, p); p += len * 3;
break;
274 GET_LENGTH_INC(mb_len, p);
275 GET_LENGTH_INC(len, p);
281 len = enclen(enc, p, pend); p += len;
break;
283 GET_LENGTH_INC(len, p); p += len;
break;
287 p += SIZE_BITSET;
break;
289 case OP_CCLASS_MB_NOT:
290 GET_LENGTH_INC(len, p); p += len;
break;
292 case OP_CCLASS_MIX_NOT:
294 GET_LENGTH_INC(len, p);
301 case OP_ANYCHAR_STAR:
302 case OP_ANYCHAR_ML_STAR:
303 *num += 1; *table_size += 1;
break;
304 case OP_ANYCHAR_STAR_PEEK_NEXT:
305 case OP_ANYCHAR_ML_STAR_PEEK_NEXT:
306 p++; *num += 1; *table_size += 1;
break;
311 case OP_NOT_WORD_BOUND:
317 case OP_NOT_ASCII_WORD:
318 case OP_ASCII_WORD_BOUND:
319 case OP_NOT_ASCII_WORD_BOUND:
320 case OP_ASCII_WORD_BEGIN:
321 case OP_ASCII_WORD_END:
328 case OP_SEMI_END_BUF:
329 case OP_BEGIN_POSITION:
336 case OP_BACKREF_MULTI:
337 case OP_BACKREF_MULTI_IC:
338 case OP_BACKREF_WITH_LEVEL:
341 case OP_MEMORY_START:
342 case OP_MEMORY_START_PUSH:
343 case OP_MEMORY_END_PUSH:
344 case OP_MEMORY_END_PUSH_REC:
346 case OP_MEMORY_END_REC:
347 p += SIZE_MEMNUM;
break;
364 case OP_PUSH_OR_JUMP_EXACT1:
365 case OP_PUSH_IF_PEEK_NEXT:
366 p += SIZE_RELADDR + 1; *num += 1; *table_size += 1;
break;
369 if (current_mem != -1) {
373 GET_MEMNUM_INC(mem, p);
375 if (reg->repeat_range[mem].lower == 0) {
379 reg->repeat_range[mem].base_num = *num;
381 current_mem_num = *num;
384 case OP_REPEAT_INC_NG:
385 GET_MEMNUM_INC(mem, p);
386 if (mem != current_mem) {
391 long inner_num = *num - current_mem_num;
393 repeat_range->inner_num = inner_num;
395 *num += inner_num * repeat_range->lower + (inner_num + 1) * (repeat_range->upper == 0x7fffffff ? 1 : repeat_range->upper - repeat_range->lower);
396 if (repeat_range->lower < repeat_range->upper) {
403 case OP_REPEAT_INC_SG:
404 case OP_REPEAT_INC_NG_SG:
407 case OP_NULL_CHECK_START:
408 case OP_NULL_CHECK_END:
409 case OP_NULL_CHECK_END_MEMST:
410 case OP_NULL_CHECK_END_MEMST_PUSH:
411 p += SIZE_MEMNUM;
break;
415 case OP_PUSH_POS_NOT:
417 case OP_PUSH_STOP_BT:
420 case OP_PUSH_LOOK_BEHIND_NOT:
421 case OP_FAIL_LOOK_BEHIND_NOT:
422 case OP_PUSH_ABSENT_POS:
434 case OP_STATE_CHECK_PUSH:
435 case OP_STATE_CHECK_PUSH_OR_JUMP:
437 case OP_STATE_CHECK_ANYCHAR_STAR:
438 case OP_STATE_CHECK_ANYCHAR_ML_STAR:
441 case OP_SET_OPTION_PUSH:
454 *num = NUM_CACHE_OPCODE_FAIL;
458 return ONIGERR_UNDEFINED_BYTECODE;
466 UChar* pend = p + reg->used;
469 MemNumType current_mem = -1;
471 long current_mem_num = 0;
481 case OP_EXACT1: p++;
break;
482 case OP_EXACT2: p += 2;
break;
483 case OP_EXACT3: p += 3;
break;
484 case OP_EXACT4: p += 4;
break;
485 case OP_EXACT5: p += 5;
break;
487 GET_LENGTH_INC(len, p); p += len;
break;
488 case OP_EXACTMB2N1: p += 2;
break;
489 case OP_EXACTMB2N2: p += 4;
break;
490 case OP_EXACTMB2N3: p += 6;
break;
492 GET_LENGTH_INC(len, p); p += len * 2;
break;
494 GET_LENGTH_INC(len, p); p += len * 3;
break;
498 GET_LENGTH_INC(mb_len, p);
499 GET_LENGTH_INC(len, p);
505 len = enclen(enc, p, pend); p += len;
break;
507 GET_LENGTH_INC(len, p); p += len;
break;
511 p += SIZE_BITSET;
break;
513 case OP_CCLASS_MB_NOT:
514 GET_LENGTH_INC(len, p); p += len;
break;
516 case OP_CCLASS_MIX_NOT:
518 GET_LENGTH_INC(len, p);
525 case OP_ANYCHAR_STAR:
526 case OP_ANYCHAR_ML_STAR:
527 table->addr = pbegin;
528 table->num = num - current_mem_num;
529 table->outer_repeat = current_mem;
533 case OP_ANYCHAR_STAR_PEEK_NEXT:
534 case OP_ANYCHAR_ML_STAR_PEEK_NEXT:
536 table->addr = pbegin;
537 table->num = num - current_mem_num;
538 table->outer_repeat = current_mem;
546 case OP_NOT_WORD_BOUND:
552 case OP_NOT_ASCII_WORD:
553 case OP_ASCII_WORD_BOUND:
554 case OP_NOT_ASCII_WORD_BOUND:
555 case OP_ASCII_WORD_BEGIN:
556 case OP_ASCII_WORD_END:
563 case OP_SEMI_END_BUF:
564 case OP_BEGIN_POSITION:
571 case OP_BACKREF_MULTI:
572 case OP_BACKREF_MULTI_IC:
573 case OP_BACKREF_WITH_LEVEL:
574 goto unexpected_bytecode_error;
576 case OP_MEMORY_START:
577 case OP_MEMORY_START_PUSH:
578 case OP_MEMORY_END_PUSH:
579 case OP_MEMORY_END_PUSH_REC:
581 case OP_MEMORY_END_REC:
582 p += SIZE_MEMNUM;
break;
594 table->addr = pbegin;
595 table->num = num - current_mem_num;
596 table->outer_repeat = current_mem;
602 case OP_PUSH_OR_JUMP_EXACT1:
603 case OP_PUSH_IF_PEEK_NEXT:
604 p += SIZE_RELADDR + 1;
605 table->addr = pbegin;
606 table->num = num - current_mem_num;
607 table->outer_repeat = current_mem;
613 GET_MEMNUM_INC(mem, p);
615 if (reg->repeat_range[mem].lower == 0) {
616 table->addr = pbegin;
617 table->num = num - current_mem_num;
618 table->outer_repeat = -1;
623 current_mem_num = num;
626 case OP_REPEAT_INC_NG:
627 GET_MEMNUM_INC(mem, p);
629 long inner_num = num - current_mem_num;
631 if (repeat_range->lower < repeat_range->upper) {
632 table->addr = pbegin;
633 table->num = num - current_mem_num;
634 table->outer_repeat = mem;
638 num += inner_num * repeat_range->lower + (inner_num + 1) * (repeat_range->upper == 0x7fffffff ? 1 : repeat_range->upper - repeat_range->lower);
643 case OP_REPEAT_INC_SG:
644 case OP_REPEAT_INC_NG_SG:
646 goto unexpected_bytecode_error;
647 case OP_NULL_CHECK_START:
648 case OP_NULL_CHECK_END:
649 case OP_NULL_CHECK_END_MEMST:
650 case OP_NULL_CHECK_END_MEMST_PUSH:
651 p += SIZE_MEMNUM;
break;
655 case OP_PUSH_POS_NOT:
657 case OP_PUSH_STOP_BT:
660 case OP_PUSH_LOOK_BEHIND_NOT:
661 case OP_FAIL_LOOK_BEHIND_NOT:
662 case OP_PUSH_ABSENT_POS:
665 goto unexpected_bytecode_error;
669 goto unexpected_bytecode_error;
672 goto unexpected_bytecode_error;
674 case OP_STATE_CHECK_PUSH:
675 case OP_STATE_CHECK_PUSH_OR_JUMP:
677 case OP_STATE_CHECK_ANYCHAR_STAR:
678 case OP_STATE_CHECK_ANYCHAR_ML_STAR:
679 goto unexpected_bytecode_error;
681 case OP_SET_OPTION_PUSH:
693unexpected_bytecode_error:
694 return ONIGERR_UNEXPECTED_BYTECODE;
697 return ONIGERR_UNDEFINED_BYTECODE;
701count_num_cache_opcode(
regex_t* reg,
long* num,
long* table_size)
703 *num = NUM_CACHE_OPCODE_FAIL;
711 long num = 0, table_size = 0;
712 count_num_cache_opcode(reg, &num, &table_size);
713 return num != NUM_CACHE_OPCODE_FAIL;
721 for (i = 0; i < region->num_regs; i++) {
722 region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
724#ifdef USE_CAPTURE_HISTORY
725 history_root_free(region);
732 region->num_regs = n;
734 if (n < ONIG_NREGION)
737 if (region->allocated == 0) {
738 region->beg = (OnigPosition* )
xmalloc(n *
sizeof(OnigPosition));
739 if (region->beg == 0)
740 return ONIGERR_MEMORY;
742 region->end = (OnigPosition* )
xmalloc(n *
sizeof(OnigPosition));
743 if (region->end == 0) {
745 return ONIGERR_MEMORY;
748 region->allocated = n;
750 else if (region->allocated < n) {
753 region->allocated = 0;
754 tmp = (OnigPosition* )
xrealloc(region->beg, n *
sizeof(OnigPosition));
758 return ONIGERR_MEMORY;
761 tmp = (OnigPosition* )
xrealloc(region->end, n *
sizeof(OnigPosition));
765 return ONIGERR_MEMORY;
769 region->allocated = n;
776onig_region_resize_clear(
OnigRegion* region,
int n)
780 r = onig_region_resize(region, n);
781 if (r != 0)
return r;
782 onig_region_clear(region);
787onig_region_set(
OnigRegion* region,
int at,
int beg,
int end)
789 if (at < 0)
return ONIGERR_INVALID_ARGUMENT;
791 if (at >= region->allocated) {
792 int r = onig_region_resize(region, at + 1);
796 region->beg[at] = beg;
797 region->end[at] = end;
804 region->num_regs = 0;
805 region->allocated = 0;
806 region->beg = (OnigPosition* )0;
807 region->end = (OnigPosition* )0;
808#ifdef USE_CAPTURE_HISTORY
809 region->history_root = (OnigCaptureTreeNode* )0;
828 if (r->allocated > 0) {
829 if (r->beg)
xfree(r->beg);
830 if (r->end)
xfree(r->end);
833#ifdef USE_CAPTURE_HISTORY
834 history_root_free(r);
836 if (free_self)
xfree(r);
843#define RREGC_SIZE (sizeof(int) * from->num_regs)
846 if (to == from)
return;
848 r = onig_region_resize(to, from->num_regs);
851 for (i = 0; i < from->num_regs; i++) {
852 to->beg[i] = from->beg[i];
853 to->end[i] = from->end[i];
855 to->num_regs = from->num_regs;
857#ifdef USE_CAPTURE_HISTORY
858 history_root_free(to);
860 if (IS_NOT_NULL(from->history_root)) {
861 to->history_root = history_tree_clone(from->history_root);
868#define INVALID_STACK_INDEX -1
872#define STK_ALT 0x0001
873#define STK_LOOK_BEHIND_NOT 0x0002
874#define STK_POS_NOT 0x0003
876#define STK_MEM_START 0x0100
877#define STK_MEM_END 0x8200
878#define STK_REPEAT_INC 0x0300
879#define STK_STATE_CHECK_MARK 0x1000
881#define STK_NULL_CHECK_START 0x3000
882#define STK_NULL_CHECK_END 0x5000
883#define STK_MEM_END_MARK 0x8400
884#define STK_POS 0x0500
885#define STK_STOP_BT 0x0600
886#define STK_REPEAT 0x0700
887#define STK_CALL_FRAME 0x0800
888#define STK_RETURN 0x0900
889#define STK_VOID 0x0a00
890#define STK_ABSENT_POS 0x0b00
891#define STK_ABSENT 0x0c00
894#define STK_MASK_POP_USED 0x00ff
895#define STK_MASK_TO_VOID_TARGET 0x10ff
896#define STK_MASK_MEM_END_OR_MARK 0x8000
898#ifdef USE_CACHE_MATCH_OPT
899#define MATCH_ARG_INIT_CACHE_MATCH_OPT(msa) do {\
900 (msa).enable_cache_match_opt = 0;\
902 (msa).num_cache_opcode = NUM_CACHE_OPCODE_UNINIT;\
903 (msa).num_cache_table = 0;\
904 (msa).cache_index_table = (OnigCacheIndex *)0;\
905 (msa).match_cache = (uint8_t *)0;\
907#define MATCH_ARG_FREE_CACHE_MATCH_OPT(msa) do {\
908 if ((msa).cache_index_table) xfree((msa).cache_index_table);\
909 if ((msa).match_cache) xfree((msa).match_cache);\
912#define MATCH_ARG_INIT_CACHE_MATCH_OPT(msa)
913#define MATCH_ARG_FREE_CACHE_MATCH_OPT(msa)
916#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
917# define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start, arg_gpos) do {\
918 (msa).stack_p = (void* )0;\
919 (msa).options = (arg_option);\
920 (msa).region = (arg_region);\
921 (msa).start = (arg_start);\
922 (msa).gpos = (arg_gpos);\
923 (msa).best_len = ONIG_MISMATCH;\
926 MATCH_ARG_INIT_CACHE_MATCH_OPT(msa);\
929# define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start, arg_gpos) do {\
930 (msa).stack_p = (void* )0;\
931 (msa).options = (arg_option);\
932 (msa).region = (arg_region);\
933 (msa).start = (arg_start);\
934 (msa).gpos = (arg_gpos);\
937 MATCH_ARG_INIT_CACHE_MATCH_OPT(msa);\
941#ifdef USE_COMBINATION_EXPLOSION_CHECK
943# define STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE 16
945# define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num) do { \
946 if ((state_num) > 0 && str_len >= STATE_CHECK_STRING_THRESHOLD_LEN) {\
947 unsigned int size = (unsigned int )(((str_len) + 1) * (state_num) + 7) >> 3;\
948 offset = ((offset) * (state_num)) >> 3;\
949 if (size > 0 && offset < size && size < STATE_CHECK_BUFF_MAX_SIZE) {\
950 if (size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) {\
951 (msa).state_check_buff = (void* )xmalloc(size);\
952 CHECK_NULL_RETURN_MEMERR((msa).state_check_buff);\
955 (msa).state_check_buff = (void* )xalloca(size);\
956 xmemset(((char* )((msa).state_check_buff)+(offset)), 0, \
957 (size_t )(size - (offset))); \
958 (msa).state_check_buff_size = size;\
961 (msa).state_check_buff = (void* )0;\
962 (msa).state_check_buff_size = 0;\
966 (msa).state_check_buff = (void* )0;\
967 (msa).state_check_buff_size = 0;\
971# define MATCH_ARG_FREE(msa) do {\
972 if ((msa).stack_p) xfree((msa).stack_p);\
973 if ((msa).state_check_buff_size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) { \
974 if ((msa).state_check_buff) xfree((msa).state_check_buff);\
976 MATCH_ARG_FREE_CACHE_MATCH_OPT(msa);\
979# define MATCH_ARG_FREE(msa) do {\
980 if ((msa).stack_p) xfree((msa).stack_p);\
981 MATCH_ARG_FREE_CACHE_MATCH_OPT(msa);\
987#define MAX_PTR_NUM 100
989#define STACK_INIT(alloc_addr, heap_addr, ptr_num, stack_num) do {\
990 if (ptr_num > MAX_PTR_NUM) {\
991 alloc_addr = (char* )xmalloc(sizeof(OnigStackIndex) * (ptr_num));\
992 heap_addr = alloc_addr;\
994 stk_alloc = (OnigStackType* )(msa->stack_p);\
995 stk_base = stk_alloc;\
997 stk_end = stk_base + msa->stack_n;\
999 stk_alloc = (OnigStackType* )xalloca(sizeof(OnigStackType) * (stack_num));\
1000 stk_base = stk_alloc;\
1002 stk_end = stk_base + (stack_num);\
1004 } else if (msa->stack_p) {\
1005 alloc_addr = (char* )xalloca(sizeof(OnigStackIndex) * (ptr_num));\
1007 stk_alloc = (OnigStackType* )(msa->stack_p);\
1008 stk_base = stk_alloc;\
1010 stk_end = stk_base + msa->stack_n;\
1013 alloc_addr = (char* )xalloca(sizeof(OnigStackIndex) * (ptr_num)\
1014 + sizeof(OnigStackType) * (stack_num));\
1016 stk_alloc = (OnigStackType* )(alloc_addr + sizeof(OnigStackIndex) * (ptr_num));\
1017 stk_base = stk_alloc;\
1019 stk_end = stk_base + (stack_num);\
1023#define STACK_SAVE do{\
1024 if (stk_base != stk_alloc) {\
1025 msa->stack_p = stk_base;\
1026 msa->stack_n = stk_end - stk_base; \
1030static unsigned int MatchStackLimitSize = DEFAULT_MATCH_STACK_LIMIT_SIZE;
1033onig_get_match_stack_limit_size(
void)
1035 return MatchStackLimitSize;
1039onig_set_match_stack_limit_size(
unsigned int size)
1041 MatchStackLimitSize = size;
1052 stk_base = *arg_stk_base;
1053 stk_end = *arg_stk_end;
1056 n = stk_end - stk_base;
1057 if (stk_base == stk_alloc && IS_NULL(msa->stack_p)) {
1061 return ONIGERR_MEMORY;
1067 unsigned int limit_size = MatchStackLimitSize;
1069 if (limit_size != 0 && n > limit_size) {
1070 if ((
unsigned int )(stk_end - stk_base) == limit_size)
1071 return ONIGERR_MATCH_STACK_LIMIT_OVER;
1078 return ONIGERR_MEMORY;
1081 *arg_stk = x + (stk - stk_base);
1083 *arg_stk_end = x + n;
1087#define STACK_ENSURE(n) do {\
1088 if (stk_end - stk < (n)) {\
1089 int r = stack_double(&stk_base, &stk_end, &stk, stk_alloc, msa);\
1092 if (xmalloc_base) xfree(xmalloc_base);\
1098#define STACK_AT(index) (stk_base + (index))
1099#define GET_STACK_INDEX(stk) ((stk) - stk_base)
1101#define STACK_PUSH_TYPE(stack_type) do {\
1103 stk->type = (stack_type);\
1104 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1108#define IS_TO_VOID_TARGET(stk) (((stk)->type & STK_MASK_TO_VOID_TARGET) != 0)
1110#ifdef USE_COMBINATION_EXPLOSION_CHECK
1111# define STATE_CHECK_POS(s,snum) \
1112 (((s) - str) * num_comb_exp_check + ((snum) - 1))
1113# define STATE_CHECK_VAL(v,snum) do {\
1114 if (state_check_buff != NULL) {\
1115 ptrdiff_t x = STATE_CHECK_POS(s,snum);\
1116 (v) = state_check_buff[x/8] & (1<<(x%8));\
1122# define ELSE_IF_STATE_CHECK_MARK(stk) \
1123 else if ((stk)->type == STK_STATE_CHECK_MARK) { \
1124 ptrdiff_t x = STATE_CHECK_POS(stk->u.state.pstr, stk->u.state.state_check);\
1125 state_check_buff[x/8] |= (1<<(x%8)); \
1128# define STACK_PUSH(stack_type,pat,s,sprev,keep) do {\
1130 stk->type = (stack_type);\
1131 stk->u.state.pcode = (pat);\
1132 stk->u.state.pstr = (s);\
1133 stk->u.state.pstr_prev = (sprev);\
1134 stk->u.state.state_check = 0;\
1135 stk->u.state.pkeep = (keep);\
1139# define STACK_PUSH_ENSURED(stack_type,pat) do {\
1140 stk->type = (stack_type);\
1141 stk->u.state.pcode = (pat);\
1142 stk->u.state.state_check = 0;\
1146# define STACK_PUSH_ALT_WITH_STATE_CHECK(pat,s,sprev,snum,keep) do {\
1148 stk->type = STK_ALT;\
1149 stk->u.state.pcode = (pat);\
1150 stk->u.state.pstr = (s);\
1151 stk->u.state.pstr_prev = (sprev);\
1152 stk->u.state.state_check = ((state_check_buff != NULL) ? (snum) : 0);\
1153 stk->u.state.pkeep = (keep);\
1157# define STACK_PUSH_STATE_CHECK(s,snum) do {\
1158 if (state_check_buff != NULL) {\
1160 stk->type = STK_STATE_CHECK_MARK;\
1161 stk->u.state.pstr = (s);\
1162 stk->u.state.state_check = (snum);\
1169# define ELSE_IF_STATE_CHECK_MARK(stk)
1171# define STACK_PUSH(stack_type,pat,s,sprev,keep) do {\
1173 stk->type = (stack_type);\
1174 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1175 stk->u.state.pcode = (pat);\
1176 stk->u.state.pstr = (s);\
1177 stk->u.state.pstr_prev = (sprev);\
1178 stk->u.state.pkeep = (keep);\
1182# define STACK_PUSH_ENSURED(stack_type,pat) do {\
1183 stk->type = (stack_type);\
1184 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1185 stk->u.state.pcode = (pat);\
1190#define STACK_PUSH_ALT(pat,s,sprev,keep) STACK_PUSH(STK_ALT,pat,s,sprev,keep)
1191#define STACK_PUSH_POS(s,sprev,keep) STACK_PUSH(STK_POS,NULL_UCHARP,s,sprev,keep)
1192#define STACK_PUSH_POS_NOT(pat,s,sprev,keep) STACK_PUSH(STK_POS_NOT,pat,s,sprev,keep)
1193#define STACK_PUSH_ABSENT STACK_PUSH_TYPE(STK_ABSENT)
1194#define STACK_PUSH_STOP_BT STACK_PUSH_TYPE(STK_STOP_BT)
1195#define STACK_PUSH_LOOK_BEHIND_NOT(pat,s,sprev,keep) \
1196 STACK_PUSH(STK_LOOK_BEHIND_NOT,pat,s,sprev,keep)
1198#ifdef USE_CACHE_MATCH_OPT
1200#define DO_CACHE_MATCH_OPT(reg,stk,repeat_stk,enable,p,num_cache_table,num_cache_size,table,pos,match_cache) do {\
1202 long cache_index = find_cache_index_table((reg), (stk), (repeat_stk), (table), (num_cache_table), (p));\
1203 if (cache_index >= 0) {\
1204 long key = (num_cache_size) * (long)(pos) + cache_index;\
1205 long index = key >> 3;\
1206 long mask = 1 << (key & 7);\
1207 if ((match_cache)[index] & mask) {\
1210 (match_cache)[index] |= mask;\
1218 long l = 0, r = num_cache_table - 1, m = 0;
1223 int is_inc = *p == OP_REPEAT_INC || *p == OP_REPEAT_INC_NG;
1227 if (table[m].addr == p)
break;
1228 if (table[m].addr < p) l = m + 1;
1232 if (!(0 <= m && m < num_cache_table && table[m].addr == p)) {
1237 if (item->outer_repeat == -1) {
1241 range = ®->repeat_range[item->outer_repeat];
1243 stkp = &stk[repeat_stk[item->outer_repeat]];
1244 count = is_inc ? stkp->u.repeat.count - 1 : stkp->u.repeat.count;
1246 if (count < range->lower) {
1247 return range->base_num + range->inner_num * count + item->num;
1250 if (range->upper == 0x7fffffff) {
1251 return range->base_num + range->inner_num * range->lower + (is_inc ? 0 : 1) + item->num;
1254 return range->base_num + range->inner_num * range->lower + (range->inner_num + 1) * (count - range->lower) + item->num;
1258reset_match_cache(
regex_t* reg, UChar* pbegin, UChar* pend,
long pos, uint8_t* match_cache,
OnigCacheIndex *table,
long num_cache_size,
long num_cache_table)
1260 long l = 0, r = num_cache_table - 1, m1 = 0, m2 = 0;
1261 int is_inc = *pend == OP_REPEAT_INC || *pend == OP_REPEAT_INC_NG;
1267 if (table[m1].addr == pbegin)
break;
1268 if (table[m1].addr < pbegin) l = m1 + 1;
1272 l = 0, r = num_cache_table - 1;
1275 if (table[m2].addr == pend)
break;
1276 if (table[m2].addr < pend) l = m2 + 1;
1280 if (table[m1].addr < pbegin && m1 + 1 < num_cache_table) m1++;
1281 if (table[m2].addr > pend && m2 - 1 > 0) m2--;
1286 if (item1->outer_repeat < 0) k1 = item1->num;
1287 else k1 = reg->repeat_range[item1->outer_repeat].base_num + item1->num;
1289 if (item2->outer_repeat < 0) k2 = item2->num;
1292 if (range->upper == 0x7fffffff) k2 = range->base_num + range->inner_num * range->lower + (is_inc ? 0 : 1) + item2->num;
1293 else k2 = range->base_num + range->inner_num * range->lower + (range->inner_num + 1) * (range->upper - range->lower - (is_inc ? 1 : 0)) + item2->num;
1296 base = pos * num_cache_size;
1300 if ((k1 >> 3) == (k2 >> 3)) {
1301 match_cache[k1 >> 3] &= (((1 << (8 - (k2 & 7) - 1)) - 1) << ((k2 & 7) + 1)) | ((1 << (k1 & 7)) - 1);
1305 match_cache[k1 >> 3] &= (1 << ((k1 & 7) - 1)) - 1;
1308 if (i < (k2 >> 3)) {
1309 xmemset(&match_cache[i], 0, (k2 >> 3) - i);
1311 match_cache[k2 >> 3] &= (((1 << (8 - (k2 & 7) - 1)) - 1) << ((k2 & 7) + 1));
1318#define DO_CACHE_MATCH_OPT(reg,stk,repeat_stk,enable,p,num_cache_table,num_cache_size,table,pos,match_cache)
1321#define STACK_PUSH_REPEAT(id, pat) do {\
1323 stk->type = STK_REPEAT;\
1324 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1325 stk->u.repeat.num = (id);\
1326 stk->u.repeat.pcode = (pat);\
1327 stk->u.repeat.count = 0;\
1331#define STACK_PUSH_REPEAT_INC(sindex) do {\
1333 stk->type = STK_REPEAT_INC;\
1334 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1335 stk->u.repeat_inc.si = (sindex);\
1339#define STACK_PUSH_MEM_START(mnum, s) do {\
1341 stk->type = STK_MEM_START;\
1342 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1343 stk->u.mem.num = (mnum);\
1344 stk->u.mem.pstr = (s);\
1345 stk->u.mem.start = mem_start_stk[mnum];\
1346 stk->u.mem.end = mem_end_stk[mnum];\
1347 mem_start_stk[mnum] = GET_STACK_INDEX(stk);\
1348 mem_end_stk[mnum] = INVALID_STACK_INDEX;\
1352#define STACK_PUSH_MEM_END(mnum, s) do {\
1354 stk->type = STK_MEM_END;\
1355 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1356 stk->u.mem.num = (mnum);\
1357 stk->u.mem.pstr = (s);\
1358 stk->u.mem.start = mem_start_stk[mnum];\
1359 stk->u.mem.end = mem_end_stk[mnum];\
1360 mem_end_stk[mnum] = GET_STACK_INDEX(stk);\
1364#define STACK_PUSH_MEM_END_MARK(mnum) do {\
1366 stk->type = STK_MEM_END_MARK;\
1367 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1368 stk->u.mem.num = (mnum);\
1372#define STACK_GET_MEM_START(mnum, k) do {\
1375 while (k > stk_base) {\
1377 if ((k->type & STK_MASK_MEM_END_OR_MARK) != 0 \
1378 && k->u.mem.num == (mnum)) {\
1381 else if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\
1382 if (level == 0) break;\
1388#define STACK_GET_MEM_RANGE(k, mnum, start, end) do {\
1391 if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\
1392 if (level == 0) (start) = k->u.mem.pstr;\
1395 else if (k->type == STK_MEM_END && k->u.mem.num == (mnum)) {\
1398 (end) = k->u.mem.pstr;\
1406#define STACK_PUSH_NULL_CHECK_START(cnum, s) do {\
1408 stk->type = STK_NULL_CHECK_START;\
1409 stk->null_check = (OnigStackIndex)(stk - stk_base);\
1410 stk->u.null_check.num = (cnum);\
1411 stk->u.null_check.pstr = (s);\
1415#define STACK_PUSH_NULL_CHECK_END(cnum) do {\
1417 stk->type = STK_NULL_CHECK_END;\
1418 stk->null_check = (OnigStackIndex)(stk - stk_base);\
1419 stk->u.null_check.num = (cnum);\
1423#define STACK_PUSH_CALL_FRAME(pat) do {\
1425 stk->type = STK_CALL_FRAME;\
1426 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1427 stk->u.call_frame.ret_addr = (pat);\
1431#define STACK_PUSH_RETURN do {\
1433 stk->type = STK_RETURN;\
1434 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1438#define STACK_PUSH_ABSENT_POS(start, end) do {\
1440 stk->type = STK_ABSENT_POS;\
1441 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1442 stk->u.absent_pos.abs_pstr = (start);\
1443 stk->u.absent_pos.end_pstr = (end);\
1449# define STACK_BASE_CHECK(p, at) \
1450 if ((p) < stk_base) {\
1451 fprintf(stderr, "at %s\n", at);\
1455# define STACK_BASE_CHECK(p, at)
1458#define STACK_POP_ONE do {\
1460 STACK_BASE_CHECK(stk, "STACK_POP_ONE"); \
1463#define STACK_POP do {\
1464 switch (pop_level) {\
1465 case STACK_POP_LEVEL_FREE:\
1468 STACK_BASE_CHECK(stk, "STACK_POP"); \
1469 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
1470 ELSE_IF_STATE_CHECK_MARK(stk);\
1473 case STACK_POP_LEVEL_MEM_START:\
1476 STACK_BASE_CHECK(stk, "STACK_POP 2"); \
1477 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
1478 else if (stk->type == STK_MEM_START) {\
1479 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1480 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
1482 ELSE_IF_STATE_CHECK_MARK(stk);\
1488 STACK_BASE_CHECK(stk, "STACK_POP 3"); \
1489 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
1490 else if (stk->type == STK_MEM_START) {\
1491 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1492 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
1494 else if (stk->type == STK_REPEAT_INC) {\
1495 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
1497 else if (stk->type == STK_MEM_END) {\
1498 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1499 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
1501 ELSE_IF_STATE_CHECK_MARK(stk);\
1507#define STACK_POP_TIL_POS_NOT do {\
1510 STACK_BASE_CHECK(stk, "STACK_POP_TIL_POS_NOT"); \
1511 if (stk->type == STK_POS_NOT) break;\
1512 else if (stk->type == STK_MEM_START) {\
1513 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1514 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
1516 else if (stk->type == STK_REPEAT_INC) {\
1517 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
1519 else if (stk->type == STK_MEM_END) {\
1520 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1521 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
1523 ELSE_IF_STATE_CHECK_MARK(stk);\
1527#define STACK_POP_TIL_LOOK_BEHIND_NOT do {\
1530 STACK_BASE_CHECK(stk, "STACK_POP_TIL_LOOK_BEHIND_NOT"); \
1531 if (stk->type == STK_LOOK_BEHIND_NOT) break;\
1532 else if (stk->type == STK_MEM_START) {\
1533 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1534 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
1536 else if (stk->type == STK_REPEAT_INC) {\
1537 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
1539 else if (stk->type == STK_MEM_END) {\
1540 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1541 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
1543 ELSE_IF_STATE_CHECK_MARK(stk);\
1547#define STACK_POP_TIL_ABSENT do {\
1550 STACK_BASE_CHECK(stk, "STACK_POP_TIL_ABSENT"); \
1551 if (stk->type == STK_ABSENT) break;\
1552 else if (stk->type == STK_MEM_START) {\
1553 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1554 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
1556 else if (stk->type == STK_REPEAT_INC) {\
1557 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
1559 else if (stk->type == STK_MEM_END) {\
1560 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1561 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
1563 ELSE_IF_STATE_CHECK_MARK(stk);\
1567#define STACK_POP_ABSENT_POS(start, end) do {\
1569 STACK_BASE_CHECK(stk, "STACK_POP_ABSENT_POS"); \
1570 (start) = stk->u.absent_pos.abs_pstr;\
1571 (end) = stk->u.absent_pos.end_pstr;\
1574#define STACK_POS_END(k) do {\
1578 STACK_BASE_CHECK(k, "STACK_POS_END"); \
1579 if (IS_TO_VOID_TARGET(k)) {\
1580 k->type = STK_VOID;\
1582 else if (k->type == STK_POS) {\
1583 k->type = STK_VOID;\
1589#define STACK_STOP_BT_END do {\
1590 OnigStackType *k = stk;\
1593 STACK_BASE_CHECK(k, "STACK_STOP_BT_END"); \
1594 if (IS_TO_VOID_TARGET(k)) {\
1595 k->type = STK_VOID;\
1597 else if (k->type == STK_STOP_BT) {\
1598 k->type = STK_VOID;\
1604#define STACK_NULL_CHECK(isnull,id,s) do {\
1605 OnigStackType* k = STACK_AT((stk-1)->null_check)+1;\
1608 STACK_BASE_CHECK(k, "STACK_NULL_CHECK"); \
1609 if (k->type == STK_NULL_CHECK_START) {\
1610 if (k->u.null_check.num == (id)) {\
1611 (isnull) = (k->u.null_check.pstr == (s));\
1618#define STACK_NULL_CHECK_REC(isnull,id,s) do {\
1620 OnigStackType* k = STACK_AT((stk-1)->null_check)+1;\
1623 STACK_BASE_CHECK(k, "STACK_NULL_CHECK_REC"); \
1624 if (k->type == STK_NULL_CHECK_START) {\
1625 if (k->u.null_check.num == (id)) {\
1627 (isnull) = (k->u.null_check.pstr == (s));\
1633 else if (k->type == STK_NULL_CHECK_END) {\
1639#define STACK_NULL_CHECK_MEMST(isnull,ischange,id,s,reg) do {\
1640 OnigStackType* k = STACK_AT((stk-1)->null_check)+1;\
1643 STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST"); \
1644 if (k->type == STK_NULL_CHECK_START) {\
1645 if (k->u.null_check.num == (id)) {\
1646 if (k->u.null_check.pstr != (s)) {\
1654 if (k->type == STK_MEM_START) {\
1655 if (k->u.mem.end == INVALID_STACK_INDEX) {\
1656 (isnull) = 0; (ischange) = 1; break;\
1658 if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\
1659 endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\
1661 endp = (UChar* )k->u.mem.end;\
1662 if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\
1663 (isnull) = 0; (ischange) = 1; break;\
1665 else if (endp != s) {\
1678#define STACK_NULL_CHECK_MEMST_REC(isnull,id,s,reg) do {\
1680 OnigStackType* k = STACK_AT((stk-1)->null_check)+1;\
1683 STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST_REC"); \
1684 if (k->type == STK_NULL_CHECK_START) {\
1685 if (k->u.null_check.num == (id)) {\
1687 if (k->u.null_check.pstr != (s)) {\
1695 if (k->type == STK_MEM_START) {\
1696 if (k->u.mem.end == INVALID_STACK_INDEX) {\
1697 (isnull) = 0; break;\
1699 if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\
1700 endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\
1702 endp = (UChar* )k->u.mem.end;\
1703 if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\
1704 (isnull) = 0; break;\
1706 else if (endp != s) {\
1720 else if (k->type == STK_NULL_CHECK_END) {\
1721 if (k->u.null_check.num == (id)) level++;\
1726#define STACK_GET_REPEAT(id, k) do {\
1731 STACK_BASE_CHECK(k, "STACK_GET_REPEAT"); \
1732 if (k->type == STK_REPEAT) {\
1734 if (k->u.repeat.num == (id)) {\
1739 else if (k->type == STK_CALL_FRAME) level--;\
1740 else if (k->type == STK_RETURN) level++;\
1744#define STACK_RETURN(addr) do {\
1746 OnigStackType* k = stk;\
1749 STACK_BASE_CHECK(k, "STACK_RETURN"); \
1750 if (k->type == STK_CALL_FRAME) {\
1752 (addr) = k->u.call_frame.ret_addr;\
1757 else if (k->type == STK_RETURN)\
1763#define STRING_CMP(s1,s2,len) do {\
1764 while (len-- > 0) {\
1765 if (*s1++ != *s2++) goto fail;\
1769#define STRING_CMP_IC(case_fold_flag,s1,ps2,len,text_end) do {\
1770 if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len, text_end) == 0) \
1774static int string_cmp_ic(
OnigEncoding enc,
int case_fold_flag,
1775 UChar* s1, UChar** ps2, OnigDistance mblen,
const UChar* text_end)
1777 UChar buf1[ONIGENC_MBC_CASE_FOLD_MAXLEN];
1778 UChar buf2[ONIGENC_MBC_CASE_FOLD_MAXLEN];
1779 UChar *p1, *p2, *end1, *s2;
1785 len1 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s1, text_end, buf1);
1786 len2 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s2, text_end, buf2);
1787 if (len1 != len2)
return 0;
1790 while (len1-- > 0) {
1791 if (*p1 != *p2)
return 0;
1801#define STRING_CMP_VALUE(s1,s2,len,is_fail) do {\
1803 while (len-- > 0) {\
1804 if (*s1++ != *s2++) {\
1805 is_fail = 1; break;\
1810#define STRING_CMP_VALUE_IC(case_fold_flag,s1,ps2,len,text_end,is_fail) do {\
1811 if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len, text_end) == 0) \
1818#define IS_EMPTY_STR (str == end)
1819#define ON_STR_BEGIN(s) ((s) == str)
1820#define ON_STR_END(s) ((s) == end)
1821#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
1822# define DATA_ENSURE_CHECK1 (s < right_range)
1823# define DATA_ENSURE_CHECK(n) (s + (n) <= right_range)
1824# define DATA_ENSURE(n) if (s + (n) > right_range) goto fail
1825# define DATA_ENSURE_CONTINUE(n) if (s + (n) > right_range) continue
1826# define ABSENT_END_POS right_range
1828# define DATA_ENSURE_CHECK1 (s < end)
1829# define DATA_ENSURE_CHECK(n) (s + (n) <= end)
1830# define DATA_ENSURE(n) if (s + (n) > end) goto fail
1831# define DATA_ENSURE_CONTINUE(n) if (s + (n) > end) continue
1832# define ABSENT_END_POS end
1836#ifdef USE_CAPTURE_HISTORY
1838make_capture_history_tree(OnigCaptureTreeNode* node,
OnigStackType** kp,
1842 OnigCaptureTreeNode* child;
1845 while (k < stk_top) {
1846 if (k->type == STK_MEM_START) {
1848 if (n <= ONIG_MAX_CAPTURE_HISTORY_GROUP &&
1849 BIT_STATUS_AT(reg->capture_history, n) != 0) {
1850 child = history_node_new();
1851 CHECK_NULL_RETURN_MEMERR(child);
1853 child->beg = k->u.mem.pstr - str;
1854 r = history_tree_add_child(node, child);
1856 history_tree_free(child);
1860 r = make_capture_history_tree(child, kp, stk_top, str, reg);
1861 if (r != 0)
return r;
1864 child->end = k->u.mem.pstr - str;
1867 else if (k->type == STK_MEM_END) {
1868 if (k->u.mem.num == node->group) {
1869 node->end = k->u.mem.pstr - str;
1881#ifdef USE_BACKREF_WITH_LEVEL
1883mem_is_in_memp(
int mem,
int num, UChar* memp)
1888 for (i = 0; i < num; i++) {
1889 GET_MEMNUM_INC(m, memp);
1890 if (mem == (
int )m)
return 1;
1895static int backref_match_at_nested_level(
regex_t* reg,
1897 int ignore_case,
int case_fold_flag,
1898 int nest,
int mem_num, UChar* memp, UChar** s,
const UChar* send)
1900 UChar *ss, *p, *pstart, *pend = NULL_UCHARP;
1907 while (k >= stk_base) {
1908 if (k->type == STK_CALL_FRAME) {
1911 else if (k->type == STK_RETURN) {
1914 else if (level == nest) {
1915 if (k->type == STK_MEM_START) {
1916 if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) {
1917 pstart = k->u.mem.pstr;
1918 if (pend != NULL_UCHARP) {
1919 if (pend - pstart > send - *s)
return 0;
1923 if (ignore_case != 0) {
1924 if (string_cmp_ic(reg->enc, case_fold_flag,
1925 pstart, &ss, pend - pstart, send) == 0)
1930 if (*p++ != *ss++)
return 0;
1939 else if (k->type == STK_MEM_END) {
1940 if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) {
1941 pend = k->u.mem.pstr;
1953#ifdef ONIG_DEBUG_STATISTICS
1956# include <windows.h>
1957static LARGE_INTEGER ts, te, freq;
1958# define GETTIME(t) QueryPerformanceCounter(&(t))
1959# define TIMEDIFF(te,ts) (unsigned long )(((te).QuadPart - (ts).QuadPart) \
1960 * 1000000 / freq.QuadPart)
1963# define USE_TIMEOFDAY
1965# ifdef USE_TIMEOFDAY
1966# ifdef HAVE_SYS_TIME_H
1967# include <sys/time.h>
1969# ifdef HAVE_UNISTD_H
1973# define GETTIME(t) gettimeofday(&(t), (struct timezone* )0)
1974# define TIMEDIFF(te,ts) (((te).tv_usec - (ts).tv_usec) + \
1975 (((te).tv_sec - (ts).tv_sec)*1000000))
1977# ifdef HAVE_SYS_TIMES_H
1978# include <sys/times.h>
1980static struct tms ts, te;
1981# define GETTIME(t) times(&(t))
1982# define TIMEDIFF(te,ts) ((te).tms_utime - (ts).tms_utime)
1987static int OpCounter[256];
1988static int OpPrevCounter[256];
1989static unsigned long OpTime[256];
1990static int OpCurr = OP_FINISH;
1991static int OpPrevTarget = OP_FAIL;
1992static int MaxStackDepth = 0;
1994# define MOP_IN(opcode) do {\
1995 if (opcode == OpPrevTarget) OpPrevCounter[OpCurr]++;\
1997 OpCounter[opcode]++;\
2001# define MOP_OUT do {\
2003 OpTime[OpCurr] += TIMEDIFF(te, ts);\
2007onig_statistics_init(
void)
2010 for (i = 0; i < 256; i++) {
2011 OpCounter[i] = OpPrevCounter[i] = 0; OpTime[i] = 0;
2015 QueryPerformanceFrequency(&freq);
2020onig_print_statistics(
FILE* f)
2023 fprintf(f,
" count prev time\n");
2024 for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
2025 fprintf(f,
"%8d: %8d: %10lu: %s\n",
2026 OpCounter[i], OpPrevCounter[i], OpTime[i], OnigOpInfo[i].name);
2028 fprintf(f,
"\nmax stack depth: %d\n", MaxStackDepth);
2031# define STACK_INC do {\
2033 if (stk - stk_base > MaxStackDepth) \
2034 MaxStackDepth = stk - stk_base;\
2038# define STACK_INC stk++
2040# define MOP_IN(opcode)
2045#ifdef ONIG_DEBUG_MATCH
2047stack_type_str(
int stack_type)
2049 switch (stack_type) {
2050 case STK_ALT:
return "Alt ";
2051 case STK_LOOK_BEHIND_NOT:
return "LBNot ";
2052 case STK_POS_NOT:
return "PosNot";
2053 case STK_MEM_START:
return "MemS ";
2054 case STK_MEM_END:
return "MemE ";
2055 case STK_REPEAT_INC:
return "RepInc";
2056 case STK_STATE_CHECK_MARK:
return "StChMk";
2057 case STK_NULL_CHECK_START:
return "NulChS";
2058 case STK_NULL_CHECK_END:
return "NulChE";
2059 case STK_MEM_END_MARK:
return "MemEMk";
2060 case STK_POS:
return "Pos ";
2061 case STK_STOP_BT:
return "StopBt";
2062 case STK_REPEAT:
return "Rep ";
2063 case STK_CALL_FRAME:
return "Call ";
2064 case STK_RETURN:
return "Ret ";
2065 case STK_VOID:
return "Void ";
2066 case STK_ABSENT_POS:
return "AbsPos";
2067 case STK_ABSENT:
return "Absent";
2068 default:
return " ";
2076match_at(
regex_t* reg,
const UChar* str,
const UChar* end,
2077#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
2078 const UChar* right_range,
2082 static const UChar FinishCode[] = { OP_FINISH };
2084 int i, num_mem, pop_level;
2085 ptrdiff_t n, best_len;
2086 LengthType tlen, tlen2;
2089 OnigOptionType option = reg->options;
2091 OnigCaseFoldType case_fold_flag = reg->case_fold_flag;
2092 UChar *s, *q, *sbegin;
2097 char *xmalloc_base = NULL;
2101 OnigStackIndex *repeat_stk;
2102 OnigStackIndex *mem_start_stk, *mem_end_stk;
2103#ifdef USE_COMBINATION_EXPLOSION_CHECK
2105 unsigned char* state_check_buff = msa->state_check_buff;
2106 int num_comb_exp_check = reg->num_comb_exp_check;
2109#if USE_TOKEN_THREADED_VM
2111# define VM_LOOP JUMP;
2113# define CASE(x) L_##x: sbegin = s; OPCODE_EXEC_HOOK;
2114# define DEFAULT L_DEFAULT:
2115# define NEXT sprev = sbegin; JUMP
2116# define JUMP pbegin = p; RB_GNUC_EXTENSION_BLOCK(goto *oplabels[*p++])
2142 &&L_OP_CCLASS_MB_NOT,
2143 &&L_OP_CCLASS_MIX_NOT,
2147 &&L_OP_ANYCHAR_STAR,
2148 &&L_OP_ANYCHAR_ML_STAR,
2149 &&L_OP_ANYCHAR_STAR_PEEK_NEXT,
2150 &&L_OP_ANYCHAR_ML_STAR_PEEK_NEXT,
2155 &&L_OP_NOT_WORD_BOUND,
2156# ifdef USE_WORD_BEGIN_END
2164 &&L_OP_NOT_ASCII_WORD,
2165 &&L_OP_ASCII_WORD_BOUND,
2166 &&L_OP_NOT_ASCII_WORD_BOUND,
2167# ifdef USE_WORD_BEGIN_END
2168 &&L_OP_ASCII_WORD_BEGIN,
2169 &&L_OP_ASCII_WORD_END,
2179 &&L_OP_SEMI_END_BUF,
2180 &&L_OP_BEGIN_POSITION,
2186 &&L_OP_BACKREF_MULTI,
2187 &&L_OP_BACKREF_MULTI_IC,
2188# ifdef USE_BACKREF_WITH_LEVEL
2189 &&L_OP_BACKREF_WITH_LEVEL,
2193 &&L_OP_MEMORY_START,
2194 &&L_OP_MEMORY_START_PUSH,
2195 &&L_OP_MEMORY_END_PUSH,
2196# ifdef USE_SUBEXP_CALL
2197 &&L_OP_MEMORY_END_PUSH_REC,
2202# ifdef USE_SUBEXP_CALL
2203 &&L_OP_MEMORY_END_REC,
2214# ifdef USE_OP_PUSH_OR_JUMP_EXACT
2215 &&L_OP_PUSH_OR_JUMP_EXACT1,
2219 &&L_OP_PUSH_IF_PEEK_NEXT,
2223 &&L_OP_REPEAT_INC_NG,
2224 &&L_OP_REPEAT_INC_SG,
2225 &&L_OP_REPEAT_INC_NG_SG,
2226 &&L_OP_NULL_CHECK_START,
2227 &&L_OP_NULL_CHECK_END,
2228# ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
2229 &&L_OP_NULL_CHECK_END_MEMST,
2233# ifdef USE_SUBEXP_CALL
2234 &&L_OP_NULL_CHECK_END_MEMST_PUSH,
2241 &&L_OP_PUSH_POS_NOT,
2243 &&L_OP_PUSH_STOP_BT,
2246 &&L_OP_PUSH_LOOK_BEHIND_NOT,
2247 &&L_OP_FAIL_LOOK_BEHIND_NOT,
2248 &&L_OP_PUSH_ABSENT_POS,
2252# ifdef USE_SUBEXP_CALL
2261# ifdef USE_COMBINATION_EXPLOSION_CHECK
2262 &&L_OP_STATE_CHECK_PUSH,
2263 &&L_OP_STATE_CHECK_PUSH_OR_JUMP,
2270# ifdef USE_COMBINATION_EXPLOSION_CHECK
2271 &&L_OP_STATE_CHECK_ANYCHAR_STAR,
2272 &&L_OP_STATE_CHECK_ANYCHAR_ML_STAR,
2279 &&L_OP_SET_OPTION_PUSH,
2295# define VM_LOOP_END } sprev = sbegin; }
2296# define CASE(x) case x:
2297# define DEFAULT default:
2299# define JUMP continue; break
2303#ifdef USE_SUBEXP_CALL
2306# define ADD_NUMMEM 1
2309# define ADD_NUMMEM 0
2312 n = reg->num_repeat + (reg->num_mem + ADD_NUMMEM) * 2;
2314 STACK_INIT(alloca_base, xmalloc_base, n, INIT_MATCH_STACK_SIZE);
2315 pop_level = reg->stack_pop_level;
2316 num_mem = reg->num_mem;
2317 repeat_stk = (OnigStackIndex* )alloca_base;
2319 mem_start_stk = (OnigStackIndex* )(repeat_stk + reg->num_repeat);
2320 mem_end_stk = mem_start_stk + (num_mem + ADD_NUMMEM);
2322 OnigStackIndex *pp = mem_start_stk;
2323 for (; pp < repeat_stk + n; pp += 2) {
2324 pp[0] = INVALID_STACK_INDEX;
2325 pp[1] = INVALID_STACK_INDEX;
2328#ifndef USE_SUBEXP_CALL
2335#ifdef ONIG_DEBUG_MATCH
2336 fprintf(stderr,
"match_at: str: %"PRIuPTR
" (%p), end: %"PRIuPTR
" (%p), start: %"PRIuPTR
" (%p), sprev: %"PRIuPTR
" (%p)\n",
2337 (uintptr_t )str, str, (uintptr_t )end, end, (uintptr_t )sstart, sstart, (uintptr_t )sprev, sprev);
2338 fprintf(stderr,
"size: %d, start offset: %d\n",
2339 (
int )(end - str), (
int )(sstart - str));
2340 fprintf(stderr,
"\n ofs> str stk:type addr:opcode\n");
2343 STACK_PUSH_ENSURED(STK_ALT, (UChar* )FinishCode);
2344 best_len = ONIG_MISMATCH;
2345 s = (UChar* )sstart;
2346 pkeep = (UChar* )sstart;
2349#ifdef ONIG_DEBUG_MATCH
2350# define OPCODE_EXEC_HOOK \
2352 UChar *op, *q, *bp, buf[50]; \
2354 op = p - OP_OFFSET; \
2355 fprintf(stderr, "%4"PRIdPTR"> \"", (*op == OP_FINISH) ? (ptrdiff_t )-1 : s - str); \
2358 if (*op != OP_FINISH) {
\
2359 for (i = 0; i < 7 && q < end; i++) { \
2360 len = enclen(encode, q, end); \
2361 while (len-- > 0) *bp++ = *q++; \
2363 if (q < end) { xmemcpy(bp, "...", 3); bp += 3; } \
2365 xmemcpy(bp, "\"", 1); bp += 1; \
2367 fputs((char* )buf, stderr); \
2368 for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', stderr); \
2369 fprintf(stderr, "%4"PRIdPTR":%s %4"PRIdPTR":", \
2370 stk - stk_base - 1, \
2371 (stk > stk_base) ? stack_type_str(stk[-1].type) : " ", \
2372 (op == FinishCode) ? (ptrdiff_t )-1 : op - reg->p); \
2373 onig_print_compiled_byte_code(stderr, op, reg->p+reg->used, NULL, encode); \
2374 fprintf(stderr, "\n"); \
2377# define OPCODE_EXEC_HOOK ((void) 0)
2382 CASE(OP_END) MOP_IN(OP_END);
2386#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
2387 if (IS_FIND_LONGEST(option)) {
2388 if (n > msa->best_len) {
2390 msa->best_s = (UChar* )sstart;
2397 region = msa->region;
2399 region->beg[0] = ((pkeep > s) ? s : pkeep) - str;
2400 region->end[0] = s - str;
2401 for (i = 1; i <= num_mem; i++) {
2402 if (mem_end_stk[i] != INVALID_STACK_INDEX) {
2403 if (BIT_STATUS_AT(reg->bt_mem_start, i))
2404 region->beg[i] = STACK_AT(mem_start_stk[i])->u.mem.pstr - str;
2406 region->beg[i] = (UChar* )((
void* )mem_start_stk[i]) - str;
2408 region->end[i] = (BIT_STATUS_AT(reg->bt_mem_end, i)
2409 ? STACK_AT(mem_end_stk[i])->u.mem.pstr
2410 : (UChar* )((
void* )mem_end_stk[i])) - str;
2413 region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
2417#ifdef USE_CAPTURE_HISTORY
2418 if (reg->capture_history != 0) {
2420 OnigCaptureTreeNode* node;
2422 if (IS_NULL(region->history_root)) {
2423 region->history_root = node = history_node_new();
2424 CHECK_NULL_RETURN_MEMERR(node);
2427 node = region->history_root;
2428 history_tree_clear(node);
2432 node->beg = ((pkeep > s) ? s : pkeep) - str;
2433 node->end = s - str;
2436 r = make_capture_history_tree(region->history_root, &stkp,
2437 stk, (UChar* )str, reg);
2447#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
2452 if (IS_FIND_CONDITION(option)) {
2453 if (IS_FIND_NOT_EMPTY(option) && s == sstart) {
2454 best_len = ONIG_MISMATCH;
2457 if (IS_FIND_LONGEST(option) && DATA_ENSURE_CHECK1) {
2466 CASE(OP_EXACT1) MOP_IN(OP_EXACT1);
2468 if (*p != *s)
goto fail;
2473 CASE(OP_EXACT1_IC) MOP_IN(OP_EXACT1_IC);
2476 UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
2479 len = ONIGENC_MBC_CASE_FOLD(encode,
2495 CASE(OP_EXACT2) MOP_IN(OP_EXACT2);
2497 if (*p != *s)
goto fail;
2499 if (*p != *s)
goto fail;
2505 CASE(OP_EXACT3) MOP_IN(OP_EXACT3);
2507 if (*p != *s)
goto fail;
2509 if (*p != *s)
goto fail;
2511 if (*p != *s)
goto fail;
2517 CASE(OP_EXACT4) MOP_IN(OP_EXACT4);
2519 if (*p != *s)
goto fail;
2521 if (*p != *s)
goto fail;
2523 if (*p != *s)
goto fail;
2525 if (*p != *s)
goto fail;
2531 CASE(OP_EXACT5) MOP_IN(OP_EXACT5);
2533 if (*p != *s)
goto fail;
2535 if (*p != *s)
goto fail;
2537 if (*p != *s)
goto fail;
2539 if (*p != *s)
goto fail;
2541 if (*p != *s)
goto fail;
2547 CASE(OP_EXACTN) MOP_IN(OP_EXACTN);
2548 GET_LENGTH_INC(tlen, p);
2550 while (tlen-- > 0) {
2551 if (*p++ != *s++)
goto fail;
2557 CASE(OP_EXACTN_IC) MOP_IN(OP_EXACTN_IC);
2560 UChar *q, *endp, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
2562 GET_LENGTH_INC(tlen, p);
2568 len = ONIGENC_MBC_CASE_FOLD(encode,
2575 if (*p != *q)
goto fail;
2584 CASE(OP_EXACTMB2N1) MOP_IN(OP_EXACTMB2N1);
2586 if (*p != *s)
goto fail;
2588 if (*p != *s)
goto fail;
2593 CASE(OP_EXACTMB2N2) MOP_IN(OP_EXACTMB2N2);
2595 if (*p != *s)
goto fail;
2597 if (*p != *s)
goto fail;
2600 if (*p != *s)
goto fail;
2602 if (*p != *s)
goto fail;
2607 CASE(OP_EXACTMB2N3) MOP_IN(OP_EXACTMB2N3);
2609 if (*p != *s)
goto fail;
2611 if (*p != *s)
goto fail;
2613 if (*p != *s)
goto fail;
2615 if (*p != *s)
goto fail;
2618 if (*p != *s)
goto fail;
2620 if (*p != *s)
goto fail;
2625 CASE(OP_EXACTMB2N) MOP_IN(OP_EXACTMB2N);
2626 GET_LENGTH_INC(tlen, p);
2627 DATA_ENSURE(tlen * 2);
2628 while (tlen-- > 0) {
2629 if (*p != *s)
goto fail;
2631 if (*p != *s)
goto fail;
2638 CASE(OP_EXACTMB3N) MOP_IN(OP_EXACTMB3N);
2639 GET_LENGTH_INC(tlen, p);
2640 DATA_ENSURE(tlen * 3);
2641 while (tlen-- > 0) {
2642 if (*p != *s)
goto fail;
2644 if (*p != *s)
goto fail;
2646 if (*p != *s)
goto fail;
2653 CASE(OP_EXACTMBN) MOP_IN(OP_EXACTMBN);
2654 GET_LENGTH_INC(tlen, p);
2655 GET_LENGTH_INC(tlen2, p);
2658 while (tlen2-- > 0) {
2659 if (*p != *s)
goto fail;
2666 CASE(OP_CCLASS) MOP_IN(OP_CCLASS);
2668 if (BITSET_AT(((BitSetRef )p), *s) == 0)
goto fail;
2670 s += enclen(encode, s, end);
2674 CASE(OP_CCLASS_MB) MOP_IN(OP_CCLASS_MB);
2675 if (! ONIGENC_IS_MBC_HEAD(encode, s, end))
goto fail;
2678 GET_LENGTH_INC(tlen, p);
2685 mb_len = enclen(encode, s, end);
2686 DATA_ENSURE(mb_len);
2689 code = ONIGENC_MBC_TO_CODE(encode, ss, s);
2691#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
2692 if (! onig_is_in_code_range(p, code))
goto fail;
2696 if (! onig_is_in_code_range(q, code))
goto fail;
2703 CASE(OP_CCLASS_MIX) MOP_IN(OP_CCLASS_MIX);
2705 if (ONIGENC_IS_MBC_HEAD(encode, s, end)) {
2710 if (BITSET_AT(((BitSetRef )p), *s) == 0)
2714 GET_LENGTH_INC(tlen, p);
2721 CASE(OP_CCLASS_NOT) MOP_IN(OP_CCLASS_NOT);
2723 if (BITSET_AT(((BitSetRef )p), *s) != 0)
goto fail;
2725 s += enclen(encode, s, end);
2729 CASE(OP_CCLASS_MB_NOT) MOP_IN(OP_CCLASS_MB_NOT);
2731 if (! ONIGENC_IS_MBC_HEAD(encode, s, end)) {
2733 GET_LENGTH_INC(tlen, p);
2735 goto cc_mb_not_success;
2739 GET_LENGTH_INC(tlen, p);
2743 int mb_len = enclen(encode, s, end);
2745 if (! DATA_ENSURE_CHECK(mb_len)) {
2749 goto cc_mb_not_success;
2754 code = ONIGENC_MBC_TO_CODE(encode, ss, s);
2756#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
2757 if (onig_is_in_code_range(p, code))
goto fail;
2761 if (onig_is_in_code_range(q, code))
goto fail;
2770 CASE(OP_CCLASS_MIX_NOT) MOP_IN(OP_CCLASS_MIX_NOT);
2772 if (ONIGENC_IS_MBC_HEAD(encode, s, end)) {
2777 if (BITSET_AT(((BitSetRef )p), *s) != 0)
2781 GET_LENGTH_INC(tlen, p);
2788 CASE(OP_ANYCHAR) MOP_IN(OP_ANYCHAR);
2790 n = enclen(encode, s, end);
2792 if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0))
goto fail;
2797 CASE(OP_ANYCHAR_ML) MOP_IN(OP_ANYCHAR_ML);
2799 n = enclen(encode, s, end);
2805 CASE(OP_ANYCHAR_STAR) MOP_IN(OP_ANYCHAR_STAR);
2806 while (DATA_ENSURE_CHECK1) {
2807 DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, s - str, msa->match_cache);
2808 STACK_PUSH_ALT(p, s, sprev, pkeep);
2809 n = enclen(encode, s, end);
2811 if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0))
goto fail;
2818 CASE(OP_ANYCHAR_ML_STAR) MOP_IN(OP_ANYCHAR_ML_STAR);
2819 while (DATA_ENSURE_CHECK1) {
2820 DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, s - str, msa->match_cache);
2821 STACK_PUSH_ALT(p, s, sprev, pkeep);
2822 n = enclen(encode, s, end);
2836 CASE(OP_ANYCHAR_STAR_PEEK_NEXT) MOP_IN(OP_ANYCHAR_STAR_PEEK_NEXT);
2837 while (DATA_ENSURE_CHECK1) {
2839 DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, end - s, msa->match_cache);
2840 STACK_PUSH_ALT(p + 1, s, sprev, pkeep);
2842 n = enclen(encode, s, end);
2844 if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0))
goto fail;
2852 CASE(OP_ANYCHAR_ML_STAR_PEEK_NEXT)MOP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT);
2853 while (DATA_ENSURE_CHECK1) {
2855 DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, s - str, msa->match_cache);
2856 STACK_PUSH_ALT(p + 1, s, sprev, pkeep);
2858 n = enclen(encode, s, end);
2873#ifdef USE_COMBINATION_EXPLOSION_CHECK
2874 CASE(OP_STATE_CHECK_ANYCHAR_STAR) MOP_IN(OP_STATE_CHECK_ANYCHAR_STAR);
2875 GET_STATE_CHECK_NUM_INC(mem, p);
2876 while (DATA_ENSURE_CHECK1) {
2877 STATE_CHECK_VAL(scv, mem);
2880 STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem, pkeep);
2881 n = enclen(encode, s, end);
2883 if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0))
goto fail;
2890 CASE(OP_STATE_CHECK_ANYCHAR_ML_STAR)
2891 MOP_IN(OP_STATE_CHECK_ANYCHAR_ML_STAR);
2893 GET_STATE_CHECK_NUM_INC(mem, p);
2894 while (DATA_ENSURE_CHECK1) {
2895 STATE_CHECK_VAL(scv, mem);
2898 STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem, pkeep);
2899 n = enclen(encode, s, end);
2914 CASE(OP_WORD) MOP_IN(OP_WORD);
2916 if (! ONIGENC_IS_MBC_WORD(encode, s, end))
2919 s += enclen(encode, s, end);
2923 CASE(OP_ASCII_WORD) MOP_IN(OP_ASCII_WORD);
2925 if (! ONIGENC_IS_MBC_ASCII_WORD(encode, s, end))
2928 s += enclen(encode, s, end);
2932 CASE(OP_NOT_WORD) MOP_IN(OP_NOT_WORD);
2934 if (ONIGENC_IS_MBC_WORD(encode, s, end))
2937 s += enclen(encode, s, end);
2941 CASE(OP_NOT_ASCII_WORD) MOP_IN(OP_NOT_ASCII_WORD);
2943 if (ONIGENC_IS_MBC_ASCII_WORD(encode, s, end))
2946 s += enclen(encode, s, end);
2950 CASE(OP_WORD_BOUND) MOP_IN(OP_WORD_BOUND);
2951 if (ON_STR_BEGIN(s)) {
2953 if (! ONIGENC_IS_MBC_WORD(encode, s, end))
2956 else if (ON_STR_END(s)) {
2957 if (! ONIGENC_IS_MBC_WORD(encode, sprev, end))
2961 if (ONIGENC_IS_MBC_WORD(encode, s, end)
2962 == ONIGENC_IS_MBC_WORD(encode, sprev, end))
2968 CASE(OP_ASCII_WORD_BOUND) MOP_IN(OP_ASCII_WORD_BOUND);
2969 if (ON_STR_BEGIN(s)) {
2971 if (! ONIGENC_IS_MBC_ASCII_WORD(encode, s, end))
2974 else if (ON_STR_END(s)) {
2975 if (! ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end))
2979 if (ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)
2980 == ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end))
2986 CASE(OP_NOT_WORD_BOUND) MOP_IN(OP_NOT_WORD_BOUND);
2987 if (ON_STR_BEGIN(s)) {
2988 if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end))
2991 else if (ON_STR_END(s)) {
2992 if (ONIGENC_IS_MBC_WORD(encode, sprev, end))
2996 if (ONIGENC_IS_MBC_WORD(encode, s, end)
2997 != ONIGENC_IS_MBC_WORD(encode, sprev, end))
3003 CASE(OP_NOT_ASCII_WORD_BOUND) MOP_IN(OP_NOT_ASCII_WORD_BOUND);
3004 if (ON_STR_BEGIN(s)) {
3005 if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_ASCII_WORD(encode, s, end))
3008 else if (ON_STR_END(s)) {
3009 if (ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end))
3013 if (ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)
3014 != ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end))
3020#ifdef USE_WORD_BEGIN_END
3021 CASE(OP_WORD_BEGIN) MOP_IN(OP_WORD_BEGIN);
3022 if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end)) {
3023 if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_WORD(encode, sprev, end)) {
3031 CASE(OP_ASCII_WORD_BEGIN) MOP_IN(OP_ASCII_WORD_BEGIN);
3032 if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)) {
3033 if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end)) {
3041 CASE(OP_WORD_END) MOP_IN(OP_WORD_END);
3042 if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_WORD(encode, sprev, end)) {
3043 if (ON_STR_END(s) || !ONIGENC_IS_MBC_WORD(encode, s, end)) {
3051 CASE(OP_ASCII_WORD_END) MOP_IN(OP_ASCII_WORD_END);
3052 if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end)) {
3053 if (ON_STR_END(s) || !ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)) {
3062 CASE(OP_BEGIN_BUF) MOP_IN(OP_BEGIN_BUF);
3063 if (! ON_STR_BEGIN(s))
goto fail;
3064 if (IS_NOTBOS(msa->options))
goto fail;
3069 CASE(OP_END_BUF) MOP_IN(OP_END_BUF);
3070 if (! ON_STR_END(s))
goto fail;
3071 if (IS_NOTEOS(msa->options))
goto fail;
3076 CASE(OP_BEGIN_LINE) MOP_IN(OP_BEGIN_LINE);
3077 if (ON_STR_BEGIN(s)) {
3078 if (IS_NOTBOL(msa->options))
goto fail;
3082 else if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)
3083#ifdef USE_CRNL_AS_LINE_TERMINATOR
3084 && !(IS_NEWLINE_CRLF(option)
3085 && ONIGENC_IS_MBC_CRNL(encode, sprev, end))
3087 && !ON_STR_END(s)) {
3094 CASE(OP_END_LINE) MOP_IN(OP_END_LINE);
3095 if (ON_STR_END(s)) {
3096#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3097 if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE_EX(encode, sprev, str, end, option, 1)) {
3099 if (IS_NOTEOL(msa->options))
goto fail;
3102#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3106 else if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 1)) {
3113 CASE(OP_SEMI_END_BUF) MOP_IN(OP_SEMI_END_BUF);
3114 if (ON_STR_END(s)) {
3115#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3116 if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE_EX(encode, sprev, str, end, option, 1)) {
3118 if (IS_NOTEOL(msa->options))
goto fail;
3121#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3125 else if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 1)) {
3126 UChar* ss = s + enclen(encode, s, end);
3127 if (ON_STR_END(ss)) {
3131#ifdef USE_CRNL_AS_LINE_TERMINATOR
3132 else if (IS_NEWLINE_CRLF(option)
3133 && ONIGENC_IS_MBC_CRNL(encode, s, end)) {
3134 ss += enclen(encode, ss, end);
3135 if (ON_STR_END(ss)) {
3145 CASE(OP_BEGIN_POSITION) MOP_IN(OP_BEGIN_POSITION);
3152 CASE(OP_MEMORY_START_PUSH) MOP_IN(OP_MEMORY_START_PUSH);
3153 GET_MEMNUM_INC(mem, p);
3154 STACK_PUSH_MEM_START(mem, s);
3158 CASE(OP_MEMORY_START) MOP_IN(OP_MEMORY_START);
3159 GET_MEMNUM_INC(mem, p);
3160 mem_start_stk[mem] = (OnigStackIndex )((
void* )s);
3161 mem_end_stk[mem] = INVALID_STACK_INDEX;
3165 CASE(OP_MEMORY_END_PUSH) MOP_IN(OP_MEMORY_END_PUSH);
3166 GET_MEMNUM_INC(mem, p);
3167 STACK_PUSH_MEM_END(mem, s);
3171 CASE(OP_MEMORY_END) MOP_IN(OP_MEMORY_END);
3172 GET_MEMNUM_INC(mem, p);
3173 mem_end_stk[mem] = (OnigStackIndex )((
void* )s);
3177 CASE(OP_KEEP) MOP_IN(OP_KEEP);
3182#ifdef USE_SUBEXP_CALL
3183 CASE(OP_MEMORY_END_PUSH_REC) MOP_IN(OP_MEMORY_END_PUSH_REC);
3184 GET_MEMNUM_INC(mem, p);
3185 STACK_GET_MEM_START(mem, stkp);
3186 STACK_PUSH_MEM_END(mem, s);
3187 mem_start_stk[mem] = GET_STACK_INDEX(stkp);
3191 CASE(OP_MEMORY_END_REC) MOP_IN(OP_MEMORY_END_REC);
3192 GET_MEMNUM_INC(mem, p);
3193 mem_end_stk[mem] = (OnigStackIndex )((
void* )s);
3194 STACK_GET_MEM_START(mem, stkp);
3196 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
3197 mem_start_stk[mem] = GET_STACK_INDEX(stkp);
3199 mem_start_stk[mem] = (OnigStackIndex )((
void* )stkp->u.mem.pstr);
3201 STACK_PUSH_MEM_END_MARK(mem);
3206 CASE(OP_BACKREF1) MOP_IN(OP_BACKREF1);
3211 CASE(OP_BACKREF2) MOP_IN(OP_BACKREF2);
3216 CASE(OP_BACKREFN) MOP_IN(OP_BACKREFN);
3217 GET_MEMNUM_INC(mem, p);
3221 UChar *pstart, *pend;
3225 if (mem > num_mem)
goto fail;
3226 if (mem_end_stk[mem] == INVALID_STACK_INDEX)
goto fail;
3227 if (mem_start_stk[mem] == INVALID_STACK_INDEX)
goto fail;
3229 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
3230 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
3232 pstart = (UChar* )((
void* )mem_start_stk[mem]);
3234 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
3235 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
3236 : (UChar* )((
void* )mem_end_stk[mem]));
3240 STRING_CMP(pstart, s, n);
3241 while (sprev + (len = enclen(encode, sprev, end)) < s)
3248 CASE(OP_BACKREFN_IC) MOP_IN(OP_BACKREFN_IC);
3249 GET_MEMNUM_INC(mem, p);
3252 UChar *pstart, *pend;
3256 if (mem > num_mem)
goto fail;
3257 if (mem_end_stk[mem] == INVALID_STACK_INDEX)
goto fail;
3258 if (mem_start_stk[mem] == INVALID_STACK_INDEX)
goto fail;
3260 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
3261 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
3263 pstart = (UChar* )((
void* )mem_start_stk[mem]);
3265 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
3266 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
3267 : (UChar* )((
void* )mem_end_stk[mem]));
3271 STRING_CMP_IC(case_fold_flag, pstart, &s, (
int)n, end);
3272 while (sprev + (len = enclen(encode, sprev, end)) < s)
3280 CASE(OP_BACKREF_MULTI) MOP_IN(OP_BACKREF_MULTI);
3283 UChar *pstart, *pend, *swork;
3285 GET_LENGTH_INC(tlen, p);
3286 for (i = 0; i < tlen; i++) {
3287 GET_MEMNUM_INC(mem, p);
3289 if (mem_end_stk[mem] == INVALID_STACK_INDEX)
continue;
3290 if (mem_start_stk[mem] == INVALID_STACK_INDEX)
continue;
3292 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
3293 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
3295 pstart = (UChar* )((
void* )mem_start_stk[mem]);
3297 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
3298 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
3299 : (UChar* )((
void* )mem_end_stk[mem]));
3301 DATA_ENSURE_CONTINUE(n);
3304 STRING_CMP_VALUE(pstart, swork, n, is_fail);
3305 if (is_fail)
continue;
3307 while (sprev + (len = enclen(encode, sprev, end)) < s)
3310 p += (SIZE_MEMNUM * (tlen - i - 1));
3313 if (i == tlen)
goto fail;
3319 CASE(OP_BACKREF_MULTI_IC) MOP_IN(OP_BACKREF_MULTI_IC);
3322 UChar *pstart, *pend, *swork;
3324 GET_LENGTH_INC(tlen, p);
3325 for (i = 0; i < tlen; i++) {
3326 GET_MEMNUM_INC(mem, p);
3328 if (mem_end_stk[mem] == INVALID_STACK_INDEX)
continue;
3329 if (mem_start_stk[mem] == INVALID_STACK_INDEX)
continue;
3331 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
3332 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
3334 pstart = (UChar* )((
void* )mem_start_stk[mem]);
3336 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
3337 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
3338 : (UChar* )((
void* )mem_end_stk[mem]));
3340 DATA_ENSURE_CONTINUE(n);
3343 STRING_CMP_VALUE_IC(case_fold_flag, pstart, &swork, n, end, is_fail);
3344 if (is_fail)
continue;
3346 while (sprev + (len = enclen(encode, sprev, end)) < s)
3349 p += (SIZE_MEMNUM * (tlen - i - 1));
3352 if (i == tlen)
goto fail;
3357#ifdef USE_BACKREF_WITH_LEVEL
3358 CASE(OP_BACKREF_WITH_LEVEL)
3364 GET_OPTION_INC(ic, p);
3365 GET_LENGTH_INC(level, p);
3366 GET_LENGTH_INC(tlen, p);
3369 if (backref_match_at_nested_level(reg, stk, stk_base, ic,
3370 case_fold_flag, (
int )level, (
int )tlen, p, &s, end)) {
3371 while (sprev + (len = enclen(encode, sprev, end)) < s)
3374 p += (SIZE_MEMNUM * tlen);
3386 CASE(OP_SET_OPTION_PUSH) MOP_IN(OP_SET_OPTION_PUSH);
3387 GET_OPTION_INC(option, p);
3388 STACK_PUSH_ALT(p, s, sprev, pkeep);
3389 p += SIZE_OP_SET_OPTION + SIZE_OP_FAIL;
3393 CASE(OP_SET_OPTION) MOP_IN(OP_SET_OPTION);
3394 GET_OPTION_INC(option, p);
3399 CASE(OP_NULL_CHECK_START) MOP_IN(OP_NULL_CHECK_START);
3400 GET_MEMNUM_INC(mem, p);
3401 STACK_PUSH_NULL_CHECK_START(mem, s);
3405 CASE(OP_NULL_CHECK_END) MOP_IN(OP_NULL_CHECK_END);
3409 GET_MEMNUM_INC(mem, p);
3410 STACK_NULL_CHECK(isnull, mem, s);
3412#ifdef ONIG_DEBUG_MATCH
3413 fprintf(stderr,
"NULL_CHECK_END: skip id:%d, s:%"PRIuPTR
" (%p)\n",
3414 (
int )mem, (uintptr_t )s, s);
3424 case OP_REPEAT_INC_NG:
3425 case OP_REPEAT_INC_SG:
3426 case OP_REPEAT_INC_NG_SG:
3430 goto unexpected_bytecode_error;
3438#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
3439 CASE(OP_NULL_CHECK_END_MEMST) MOP_IN(OP_NULL_CHECK_END_MEMST);
3444 GET_MEMNUM_INC(mem, p);
3445 STACK_NULL_CHECK_MEMST(isnull, ischanged, mem, s, reg);
3447# ifdef ONIG_DEBUG_MATCH
3448 fprintf(stderr,
"NULL_CHECK_END_MEMST: skip id:%d, s:%"PRIuPTR
" (%p)\n",
3449 (
int )mem, (uintptr_t )s, s);
3451 if (isnull == -1)
goto fail;
3452 goto null_check_found;
3454# ifdef USE_CACHE_MATCH_OPT
3455 if (ischanged && msa->enable_cache_match_opt) {
3463 GET_RELADDR_INC(rel, tmp);
3467 case OP_REPEAT_INC_NG:
3468 GET_MEMNUM_INC(mem, tmp);
3469 addr = STACK_AT(repeat_stk[mem])->u.repeat.pcode;
3472 goto unexpected_bytecode_error;
3474 reset_match_cache(reg, addr, pbegin, (
long)(s - str), msa->match_cache, msa->cache_index_table, msa->num_cache_table ,msa->num_cache_opcode);
3482#ifdef USE_SUBEXP_CALL
3483 CASE(OP_NULL_CHECK_END_MEMST_PUSH)
3484 MOP_IN(OP_NULL_CHECK_END_MEMST_PUSH);
3488 GET_MEMNUM_INC(mem, p);
3489# ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
3490 STACK_NULL_CHECK_MEMST_REC(isnull, mem, s, reg);
3492 STACK_NULL_CHECK_REC(isnull, mem, s);
3495# ifdef ONIG_DEBUG_MATCH
3496 fprintf(stderr,
"NULL_CHECK_END_MEMST_PUSH: skip id:%d, s:%"PRIuPTR
" (%p)\n",
3497 (
int )mem, (uintptr_t )s, s);
3499 if (isnull == -1)
goto fail;
3500 goto null_check_found;
3503 STACK_PUSH_NULL_CHECK_END(mem);
3510 CASE(OP_JUMP) MOP_IN(OP_JUMP);
3511 GET_RELADDR_INC(addr, p);
3514 CHECK_INTERRUPT_IN_MATCH_AT;
3517 CASE(OP_PUSH) MOP_IN(OP_PUSH);
3518 GET_RELADDR_INC(addr, p);
3519 DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, s - str, msa->match_cache);
3520 STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
3524#ifdef USE_COMBINATION_EXPLOSION_CHECK
3525 CASE(OP_STATE_CHECK_PUSH) MOP_IN(OP_STATE_CHECK_PUSH);
3526 GET_STATE_CHECK_NUM_INC(mem, p);
3527 STATE_CHECK_VAL(scv, mem);
3530 GET_RELADDR_INC(addr, p);
3531 STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem, pkeep);
3535 CASE(OP_STATE_CHECK_PUSH_OR_JUMP) MOP_IN(OP_STATE_CHECK_PUSH_OR_JUMP);
3536 GET_STATE_CHECK_NUM_INC(mem, p);
3537 GET_RELADDR_INC(addr, p);
3538 STATE_CHECK_VAL(scv, mem);
3543 STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem, pkeep);
3548 CASE(OP_STATE_CHECK) MOP_IN(OP_STATE_CHECK);
3549 GET_STATE_CHECK_NUM_INC(mem, p);
3550 STATE_CHECK_VAL(scv, mem);
3553 STACK_PUSH_STATE_CHECK(s, mem);
3558 CASE(OP_POP) MOP_IN(OP_POP);
3562#ifdef USE_CACHE_MATCH_OPT
3568#ifdef USE_OP_PUSH_OR_JUMP_EXACT
3569 CASE(OP_PUSH_OR_JUMP_EXACT1) MOP_IN(OP_PUSH_OR_JUMP_EXACT1);
3570 GET_RELADDR_INC(addr, p);
3571 if (*p == *s && DATA_ENSURE_CHECK1) {
3573 DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, s - str, msa->match_cache);
3574 STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
3583 CASE(OP_PUSH_IF_PEEK_NEXT) MOP_IN(OP_PUSH_IF_PEEK_NEXT);
3584 GET_RELADDR_INC(addr, p);
3587 DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, s - str, msa->match_cache);
3588 STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
3596 CASE(OP_REPEAT) MOP_IN(OP_REPEAT);
3598 GET_MEMNUM_INC(mem, p);
3599 GET_RELADDR_INC(addr, p);
3602 repeat_stk[mem] = GET_STACK_INDEX(stk);
3603 STACK_PUSH_REPEAT(mem, p);
3605 if (reg->repeat_range[mem].lower == 0) {
3606 DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, end - s, msa->match_cache);
3607 STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
3613 CASE(OP_REPEAT_NG) MOP_IN(OP_REPEAT_NG);
3615 GET_MEMNUM_INC(mem, p);
3616 GET_RELADDR_INC(addr, p);
3619 repeat_stk[mem] = GET_STACK_INDEX(stk);
3620 STACK_PUSH_REPEAT(mem, p);
3622 if (reg->repeat_range[mem].lower == 0) {
3623 DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, s - str, msa->match_cache);
3624 STACK_PUSH_ALT(p, s, sprev, pkeep);
3631 CASE(OP_REPEAT_INC) MOP_IN(OP_REPEAT_INC);
3632 GET_MEMNUM_INC(mem, p);
3633 si = repeat_stk[mem];
3634 stkp = STACK_AT(si);
3637 stkp->u.repeat.count++;
3638 if (stkp->u.repeat.count >= reg->repeat_range[mem].upper) {
3641 else if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
3642 if (*pbegin == OP_REPEAT_INC) {
3643 DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, s - str, msa->match_cache);
3645 STACK_PUSH_ALT(p, s, sprev, pkeep);
3646 p = STACK_AT(si)->u.repeat.pcode;
3649 p = stkp->u.repeat.pcode;
3651 STACK_PUSH_REPEAT_INC(si);
3653 CHECK_INTERRUPT_IN_MATCH_AT;
3656 CASE(OP_REPEAT_INC_SG) MOP_IN(OP_REPEAT_INC_SG);
3657 GET_MEMNUM_INC(mem, p);
3658 STACK_GET_REPEAT(mem, stkp);
3659 si = GET_STACK_INDEX(stkp);
3663 CASE(OP_REPEAT_INC_NG) MOP_IN(OP_REPEAT_INC_NG);
3664 GET_MEMNUM_INC(mem, p);
3665 si = repeat_stk[mem];
3666 stkp = STACK_AT(si);
3669 stkp->u.repeat.count++;
3670 if (stkp->u.repeat.count < reg->repeat_range[mem].upper) {
3671 if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
3672 UChar* pcode = stkp->u.repeat.pcode;
3674 STACK_PUSH_REPEAT_INC(si);
3675 if (*pbegin == OP_REPEAT_INC_NG) {
3676 DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, s - str, msa->match_cache);
3678 STACK_PUSH_ALT(pcode, s, sprev, pkeep);
3681 p = stkp->u.repeat.pcode;
3682 STACK_PUSH_REPEAT_INC(si);
3685 else if (stkp->u.repeat.count == reg->repeat_range[mem].upper) {
3686 STACK_PUSH_REPEAT_INC(si);
3689 CHECK_INTERRUPT_IN_MATCH_AT;
3692 CASE(OP_REPEAT_INC_NG_SG) MOP_IN(OP_REPEAT_INC_NG_SG);
3693 GET_MEMNUM_INC(mem, p);
3694 STACK_GET_REPEAT(mem, stkp);
3695 si = GET_STACK_INDEX(stkp);
3699 CASE(OP_PUSH_POS) MOP_IN(OP_PUSH_POS);
3700 STACK_PUSH_POS(s, sprev, pkeep);
3704 CASE(OP_POP_POS) MOP_IN(OP_POP_POS);
3706 STACK_POS_END(stkp);
3707 s = stkp->u.state.pstr;
3708 sprev = stkp->u.state.pstr_prev;
3713 CASE(OP_PUSH_POS_NOT) MOP_IN(OP_PUSH_POS_NOT);
3714 GET_RELADDR_INC(addr, p);
3715 STACK_PUSH_POS_NOT(p + addr, s, sprev, pkeep);
3719 CASE(OP_FAIL_POS) MOP_IN(OP_FAIL_POS);
3720 STACK_POP_TIL_POS_NOT;
3724 CASE(OP_PUSH_STOP_BT) MOP_IN(OP_PUSH_STOP_BT);
3729 CASE(OP_POP_STOP_BT) MOP_IN(OP_POP_STOP_BT);
3734 CASE(OP_LOOK_BEHIND) MOP_IN(OP_LOOK_BEHIND);
3735 GET_LENGTH_INC(tlen, p);
3736 s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, end, (
int )tlen);
3737 if (IS_NULL(s))
goto fail;
3738 sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s, end);
3742 CASE(OP_PUSH_LOOK_BEHIND_NOT) MOP_IN(OP_PUSH_LOOK_BEHIND_NOT);
3743 GET_RELADDR_INC(addr, p);
3744 GET_LENGTH_INC(tlen, p);
3745 q = (UChar* )ONIGENC_STEP_BACK(encode, str, s, end, (
int )tlen);
3753 STACK_PUSH_LOOK_BEHIND_NOT(p + addr, s, sprev, pkeep);
3755 sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s, end);
3760 CASE(OP_FAIL_LOOK_BEHIND_NOT) MOP_IN(OP_FAIL_LOOK_BEHIND_NOT);
3761 STACK_POP_TIL_LOOK_BEHIND_NOT;
3765 CASE(OP_PUSH_ABSENT_POS) MOP_IN(OP_PUSH_ABSENT_POS);
3767 STACK_PUSH_ABSENT_POS(s, ABSENT_END_POS);
3771 CASE(OP_ABSENT) MOP_IN(OP_ABSENT);
3773 const UChar* aend = ABSENT_END_POS;
3775 UChar* selfp = p - 1;
3777 STACK_POP_ABSENT_POS(absent, ABSENT_END_POS);
3778 GET_RELADDR_INC(addr, p);
3779#ifdef ONIG_DEBUG_MATCH
3780 fprintf(stderr,
"ABSENT: s:%p, end:%p, absent:%p, aend:%p\n", s, end, absent, aend);
3782 if ((absent > aend) && (s > absent)) {
3788 else if ((s >= aend) && (s > absent)) {
3798 else if (s == end) {
3804 STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
3805 n = enclen(encode, s, end);
3806 STACK_PUSH_ABSENT_POS(absent, ABSENT_END_POS);
3807 STACK_PUSH_ALT(selfp, s + n, s, pkeep);
3809 ABSENT_END_POS = aend;
3815 CASE(OP_ABSENT_END) MOP_IN(OP_ABSENT_END);
3818 if (sprev < ABSENT_END_POS)
3819 ABSENT_END_POS = sprev;
3820#ifdef ONIG_DEBUG_MATCH
3821 fprintf(stderr,
"ABSENT_END: end:%p\n", ABSENT_END_POS);
3823 STACK_POP_TIL_ABSENT;
3827#ifdef USE_SUBEXP_CALL
3828 CASE(OP_CALL) MOP_IN(OP_CALL);
3829 GET_ABSADDR_INC(addr, p);
3830 STACK_PUSH_CALL_FRAME(p);
3835 CASE(OP_RETURN) MOP_IN(OP_RETURN);
3842 CASE(OP_CONDITION) MOP_IN(OP_CONDITION);
3843 GET_MEMNUM_INC(mem, p);
3844 GET_RELADDR_INC(addr, p);
3845 if ((mem > num_mem) ||
3846 (mem_end_stk[mem] == INVALID_STACK_INDEX) ||
3847 (mem_start_stk[mem] == INVALID_STACK_INDEX)) {
3865 p = stk->u.state.pcode;
3866 s = stk->u.state.pstr;
3867 sprev = stk->u.state.pstr_prev;
3868 pkeep = stk->u.state.pkeep;
3870#ifdef USE_CACHE_MATCH_OPT
3871 if (++msa->num_fail >= (
long)(end - str) + 1 && msa->num_cache_opcode == NUM_CACHE_OPCODE_UNINIT) {
3872 msa->enable_cache_match_opt = 1;
3873 if (msa->num_cache_opcode == NUM_CACHE_OPCODE_UNINIT) {
3874 OnigPosition r = count_num_cache_opcode(reg, &msa->num_cache_opcode, &msa->num_cache_table);
3875 if (r < 0)
goto bytecode_error;
3877 if (msa->num_cache_opcode == NUM_CACHE_OPCODE_FAIL || msa->num_cache_opcode == 0) {
3878 msa->enable_cache_match_opt = 0;
3879 goto fail_match_cache_opt;
3881 if (msa->cache_index_table == NULL) {
3883 if (table == NULL) {
3884 return ONIGERR_MEMORY;
3886 OnigPosition r = init_cache_index_table(reg, table);
3888 if (r == ONIGERR_UNEXPECTED_BYTECODE)
goto unexpected_bytecode_error;
3889 else goto bytecode_error;
3891 msa->cache_index_table = table;
3893 size_t len = (end - str) + 1;
3894 size_t match_cache_size8 = (size_t)msa->num_cache_opcode * len;
3896 if (match_cache_size8 / len != (
size_t)msa->num_cache_opcode) {
3897 return ONIGERR_MEMORY;
3900 if (match_cache_size8 >= LONG_MAX_LIMIT) {
3901 return ONIGERR_MEMORY;
3903 size_t match_cache_size = (match_cache_size8 >> 3) + (match_cache_size8 & 7 ? 1 : 0);
3904 msa->match_cache = (uint8_t*)
xmalloc(match_cache_size *
sizeof(uint8_t));
3905 if (msa->match_cache == NULL) {
3906 return ONIGERR_MEMORY;
3908 xmemset(msa->match_cache, 0, match_cache_size *
sizeof(uint8_t));
3910 fail_match_cache_opt:
3913#ifdef USE_COMBINATION_EXPLOSION_CHECK
3914 if (stk->u.state.state_check != 0) {
3915 stk->type = STK_STATE_CHECK_MARK;
3921 CHECK_INTERRUPT_IN_MATCH_AT;
3925 goto bytecode_error;
3930 if (xmalloc_base)
xfree(xmalloc_base);
3936 if (xmalloc_base)
xfree(xmalloc_base);
3937 return ONIGERR_STACK_BUG;
3942 if (xmalloc_base)
xfree(xmalloc_base);
3943 return ONIGERR_UNDEFINED_BYTECODE;
3945 unexpected_bytecode_error:
3947 if (xmalloc_base)
xfree(xmalloc_base);
3948 return ONIGERR_UNEXPECTED_BYTECODE;
3953slow_search(
OnigEncoding enc, UChar* target, UChar* target_end,
3954 const UChar* text,
const UChar* text_end, UChar* text_range)
3956 UChar *t, *p, *s, *end;
3958 end = (UChar* )text_end;
3959 end -= target_end - target - 1;
3960 if (end > text_range)
3965 if (enc->max_enc_len == enc->min_enc_len) {
3966 int n = enc->max_enc_len;
3969 if (*s == *target) {
3972 if (target_end == t || memcmp(t, p, target_end - t) == 0)
3977 return (UChar* )NULL;
3980 if (*s == *target) {
3983 if (target_end == t || memcmp(t, p, target_end - t) == 0)
3986 s += enclen(enc, s, text_end);
3989 return (UChar* )NULL;
3993str_lower_case_match(
OnigEncoding enc,
int case_fold_flag,
3994 const UChar* t,
const UChar* tend,
3995 const UChar* p,
const UChar* end)
3998 UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
4001 lowlen = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &p, end, lowbuf);
4003 while (lowlen > 0) {
4004 if (*t++ != *q++)
return 0;
4014 UChar* target, UChar* target_end,
4015 const UChar* text,
const UChar* text_end, UChar* text_range)
4019 end = (UChar* )text_end;
4020 end -= target_end - target - 1;
4021 if (end > text_range)
4027 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4031 s += enclen(enc, s, text_end);
4034 return (UChar* )NULL;
4038slow_search_backward(
OnigEncoding enc, UChar* target, UChar* target_end,
4039 const UChar* text,
const UChar* adjust_text,
4040 const UChar* text_end,
const UChar* text_start)
4044 s = (UChar* )text_end;
4045 s -= (target_end - target);
4047 s = (UChar* )text_start;
4049 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s, text_end);
4052 if (*s == *target) {
4055 while (t < target_end) {
4060 if (t == target_end)
4063 s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s, text_end);
4066 return (UChar* )NULL;
4070slow_search_backward_ic(
OnigEncoding enc,
int case_fold_flag,
4071 UChar* target, UChar* target_end,
4072 const UChar* text,
const UChar* adjust_text,
4073 const UChar* text_end,
const UChar* text_start)
4077 s = (UChar* )text_end;
4078 s -= (target_end - target);
4080 s = (UChar* )text_start;
4082 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s, text_end);
4085 if (str_lower_case_match(enc, case_fold_flag,
4086 target, target_end, s, text_end))
4089 s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s, text_end);
4092 return (UChar* )NULL;
4095#ifndef USE_SUNDAY_QUICK_SEARCH
4098bm_search_notrev(
regex_t* reg,
const UChar* target,
const UChar* target_end,
4099 const UChar* text,
const UChar* text_end,
4100 const UChar* text_range)
4102 const UChar *s, *se, *t, *p, *end;
4104 ptrdiff_t skip, tlen1;
4106# ifdef ONIG_DEBUG_SEARCH
4107 fprintf(stderr,
"bm_search_notrev: text: %"PRIuPTR
" (%p), text_end: %"PRIuPTR
" (%p), text_range: %"PRIuPTR
" (%p)\n",
4108 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
4111 tail = target_end - 1;
4112 tlen1 = tail - target;
4114 if (end + tlen1 > text_end)
4115 end = text_end - tlen1;
4119 if (IS_NULL(reg->int_map)) {
4124 if (t == target)
return (UChar* )s;
4127 skip = reg->map[*se];
4130 s += enclen(reg->enc, s, end);
4131 }
while ((s - t) < skip && s < end);
4135# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
4140 if (t == target)
return (UChar* )s;
4143 skip = reg->int_map[*se];
4146 s += enclen(reg->enc, s, end);
4147 }
while ((s - t) < skip && s < end);
4152 return (UChar* )NULL;
4157bm_search(
regex_t* reg,
const UChar* target,
const UChar* target_end,
4158 const UChar* text,
const UChar* text_end,
const UChar* text_range)
4160 const UChar *s, *t, *p, *end;
4163# ifdef ONIG_DEBUG_SEARCH
4164 fprintf(stderr,
"bm_search: text: %"PRIuPTR
" (%p), text_end: %"PRIuPTR
" (%p), text_range: %"PRIuPTR
" (%p)\n",
4165 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
4168 end = text_range + (target_end - target) - 1;
4172 tail = target_end - 1;
4173 s = text + (target_end - target) - 1;
4174 if (IS_NULL(reg->int_map)) {
4178# ifdef ONIG_DEBUG_SEARCH
4179 fprintf(stderr,
"bm_search_loop: pos: %"PRIdPTR
" %s\n",
4180 (intptr_t )(s - text), s);
4183 if (t == target)
return (UChar* )p;
4190# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
4195 if (t == target)
return (UChar* )p;
4198 s += reg->int_map[*s];
4202 return (UChar* )NULL;
4207bm_search_notrev_ic(
regex_t* reg,
const UChar* target,
const UChar* target_end,
4208 const UChar* text,
const UChar* text_end,
4209 const UChar* text_range)
4211 const UChar *s, *se, *t, *end;
4213 ptrdiff_t skip, tlen1;
4215 int case_fold_flag = reg->case_fold_flag;
4217# ifdef ONIG_DEBUG_SEARCH
4218 fprintf(stderr,
"bm_search_notrev_ic: text: %d (%p), text_end: %d (%p), text_range: %d (%p)\n",
4219 (
int )text, text, (
int )text_end, text_end, (
int )text_range, text_range);
4222 tail = target_end - 1;
4223 tlen1 = tail - target;
4225 if (end + tlen1 > text_end)
4226 end = text_end - tlen1;
4230 if (IS_NULL(reg->int_map)) {
4233 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4236 skip = reg->map[*se];
4239 s += enclen(reg->enc, s, end);
4240 }
while ((s - t) < skip && s < end);
4244# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
4247 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4250 skip = reg->int_map[*se];
4253 s += enclen(reg->enc, s, end);
4254 }
while ((s - t) < skip && s < end);
4259 return (UChar* )NULL;
4264bm_search_ic(
regex_t* reg,
const UChar* target,
const UChar* target_end,
4265 const UChar* text,
const UChar* text_end,
const UChar* text_range)
4267 const UChar *s, *p, *end;
4270 int case_fold_flag = reg->case_fold_flag;
4272# ifdef ONIG_DEBUG_SEARCH
4273 fprintf(stderr,
"bm_search_ic: text: %d (%p), text_end: %d (%p), text_range: %d (%p)\n",
4274 (
int )text, text, (
int )text_end, text_end, (
int )text_range, text_range);
4277 end = text_range + (target_end - target) - 1;
4281 tail = target_end - 1;
4282 s = text + (target_end - target) - 1;
4283 if (IS_NULL(reg->int_map)) {
4285 p = s - (target_end - target) + 1;
4286 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4293# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
4295 p = s - (target_end - target) + 1;
4296 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4299 s += reg->int_map[*s];
4303 return (UChar* )NULL;
4310bm_search_notrev(
regex_t* reg,
const UChar* target,
const UChar* target_end,
4311 const UChar* text,
const UChar* text_end,
4312 const UChar* text_range)
4314 const UChar *s, *se, *t, *p, *end;
4316 ptrdiff_t skip, tlen1;
4319# ifdef ONIG_DEBUG_SEARCH
4320 fprintf(stderr,
"bm_search_notrev: text: %"PRIuPTR
" (%p), text_end: %"PRIuPTR
" (%p), text_range: %"PRIuPTR
" (%p)\n",
4321 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
4324 tail = target_end - 1;
4325 tlen1 = tail - target;
4327 if (end + tlen1 > text_end)
4328 end = text_end - tlen1;
4332 if (IS_NULL(reg->int_map)) {
4337 if (t == target)
return (UChar* )s;
4340 if (s + 1 >= end)
break;
4341 skip = reg->map[se[1]];
4344 s += enclen(enc, s, end);
4345 }
while ((s - t) < skip && s < end);
4349# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
4354 if (t == target)
return (UChar* )s;
4357 if (s + 1 >= end)
break;
4358 skip = reg->int_map[se[1]];
4361 s += enclen(enc, s, end);
4362 }
while ((s - t) < skip && s < end);
4367 return (UChar* )NULL;
4372bm_search(
regex_t* reg,
const UChar* target,
const UChar* target_end,
4373 const UChar* text,
const UChar* text_end,
const UChar* text_range)
4375 const UChar *s, *t, *p, *end;
4379# ifdef ONIG_DEBUG_SEARCH
4380 fprintf(stderr,
"bm_search: text: %"PRIuPTR
" (%p), text_end: %"PRIuPTR
" (%p), text_range: %"PRIuPTR
" (%p)\n",
4381 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
4384 tail = target_end - 1;
4385 tlen1 = tail - target;
4386 end = text_range + tlen1;
4391 if (IS_NULL(reg->int_map)) {
4396 if (t == target)
return (UChar* )p;
4399 if (s + 1 >= end)
break;
4400 s += reg->map[s[1]];
4404# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
4409 if (t == target)
return (UChar* )p;
4412 if (s + 1 >= end)
break;
4413 s += reg->int_map[s[1]];
4417 return (UChar* )NULL;
4422bm_search_notrev_ic(
regex_t* reg,
const UChar* target,
const UChar* target_end,
4423 const UChar* text,
const UChar* text_end,
4424 const UChar* text_range)
4426 const UChar *s, *se, *t, *end;
4428 ptrdiff_t skip, tlen1;
4430 int case_fold_flag = reg->case_fold_flag;
4432# ifdef ONIG_DEBUG_SEARCH
4433 fprintf(stderr,
"bm_search_notrev_ic: text: %"PRIuPTR
" (%p), text_end: %"PRIuPTR
" (%p), text_range: %"PRIuPTR
" (%p)\n",
4434 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
4437 tail = target_end - 1;
4438 tlen1 = tail - target;
4440 if (end + tlen1 > text_end)
4441 end = text_end - tlen1;
4445 if (IS_NULL(reg->int_map)) {
4448 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4451 if (s + 1 >= end)
break;
4452 skip = reg->map[se[1]];
4455 s += enclen(enc, s, end);
4456 }
while ((s - t) < skip && s < end);
4460# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
4463 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4466 if (s + 1 >= end)
break;
4467 skip = reg->int_map[se[1]];
4470 s += enclen(enc, s, end);
4471 }
while ((s - t) < skip && s < end);
4476 return (UChar* )NULL;
4481bm_search_ic(
regex_t* reg,
const UChar* target,
const UChar* target_end,
4482 const UChar* text,
const UChar* text_end,
const UChar* text_range)
4484 const UChar *s, *p, *end;
4488 int case_fold_flag = reg->case_fold_flag;
4490# ifdef ONIG_DEBUG_SEARCH
4491 fprintf(stderr,
"bm_search_ic: text: %"PRIuPTR
" (%p), text_end: %"PRIuPTR
" (%p), text_range: %"PRIuPTR
" (%p)\n",
4492 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
4495 tail = target_end - 1;
4496 tlen1 = tail - target;
4497 end = text_range + tlen1;
4502 if (IS_NULL(reg->int_map)) {
4505 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4508 if (s + 1 >= end)
break;
4509 s += reg->map[s[1]];
4513# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
4516 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4519 if (s + 1 >= end)
break;
4520 s += reg->int_map[s[1]];
4524 return (UChar* )NULL;
4528#ifdef USE_INT_MAP_BACKWARD
4530set_bm_backward_skip(UChar* s, UChar* end,
OnigEncoding enc ARG_UNUSED,
4535 if (IS_NULL(*skip)) {
4536 *skip = (
int* )
xmalloc(
sizeof(
int) * ONIG_CHAR_TABLE_SIZE);
4537 if (IS_NULL(*skip))
return ONIGERR_MEMORY;
4540 len = (int )(end - s);
4541 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
4544 for (i = len - 1; i > 0; i--)
4551bm_search_backward(
regex_t* reg,
const UChar* target,
const UChar* target_end,
4552 const UChar* text,
const UChar* adjust_text,
4553 const UChar* text_end,
const UChar* text_start)
4555 const UChar *s, *t, *p;
4557 s = text_end - (target_end - target);
4561 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s, text_end);
4566 while (t < target_end && *p == *t) {
4569 if (t == target_end)
4572 s -= reg->int_map_backward[*s];
4573 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s, text_end);
4576 return (UChar* )NULL;
4582 const UChar* text,
const UChar* text_range,
const UChar* text_end)
4584 const UChar *s = text;
4586 while (s < text_range) {
4587 if (map[*s])
return (UChar* )s;
4589 s += enclen(enc, s, text_end);
4591 return (UChar* )NULL;
4596 const UChar* text,
const UChar* adjust_text,
4597 const UChar* text_start,
const UChar* text_end)
4599 const UChar *s = text_start;
4602 if (map[*s])
return (UChar* )s;
4604 s = onigenc_get_prev_char_head(enc, adjust_text, s, text_end);
4606 return (UChar* )NULL;
4610onig_match(
regex_t* reg,
const UChar* str,
const UChar* end,
const UChar* at,
OnigRegion* region,
4611 OnigOptionType option)
4617 MATCH_ARG_INIT(msa, option, region, at, at);
4618#ifdef USE_COMBINATION_EXPLOSION_CHECK
4620 ptrdiff_t offset = at - str;
4621 STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check);
4626 r = onig_region_resize_clear(region, reg->num_mem + 1);
4632 prev = (UChar* )onigenc_get_prev_char_head(reg->enc, str, at, end);
4633 r = match_at(reg, str, end,
4634#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
4640 MATCH_ARG_FREE(msa);
4645forward_search_range(
regex_t* reg,
const UChar* str,
const UChar* end, UChar* s,
4646 UChar* range, UChar** low, UChar** high, UChar** low_prev)
4648 UChar *p, *pprev = (UChar* )NULL;
4650#ifdef ONIG_DEBUG_SEARCH
4651 fprintf(stderr,
"forward_search_range: str: %"PRIuPTR
" (%p), end: %"PRIuPTR
" (%p), s: %"PRIuPTR
" (%p), range: %"PRIuPTR
" (%p)\n",
4652 (uintptr_t )str, str, (uintptr_t )end, end, (uintptr_t )s, s, (uintptr_t )range, range);
4656 if (reg->dmin > 0) {
4657 if (ONIGENC_IS_SINGLEBYTE(reg->enc)) {
4661 UChar *q = p + reg->dmin;
4663 if (q >= end)
return 0;
4664 while (p < q) p += enclen(reg->enc, p, end);
4669 switch (reg->optimize) {
4670 case ONIG_OPTIMIZE_EXACT:
4671 p = slow_search(reg->enc, reg->exact, reg->exact_end, p, end, range);
4673 case ONIG_OPTIMIZE_EXACT_IC:
4674 p = slow_search_ic(reg->enc, reg->case_fold_flag,
4675 reg->exact, reg->exact_end, p, end, range);
4678 case ONIG_OPTIMIZE_EXACT_BM:
4679 p = bm_search(reg, reg->exact, reg->exact_end, p, end, range);
4682 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV:
4683 p = bm_search_notrev(reg, reg->exact, reg->exact_end, p, end, range);
4686 case ONIG_OPTIMIZE_EXACT_BM_IC:
4687 p = bm_search_ic(reg, reg->exact, reg->exact_end, p, end, range);
4690 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC:
4691 p = bm_search_notrev_ic(reg, reg->exact, reg->exact_end, p, end, range);
4694 case ONIG_OPTIMIZE_MAP:
4695 p = map_search(reg->enc, reg->map, p, range, end);
4699 if (p && p < range) {
4700 if (p - reg->dmin < s) {
4703 p += enclen(reg->enc, p, end);
4707 if (reg->sub_anchor) {
4710 switch (reg->sub_anchor) {
4711 case ANCHOR_BEGIN_LINE:
4712 if (!ON_STR_BEGIN(p)) {
4713 prev = onigenc_get_prev_char_head(reg->enc,
4714 (pprev ? pprev : str), p, end);
4715 if (!ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 0))
4720 case ANCHOR_END_LINE:
4721 if (ON_STR_END(p)) {
4722#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
4723 prev = (UChar* )onigenc_get_prev_char_head(reg->enc,
4724 (pprev ? pprev : str), p);
4725 if (prev && ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 1))
4729 else if (! ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, p, str, end, reg->options, 1))
4735 if (reg->dmax == 0) {
4739 *low_prev = onigenc_get_prev_char_head(reg->enc, s, p, end);
4741 *low_prev = onigenc_get_prev_char_head(reg->enc,
4742 (pprev ? pprev : str), p, end);
4746 if (reg->dmax != ONIG_INFINITE_DISTANCE) {
4747 if (p < str + reg->dmax) {
4748 *low = (UChar* )str;
4750 *low_prev = onigenc_get_prev_char_head(reg->enc, str, *low, end);
4753 *low = p - reg->dmax;
4755 *low = onigenc_get_right_adjust_char_head_with_prev(reg->enc, s,
4756 *low, end, (
const UChar** )low_prev);
4757 if (low_prev && IS_NULL(*low_prev))
4758 *low_prev = onigenc_get_prev_char_head(reg->enc,
4759 (pprev ? pprev : s), *low, end);
4763 *low_prev = onigenc_get_prev_char_head(reg->enc,
4764 (pprev ? pprev : str), *low, end);
4770 *high = p - reg->dmin;
4772#ifdef ONIG_DEBUG_SEARCH
4774 "forward_search_range success: low: %"PRIdPTR
", high: %"PRIdPTR
", dmin: %"PRIdPTR
", dmax: %"PRIdPTR
"\n",
4775 *low - str, *high - str, reg->dmin, reg->dmax);
4783#define BM_BACKWARD_SEARCH_LENGTH_THRESHOLD 100
4786backward_search_range(
regex_t* reg,
const UChar* str,
const UChar* end,
4787 UChar* s,
const UChar* range, UChar* adjrange,
4788 UChar** low, UChar** high)
4796 switch (reg->optimize) {
4797 case ONIG_OPTIMIZE_EXACT:
4799 p = slow_search_backward(reg->enc, reg->exact, reg->exact_end,
4800 range, adjrange, end, p);
4803 case ONIG_OPTIMIZE_EXACT_IC:
4804 case ONIG_OPTIMIZE_EXACT_BM_IC:
4805 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC:
4806 p = slow_search_backward_ic(reg->enc, reg->case_fold_flag,
4807 reg->exact, reg->exact_end,
4808 range, adjrange, end, p);
4811 case ONIG_OPTIMIZE_EXACT_BM:
4812 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV:
4813#ifdef USE_INT_MAP_BACKWARD
4814 if (IS_NULL(reg->int_map_backward)) {
4816 if (s - range < BM_BACKWARD_SEARCH_LENGTH_THRESHOLD)
4819 r = set_bm_backward_skip(reg->exact, reg->exact_end, reg->enc,
4820 &(reg->int_map_backward));
4823 p = bm_search_backward(reg, reg->exact, reg->exact_end, range, adjrange,
4830 case ONIG_OPTIMIZE_MAP:
4831 p = map_search_backward(reg->enc, reg->map, range, adjrange, p, end);
4836 if (reg->sub_anchor) {
4839 switch (reg->sub_anchor) {
4840 case ANCHOR_BEGIN_LINE:
4841 if (!ON_STR_BEGIN(p)) {
4842 prev = onigenc_get_prev_char_head(reg->enc, str, p, end);
4843 if (!ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 0)) {
4850 case ANCHOR_END_LINE:
4851 if (ON_STR_END(p)) {
4852#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
4853 prev = onigenc_get_prev_char_head(reg->enc, adjrange, p);
4854 if (IS_NULL(prev))
goto fail;
4855 if (ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 1)) {
4861 else if (! ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, p, str, end, reg->options, 1)) {
4862 p = onigenc_get_prev_char_head(reg->enc, adjrange, p, end);
4863 if (IS_NULL(p))
goto fail;
4871 if (reg->dmax != ONIG_INFINITE_DISTANCE) {
4872 *low = p - reg->dmax;
4873 *high = p - reg->dmin;
4874 *high = onigenc_get_right_adjust_char_head(reg->enc, adjrange, *high, end);
4877#ifdef ONIG_DEBUG_SEARCH
4878 fprintf(stderr,
"backward_search_range: low: %d, high: %d\n",
4879 (
int )(*low - str), (
int )(*high - str));
4885#ifdef ONIG_DEBUG_SEARCH
4886 fprintf(stderr,
"backward_search_range: fail.\n");
4893onig_search(
regex_t* reg,
const UChar* str,
const UChar* end,
4894 const UChar* start,
const UChar* range,
OnigRegion* region, OnigOptionType option)
4896 return onig_search_gpos(reg, str, end, start, start, range, region, option);
4900onig_search_gpos(
regex_t* reg,
const UChar* str,
const UChar* end,
4901 const UChar* global_pos,
4902 const UChar* start,
const UChar* range,
OnigRegion* region, OnigOptionType option)
4907#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
4908 const UChar *orig_start = start;
4909 const UChar *orig_range = range;
4912#ifdef ONIG_DEBUG_SEARCH
4914 "onig_search (entry point): str: %"PRIuPTR
" (%p), end: %"PRIuPTR
", start: %"PRIuPTR
", range: %"PRIuPTR
"\n",
4915 (uintptr_t )str, str, end - str, start - str, range - str);
4919 r = onig_region_resize_clear(region, reg->num_mem + 1);
4920 if (r)
goto finish_no_msa;
4923 if (start > end || start < str)
goto mismatch_no_msa;
4926#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
4927# ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
4928# define MATCH_AND_RETURN_CHECK(upper_range) \
4929 r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
4930 if (r != ONIG_MISMATCH) {\
4932 if (! IS_FIND_LONGEST(reg->options)) {\
4939# define MATCH_AND_RETURN_CHECK(upper_range) \
4940 r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
4941 if (r != ONIG_MISMATCH) {\
4949# ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
4950# define MATCH_AND_RETURN_CHECK(none) \
4951 r = match_at(reg, str, end, s, prev, &msa);\
4952 if (r != ONIG_MISMATCH) {\
4954 if (! IS_FIND_LONGEST(reg->options)) {\
4961# define MATCH_AND_RETURN_CHECK(none) \
4962 r = match_at(reg, str, end, s, prev, &msa);\
4963 if (r != ONIG_MISMATCH) {\
4974 if (reg->anchor != 0 && str < end) {
4975 UChar *min_semi_end, *max_semi_end;
4977 if (reg->anchor & ANCHOR_BEGIN_POSITION) {
4982 if (global_pos > start)
4984 if (global_pos < range)
4985 range = global_pos + 1;
4993 else if (reg->anchor & ANCHOR_BEGIN_BUF) {
4995 if (range > start) {
4996 if (start != str)
goto mismatch_no_msa;
5005 goto mismatch_no_msa;
5008 else if (reg->anchor & ANCHOR_END_BUF) {
5009 min_semi_end = max_semi_end = (UChar* )end;
5012 if ((OnigDistance )(max_semi_end - str) < reg->anchor_dmin)
5013 goto mismatch_no_msa;
5015 if (range > start) {
5016 if ((OnigDistance )(min_semi_end - start) > reg->anchor_dmax) {
5017 start = min_semi_end - reg->anchor_dmax;
5019 start = onigenc_get_right_adjust_char_head(reg->enc, str, start, end);
5021 if ((OnigDistance )(max_semi_end - (range - 1)) < reg->anchor_dmin) {
5022 range = max_semi_end - reg->anchor_dmin + 1;
5025 if (start > range)
goto mismatch_no_msa;
5030 if ((OnigDistance )(min_semi_end - range) > reg->anchor_dmax) {
5031 range = min_semi_end - reg->anchor_dmax;
5033 if ((OnigDistance )(max_semi_end - start) < reg->anchor_dmin) {
5034 start = max_semi_end - reg->anchor_dmin;
5035 start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, start, end);
5037 if (range > start)
goto mismatch_no_msa;
5040 else if (reg->anchor & ANCHOR_SEMI_END_BUF) {
5041 UChar* pre_end = ONIGENC_STEP_BACK(reg->enc, str, end, end, 1);
5043 max_semi_end = (UChar* )end;
5044 if (ONIGENC_IS_MBC_NEWLINE(reg->enc, pre_end, end)) {
5045 min_semi_end = pre_end;
5047#ifdef USE_CRNL_AS_LINE_TERMINATOR
5048 pre_end = ONIGENC_STEP_BACK(reg->enc, str, pre_end, end, 1);
5049 if (IS_NOT_NULL(pre_end) &&
5050 IS_NEWLINE_CRLF(reg->options) &&
5051 ONIGENC_IS_MBC_CRNL(reg->enc, pre_end, end)) {
5052 min_semi_end = pre_end;
5055 if (min_semi_end > str && start <= min_semi_end) {
5060 min_semi_end = (UChar* )end;
5064 else if ((reg->anchor & ANCHOR_ANYCHAR_STAR_ML)) {
5065 goto begin_position;
5068 else if (str == end) {
5069 static const UChar address_for_empty_string[] =
"";
5071#ifdef ONIG_DEBUG_SEARCH
5072 fprintf(stderr,
"onig_search: empty string.\n");
5075 if (reg->threshold_len == 0) {
5076 start = end = str = address_for_empty_string;
5078 prev = (UChar* )NULL;
5080 MATCH_ARG_INIT(msa, option, region, start, start);
5081#ifdef USE_COMBINATION_EXPLOSION_CHECK
5082 msa.state_check_buff = (
void* )0;
5083 msa.state_check_buff_size = 0;
5085 MATCH_AND_RETURN_CHECK(end);
5088 goto mismatch_no_msa;
5091#ifdef ONIG_DEBUG_SEARCH
5092 fprintf(stderr,
"onig_search(apply anchor): end: %d, start: %d, range: %d\n",
5093 (
int )(end - str), (
int )(start - str), (
int )(range - str));
5096 MATCH_ARG_INIT(msa, option, region, start, global_pos);
5097#ifdef USE_COMBINATION_EXPLOSION_CHECK
5099 ptrdiff_t offset = (MIN(start, range) - str);
5100 STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check);
5105 if (range > start) {
5107 prev = onigenc_get_prev_char_head(reg->enc, str, s, end);
5109 prev = (UChar* )NULL;
5111 if (reg->optimize != ONIG_OPTIMIZE_NONE) {
5112 UChar *sch_range, *low, *high, *low_prev;
5114 sch_range = (UChar* )range;
5115 if (reg->dmax != 0) {
5116 if (reg->dmax == ONIG_INFINITE_DISTANCE)
5117 sch_range = (UChar* )end;
5119 sch_range += reg->dmax;
5120 if (sch_range > end) sch_range = (UChar* )end;
5124 if ((end - start) < reg->threshold_len)
5127 if (reg->dmax != ONIG_INFINITE_DISTANCE) {
5129 if (! forward_search_range(reg, str, end, s, sch_range,
5130 &low, &high, &low_prev))
goto mismatch;
5136 MATCH_AND_RETURN_CHECK(orig_range);
5138 s += enclen(reg->enc, s, end);
5140 }
while (s < range);
5144 if (! forward_search_range(reg, str, end, s, sch_range,
5145 &low, &high, (UChar** )NULL))
goto mismatch;
5147 if ((reg->anchor & ANCHOR_ANYCHAR_STAR) != 0) {
5149 MATCH_AND_RETURN_CHECK(orig_range);
5151 s += enclen(reg->enc, s, end);
5153 if ((reg->anchor & (ANCHOR_LOOK_BEHIND | ANCHOR_PREC_READ_NOT)) == 0) {
5154 while (!ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 0)
5157 s += enclen(reg->enc, s, end);
5160 }
while (s < range);
5167 MATCH_AND_RETURN_CHECK(orig_range);
5169 s += enclen(reg->enc, s, end);
5170 }
while (s < range);
5173 MATCH_AND_RETURN_CHECK(orig_range);
5177 if (reg->optimize != ONIG_OPTIMIZE_NONE) {
5178 UChar *low, *high, *adjrange, *sch_start;
5181 adjrange = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, range, end);
5183 adjrange = (UChar* )end;
5185 if (reg->dmax != ONIG_INFINITE_DISTANCE &&
5186 (end - range) >= reg->threshold_len) {
5188 sch_start = s + reg->dmax;
5189 if (sch_start > end) sch_start = (UChar* )end;
5190 if (backward_search_range(reg, str, end, sch_start, range, adjrange,
5198 prev = onigenc_get_prev_char_head(reg->enc, str, s, end);
5199 MATCH_AND_RETURN_CHECK(orig_start);
5202 }
while (s >= range);
5206 if ((end - range) < reg->threshold_len)
goto mismatch;
5209 if (reg->dmax != 0) {
5210 if (reg->dmax == ONIG_INFINITE_DISTANCE)
5211 sch_start = (UChar* )end;
5213 sch_start += reg->dmax;
5214 if (sch_start > end) sch_start = (UChar* )end;
5216 sch_start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc,
5217 start, sch_start, end);
5220 if (backward_search_range(reg, str, end, sch_start, range, adjrange,
5221 &low, &high) <= 0)
goto mismatch;
5226 prev = onigenc_get_prev_char_head(reg->enc, str, s, end);
5227 MATCH_AND_RETURN_CHECK(orig_start);
5229 }
while (s >= range);
5233#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
5234 if (IS_FIND_LONGEST(reg->options)) {
5235 if (msa.best_len >= 0) {
5244 MATCH_ARG_FREE(msa);
5248 if (IS_FIND_NOT_EMPTY(reg->options) && region) {
5249 onig_region_clear(region);
5253 if (r != ONIG_MISMATCH)
5254 fprintf(stderr,
"onig_search: error %"PRIdPTRDIFF
"\n", r);
5262 if (r != ONIG_MISMATCH)
5263 fprintf(stderr,
"onig_search: error %"PRIdPTRDIFF
"\n", r);
5268 MATCH_ARG_FREE(msa);
5273onig_scan(
regex_t* reg,
const UChar* str,
const UChar* end,
5275 int (*scan_callback)(OnigPosition, OnigPosition,
OnigRegion*,
void*),
5286 r = onig_search(reg, str, end, start, end, region, option);
5288 rs = scan_callback(n, r, region, callback_arg);
5293 if (region->end[0] == start - str) {
5294 if (start >= end)
break;
5295 start += enclen(reg->enc, start, end);
5298 start = str + region->end[0];
5303 else if (r == ONIG_MISMATCH) {
5315onig_get_encoding(
const regex_t* reg)
5320extern OnigOptionType
5321onig_get_options(
const regex_t* reg)
5323 return reg->options;
5326extern OnigCaseFoldType
5327onig_get_case_fold_flag(
const regex_t* reg)
5329 return reg->case_fold_flag;
5333onig_get_syntax(
const regex_t* reg)
5339onig_number_of_captures(
const regex_t* reg)
5341 return reg->num_mem;
5345onig_number_of_capture_histories(
const regex_t* reg)
5347#ifdef USE_CAPTURE_HISTORY
5351 for (i = 0; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
5352 if (BIT_STATUS_AT(reg->capture_history, i) != 0)
#define RB_GNUC_EXTENSION
This is expanded to nothing for non-GCC compilers.
#define xfree
Old name of ruby_xfree.
#define xrealloc
Old name of ruby_xrealloc.
#define xmalloc
Old name of ruby_xmalloc.