Skip to content

Commit a22aa0e

Browse files
k-takatanobu
authored andcommitted
[Bug ruby#13671] Fix that "ss" in look-behind causes syntax error
Fixes k-takata/Onigmo#92. This fix was ported from oniguruma: kkos/oniguruma@257082d k-takata/Onigmo@b1a5445
1 parent a637903 commit a22aa0e

File tree

3 files changed

+46
-16
lines changed

3 files changed

+46
-16
lines changed

regcomp.c

Lines changed: 22 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -3301,6 +3301,14 @@ setup_subexp_call(Node* node, ScanEnv* env)
33013301
}
33023302
#endif
33033303

3304+
#define IN_ALT (1<<0)
3305+
#define IN_NOT (1<<1)
3306+
#define IN_REPEAT (1<<2)
3307+
#define IN_VAR_REPEAT (1<<3)
3308+
#define IN_CALL (1<<4)
3309+
#define IN_RECCALL (1<<5)
3310+
#define IN_LOOK_BEHIND (1<<6)
3311+
33043312
/* divide different length alternatives in look-behind.
33053313
(?<=A|B) ==> (?<=A)|(?<=B)
33063314
(?<!A|B) ==> (?<!A)(?<!B)
@@ -3597,24 +3605,29 @@ expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[],
35973605
return ONIGERR_MEMORY;
35983606
}
35993607

3600-
static int
3601-
expand_case_fold_string(Node* node, regex_t* reg)
3602-
{
36033608
#define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION 8
36043609

3610+
static int
3611+
expand_case_fold_string(Node* node, regex_t* reg, int state)
3612+
{
36053613
int r, n, len, alt_num;
36063614
int varlen = 0;
3615+
int is_in_look_behind;
36073616
UChar *start, *end, *p;
36083617
Node *top_root, *root, *snode, *prev_node;
36093618
OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
3610-
StrNode* sn = NSTR(node);
3619+
StrNode* sn;
36113620

36123621
if (NSTRING_IS_AMBIG(node)) return 0;
36133622

3623+
sn = NSTR(node);
3624+
36143625
start = sn->s;
36153626
end = sn->end;
36163627
if (start >= end) return 0;
36173628

3629+
is_in_look_behind = (state & IN_LOOK_BEHIND) != 0;
3630+
36183631
r = 0;
36193632
top_root = root = prev_node = snode = NULL_NODE;
36203633
alt_num = 1;
@@ -3630,7 +3643,7 @@ expand_case_fold_string(Node* node, regex_t* reg)
36303643
len = enclen(reg->enc, p, end);
36313644

36323645
varlen = is_case_fold_variable_len(n, items, len);
3633-
if (n == 0 || varlen == 0) {
3646+
if (n == 0 || varlen == 0 || is_in_look_behind) {
36343647
if (IS_NULL(snode)) {
36353648
if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
36363649
onig_node_free(top_root);
@@ -3889,13 +3902,6 @@ setup_comb_exp_check(Node* node, int state, ScanEnv* env)
38893902
}
38903903
#endif
38913904

3892-
#define IN_ALT (1<<0)
3893-
#define IN_NOT (1<<1)
3894-
#define IN_REPEAT (1<<2)
3895-
#define IN_VAR_REPEAT (1<<3)
3896-
#define IN_CALL (1<<4)
3897-
#define IN_RECCALL (1<<5)
3898-
38993905
/* setup_tree does the following work.
39003906
1. check empty loop. (set qn->target_empty_info)
39013907
2. expand ignore-case in char class.
@@ -3937,7 +3943,7 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
39373943

39383944
case NT_STR:
39393945
if (IS_IGNORECASE(reg->options) && !NSTRING_IS_RAW(node)) {
3940-
r = expand_case_fold_string(node, reg);
3946+
r = expand_case_fold_string(node, reg, state);
39413947
}
39423948
break;
39433949

@@ -4180,7 +4186,7 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
41804186
if (r < 0) return r;
41814187
if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
41824188
if (NTYPE(node) != NT_ANCHOR) goto restart;
4183-
r = setup_tree(an->target, reg, state, env);
4189+
r = setup_tree(an->target, reg, (state | IN_LOOK_BEHIND), env);
41844190
if (r != 0) return r;
41854191
r = setup_look_behind(node, reg, env);
41864192
}
@@ -4193,7 +4199,8 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
41934199
if (r < 0) return r;
41944200
if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
41954201
if (NTYPE(node) != NT_ANCHOR) goto restart;
4196-
r = setup_tree(an->target, reg, (state | IN_NOT), env);
4202+
r = setup_tree(an->target, reg, (state | IN_NOT | IN_LOOK_BEHIND),
4203+
env);
41974204
if (r != 0) return r;
41984205
r = setup_look_behind(node, reg, env);
41994206
}

spec/ruby/language/regexp_spec.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@
112112
/foo.(?<=\d)/.match("fooA foo1").to_a.should == ["foo1"]
113113
end
114114

115-
ruby_bug "#13671", ""..."3.6" do # https://bugs.ruby-lang.org/issues/13671
115+
ruby_bug "#13671", ""..."3.5" do # https://bugs.ruby-lang.org/issues/13671
116116
it "handles a lookbehind with ss characters" do
117117
r = Regexp.new("(?<!dss)", Regexp::IGNORECASE)
118118
r.should =~ "✨"

test/ruby/test_regexp.rb

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1743,6 +1743,29 @@ def test_conditional_expression
17431743
assert_raise(RegexpError, bug12418){ Regexp.new('(0?0|(?(5)||)|(?(5)||))?') }
17441744
end
17451745

1746+
def test_ss_in_look_behind
1747+
assert_match_at("(?i:ss)", "ss", [[0, 2]])
1748+
assert_match_at("(?i:ss)", "Ss", [[0, 2]])
1749+
assert_match_at("(?i:ss)", "SS", [[0, 2]])
1750+
assert_match_at("(?i:ss)", "\u017fS", [[0, 2]]) # LATIN SMALL LETTER LONG S
1751+
assert_match_at("(?i:ss)", "s\u017f", [[0, 2]])
1752+
assert_match_at("(?i:ss)", "\u00df", [[0, 1]]) # LATIN SMALL LETTER SHARP S
1753+
assert_match_at("(?i:ss)", "\u1e9e", [[0, 1]]) # LATIN CAPITAL LETTER SHARP S
1754+
assert_match_at("(?i:xssy)", "xssy", [[0, 4]])
1755+
assert_match_at("(?i:xssy)", "xSsy", [[0, 4]])
1756+
assert_match_at("(?i:xssy)", "xSSy", [[0, 4]])
1757+
assert_match_at("(?i:xssy)", "x\u017fSy", [[0, 4]])
1758+
assert_match_at("(?i:xssy)", "xs\u017fy", [[0, 4]])
1759+
assert_match_at("(?i:xssy)", "x\u00dfy", [[0, 3]])
1760+
assert_match_at("(?i:xssy)", "x\u1e9ey", [[0, 3]])
1761+
assert_match_at("(?i:\u00df)", "ss", [[0, 2]])
1762+
assert_match_at("(?i:\u00df)", "SS", [[0, 2]])
1763+
assert_match_at("(?i:[\u00df])", "ss", [[0, 2]])
1764+
assert_match_at("(?i:[\u00df])", "SS", [[0, 2]])
1765+
assert_match_at("(?i)(?<!ss)\u2728", "qq\u2728", [[2, 3]]) # Issue #92
1766+
assert_match_at("(?i)(?<!xss)\u2728", "qq\u2728", [[2, 3]])
1767+
end
1768+
17461769
def test_options_in_look_behind
17471770
assert_nothing_raised {
17481771
assert_match_at("(?<=(?i)ab)cd", "ABcd", [[2,4]])

0 commit comments

Comments
 (0)