Skip to content

Commit 2d7420f

Browse files
committed
Fixed comparison between wchar-base encoding strings [Bug #17594]
1 parent 4e516a4 commit 2d7420f

File tree

2 files changed

+34
-0
lines changed

2 files changed

+34
-0
lines changed

string.c

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4073,6 +4073,34 @@ rb_str_cmp(VALUE str1, VALUE str2)
40734073
if (str1 == str2) return 0;
40744074
RSTRING_GETMEM(str1, ptr1, len1);
40754075
RSTRING_GETMEM(str2, ptr2, len2);
4076+
4077+
rb_encoding *enc1 = rb_enc_get(str1);
4078+
rb_encoding *enc2 = rb_enc_get(str2);
4079+
if (rb_enc_mbminlen(enc1) > 1 && rb_enc_mbminlen(enc2) > 1 &&
4080+
rb_str_comparable(str1, str2)) {
4081+
/* wchar-base encoding */
4082+
const char *end1 = ptr1 + len1, *end2 = ptr2 + len2;
4083+
unsigned int c1, c2;
4084+
int r1, r2;
4085+
4086+
while (len1 > 0 && len2 > 0) {
4087+
if (!MBCLEN_CHARFOUND_P(r1 = rb_enc_precise_mbclen(ptr1, end1, enc1)))
4088+
break;
4089+
if (!MBCLEN_CHARFOUND_P(r2 = rb_enc_precise_mbclen(ptr2, end2, enc2)))
4090+
break;
4091+
c1 = rb_enc_mbc_to_codepoint(ptr1, end1, enc1);
4092+
c2 = rb_enc_mbc_to_codepoint(ptr2, end2, enc2);
4093+
len1 = end1 - (ptr1 += MBCLEN_CHARFOUND_LEN(r1));
4094+
len2 = end2 - (ptr2 += MBCLEN_CHARFOUND_LEN(r2));
4095+
if (c1 != c2) {
4096+
return c1 < c2 ? -1 : 1;
4097+
}
4098+
}
4099+
if (len1 == 0 && len2 == 0) return 0;
4100+
if (len1 == 0) return -1;
4101+
if (len2 == 0) return 1;
4102+
}
4103+
40764104
if (ptr1 == ptr2 || (retval = memcmp(ptr1, ptr2, lesser(len1, len2))) == 0) {
40774105
if (len1 == len2) {
40784106
if (!rb_str_comparable(str1, str2)) {

test/ruby/test_m17n.rb

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1171,6 +1171,12 @@ def test_upto
11711171
assert_raise(Encoding::CompatibilityError){s1.upto(s2) {|x| break }}
11721172
end
11731173

1174+
def test_CMP
1175+
each_encoding(*%W"a \u{0101}") do |a, a_macron|
1176+
assert_operator(a, :<, a_macron)
1177+
end
1178+
end
1179+
11741180
def test_casecmp
11751181
s1 = s("\x81\x41")
11761182
s2 = s("\x81\x61")

0 commit comments

Comments
 (0)