@@ -4073,6 +4073,34 @@ rb_str_cmp(VALUE str1, VALUE str2)
40734073 if (str1 == str2 ) return 0 ;
40744074 RSTRING_GETMEM (str1 , ptr1 , len1 );
40754075 RSTRING_GETMEM (str2 , ptr2 , len2 );
4076+
4077+ rb_encoding * enc1 = rb_enc_get (str1 );
4078+ rb_encoding * enc2 = rb_enc_get (str2 );
4079+ if (rb_enc_mbminlen (enc1 ) > 1 && rb_enc_mbminlen (enc2 ) > 1 &&
4080+ rb_str_comparable (str1 , str2 )) {
4081+ /* wchar-base encoding */
4082+ const char * end1 = ptr1 + len1 , * end2 = ptr2 + len2 ;
4083+ unsigned int c1 , c2 ;
4084+ int r1 , r2 ;
4085+
4086+ while (len1 > 0 && len2 > 0 ) {
4087+ if (!MBCLEN_CHARFOUND_P (r1 = rb_enc_precise_mbclen (ptr1 , end1 , enc1 )))
4088+ break ;
4089+ if (!MBCLEN_CHARFOUND_P (r2 = rb_enc_precise_mbclen (ptr2 , end2 , enc2 )))
4090+ break ;
4091+ c1 = rb_enc_mbc_to_codepoint (ptr1 , end1 , enc1 );
4092+ c2 = rb_enc_mbc_to_codepoint (ptr2 , end2 , enc2 );
4093+ len1 = end1 - (ptr1 += MBCLEN_CHARFOUND_LEN (r1 ));
4094+ len2 = end2 - (ptr2 += MBCLEN_CHARFOUND_LEN (r2 ));
4095+ if (c1 != c2 ) {
4096+ return c1 < c2 ? -1 : 1 ;
4097+ }
4098+ }
4099+ if (len1 == 0 && len2 == 0 ) return 0 ;
4100+ if (len1 == 0 ) return -1 ;
4101+ if (len2 == 0 ) return 1 ;
4102+ }
4103+
40764104 if (ptr1 == ptr2 || (retval = memcmp (ptr1 , ptr2 , lesser (len1 , len2 ))) == 0 ) {
40774105 if (len1 == len2 ) {
40784106 if (!rb_str_comparable (str1 , str2 )) {
0 commit comments