@@ -4074,35 +4074,57 @@ rb_str_cmp(VALUE str1, VALUE str2)
40744074 RSTRING_GETMEM (str1 , ptr1 , len1 );
40754075 RSTRING_GETMEM (str2 , ptr2 , len2 );
40764076
4077- rb_encoding * enc1 = rb_enc_get (str1 );
4078- rb_encoding * enc2 = rb_enc_get (str2 );
4079- if (rb_enc_mbminlen (enc1 ) > 1 && rb_enc_mbminlen (enc2 ) > 1 &&
4080- rb_str_comparable (str1 , str2 )) {
4081- /* wchar-base encoding */
4082- const char * end1 = ptr1 + len1 , * end2 = ptr2 + len2 ;
4083- unsigned int c1 , c2 ;
4084- int r1 , r2 ;
4085-
4086- while (len1 > 0 && len2 > 0 ) {
4087- if (!MBCLEN_CHARFOUND_P (r1 = rb_enc_precise_mbclen (ptr1 , end1 , enc1 )))
4088- break ;
4089- if (!MBCLEN_CHARFOUND_P (r2 = rb_enc_precise_mbclen (ptr2 , end2 , enc2 )))
4090- break ;
4091- c1 = rb_enc_mbc_to_codepoint (ptr1 , end1 , enc1 );
4092- c2 = rb_enc_mbc_to_codepoint (ptr2 , end2 , enc2 );
4093- len1 = end1 - (ptr1 += MBCLEN_CHARFOUND_LEN (r1 ));
4094- len2 = end2 - (ptr2 += MBCLEN_CHARFOUND_LEN (r2 ));
4095- if (c1 != c2 ) {
4096- return c1 < c2 ? -1 : 1 ;
4097- }
4098- }
4099- if (len1 == 0 && len2 == 0 ) return 0 ;
4100- if (len1 == 0 ) return -1 ;
4101- if (len2 == 0 ) return 1 ;
4077+ if (ptr1 == ptr2 ) {
4078+ long len = lesser (len1 , len2 );
4079+ ptr1 += len ; len1 -= len ;
4080+ ptr2 += len ; len2 -= len ;
4081+ }
4082+ if (len1 == 0 && len2 == 0 ) return 0 ;
4083+ if (single_byte_optimizable (str1 ) && single_byte_optimizable (str2 )) {
4084+ if ((retval = memcmp (ptr1 , ptr2 , lesser (len1 , len2 ))) == 0 ) {
4085+ if (len1 == len2 ) {
4086+ if (!rb_str_comparable (str1 , str2 )) {
4087+ if (ENCODING_GET (str1 ) > ENCODING_GET (str2 ))
4088+ return 1 ;
4089+ return -1 ;
4090+ }
4091+ return 0 ;
4092+ }
4093+ return (len1 > len2 ) ? 1 : -1 ;
4094+ }
4095+ return (retval > 0 ) ? 1 : -1 ;
4096+ }
4097+ else {
4098+ rb_encoding * enc1 = rb_enc_get (str1 );
4099+ rb_encoding * enc2 = rb_enc_get (str2 );
4100+ const char * p1end = ptr1 + len1 , * p2end = ptr2 + len2 ;
4101+ const char * p1 = ptr1 , * p2 = ptr2 ;
4102+ unsigned int c1 , c2 ;
4103+ int r1 , r2 ;
4104+
4105+ while (p1 < p1end && p2 < p2end ) {
4106+ if ((r1 = rb_enc_precise_mbclen (p1 , p1end , enc1 )) <= 0 )
4107+ break ;
4108+ if ((r2 = rb_enc_precise_mbclen (p2 , p2end , enc2 )) <= 0 )
4109+ break ;
4110+ c1 = rb_enc_mbc_to_codepoint (p1 , p1end , enc1 );
4111+ c2 = rb_enc_mbc_to_codepoint (p2 , p2end , enc2 );
4112+ p1 += MBCLEN_CHARFOUND_LEN (r1 );
4113+ p2 += MBCLEN_CHARFOUND_LEN (r2 );
4114+ if (c1 != c2 ) {
4115+ return c1 < c2 ? -1 : 1 ;
4116+ }
4117+ }
4118+ len1 = p1end - (ptr1 = p1 );
4119+ len2 = p2end - (ptr2 = p2 );
4120+ if (len1 == 0 && len2 == 0 ) goto same_binary ;
4121+ if (len1 == 0 ) return -1 ;
4122+ if (len2 == 0 ) return 1 ;
41024123 }
41034124
41044125 if (ptr1 == ptr2 || (retval = memcmp (ptr1 , ptr2 , lesser (len1 , len2 ))) == 0 ) {
41054126 if (len1 == len2 ) {
4127+ same_binary :
41064128 if (!rb_str_comparable (str1 , str2 )) {
41074129 if (ENCODING_GET (str1 ) > ENCODING_GET (str2 ))
41084130 return 1 ;
@@ -4263,6 +4285,12 @@ str_casecmp(VALUE str1, VALUE str2)
42634285
42644286 p1 = RSTRING_PTR (str1 ); p1end = RSTRING_END (str1 );
42654287 p2 = RSTRING_PTR (str2 ); p2end = RSTRING_END (str2 );
4288+ if (p1 == p2 ) {
4289+ len = lesser (p1end - p1 , p2end - p2 );
4290+ p1 += len ;
4291+ p2 += len ;
4292+ }
4293+ if (p1 == p1end && p2 == p2end ) return INT2FIX (0 );
42664294 if (single_byte_optimizable (str1 ) && single_byte_optimizable (str2 )) {
42674295 while (p1 < p1end && p2 < p2end ) {
42684296 if (* p1 != * p2 ) {
@@ -4276,34 +4304,38 @@ str_casecmp(VALUE str1, VALUE str2)
42764304 }
42774305 }
42784306 else {
4279- while (p1 < p1end && p2 < p2end ) {
4280- int l1 , c1 = rb_enc_ascget (p1 , p1end , & l1 , enc );
4281- int l2 , c2 = rb_enc_ascget (p2 , p2end , & l2 , enc );
4307+ rb_encoding * enc1 = rb_enc_get (str1 );
4308+ rb_encoding * enc2 = rb_enc_get (str2 );
4309+ unsigned int c1 , c2 ;
4310+ int r1 , r2 ;
42824311
4283- if (0 <= c1 && 0 <= c2 ) {
4284- c1 = TOLOWER (c1 );
4285- c2 = TOLOWER (c2 );
4286- if (c1 != c2 )
4287- return INT2FIX (c1 < c2 ? -1 : 1 );
4312+ while (p1 < p1end && p2 < p2end ) {
4313+ if ((r1 = rb_enc_precise_mbclen (p1 , p1end , enc1 )) <= 0 )
4314+ break ;
4315+ if ((r2 = rb_enc_precise_mbclen (p2 , p2end , enc2 )) <= 0 )
4316+ break ;
4317+ c1 = rb_enc_mbc_to_codepoint (p1 , p1end , enc1 );
4318+ c2 = rb_enc_mbc_to_codepoint (p2 , p2end , enc2 );
4319+ p1 += MBCLEN_CHARFOUND_LEN (r1 );
4320+ p2 += MBCLEN_CHARFOUND_LEN (r2 );
4321+ if (ISASCII (c1 )) c1 = TOLOWER (c1 );
4322+ if (ISASCII (c2 )) c2 = TOLOWER (c2 );
4323+ if (c1 != c2 ) {
4324+ return INT2FIX (c1 < c2 ? -1 : 1 );
42884325 }
4289- else {
4290- int r ;
4291- l1 = rb_enc_mbclen (p1 , p1end , enc );
4292- l2 = rb_enc_mbclen (p2 , p2end , enc );
4293- len = l1 < l2 ? l1 : l2 ;
4294- r = memcmp (p1 , p2 , len );
4295- if (r != 0 )
4296- return INT2FIX (r < 0 ? -1 : 1 );
4297- if (l1 != l2 )
4298- return INT2FIX (l1 < l2 ? -1 : 1 );
4299- }
4300- p1 += l1 ;
4301- p2 += l2 ;
4302- }
4303- }
4304- if (RSTRING_LEN (str1 ) == RSTRING_LEN (str2 )) return INT2FIX (0 );
4305- if (RSTRING_LEN (str1 ) > RSTRING_LEN (str2 )) return INT2FIX (1 );
4306- return INT2FIX (-1 );
4326+ }
4327+ if (p1 < p1end && p2 < p2end ) {
4328+ len = lesser (p1end - p1 , p2end - p2 );
4329+ if ((r1 = memcmp (p1 , p2 , len )) != 0 )
4330+ return INT2FIX (r1 < 0 ? -1 : 1 );
4331+ p1 += len ;
4332+ p2 += len ;
4333+ }
4334+ }
4335+
4336+ if (p1 < p1end ) return INT2FIX (1 );
4337+ if (p2 < p2end ) return INT2FIX (-1 );
4338+ return INT2FIX (0 );
43074339}
43084340
43094341/*
0 commit comments