From jhi@cc.hut.fi Sun Sep 29 01:56:47 1996 Date: Sun, 29 Sep 1996 00:53:09 +0300 (EET DST) From: Jarkko Hietaniemi Reply-To: Jarkko.Hietaniemi@iki.fi To: Oleg Bartunov Subject: Re: Does perl 5.003 uses LC_COLLATE information ? (fwd) [The following text is in the "ISO-8859-1" character set] [Your display is set for the "koi8-r" character set] [Some characters may be displayed incorrectly] > It's 01:45 in Moscow and I'm an astronomer but we here in Russia have to Oops, yes, only one hour difference. > do our moonlight work to earn money :-( Sorry to hear that. > I'm waiting for your patch, this one seems broooken, at least miniperl Yes, quite broken. I am quite amazed how it did manage to compile (in Digital UNIX). Here is a better one: diff -rwu perl5.003_05:dist/embed.h perl5.003_05+lc_collate/embed.h --- perl5.003_05:dist/embed.h Sun Aug 25 20:33:47 1996 +++ perl5.003_05+lc_collate/embed.h Sun Sep 29 00:04:52 1996 @@ -93,6 +93,7 @@ #define last_lop Perl_last_lop #define last_lop_op Perl_last_lop_op #define last_uni Perl_last_uni +#define lc_collate_done Perl_lc_collate_done #define le_amg Perl_le_amg #define lex_state Perl_lex_state #define lex_defer Perl_lex_defer @@ -507,6 +508,7 @@ #define magic_wipepack Perl_magic_wipepack #define magicname Perl_magicname #define markstack_grow Perl_markstack_grow +#define mem_collxfrm Perl_mem_collxfrm #define mess Perl_mess #define mg_clear Perl_mg_clear #define mg_copy Perl_mg_copy diff -rwu perl5.003_05:dist/global.sym perl5.003_05+lc_collate/global.sym --- perl5.003_05:dist/global.sym Sun Aug 25 20:11:47 1996 +++ perl5.003_05+lc_collate/global.sym Sun Sep 29 00:04:53 1996 @@ -77,6 +77,7 @@ last_lop last_lop_op last_uni +lc_collate_active le_amg lex_state lex_defer @@ -494,6 +495,7 @@ magic_wipepack magicname markstack_grow +mem_collxfrm mess mg_clear mg_copy diff -rwu perl5.003_05:dist/lib/Benchmark.pm perl5.003_05+lc_collate/lib/Benchmark.pm --- perl5.003_05:dist/lib/Benchmark.pm Thu Jun 8 02:48:13 1995 +++ perl5.003_05+lc_collate/lib/Benchmark.pm Sun Sep 29 00:04:53 1996 @@ -40,7 +40,7 @@ # ... your code here ... $t1 = new Benchmark; $td = timediff($t1, $t0); - print "the code took:",timestr($dt),"\n"; + print "the code took:",timestr($td),"\n"; =item debug diff -rwu perl5.003_05:dist/perl.h perl5.003_05+lc_collate/perl.h --- perl5.003_05:dist/perl.h Tue Sep 10 17:26:18 1996 +++ perl5.003_05+lc_collate/perl.h Sun Sep 29 00:04:53 1996 @@ -179,6 +179,8 @@ #include #endif +EXT int lc_collate_active; + #ifdef METHOD /* Defined by OSF/1 v3.0 by ctype.h */ #undef METHOD #endif diff -rwu perl5.003_05:dist/pp_ctl.c perl5.003_05+lc_collate/pp_ctl.c --- perl5.003_05:dist/pp_ctl.c Mon Sep 2 18:00:42 1996 +++ perl5.003_05+lc_collate/pp_ctl.c Sun Sep 29 00:13:57 1996 @@ -1202,6 +1202,33 @@ if (!SvPOKp(str2)) return 1; + if (lc_collate_active) { + register char * pv1, * pv2, * pvx; + STRLEN cur1, cur2, curx; + + pv1 = SvPV(str1, cur1); + pvx = mem_collxfrm(pv1, cur1, &curx); + pv1 = pvx; + cur1 = curx; + + pv2 = SvPV(str2, cur2); + pvx = mem_collxfrm(pv2, cur2, &curx); + pv2 = pvx; + cur2 = curx; + + retval = memcmp((void *)pv1, (void *)pv2, cur1 < cur2 ? cur1 : cur2); + + Safefree(pv1); + Safefree(pv2); + + if (retval) + return retval < 0 ? -1 : 1; + + if (cur1 == cur2) + return 0; + else + return cur1 < cur2 ? -1 : 1; + } else { if (SvCUR(str1) < SvCUR(str2)) { /*SUPPRESS 560*/ if (retval = memcmp(SvPVX(str1), SvPVX(str2), SvCUR(str1))) @@ -1216,6 +1243,7 @@ return 0; else return 1; + } } PP(pp_reset) diff -rwu perl5.003_05:dist/proto.h perl5.003_05+lc_collate/proto.h --- perl5.003_05:dist/proto.h Mon Sep 2 18:00:41 1996 +++ perl5.003_05+lc_collate/proto.h Sun Sep 29 00:04:53 1996 @@ -215,6 +215,7 @@ extern Malloc_t calloc _((MEM_SIZE, MEM_SIZE)); #endif void markstack_grow _((void)); +char* mem_collxfrm _((const char *m, const Size_t n, Size_t * nx)); char* mess _((char* pat, va_list* args)); int mg_clear _((SV* sv)); int mg_copy _((SV *, SV *, char *, I32)); diff -rwu perl5.003_05:dist/sv.c perl5.003_05+lc_collate/sv.c --- perl5.003_05:dist/sv.c Sun Sep 8 21:21:34 1996 +++ perl5.003_05+lc_collate/sv.c Sun Sep 29 00:04:53 1996 @@ -2615,35 +2615,55 @@ pv1 = ""; cur1 = 0; } - else + else { pv1 = SvPV(str1, cur1); + if (lc_collate_active) { + STRLEN cur1x; + char * pv1x = mem_collxfrm(pv1, cur1, &cur1x); + pv1 = pv1x; + cur1 = cur1x; + } + } if (!str2) { pv2 = ""; cur2 = 0; } - else + else { pv2 = SvPV(str2, cur2); + if (lc_collate_active) { + STRLEN cur2x; + char * pv2x = mem_collxfrm(pv2, cur2, &cur2x); + pv2 = pv2x; + cur2 = cur2x; + } + } - if (!cur1) + if (!cur1) { + if (lc_collate_active) + Safefree(pv2); return cur2 ? -1 : 0; - if (!cur2) + } + if (!cur2) { + if (lc_collate_active) + Safefree(pv1); return 1; + } - if (cur1 < cur2) { - /*SUPPRESS 560*/ - if (retval = memcmp((void*)pv1, (void*)pv2, cur1)) - return retval < 0 ? -1 : 1; - else - return -1; + retval = memcmp((void*)pv1, (void*)pv2, cur1 < cur2 ? cur1 : cur2); + + if (lc_collate_active) { + Safefree(pv1); + Safefree(pv2); } - /*SUPPRESS 560*/ - else if (retval = memcmp((void*)pv1, (void*)pv2, cur2)) + + if (retval) return retval < 0 ? -1 : 1; - else if (cur1 == cur2) + + if (cur1 == cur2) return 0; else - return 1; + return cur1 < cur2 ? -1 : 1; } char * diff -rwu perl5.003_05:dist/t/base/term.t perl5.003_05+lc_collate/t/base/term.t --- perl5.003_05:dist/t/base/term.t Tue Oct 18 19:43:06 1994 +++ perl5.003_05+lc_collate/t/base/term.t Sun Sep 29 00:04:53 1996 @@ -7,7 +7,7 @@ # check "" interpretation $x = "\n"; -if ($x lt ' ') {print "ok 1\n";} else {print "not ok 1\n";} +if ($x eq chr(10)) {print "ok 1\n";} else {print "not ok 1\n";} # check `` processing diff -rwu perl5.003_05:dist/t/comp/package.t perl5.003_05+lc_collate/t/comp/package.t --- perl5.003_05:dist/t/comp/package.t Tue Oct 18 19:43:35 1994 +++ perl5.003_05+lc_collate/t/comp/package.t Sun Sep 29 00:04:53 1996 @@ -5,7 +5,7 @@ $blurfl = 123; $foo = 3; -package XYZ; +package xyz; $bar = 4; @@ -20,10 +20,10 @@ $xyz = 2; $main = join(':', sort(keys %main::)); -$XYZ = join(':', sort(keys %XYZ::)); +$xyz = join(':', sort(keys %xyz::)); $ABC = join(':', sort(keys %ABC::)); -print $XYZ eq 'ABC:XYZ:bar:main:xyz' ? "ok 1\n" : "not ok 1 '$XYZ'\n"; +print $xyz eq 'ABC:bar:main:xyz' ? "ok 1\n" : "not ok 1 '$xyz'\n"; print $ABC eq 'blurfl:dyick' ? "ok 2\n" : "not ok 2 '$ABC'\n"; print $main'blurfl == 123 ? "ok 3\n" : "not ok 3\n"; diff -rwu perl5.003_05:dist/t/lib/anydbm.t perl5.003_05+lc_collate/t/lib/anydbm.t --- perl5.003_05:dist/t/lib/anydbm.t Tue Oct 18 19:43:59 1994 +++ perl5.003_05+lc_collate/t/lib/anydbm.t Sun Sep 29 00:04:53 1996 @@ -80,7 +80,7 @@ if ($#keys == 29 && $#values == 29) {print "ok 5\n";} else {print "not ok 5\n";} while (($key,$value) = each(h)) { - if ($key eq $keys[$i] && $value eq $values[$i] && $key gt $value) { + if ($key eq $keys[$i] && $value eq $values[$i] && $key eq lc($value)) { $key =~ y/a-z/A-Z/; $i++ if $key eq $value; } diff -rwu perl5.003_05:dist/t/lib/db-btree.t perl5.003_05+lc_collate/t/lib/db-btree.t --- perl5.003_05:dist/t/lib/db-btree.t Sat Sep 7 23:18:27 1996 +++ perl5.003_05+lc_collate/t/lib/db-btree.t Sun Sep 29 00:04:53 1996 @@ -158,7 +158,7 @@ $i = 0 ; while (($key,$value) = each(%h)) { - if ($key eq $keys[$i] && $value eq $values[$i] && $key gt $value) { + if ($key eq $keys[$i] && $value eq $values[$i] && $key eq lc($value)) { $key =~ y/a-z/A-Z/; $i++ if $key eq $value; } diff -rwu perl5.003_05:dist/t/lib/db-hash.t perl5.003_05+lc_collate/t/lib/db-hash.t --- perl5.003_05:dist/t/lib/db-hash.t Sat Sep 7 23:18:27 1996 +++ perl5.003_05+lc_collate/t/lib/db-hash.t Sun Sep 29 00:04:53 1996 @@ -153,7 +153,7 @@ $i = 0 ; while (($key,$value) = each(%h)) { - if ($key eq $keys[$i] && $value eq $values[$i] && $key gt $value) { + if ($key eq $keys[$i] && $value eq $values[$i] && $key eq lc($value)) { $key =~ y/a-z/A-Z/; $i++ if $key eq $value; } diff -rwu perl5.003_05:dist/t/lib/gdbm.t perl5.003_05+lc_collate/t/lib/gdbm.t --- perl5.003_05:dist/t/lib/gdbm.t Tue Oct 18 19:44:13 1994 +++ perl5.003_05+lc_collate/t/lib/gdbm.t Sun Sep 29 00:04:53 1996 @@ -83,7 +83,7 @@ if ($#keys == 29 && $#values == 29) {print "ok 5\n";} else {print "not ok 5\n";} while (($key,$value) = each(h)) { - if ($key eq $keys[$i] && $value eq $values[$i] && $key gt $value) { + if ($key eq $keys[$i] && $value eq $values[$i] && $key eq lc($value)) { $key =~ y/a-z/A-Z/; $i++ if $key eq $value; } diff -rwu perl5.003_05:dist/t/lib/ndbm.t perl5.003_05+lc_collate/t/lib/ndbm.t --- perl5.003_05:dist/t/lib/ndbm.t Tue Oct 18 19:44:15 1994 +++ perl5.003_05+lc_collate/t/lib/ndbm.t Sun Sep 29 00:04:53 1996 @@ -86,7 +86,7 @@ if ($#keys == 29 && $#values == 29) {print "ok 5\n";} else {print "not ok 5\n";} while (($key,$value) = each(h)) { - if ($key eq $keys[$i] && $value eq $values[$i] && $key gt $value) { + if ($key eq $keys[$i] && $value eq $values[$i] && $key eq lc($value)) { $key =~ y/a-z/A-Z/; $i++ if $key eq $value; } diff -rwu perl5.003_05:dist/t/lib/odbm.t perl5.003_05+lc_collate/t/lib/odbm.t --- perl5.003_05:dist/t/lib/odbm.t Tue Oct 18 19:44:18 1994 +++ perl5.003_05+lc_collate/t/lib/odbm.t Sun Sep 29 00:04:53 1996 @@ -86,7 +86,7 @@ if ($#keys == 29 && $#values == 29) {print "ok 5\n";} else {print "not ok 5\n";} while (($key,$value) = each(h)) { - if ($key eq $keys[$i] && $value eq $values[$i] && $key gt $value) { + if ($key eq $keys[$i] && $value eq $values[$i] && $key eq lc($value)) { $key =~ y/a-z/A-Z/; $i++ if $key eq $value; } diff -rwu perl5.003_05:dist/t/lib/sdbm.t perl5.003_05+lc_collate/t/lib/sdbm.t --- perl5.003_05:dist/t/lib/sdbm.t Tue Oct 18 19:44:22 1994 +++ perl5.003_05+lc_collate/t/lib/sdbm.t Sun Sep 29 00:04:53 1996 @@ -85,7 +85,7 @@ if ($#keys == 29 && $#values == 29) {print "ok 5\n";} else {print "not ok 5\n";} while (($key,$value) = each(h)) { - if ($key eq $keys[$i] && $value eq $values[$i] && $key gt $value) { + if ($key eq $keys[$i] && $value eq $values[$i] && $key eq lc($value)) { $key =~ y/a-z/A-Z/; $i++ if $key eq $value; } diff -rwu perl5.003_05:dist/t/op/each.t perl5.003_05+lc_collate/t/op/each.t --- perl5.003_05:dist/t/op/each.t Tue Oct 18 19:44:45 1994 +++ perl5.003_05+lc_collate/t/op/each.t Sun Sep 29 00:04:53 1996 @@ -41,7 +41,7 @@ if ($#keys == 29 && $#values == 29) {print "ok 1\n";} else {print "not ok 1\n";} while (($key,$value) = each(h)) { - if ($key eq $keys[$i] && $value eq $values[$i] && $key gt $value) { + if ($key eq $keys[$i] && $value eq $values[$i] && $key eq lc($value)) { $key =~ y/a-z/A-Z/; $i++ if $key eq $value; } diff -rwu perl5.003_05:dist/t/op/glob.t perl5.003_05+lc_collate/t/op/glob.t --- perl5.003_05:dist/t/op/glob.t Tue Oct 18 19:45:03 1994 +++ perl5.003_05+lc_collate/t/op/glob.t Sun Sep 29 00:22:14 1996 @@ -5,9 +5,10 @@ print "1..4\n"; @ops = ; -$list = join(' ',@ops); +$list = join(' ',sort @ops); chop($otherway = `echo op/*`); +$otherway = join(' ',sort split(' ', $otherway)); print $list eq $otherway ? "ok 1\n" : "not ok 1\n$list\n$otherway\n"; diff -rwu perl5.003_05:dist/t/op/magic.t perl5.003_05+lc_collate/t/op/magic.t --- perl5.003_05:dist/t/op/magic.t Tue Oct 18 19:45:26 1994 +++ perl5.003_05+lc_collate/t/op/magic.t Sun Sep 29 00:04:53 1996 @@ -30,7 +30,7 @@ print "ok 3\n"; } else { - print "not ok 3 $a\n"; + print "not ok 3 ($x @_)\n"; } } diff -rwu perl5.003_05:dist/t/op/readdir.t perl5.003_05+lc_collate/t/op/readdir.t --- perl5.003_05:dist/t/op/readdir.t Tue Oct 18 19:46:03 1994 +++ perl5.003_05+lc_collate/t/op/readdir.t Sun Sep 29 00:24:07 1996 @@ -12,7 +12,7 @@ if (@D > 20 && @D < 100) { print "ok 2\n"; } else { print "not ok 2\n"; } @R = sort @D; -@G = ; +@G = sort ; if ($G[0] =~ m#.*\](\w+\.t)#i) { # grep is to convert filespecs returned from glob under VMS to format # identical to that returned by readdir diff -rwu perl5.003_05:dist/t/op/sort.t perl5.003_05+lc_collate/t/op/sort.t --- perl5.003_05:dist/t/op/sort.t Tue Oct 18 19:46:16 1994 +++ perl5.003_05+lc_collate/t/op/sort.t Sun Sep 29 00:04:54 1996 @@ -4,19 +4,22 @@ print "1..10\n"; -sub backwards { $a lt $b ? 1 : $a gt $b ? -1 : 0; } +sub backwards { $a lt $b ? 1 : $a gt $b ? -1 : 0 } @harry = ('dog','cat','x','Cain','Abel'); -@george = ('gone','chased','yz','Punished','Axed'); +@george = ('gone','chased','yz','punished','Axed'); $x = join('', sort @harry); print ($x eq 'AbelCaincatdogx' ? "ok 1\n" : "not ok 1\n"); +print "# x = '$x'\n"; $x = join('', sort( backwards @harry)); print ($x eq 'xdogcatCainAbel' ? "ok 2\n" : "not ok 2\n"); +print "# x = '$x'\n"; $x = join('', sort @george, 'to', @harry); -print ($x eq 'AbelAxedCainPunishedcatchaseddoggonetoxyz'?"ok 3\n":"not ok 3\n"); +print ($x eq 'AbelAxedCaincatchaseddoggonepunishedtoxyz'?"ok 3\n":"not ok 3\n"); +print "# x = '$x'\n"; @a = (); @b = reverse @a; diff -rwu perl5.003_05:dist/util.c perl5.003_05+lc_collate/util.c --- perl5.003_05:dist/util.c Tue Sep 3 19:00:14 1996 +++ perl5.003_05+lc_collate/util.c Sun Sep 29 00:12:02 1996 @@ -415,38 +415,150 @@ * 0 = fallback to C locale, * -1 = fallback to C locale failed */ -#if defined(HAS_SETLOCALE) && defined(LC_CTYPE) - char * lang = getenv("LANG"); +#if defined(HAS_SETLOCALE) char * lc_all = getenv("LC_ALL"); char * lc_ctype = getenv("LC_CTYPE"); - int i; + char * lc_collate = getenv("LC_COLLATE"); + char * lang = getenv("LANG"); + int setlocale_failure = 0; + +#define SETLOCALE_LC_CTYPE 0x01 +#define SETLOCALE_LC_COLLATE 0x02 + +#ifdef LC_CTYPE + if (setlocale(LC_CTYPE, "") == 0) + setlocale_failure |= SETLOCALE_LC_CTYPE; +#endif - if (setlocale(LC_CTYPE, "") == NULL && (lc_all || lc_ctype || lang)) { +#ifdef LC_COLLATE + if (setlocale(LC_COLLATE, "") == 0) + setlocale_failure |= SETLOCALE_LC_COLLATE; + else + lc_collate_active = 1; +#endif + + if (setlocale_failure && (lc_all || lang)) { char *doit; if (printwarn > 1 || printwarn && (!(doit = getenv("PERL_BADLANG")) || atoi(doit))) { - PerlIO_printf(PerlIO_stderr(), "warning: setlocale(LC_CTYPE, \"\") failed.\n"); + if (setlocale_failure) { +#ifdef LC_CTYPE + if (setlocale_failure && SETLOCALE_LC_CTYPE) + PerlIO_printf(PerlIO_stderr(), + "warning: setlocale(LC_CTYPE, \"\") failed.\n"); +#endif +#ifdef LC_COLLATE + if (setlocale_failure && SETLOCALE_LC_COLLATE) + PerlIO_printf(PerlIO_stderr(), + "warning: setlocale(LC_COLLATE, \"\") failed.\n"); +#endif + PerlIO_printf(PerlIO_stderr(), + "warning: LC_ALL = \"%s\",\n", + lc_all ? lc_all : "(null)"); +#ifdef LC_CTYPE + if (setlocale_failure && SETLOCALE_LC_CTYPE) + PerlIO_printf(PerlIO_stderr(), + "\tLC_CTYPE = \"%s\",\n", + lc_ctype ? lc_ctype : "(null)"); +#endif +#ifdef LC_COLLATE + if (setlocale_failure && SETLOCALE_LC_COLLATE) + PerlIO_printf(PerlIO_stderr(), + "\tLC_COLLATE = \"%s\",\n", + lc_collate ? lc_collate : "(null)"); +#endif PerlIO_printf(PerlIO_stderr(), - "warning: LC_ALL = \"%s\", LC_CTYPE = \"%s\", LANG = \"%s\",\n", - lc_all ? lc_all : "(null)", - lc_ctype ? lc_ctype : "(null)", - lang ? lang : "(null)" - ); - PerlIO_printf(PerlIO_stderr(), "warning: falling back to the \"C\" locale.\n"); + "\tLANG = \"%s\"\n", + lang ? lang : "(null)"); + PerlIO_printf(PerlIO_stderr(), + "warning: falling back to the \"C\" locale.\n"); } + ok = 0; + +#ifdef LC_CTYPE + if (setlocale_failure && SETLOCALE_LC_CTYPE) if (setlocale(LC_CTYPE, "C") == NULL) ok = -1; +#endif +#ifdef LC_COLLATE + if (setlocale_failure && SETLOCALE_LC_COLLATE) + if (setlocale(LC_COLLATE, "C") == NULL) + ok = -1; +#endif + } } + if (setlocale_failure & SETLOCALE_LC_CTYPE == 0) { + int i; + for (i = 0; i < 256; i++) { if (isUPPER(i)) fold[i] = toLOWER(i); else if (isLOWER(i)) fold[i] = toUPPER(i); else fold[i] = i; } -#endif + } + +#endif /* #if defined(HAS_SETLOCALE) */ + return ok; +} + +char * +mem_collxfrm(m, n, nx) + const char *m; + const Size_t n; + Size_t * nx; +{ + char * mx = 0; + +#ifdef HAS_STRXFRM + Size_t ma; + + ma = (lc_collate_active ? 16 : 1) * n + 1; + +#define RENEW_mx() \ + do { \ + ma = 2 * ma + 1; \ + Renew(mx, ma, char); \ + if (mx == 0) \ + goto out; \ + } while (0) + + New(171, mx, ma, char); + + if (mx) { + Size_t xc, dx; + int xok; + + for (*nx = 0, xc = 0; xc < n; ) { + if (m[xc] == 0) + do { + if (*nx == ma) + RENEW_mx(); + mx[*nx++] = m[xc++]; + } while (xc < n && m[xc] == 0); + else { + do { + dx = strxfrm(mx + *nx, m + xc, ma - *nx); + if (dx + *nx > ma) { + RENEW_mx(); + xok = 0; + } else + xok = 1; + } while (!xok); + xc += strlen(mx + *nx); + *nx += dx; + } + } + } + +out: + +#endif /* HAS_STRXFRM */ + + return mx; } void