Gentoo Archives: gentoo-commits

From: "Mike Frysinger (vapier)" <vapier@g.o>
To: gentoo-commits@l.g.o
Subject: [gentoo-commits] gentoo commit in src/patchsets/coreutils/8.3: 000_all_coreutils-i18n.patch 003_all_coreutils-gentoo-uname.patch 010_all_coreutils-tests.patch 030_all_coreutils-more-dir-colors.patch README.history
Date: Fri, 08 Jan 2010 03:34:32
Message-Id: E1NT5cG-0001AV-OK@stork.gentoo.org
1 vapier 10/01/08 03:34:24
2
3 Added: 000_all_coreutils-i18n.patch
4 003_all_coreutils-gentoo-uname.patch
5 010_all_coreutils-tests.patch
6 030_all_coreutils-more-dir-colors.patch
7 README.history
8 Log:
9 initial 8.3 patchset based on last 8.2 patchset
10
11 Revision Changes Path
12 1.1 src/patchsets/coreutils/8.3/000_all_coreutils-i18n.patch
13
14 file : http://sources.gentoo.org/viewcvs.py/gentoo/src/patchsets/coreutils/8.3/000_all_coreutils-i18n.patch?rev=1.1&view=markup
15 plain: http://sources.gentoo.org/viewcvs.py/gentoo/src/patchsets/coreutils/8.3/000_all_coreutils-i18n.patch?rev=1.1&content-type=text/plain
16
17 Index: 000_all_coreutils-i18n.patch
18 ===================================================================
19 ripped from Fedora
20
21 diff -urNp coreutils-8.0-orig/lib/linebuffer.h coreutils-8.0/lib/linebuffer.h
22 --- coreutils-8.0-orig/lib/linebuffer.h 2009-10-06 10:59:48.000000000 +0200
23 +++ coreutils-8.0/lib/linebuffer.h 2009-10-07 10:07:16.000000000 +0200
24 @@ -21,6 +21,11 @@
25
26 # include <stdio.h>
27
28 +/* Get mbstate_t. */
29 +# if HAVE_WCHAR_H
30 +# include <wchar.h>
31 +# endif
32 +
33 /* A `struct linebuffer' holds a line of text. */
34
35 struct linebuffer
36 @@ -28,6 +33,9 @@ struct linebuffer
37 size_t size; /* Allocated. */
38 size_t length; /* Used. */
39 char *buffer;
40 +# if HAVE_WCHAR_H
41 + mbstate_t state;
42 +# endif
43 };
44
45 /* Initialize linebuffer LINEBUFFER for use. */
46 diff -urNp coreutils-8.0-orig/src/cut.c coreutils-8.0/src/cut.c
47 --- coreutils-8.0-orig/src/cut.c 2009-09-23 10:25:44.000000000 +0200
48 +++ coreutils-8.0/src/cut.c 2009-10-07 10:07:16.000000000 +0200
49 @@ -28,6 +28,11 @@
50 #include <assert.h>
51 #include <getopt.h>
52 #include <sys/types.h>
53 +
54 +/* Get mbstate_t, mbrtowc(). */
55 +#if HAVE_WCHAR_H
56 +# include <wchar.h>
57 +#endif
58 #include "system.h"
59
60 #include "error.h"
61 @@ -36,6 +41,18 @@
62 #include "quote.h"
63 #include "xstrndup.h"
64
65 +/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
66 + installation; work around this configuration error. */
67 +#if !defined MB_LEN_MAX || MB_LEN_MAX < 2
68 +# undef MB_LEN_MAX
69 +# define MB_LEN_MAX 16
70 +#endif
71 +
72 +/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
73 +#if HAVE_MBRTOWC && defined mbstate_t
74 +# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
75 +#endif
76 +
77 /* The official name of this program (e.g., no `g' prefix). */
78 #define PROGRAM_NAME "cut"
79
80 @@ -71,6 +88,52 @@
81 } \
82 while (0)
83
84 +/* Refill the buffer BUF to get a multibyte character. */
85 +#define REFILL_BUFFER(BUF, BUFPOS, BUFLEN, STREAM) \
86 + do \
87 + { \
88 + if (BUFLEN < MB_LEN_MAX && !feof (STREAM) && !ferror (STREAM)) \
89 + { \
90 + memmove (BUF, BUFPOS, BUFLEN); \
91 + BUFLEN += fread (BUF + BUFLEN, sizeof(char), BUFSIZ, STREAM); \
92 + BUFPOS = BUF; \
93 + } \
94 + } \
95 + while (0)
96 +
97 +/* Get wide character on BUFPOS. BUFPOS is not included after that.
98 + If byte sequence is not valid as a character, CONVFAIL is 1. Otherwise 0. */
99 +#define GET_NEXT_WC_FROM_BUFFER(WC, BUFPOS, BUFLEN, MBLENGTH, STATE, CONVFAIL) \
100 + do \
101 + { \
102 + mbstate_t state_bak; \
103 + \
104 + if (BUFLEN < 1) \
105 + { \
106 + WC = WEOF; \
107 + break; \
108 + } \
109 + \
110 + /* Get a wide character. */ \
111 + CONVFAIL = 0; \
112 + state_bak = STATE; \
113 + MBLENGTH = mbrtowc ((wchar_t *)&WC, BUFPOS, BUFLEN, &STATE); \
114 + \
115 + switch (MBLENGTH) \
116 + { \
117 + case (size_t)-1: \
118 + case (size_t)-2: \
119 + CONVFAIL++; \
120 + STATE = state_bak; \
121 + /* Fall througn. */ \
122 + \
123 + case 0: \
124 + MBLENGTH = 1; \
125 + break; \
126 + } \
127 + } \
128 + while (0)
129 +
130 struct range_pair
131 {
132 size_t lo;
133 @@ -89,7 +152,7 @@ static char *field_1_buffer;
134 /* The number of bytes allocated for FIELD_1_BUFFER. */
135 static size_t field_1_bufsize;
136
137 -/* The largest field or byte index used as an endpoint of a closed
138 +/* The largest byte, character or field index used as an endpoint of a closed
139 or degenerate range specification; this doesn't include the starting
140 index of right-open-ended ranges. For example, with either range spec
141 `2-5,9-', `2-3,5,9-' this variable would be set to 5. */
142 @@ -101,10 +164,11 @@ static size_t eol_range_start;
143
144 /* This is a bit vector.
145 In byte mode, which bytes to output.
146 + In character mode, which characters to output.
147 In field mode, which DELIM-separated fields to output.
148 - Both bytes and fields are numbered starting with 1,
149 + Bytes, characters and fields are numbered starting with 1,
150 so the zeroth bit of this array is unused.
151 - A field or byte K has been selected if
152 + A byte, character or field K has been selected if
153 (K <= MAX_RANGE_ENDPOINT and is_printable_field(K))
154 || (EOL_RANGE_START > 0 && K >= EOL_RANGE_START). */
155 static unsigned char *printable_field;
156 @@ -113,15 +177,25 @@ enum operating_mode
157 {
158 undefined_mode,
159
160 - /* Output characters that are in the given bytes. */
161 + /* Output bytes that are at the given positions. */
162 byte_mode,
163
164 + /* Output characters that are at the given positions. */
165 + character_mode,
166 +
167 /* Output the given delimeter-separated fields. */
168 field_mode
169 };
170
171 static enum operating_mode operating_mode;
172
173 +/* If nonzero, when in byte mode, don't split multibyte characters. */
174 +static int byte_mode_character_aware;
175 +
176 +/* If nonzero, the function for single byte locale is work
177 + if this program runs on multibyte locale. */
178 +static int force_singlebyte_mode;
179 +
180 /* If true do not output lines containing no delimeter characters.
181 Otherwise, all such lines are printed. This option is valid only
182 with field mode. */
183 @@ -133,6 +207,9 @@ static bool complement;
184
185 /* The delimeter character for field mode. */
186 static unsigned char delim;
187 +#if HAVE_WCHAR_H
188 +static wchar_t wcdelim;
189 +#endif
190
191 /* True if the --output-delimiter=STRING option was specified. */
192 static bool output_delimiter_specified;
193 @@ -206,7 +283,7 @@ Mandatory arguments to long options are
194 -f, --fields=LIST select only these fields; also print any line\n\
195 that contains no delimiter character, unless\n\
196 the -s option is specified\n\
197 - -n (ignored)\n\
198 + -n with -b: don't split multibyte characters\n\
199 "), stdout);
200 fputs (_("\
201 --complement complement the set of selected bytes, characters\n\
202 @@ -365,7 +442,7 @@ set_fields (const char *fieldstr)
203 in_digits = false;
204 /* Starting a range. */
205 if (dash_found)
206 - FATAL_ERROR (_("invalid byte or field list"));
207 + FATAL_ERROR (_("invalid byte, character or field list"));
208 dash_found = true;
209 fieldstr++;
210
211 @@ -389,14 +466,16 @@ set_fields (const char *fieldstr)
212 if (!rhs_specified)
213 {
214 /* `n-'. From `initial' to end of line. */
215 - eol_range_start = initial;
216 + if (eol_range_start == 0 ||
217 + (eol_range_start != 0 && eol_range_start > initial))
218 + eol_range_start = initial;
219 field_found = true;
220 }
221 else
222 {
223 /* `m-n' or `-n' (1-n). */
224 if (value < initial)
225 - FATAL_ERROR (_("invalid decreasing range"));
226 + FATAL_ERROR (_("invalid byte, character or field list"));
227
228 /* Is there already a range going to end of line? */
229 if (eol_range_start != 0)
230 @@ -476,6 +555,9 @@ set_fields (const char *fieldstr)
231 if (operating_mode == byte_mode)
232 error (0, 0,
233 _("byte offset %s is too large"), quote (bad_num));
234 + else if (operating_mode == character_mode)
235 + error (0, 0,
236 + _("character offset %s is too large"), quote (bad_num));
237 else
238 error (0, 0,
239 _("field number %s is too large"), quote (bad_num));
240 @@ -486,7 +568,7 @@ set_fields (const char *fieldstr)
241 fieldstr++;
242 }
243 else
244 - FATAL_ERROR (_("invalid byte or field list"));
245 + FATAL_ERROR (_("invalid byte, character or field list"));
246 }
247
248 max_range_endpoint = 0;
249 @@ -579,6 +661,63 @@ cut_bytes (FILE *stream)
250 }
251 }
252
253 +#if HAVE_MBRTOWC
254 +/* This function is in use for the following case.
255 +
256 + 1. Read from the stream STREAM, printing to standard output any selected
257 + characters.
258 +
259 + 2. Read from stream STREAM, printing to standard output any selected bytes,
260 + without splitting multibyte characters. */
261 +
262 +static void
263 +cut_characters_or_cut_bytes_no_split (FILE *stream)
264 +{
265 + int idx; /* number of bytes or characters in the line so far. */
266 + char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */
267 + char *bufpos; /* Next read position of BUF. */
268 + size_t buflen; /* The length of the byte sequence in buf. */
269 + wint_t wc; /* A gotten wide character. */
270 + size_t mblength; /* The byte size of a multibyte character which shows
271 + as same character as WC. */
272 + mbstate_t state; /* State of the stream. */
273 + int convfail; /* 1, when conversion is failed. Otherwise 0. */
274 +
275 + idx = 0;
276 + buflen = 0;
277 + bufpos = buf;
278 + memset (&state, '\0', sizeof(mbstate_t));
279 +
280 + while (1)
281 + {
282 + REFILL_BUFFER (buf, bufpos, buflen, stream);
283 +
284 + GET_NEXT_WC_FROM_BUFFER (wc, bufpos, buflen, mblength, state, convfail);
285 +
286 + if (wc == WEOF)
287 + {
288 + if (idx > 0)
289 + putchar ('\n');
290 + break;
291 + }
292 + else if (wc == L'\n')
293 + {
294 + putchar ('\n');
295 + idx = 0;
296 + }
297 + else
298 + {
299 + idx += (operating_mode == byte_mode) ? mblength : 1;
300 + if (print_kth (idx, NULL))
301 + fwrite (bufpos, mblength, sizeof(char), stdout);
302 + }
303 +
304 + buflen -= mblength;
305 + bufpos += mblength;
306 + }
307 +}
308 +#endif
309 +
310 /* Read from stream STREAM, printing to standard output any selected fields. */
311
312 static void
313 @@ -701,13 +840,192 @@ cut_fields (FILE *stream)
314 }
315 }
316
317 +#if HAVE_MBRTOWC
318 +static void
319 +cut_fields_mb (FILE *stream)
320 +{
321 + int c;
322 + unsigned int field_idx;
323 + int found_any_selected_field;
324 + int buffer_first_field;
325 + int empty_input;
326 + char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */
327 + char *bufpos; /* Next read position of BUF. */
328 + size_t buflen; /* The length of the byte sequence in buf. */
329 + wint_t wc = 0; /* A gotten wide character. */
330 + size_t mblength; /* The byte size of a multibyte character which shows
331 + as same character as WC. */
332 + mbstate_t state; /* State of the stream. */
333 + int convfail; /* 1, when conversion is failed. Otherwise 0. */
334 +
335 + found_any_selected_field = 0;
336 + field_idx = 1;
337 + bufpos = buf;
338 + buflen = 0;
339 + memset (&state, '\0', sizeof(mbstate_t));
340 +
341 + c = getc (stream);
342 + empty_input = (c == EOF);
343 + if (c != EOF)
344 + ungetc (c, stream);
345 + else
346 + wc = WEOF;
347 +
348 + /* To support the semantics of the -s flag, we may have to buffer
349 + all of the first field to determine whether it is `delimited.'
350 + But that is unnecessary if all non-delimited lines must be printed
351 + and the first field has been selected, or if non-delimited lines
352 + must be suppressed and the first field has *not* been selected.
353 + That is because a non-delimited line has exactly one field. */
354 + buffer_first_field = (suppress_non_delimited ^ !print_kth (1, NULL));
355 +
356 + while (1)
357 + {
358 + if (field_idx == 1 && buffer_first_field)
359 + {
360 + int len = 0;
361 +
362 + while (1)
363 + {
364 + REFILL_BUFFER (buf, bufpos, buflen, stream);
365 +
366 + GET_NEXT_WC_FROM_BUFFER
367 + (wc, bufpos, buflen, mblength, state, convfail);
368 +
369 + if (wc == WEOF)
370 + break;
371 +
372 + field_1_buffer = xrealloc (field_1_buffer, len + mblength);
373 + memcpy (field_1_buffer + len, bufpos, mblength);
374 + len += mblength;
375 + buflen -= mblength;
376 + bufpos += mblength;
377 +
378 + if (!convfail && (wc == L'\n' || wc == wcdelim))
379 + break;
380 + }
381 +
382 + if (wc == WEOF)
383 + break;
384 +
385 + /* If the first field extends to the end of line (it is not
386 + delimited) and we are printing all non-delimited lines,
387 + print this one. */
388 + if (convfail || (!convfail && wc != wcdelim))
389 + {
390 + if (suppress_non_delimited)
391 + {
392 + /* Empty. */
393 + }
394 + else
395 + {
396 + fwrite (field_1_buffer, sizeof (char), len, stdout);
397 + /* Make sure the output line is newline terminated. */
398 + if (convfail || (!convfail && wc != L'\n'))
399 + putchar ('\n');
400 + }
401 + continue;
402 + }
403 +
404 + if (print_kth (1, NULL))
405 + {
406 + /* Print the field, but not the trailing delimiter. */
407 + fwrite (field_1_buffer, sizeof (char), len - 1, stdout);
408 + found_any_selected_field = 1;
409 + }
410 + ++field_idx;
411 + }
412 +
413 + if (wc != WEOF)
414 + {
415 + if (print_kth (field_idx, NULL))
416 + {
417 + if (found_any_selected_field)
418 + {
419 + fwrite (output_delimiter_string, sizeof (char),
420 + output_delimiter_length, stdout);
421 + }
422 + found_any_selected_field = 1;
423 + }
424 +
425 + while (1)
426 + {
427 + REFILL_BUFFER (buf, bufpos, buflen, stream);
428 +
429 + GET_NEXT_WC_FROM_BUFFER
430 + (wc, bufpos, buflen, mblength, state, convfail);
431 +
432 + if (wc == WEOF)
433 + break;
434 + else if (!convfail && (wc == wcdelim || wc == L'\n'))
435 + {
436 + buflen -= mblength;
437 + bufpos += mblength;
438 + break;
439 + }
440 +
441 + if (print_kth (field_idx, NULL))
442 + fwrite (bufpos, mblength, sizeof(char), stdout);
443 +
444 + buflen -= mblength;
445 + bufpos += mblength;
446 + }
447 + }
448 +
449 + if ((!convfail || wc == L'\n') && buflen < 1)
450 + wc = WEOF;
451 +
452 + if (!convfail && wc == wcdelim)
453 + ++field_idx;
454 + else if (wc == WEOF || (!convfail && wc == L'\n'))
455 + {
456 + if (found_any_selected_field
457 + || (!empty_input && !(suppress_non_delimited && field_idx == 1)))
458 + putchar ('\n');
459 + if (wc == WEOF)
460 + break;
461 + field_idx = 1;
462 + found_any_selected_field = 0;
463 + }
464 + }
465 +}
466 +#endif
467 +
468 static void
469 cut_stream (FILE *stream)
470 {
471 - if (operating_mode == byte_mode)
472 - cut_bytes (stream);
473 +#if HAVE_MBRTOWC
474 + if (MB_CUR_MAX > 1 && !force_singlebyte_mode)
475 + {
476 + switch (operating_mode)
477 + {
478 + case byte_mode:
479 + if (byte_mode_character_aware)
480 + cut_characters_or_cut_bytes_no_split (stream);
481 + else
482 + cut_bytes (stream);
483 + break;
484 +
485 + case character_mode:
486 + cut_characters_or_cut_bytes_no_split (stream);
487 + break;
488 +
489 + case field_mode:
490 + cut_fields_mb (stream);
491 + break;
492 +
493 + default:
494 + abort ();
495 + }
496 + }
497 else
498 - cut_fields (stream);
499 +#endif
500 + {
501 + if (operating_mode == field_mode)
502 + cut_fields (stream);
503 + else
504 + cut_bytes (stream);
505 + }
506 }
507
508 /* Process file FILE to standard output.
509 @@ -757,6 +1075,8 @@ main (int argc, char **argv)
510 bool ok;
511 bool delim_specified = false;
512 char *spec_list_string IF_LINT(= NULL);
513 + char mbdelim[MB_LEN_MAX + 1];
514 + size_t delimlen = 0;
515
516 initialize_main (&argc, &argv);
517 set_program_name (argv[0]);
518 @@ -779,7 +1099,6 @@ main (int argc, char **argv)
519 switch (optc)
520 {
521 case 'b':
522 - case 'c':
523 /* Build the byte list. */
524 if (operating_mode != undefined_mode)
525 FATAL_ERROR (_("only one type of list may be specified"));
526 @@ -787,6 +1106,14 @@ main (int argc, char **argv)
527 spec_list_string = optarg;
528 break;
529
530 + case 'c':
531 + /* Build the character list. */
532 + if (operating_mode != undefined_mode)
533 + FATAL_ERROR (_("only one type of list may be specified"));
534 + operating_mode = character_mode;
535 + spec_list_string = optarg;
536 + break;
537 +
538 case 'f':
539 /* Build the field list. */
540 if (operating_mode != undefined_mode)
541 @@ -798,10 +1125,35 @@ main (int argc, char **argv)
542 case 'd':
543 /* New delimiter. */
544 /* Interpret -d '' to mean `use the NUL byte as the delimiter.' */
545 - if (optarg[0] != '\0' && optarg[1] != '\0')
546 - FATAL_ERROR (_("the delimiter must be a single character"));
547 - delim = optarg[0];
548 - delim_specified = true;
549 + {
550 +#if HAVE_MBRTOWC
551 + if(MB_CUR_MAX > 1)
552 + {
553 + mbstate_t state;
554 +
555 + memset (&state, '\0', sizeof(mbstate_t));
556 + delimlen = mbrtowc (&wcdelim, optarg, strnlen(optarg, MB_LEN_MAX), &state);
557 +
558 + if (delimlen == (size_t)-1 || delimlen == (size_t)-2)
559 + ++force_singlebyte_mode;
560 + else
561 + {
562 + delimlen = (delimlen < 1) ? 1 : delimlen;
563 + if (wcdelim != L'\0' && *(optarg + delimlen) != '\0')
564 + FATAL_ERROR (_("the delimiter must be a single character"));
565 + memcpy (mbdelim, optarg, delimlen);
566 + }
567 + }
568 +
569 + if (MB_CUR_MAX <= 1 || force_singlebyte_mode)
570 +#endif
571 + {
572 + if (optarg[0] != '\0' && optarg[1] != '\0')
573 + FATAL_ERROR (_("the delimiter must be a single character"));
574 + delim = (unsigned char) optarg[0];
575 + }
576 + delim_specified = true;
577 + }
578 break;
579
580 case OUTPUT_DELIMITER_OPTION:
581 @@ -814,6 +1166,7 @@ main (int argc, char **argv)
582 break;
583
584 case 'n':
585 + byte_mode_character_aware = 1;
586 break;
587
588 case 's':
589 @@ -836,7 +1189,7 @@ main (int argc, char **argv)
590 if (operating_mode == undefined_mode)
591 FATAL_ERROR (_("you must specify a list of bytes, characters, or fields"));
592
593 - if (delim != '\0' && operating_mode != field_mode)
594 + if (delim_specified && operating_mode != field_mode)
595 FATAL_ERROR (_("an input delimiter may be specified only\
596 when operating on fields"));
597
598 @@ -863,15 +1216,34 @@ main (int argc, char **argv)
599 }
600
601 if (!delim_specified)
602 - delim = '\t';
603 + {
604 + delim = '\t';
605 +#ifdef HAVE_MBRTOWC
606 + wcdelim = L'\t';
607 + mbdelim[0] = '\t';
608 + mbdelim[1] = '\0';
609 + delimlen = 1;
610 +#endif
611 + }
612
613 if (output_delimiter_string == NULL)
614 {
615 - static char dummy[2];
616 - dummy[0] = delim;
617 - dummy[1] = '\0';
618 - output_delimiter_string = dummy;
619 - output_delimiter_length = 1;
620 +#ifdef HAVE_MBRTOWC
621 + if (MB_CUR_MAX > 1 && !force_singlebyte_mode)
622 + {
623 + output_delimiter_string = xstrdup(mbdelim);
624 + output_delimiter_length = delimlen;
625 + }
626 +
627 + if (MB_CUR_MAX <= 1 || force_singlebyte_mode)
628 +#endif
629 + {
630 + static char dummy[2];
631 + dummy[0] = delim;
632 + dummy[1] = '\0';
633 + output_delimiter_string = dummy;
634 + output_delimiter_length = 1;
635 + }
636 }
637
638 if (optind == argc)
639 diff -urNp coreutils-8.0-orig/src/expand.c coreutils-8.0/src/expand.c
640 --- coreutils-8.0-orig/src/expand.c 2009-09-29 15:27:54.000000000 +0200
641 +++ coreutils-8.0/src/expand.c 2009-10-07 10:07:16.000000000 +0200
642 @@ -37,11 +37,28 @@
643 #include <stdio.h>
644 #include <getopt.h>
645 #include <sys/types.h>
646 +
647 +/* Get mbstate_t, mbrtowc(), wcwidth(). */
648 +#if HAVE_WCHAR_H
649 +# include <wchar.h>
650 +#endif
651 +
652 #include "system.h"
653 #include "error.h"
654 #include "quote.h"
655 #include "xstrndup.h"
656
657 +/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
658 + installation; work around this configuration error. */
659 +#if !defined MB_LEN_MAX || MB_LEN_MAX < 2
660 +# define MB_LEN_MAX 16
661 +#endif
662 +
663 +/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
664 +#if HAVE_MBRTOWC && defined mbstate_t
665 +# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
666 +#endif
667 +
668 /* The official name of this program (e.g., no `g' prefix). */
669 #define PROGRAM_NAME "expand"
670
671 @@ -357,6 +374,142 @@ expand (void)
672 }
673 }
674
675 +#if HAVE_MBRTOWC
676 +static void
677 +expand_multibyte (void)
678 +{
679 + FILE *fp; /* Input strem. */
680 + mbstate_t i_state; /* Current shift state of the input stream. */
681 + mbstate_t i_state_bak; /* Back up the I_STATE. */
682 + mbstate_t o_state; /* Current shift state of the output stream. */
683 + char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */
684 + char *bufpos; /* Next read position of BUF. */
685 + size_t buflen = 0; /* The length of the byte sequence in buf. */
686 + wchar_t wc; /* A gotten wide character. */
687 + size_t mblength; /* The byte size of a multibyte character
688 + which shows as same character as WC. */
689 + int tab_index = 0; /* Index in `tab_list' of next tabstop. */
690 + int column = 0; /* Column on screen of the next char. */
691 + int next_tab_column; /* Column the next tab stop is on. */
692 + int convert = 1; /* If nonzero, perform translations. */
693 +
694 + fp = next_file ((FILE *) NULL);
695 + if (fp == NULL)
696 + return;
697 +
698 + memset (&o_state, '\0', sizeof(mbstate_t));
699 + memset (&i_state, '\0', sizeof(mbstate_t));
700 +
701 + for (;;)
702 + {
703 + /* Refill the buffer BUF. */
704 + if (buflen < MB_LEN_MAX && !feof(fp) && !ferror(fp))
705 + {
706 + memmove (buf, bufpos, buflen);
707 + buflen += fread (buf + buflen, sizeof(char), BUFSIZ, fp);
708 + bufpos = buf;
709 + }
710 +
711 + /* No character is left in BUF. */
712 + if (buflen < 1)
713 + {
714 + fp = next_file (fp);
715 +
716 + if (fp == NULL)
717 + break; /* No more files. */
718 + else
719 + {
720 + memset (&i_state, '\0', sizeof(mbstate_t));
721 + continue;
722 + }
723 + }
724 +
725 + /* Get a wide character. */
726 + i_state_bak = i_state;
727 + mblength = mbrtowc (&wc, bufpos, buflen, &i_state);
728 +
729 + switch (mblength)
730 + {
731 + case (size_t)-1: /* illegal byte sequence. */
732 + case (size_t)-2:
733 + mblength = 1;
734 + i_state = i_state_bak;
735 + if (convert)
736 + {
737 + ++column;
738 + if (convert_entire_line == 0)
739 + convert = 0;
740 + }
741 + putchar (*bufpos);
742 + break;
743 +
744 + case 0: /* null. */
745 + mblength = 1;
746 + if (convert && convert_entire_line == 0)
747 + convert = 0;
748 + putchar ('\0');
749 + break;
750 +
751 + default:
752 + if (wc == L'\n') /* LF. */
753 + {
754 + tab_index = 0;
755 + column = 0;
756 + convert = 1;
757 + putchar ('\n');
758 + }
759 + else if (wc == L'\t' && convert) /* Tab. */
760 + {
761 + if (tab_size == 0)
762 + {
763 + /* Do not let tab_index == first_free_tab;
764 + stop when it is 1 less. */
765 + while (tab_index < first_free_tab - 1
766 + && column >= tab_list[tab_index])
767 + tab_index++;
768 + next_tab_column = tab_list[tab_index];
769 + if (tab_index < first_free_tab - 1)
770 + tab_index++;
771 + if (column >= next_tab_column)
772 + next_tab_column = column + 1;
773 + }
774 + else
775 + next_tab_column = column + tab_size - column % tab_size;
776 +
777 + while (column < next_tab_column)
778 + {
779 + putchar (' ');
780 + ++column;
781 + }
782 + }
783 + else /* Others. */
784 + {
785 + if (convert)
786 + {
787 + if (wc == L'\b')
788 + {
789 + if (column > 0)
790 + --column;
791 + }
792 + else
793 + {
794 + int width; /* The width of WC. */
795 +
796 + width = wcwidth (wc);
797 + column += (width > 0) ? width : 0;
798 + if (convert_entire_line == 0)
799 + convert = 0;
800 + }
801 + }
802 + fwrite (bufpos, sizeof(char), mblength, stdout);
803 + }
804 + }
805 + buflen -= mblength;
806 + bufpos += mblength;
807 + }
808 +}
809 +#endif
810 +
811 int
812 main (int argc, char **argv)
813 {
814 @@ -421,7 +574,12 @@ main (int argc, char **argv)
815
816 file_list = (optind < argc ? &argv[optind] : stdin_argv);
817
818 - expand ();
819 +#if HAVE_MBRTOWC
820 + if (MB_CUR_MAX > 1)
821 + expand_multibyte ();
822 + else
823 +#endif
824 + expand ();
825
826 if (have_read_stdin && fclose (stdin) != 0)
827 error (EXIT_FAILURE, errno, "-");
828 diff -urNp coreutils-8.0-orig/src/fold.c coreutils-8.0/src/fold.c
829 --- coreutils-8.0-orig/src/fold.c 2009-09-23 10:25:44.000000000 +0200
830 +++ coreutils-8.0/src/fold.c 2009-10-07 10:07:16.000000000 +0200
831 @@ -22,11 +22,33 @@
832 #include <getopt.h>
833 #include <sys/types.h>
834
835 +/* Get mbstate_t, mbrtowc(), wcwidth(). */
836 +#if HAVE_WCHAR_H
837 +# include <wchar.h>
838 +#endif
839 +
840 +/* Get iswprint(), iswblank(), wcwidth(). */
841 +#if HAVE_WCTYPE_H
842 +# include <wctype.h>
843 +#endif
844 +
845 #include "system.h"
846 #include "error.h"
847 #include "quote.h"
848 #include "xstrtol.h"
849
850 +/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
851 + installation; work around this configuration error. */
852 +#if !defined MB_LEN_MAX || MB_LEN_MAX < 2
853 +# undef MB_LEN_MAX
854 +# define MB_LEN_MAX 16
855 +#endif
856 +
857 +/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
858 +#if HAVE_MBRTOWC && defined mbstate_t
859 +# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
860 +#endif
861 +
862 #define TAB_WIDTH 8
863
864 /* The official name of this program (e.g., no `g' prefix). */
865 @@ -34,20 +56,41 @@
866
867 #define AUTHORS proper_name ("David MacKenzie")
868
869 +#define FATAL_ERROR(Message) \
870 + do \
871 + { \
872 + error (0, 0, (Message)); \
873 + usage (2); \
874 + } \
875 + while (0)
876 +
877 +enum operating_mode
878 +{
879 + /* Fold texts by columns that are at the given positions. */
880 + column_mode,
881 +
882 + /* Fold texts by bytes that are at the given positions. */
883 + byte_mode,
884 +
885 + /* Fold texts by characters that are at the given positions. */
886 + character_mode,
887 +};
888 +
889 +/* The argument shows current mode. (Default: column_mode) */
890 +static enum operating_mode operating_mode;
891 +
892 /* If nonzero, try to break on whitespace. */
893 static bool break_spaces;
894
895 -/* If nonzero, count bytes, not column positions. */
896 -static bool count_bytes;
897 -
898 /* If nonzero, at least one of the files we read was standard input. */
899 static bool have_read_stdin;
900
901 -static char const shortopts[] = "bsw:0::1::2::3::4::5::6::7::8::9::";
902 +static char const shortopts[] = "bcsw:0::1::2::3::4::5::6::7::8::9::";
903
904 static struct option const longopts[] =
905 {
906 {"bytes", no_argument, NULL, 'b'},
907 + {"characters", no_argument, NULL, 'c'},
908 {"spaces", no_argument, NULL, 's'},
909 {"width", required_argument, NULL, 'w'},
910 {GETOPT_HELP_OPTION_DECL},
911 @@ -77,6 +120,7 @@ Mandatory arguments to long options are
912 "), stdout);
913 fputs (_("\
914 -b, --bytes count bytes rather than columns\n\
915 + -c, --characters count characters rather than columns\n\
916 -s, --spaces break at spaces\n\
917 -w, --width=WIDTH use WIDTH columns instead of 80\n\
918 "), stdout);
919 @@ -94,7 +138,7 @@ Mandatory arguments to long options are
920 static size_t
921 adjust_column (size_t column, char c)
922 {
923 - if (!count_bytes)
924 + if (operating_mode != byte_mode)
925 {
926 if (c == '\b')
927 {
928 @@ -117,30 +161,14 @@ adjust_column (size_t column, char c)
929 to stdout, with maximum line length WIDTH.
930 Return true if successful. */
931
932 -static bool
933 -fold_file (char const *filename, size_t width)
934 +static void
935 +fold_text (FILE *istream, size_t width, int *saved_errno)
936 {
937 - FILE *istream;
938 int c;
939 size_t column = 0; /* Screen column where next char will go. */
940 size_t offset_out = 0; /* Index in `line_out' for next char. */
941 static char *line_out = NULL;
942 static size_t allocated_out = 0;
943 - int saved_errno;
944 -
945 - if (STREQ (filename, "-"))
946 - {
947 - istream = stdin;
948 - have_read_stdin = true;
949 - }
950 - else
951 - istream = fopen (filename, "r");
952 -
953 - if (istream == NULL)
954 - {
955 - error (0, errno, "%s", filename);
956 - return false;
957 - }
958
959 while ((c = getc (istream)) != EOF)
960 {
961 @@ -168,6 +196,15 @@ fold_file (char const *filename, size_t
962 bool found_blank = false;
963 size_t logical_end = offset_out;
964
965 + /* If LINE_OUT has no wide character,
966 + put a new wide character in LINE_OUT
967 + if column is bigger than width. */
968 + if (offset_out == 0)
969 + {
970 + line_out[offset_out++] = c;
971 + continue;
972 + }
973 +
974 /* Look for the last blank. */
975 while (logical_end)
976 {
977 @@ -214,11 +251,222 @@ fold_file (char const *filename, size_t
978 line_out[offset_out++] = c;
979 }
980
981 - saved_errno = errno;
982 + *saved_errno = errno;
983
984 if (offset_out)
985 fwrite (line_out, sizeof (char), (size_t) offset_out, stdout);
986
987 +}
988 +
989 +#if HAVE_MBRTOWC
990 +static void
991 +fold_multibyte_text (FILE *istream, size_t width, int *saved_errno)
992 +{
993 + char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */
994 + size_t buflen = 0; /* The length of the byte sequence in buf. */
995 + char *bufpos = NULL; /* Next read position of BUF. */
996 + wint_t wc; /* A gotten wide character. */
997 + size_t mblength; /* The byte size of a multibyte character which shows
998 + as same character as WC. */
999 + mbstate_t state, state_bak; /* State of the stream. */
1000 + int convfail; /* 1, when conversion is failed. Otherwise 0. */
1001 +
1002 + static char *line_out = NULL;
1003 + size_t offset_out = 0; /* Index in `line_out' for next char. */
1004 + static size_t allocated_out = 0;
1005 +
1006 + int increment;
1007 + size_t column = 0;
1008 +
1009 + size_t last_blank_pos;
1010 + size_t last_blank_column;
1011 + int is_blank_seen;
1012 + int last_blank_increment = 0;
1013 + int is_bs_following_last_blank;
1014 + size_t bs_following_last_blank_num;
1015 + int is_cr_after_last_blank;
1016 +
1017 +#define CLEAR_FLAGS \
1018 + do \
1019 + { \
1020 + last_blank_pos = 0; \
1021 + last_blank_column = 0; \
1022 + is_blank_seen = 0; \
1023 + is_bs_following_last_blank = 0; \
1024 + bs_following_last_blank_num = 0; \
1025 + is_cr_after_last_blank = 0; \
1026 + } \
1027 + while (0)
1028 +
1029 +#define START_NEW_LINE \
1030 + do \
1031 + { \
1032 + putchar ('\n'); \
1033 + column = 0; \
1034 + offset_out = 0; \
1035 + CLEAR_FLAGS; \
1036 + } \
1037 + while (0)
1038 +
1039 + CLEAR_FLAGS;
1040 + memset (&state, '\0', sizeof(mbstate_t));
1041 +
1042 + for (;; bufpos += mblength, buflen -= mblength)
1043 + {
1044 + if (buflen < MB_LEN_MAX && !feof (istream) && !ferror (istream))
1045 + {
1046 + memmove (buf, bufpos, buflen);
1047 + buflen += fread (buf + buflen, sizeof(char), BUFSIZ, istream);
1048 + bufpos = buf;
1049 + }
1050 +
1051 + if (buflen < 1)
1052 + break;
1053 +
1054 + /* Get a wide character. */
1055 + convfail = 0;
1056 + state_bak = state;
1057 + mblength = mbrtowc ((wchar_t *)&wc, bufpos, buflen, &state);
1058 +
1059 + switch (mblength)
1060 + {
1061 + case (size_t)-1:
1062 + case (size_t)-2:
1063 + convfail++;
1064 + state = state_bak;
1065 + /* Fall through. */
1066 +
1067 + case 0:
1068 + mblength = 1;
1069 + break;
1070 + }
1071 +
1072 +rescan:
1073 + if (operating_mode == byte_mode) /* byte mode */
1074 + increment = mblength;
1075 + else if (operating_mode == character_mode) /* character mode */
1076 + increment = 1;
1077 + else /* column mode */
1078 + {
1079 + if (convfail)
1080 + increment = 1;
1081 + else
1082 + {
1083 + switch (wc)
1084 + {
1085 + case L'\n':
1086 + fwrite (line_out, sizeof(char), offset_out, stdout);
1087 + START_NEW_LINE;
1088 + continue;
1089 +
1090 + case L'\b':
1091 + increment = (column > 0) ? -1 : 0;
1092 + break;
1093 +
1094 + case L'\r':
1095 + increment = -1 * column;
1096 + break;
1097 +
1098 + case L'\t':
1099 + increment = 8 - column % 8;
1100 + break;
1101 +
1102 + default:
1103 + increment = wcwidth (wc);
1104 + increment = (increment < 0) ? 0 : increment;
1105 + }
1106 + }
1107 + }
1108 +
1109 + if (column + increment > width && break_spaces && last_blank_pos)
1110 + {
1111 + fwrite (line_out, sizeof(char), last_blank_pos, stdout);
1112 + putchar ('\n');
1113 +
1114 + offset_out = offset_out - last_blank_pos;
1115 + column = column - last_blank_column + ((is_cr_after_last_blank)
1116 + ? last_blank_increment : bs_following_last_blank_num);
1117 + memmove (line_out, line_out + last_blank_pos, offset_out);
1118 + CLEAR_FLAGS;
1119 + goto rescan;
1120 + }
1121 +
1122 + if (column + increment > width && column != 0)
1123 + {
1124 + fwrite (line_out, sizeof(char), offset_out, stdout);
1125 + START_NEW_LINE;
1126 + goto rescan;
1127 + }
1128 +
1129 + if (allocated_out < offset_out + mblength)
1130 + {
1131 + line_out = X2REALLOC (line_out, &allocated_out);
1132 + }
1133 +
1134 + memcpy (line_out + offset_out, bufpos, mblength);
1135 + offset_out += mblength;
1136 + column += increment;
1137 +
1138 + if (is_blank_seen && !convfail && wc == L'\r')
1139 + is_cr_after_last_blank = 1;
1140 +
1141 + if (is_bs_following_last_blank && !convfail && wc == L'\b')
1142 + ++bs_following_last_blank_num;
1143 + else
1144 + is_bs_following_last_blank = 0;
1145 +
1146 + if (break_spaces && !convfail && iswblank (wc))
1147 + {
1148 + last_blank_pos = offset_out;
1149 + last_blank_column = column;
1150 + is_blank_seen = 1;
1151 + last_blank_increment = increment;
1152 + is_bs_following_last_blank = 1;
1153 + bs_following_last_blank_num = 0;
1154 + is_cr_after_last_blank = 0;
1155 + }
1156 + }
1157 +
1158 + *saved_errno = errno;
1159 +
1160 + if (offset_out)
1161 + fwrite (line_out, sizeof (char), (size_t) offset_out, stdout);
1162 +
1163 +}
1164 +#endif
1165 +
1166 +/* Fold file FILENAME, or standard input if FILENAME is "-",
1167 + to stdout, with maximum line length WIDTH.
1168 + Return 0 if successful, 1 if an error occurs. */
1169 +
1170 +static bool
1171 +fold_file (char *filename, size_t width)
1172 +{
1173 + FILE *istream;
1174 + int saved_errno;
1175 +
1176 + if (STREQ (filename, "-"))
1177 + {
1178 + istream = stdin;
1179 + have_read_stdin = 1;
1180 + }
1181 + else
1182 + istream = fopen (filename, "r");
1183 +
1184 + if (istream == NULL)
1185 + {
1186 + error (0, errno, "%s", filename);
1187 + return 1;
1188 + }
1189 +
1190 + /* Define how ISTREAM is being folded. */
1191 +#if HAVE_MBRTOWC
1192 + if (MB_CUR_MAX > 1)
1193 + fold_multibyte_text (istream, width, &saved_errno);
1194 + else
1195 +#endif
1196 + fold_text (istream, width, &saved_errno);
1197 +
1198 if (ferror (istream))
1199 {
1200 error (0, saved_errno, "%s", filename);
1201 @@ -251,7 +499,8 @@ main (int argc, char **argv)
1202
1203 atexit (close_stdout);
1204
1205 - break_spaces = count_bytes = have_read_stdin = false;
1206 + operating_mode = column_mode;
1207 + break_spaces = have_read_stdin = false;
1208
1209 while ((optc = getopt_long (argc, argv, shortopts, longopts, NULL)) != -1)
1210 {
1211 @@ -260,7 +509,15 @@ main (int argc, char **argv)
1212 switch (optc)
1213 {
1214 case 'b': /* Count bytes rather than columns. */
1215 - count_bytes = true;
1216 + if (operating_mode != column_mode)
1217 + FATAL_ERROR (_("only one way of folding may be specified"));
1218 + operating_mode = byte_mode;
1219 + break;
1220 +
1221 + case 'c':
1222 + if (operating_mode != column_mode)
1223 + FATAL_ERROR (_("only one way of folding may be specified"));
1224 + operating_mode = character_mode;
1225 break;
1226
1227 case 's': /* Break at word boundaries. */
1228 diff -urNp coreutils-8.0-orig/src/join.c coreutils-8.0/src/join.c
1229 --- coreutils-8.0-orig/src/join.c 2009-09-23 10:25:44.000000000 +0200
1230 +++ coreutils-8.0/src/join.c 2009-10-07 10:07:16.000000000 +0200
1231 @@ -22,17 +22,31 @@
1232 #include <sys/types.h>
1233 #include <getopt.h>
1234
1235 +/* Get mbstate_t, mbrtowc(), mbrtowc(), wcwidth(). */
1236 +#if HAVE_WCHAR_H
1237 +# include <wchar.h>
1238 +#endif
1239 +
1240 +/* Get iswblank(), towupper. */
1241 +#if HAVE_WCTYPE_H
1242 +# include <wctype.h>
1243 +#endif
1244 +
1245 #include "system.h"
1246 #include "error.h"
1247 #include "hard-locale.h"
1248 #include "linebuffer.h"
1249 -#include "memcasecmp.h"
1250 #include "quote.h"
1251 #include "stdio--.h"
1252 #include "xmemcoll.h"
1253 #include "xstrtol.h"
1254 #include "argmatch.h"
1255
1256 +/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
1257 +#if HAVE_MBRTOWC && defined mbstate_t
1258 +# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
1259 +#endif
1260 +
1261 /* The official name of this program (e.g., no `g' prefix). */
1262 #define PROGRAM_NAME "join"
1263
1264 @@ -121,10 +135,12 @@ static struct outlist outlist_head;
1265 /* Last element in `outlist', where a new element can be added. */
1266 static struct outlist *outlist_end = &outlist_head;
1267
1268 -/* Tab character separating fields. If negative, fields are separated
1269 - by any nonempty string of blanks, otherwise by exactly one
1270 - tab character whose value (when cast to unsigned char) equals TAB. */
1271 -static int tab = -1;
1272 +/* Tab character separating fields. If NULL, fields are separated
1273 + by any nonempty string of blanks. */
1274 +static char *tab = NULL;
1275 +
1276 +/* The number of bytes used for tab. */
1277 +static size_t tablen = 0;
1278
1279 /* If nonzero, check that the input is correctly ordered. */
1280 static enum
1281 @@ -239,10 +255,11 @@ xfields (struct line *line)
1282 if (ptr == lim)
1283 return;
1284
1285 - if (0 <= tab)
1286 + if (tab != NULL)
1287 {
1288 + unsigned char t = tab[0];
1289 char *sep;
1290 - for (; (sep = memchr (ptr, tab, lim - ptr)) != NULL; ptr = sep + 1)
1291 + for (; (sep = memchr (ptr, t, lim - ptr)) != NULL; ptr = sep + 1)
1292 extract_field (line, ptr, sep - ptr);
1293 }
1294 else
1295 @@ -269,6 +286,148 @@ xfields (struct line *line)
1296 extract_field (line, ptr, lim - ptr);
1297 }
1298
1299 +#if HAVE_MBRTOWC
1300 +static void
1301 +xfields_multibyte (struct line *line)
1302 +{
1303 + char *ptr = line->buf.buffer;
1304 + char const *lim = ptr + line->buf.length - 1;
1305 + wchar_t wc = 0;
1306 + size_t mblength = 1;
1307 + mbstate_t state, state_bak;
1308 +
1309 + memset (&state, 0, sizeof (mbstate_t));
1310 +
1311 + if (ptr >= lim)
1312 + return;
1313 +
1314 + if (tab != NULL)
1315 + {
1316 + unsigned char t = tab[0];
1317 + char *sep = ptr;
1318 + for (; ptr < lim; ptr = sep + mblength)
1319 + {
1320 + sep = ptr;
1321 + while (sep < lim)
1322 + {
1323 + state_bak = state;
1324 + mblength = mbrtowc (&wc, sep, lim - sep + 1, &state);
1325 +
1326 + if (mblength == (size_t)-1 || mblength == (size_t)-2)
1327 + {
1328 + mblength = 1;
1329 + state = state_bak;
1330 + }
1331 + mblength = (mblength < 1) ? 1 : mblength;
1332 +
1333 + if (mblength == tablen && !memcmp (sep, tab, mblength))
1334 + break;
1335 + else
1336 + {
1337 + sep += mblength;
1338 + continue;
1339 + }
1340 + }
1341 +
1342 + if (sep >= lim)
1343 + break;
1344 +
1345 + extract_field (line, ptr, sep - ptr);
1346 + }
1347 + }
1348 + else
1349 + {
1350 + /* Skip leading blanks before the first field. */
1351 + while(ptr < lim)
1352 + {
1353 + state_bak = state;
1354 + mblength = mbrtowc (&wc, ptr, lim - ptr + 1, &state);
1355 +
1356 + if (mblength == (size_t)-1 || mblength == (size_t)-2)
1357 + {
1358 + mblength = 1;
1359 + state = state_bak;
1360 + break;
1361 + }
1362 + mblength = (mblength < 1) ? 1 : mblength;
1363 +
1364 + if (!iswblank(wc))
1365 + break;
1366 + ptr += mblength;
1367 + }
1368 +
1369 + do
1370 + {
1371 + char *sep;
1372 + state_bak = state;
1373 + mblength = mbrtowc (&wc, ptr, lim - ptr + 1, &state);
1374 + if (mblength == (size_t)-1 || mblength == (size_t)-2)
1375 + {
1376 + mblength = 1;
1377 + state = state_bak;
1378 + break;
1379 + }
1380 + mblength = (mblength < 1) ? 1 : mblength;
1381 +
1382 + sep = ptr + mblength;
1383 + while (sep < lim)
1384 + {
1385 + state_bak = state;
1386 + mblength = mbrtowc (&wc, sep, lim - sep + 1, &state);
1387 + if (mblength == (size_t)-1 || mblength == (size_t)-2)
1388 + {
1389 + mblength = 1;
1390 + state = state_bak;
1391 + break;
1392 + }
1393 + mblength = (mblength < 1) ? 1 : mblength;
1394 +
1395 + if (iswblank (wc))
1396 + break;
1397 +
1398 + sep += mblength;
1399 + }
1400 +
1401 + extract_field (line, ptr, sep - ptr);
1402 + if (sep >= lim)
1403 + return;
1404 +
1405 + state_bak = state;
1406 + mblength = mbrtowc (&wc, sep, lim - sep + 1, &state);
1407 + if (mblength == (size_t)-1 || mblength == (size_t)-2)
1408 + {
1409 + mblength = 1;
1410 + state = state_bak;
1411 + break;
1412 + }
1413 + mblength = (mblength < 1) ? 1 : mblength;
1414 +
1415 + ptr = sep + mblength;
1416 + while (ptr < lim)
1417 + {
1418 + state_bak = state;
1419 + mblength = mbrtowc (&wc, ptr, lim - ptr + 1, &state);
1420 + if (mblength == (size_t)-1 || mblength == (size_t)-2)
1421 + {
1422 + mblength = 1;
1423 + state = state_bak;
1424 + break;
1425 + }
1426 + mblength = (mblength < 1) ? 1 : mblength;
1427 +
1428 + if (!iswblank (wc))
1429 + break;
1430 +
1431 + ptr += mblength;
1432 + }
1433 + }
1434 + while (ptr < lim);
1435 + }
1436 +
1437 + extract_field (line, ptr, lim - ptr);
1438 +}
1439 +#endif
1440 +
1441 static void
1442 freeline (struct line *line)
1443 {
1444 @@ -287,56 +446,115 @@ keycmp (struct line const *line1, struct
1445 size_t jf_1, size_t jf_2)
1446 {
1447 /* Start of field to compare in each file. */
1448 - char *beg1;
1449 - char *beg2;
1450 -
1451 - size_t len1;
1452 - size_t len2; /* Length of fields to compare. */
1453 + char *beg[2];
1454 + char *copy[2];
1455 + size_t len[2]; /* Length of fields to compare. */
1456 int diff;
1457 + int i, j;
1458
1459 if (jf_1 < line1->nfields)
1460 {
1461 - beg1 = line1->fields[jf_1].beg;
1462 - len1 = line1->fields[jf_1].len;
1463 + beg[0] = line1->fields[jf_1].beg;
1464 + len[0] = line1->fields[jf_1].len;
1465 }
1466 else
1467 {
1468 - beg1 = NULL;
1469 - len1 = 0;
1470 + beg[0] = NULL;
1471 + len[0] = 0;
1472 }
1473
1474 if (jf_2 < line2->nfields)
1475 {
1476 - beg2 = line2->fields[jf_2].beg;
1477 - len2 = line2->fields[jf_2].len;
1478 + beg[1] = line2->fields[jf_2].beg;
1479 + len[1] = line2->fields[jf_2].len;
1480 }
1481 else
1482 {
1483 - beg2 = NULL;
1484 - len2 = 0;
1485 + beg[1] = NULL;
1486 + len[1] = 0;
1487 }
1488
1489 - if (len1 == 0)
1490 - return len2 == 0 ? 0 : -1;
1491 - if (len2 == 0)
1492 + if (len[0] == 0)
1493 + return len[1] == 0 ? 0 : -1;
1494 + if (len[1] == 0)
1495 return 1;
1496
1497 if (ignore_case)
1498 {
1499 - /* FIXME: ignore_case does not work with NLS (in particular,
1500 - with multibyte chars). */
1501 - diff = memcasecmp (beg1, beg2, MIN (len1, len2));
1502 +#ifdef HAVE_MBRTOWC
1503 + if (MB_CUR_MAX > 1)
1504 + {
1505 + size_t mblength;
1506 + wchar_t wc, uwc;
1507 + mbstate_t state, state_bak;
1508 +
1509 + memset (&state, '\0', sizeof (mbstate_t));
1510 +
1511 + for (i = 0; i < 2; i++)
1512 + {
1513 + copy[i] = alloca (len[i] + 1);
1514 +
1515 + for (j = 0; j < MIN (len[0], len[1]);)
1516 + {
1517 + state_bak = state;
1518 + mblength = mbrtowc (&wc, beg[i] + j, len[i] - j, &state);
1519 +
1520 + switch (mblength)
1521 + {
1522 + case (size_t) -1:
1523 + case (size_t) -2:
1524 + state = state_bak;
1525 + /* Fall through */
1526 + case 0:
1527 + mblength = 1;
1528 + break;
1529 +
1530 + default:
1531 + uwc = towupper (wc);
1532 +
1533 + if (uwc != wc)
1534 + {
1535 + mbstate_t state_wc;
1536 +
1537 + memset (&state_wc, '\0', sizeof (mbstate_t));
1538 + wcrtomb (copy[i] + j, uwc, &state_wc);
1539 + }
1540 + else
1541 + memcpy (copy[i] + j, beg[i] + j, mblength);
1542 + }
1543 + j += mblength;
1544 + }
1545 + copy[i][j] = '\0';
1546 + }
1547 + }
1548 + else
1549 +#endif
1550 + {
1551 + for (i = 0; i < 2; i++)
1552 + {
1553 + copy[i] = alloca (len[i] + 1);
1554 +
1555 + for (j = 0; j < MIN (len[0], len[1]); j++)
1556 + copy[i][j] = toupper (beg[i][j]);
1557 +
1558 + copy[i][j] = '\0';
1559 + }
1560 + }
1561 }
1562 else
1563 {
1564 - if (hard_LC_COLLATE)
1565 - return xmemcoll (beg1, len1, beg2, len2);
1566 - diff = memcmp (beg1, beg2, MIN (len1, len2));
1567 + copy[0] = (unsigned char *) beg[0];
1568 + copy[1] = (unsigned char *) beg[1];
1569 }
1570
1571 + if (hard_LC_COLLATE)
1572 + return xmemcoll ((char *) copy[0], len[0], (char *) copy[1], len[1]);
1573 + diff = memcmp (copy[0], copy[1], MIN (len[0], len[1]));
1574 +
1575 +
1576 if (diff)
1577 return diff;
1578 - return len1 < len2 ? -1 : len1 != len2;
1579 + return len[0] - len[1];
1580 }
1581
1582 /* Check that successive input lines PREV and CURRENT from input file
1583 @@ -417,6 +635,11 @@ get_line (FILE *fp, struct line **linep,
1584 return false;
1585 }
1586
1587 +#if HAVE_MBRTOWC
1588 + if (MB_CUR_MAX > 1)
1589 + xfields_multibyte (line);
1590 + else
1591 +#endif
1592 xfields (line);
1593
1594 if (prevline[which - 1])
1595 @@ -518,11 +741,18 @@ prfield (size_t n, struct line const *li
1596
1597 /* Print the join of LINE1 and LINE2. */
1598
1599 +#define PUT_TAB_CHAR \
1600 + do \
1601 + { \
1602 + (tab != NULL) ? \
1603 + fwrite(tab, sizeof(char), tablen, stdout) : putchar (' '); \
1604 + } \
1605 + while (0)
1606 +
1607 static void
1608 prjoin (struct line const *line1, struct line const *line2)
1609 {
1610 const struct outlist *outlist;
1611 - char output_separator = tab < 0 ? ' ' : tab;
1612
1613 outlist = outlist_head.next;
1614 if (outlist)
1615 @@ -557,7 +787,7 @@ prjoin (struct line const *line1, struct
1616 o = o->next;
1617 if (o == NULL)
1618 break;
1619 - putchar (output_separator);
1620 + PUT_TAB_CHAR;
1621 }
1622 putchar ('\n');
1623 }
1624 @@ -575,23 +805,23 @@ prjoin (struct line const *line1, struct
1625 prfield (join_field_1, line1);
1626 for (i = 0; i < join_field_1 && i < line1->nfields; ++i)
1627 {
1628 - putchar (output_separator);
1629 + PUT_TAB_CHAR;
1630 prfield (i, line1);
1631 }
1632 for (i = join_field_1 + 1; i < line1->nfields; ++i)
1633 {
1634 - putchar (output_separator);
1635 + PUT_TAB_CHAR;
1636 prfield (i, line1);
1637 }
1638
1639 for (i = 0; i < join_field_2 && i < line2->nfields; ++i)
1640 {
1641 - putchar (output_separator);
1642 + PUT_TAB_CHAR;
1643 prfield (i, line2);
1644 }
1645 for (i = join_field_2 + 1; i < line2->nfields; ++i)
1646 {
1647 - putchar (output_separator);
1648 + PUT_TAB_CHAR;
1649 prfield (i, line2);
1650 }
1651 putchar ('\n');
1652 @@ -1022,20 +1252,41 @@ main (int argc, char **argv)
1653
1654 case 't':
1655 {
1656 - unsigned char newtab = optarg[0];
1657 - if (! newtab)
1658 + char *newtab;
1659 + size_t newtablen;
1660 + if (! optarg[0])
1661 error (EXIT_FAILURE, 0, _("empty tab"));
1662 - if (optarg[1])
1663 + newtab = xstrdup (optarg);
1664 +#if HAVE_MBRTOWC
1665 + if (MB_CUR_MAX > 1)
1666 + {
1667 + mbstate_t state;
1668 +
1669 + memset (&state, 0, sizeof (mbstate_t));
1670 + newtablen = mbrtowc (NULL, newtab,
1671 + strnlen (newtab, MB_LEN_MAX),
1672 + &state);
1673 + if (newtablen == (size_t) 0
1674 + || newtablen == (size_t) -1
1675 + || newtablen == (size_t) -2)
1676 + newtablen = 1;
1677 + }
1678 + else
1679 +#endif
1680 + newtablen = 1;
1681 +
1682 + if (newtablen == 1 && newtab[1])
1683 + {
1684 + if (STREQ (newtab, "\\0"))
1685 + newtab[0] = '\0';
1686 + }
1687 + if (tab != NULL && strcmp (tab, newtab))
1688 {
1689 - if (STREQ (optarg, "\\0"))
1690 - newtab = '\0';
1691 - else
1692 - error (EXIT_FAILURE, 0, _("multi-character tab %s"),
1693 - quote (optarg));
1694 + free (newtab);
1695 + error (EXIT_FAILURE, 0, _("incompatible tabs"));
1696 }
1697 - if (0 <= tab && tab != newtab)
1698 - error (EXIT_FAILURE, 0, _("incompatible tabs"));
1699 tab = newtab;
1700 + tablen = newtablen;
1701 }
1702 break;
1703
1704 diff -urNp coreutils-8.0-orig/src/pr.c coreutils-8.0/src/pr.c
1705 --- coreutils-8.0-orig/src/pr.c 2009-09-29 15:27:54.000000000 +0200
1706 +++ coreutils-8.0/src/pr.c 2009-10-07 10:07:16.000000000 +0200
1707 @@ -312,6 +312,32 @@
1708
1709 #include <getopt.h>
1710 #include <sys/types.h>
1711 +
1712 +/* Get MB_LEN_MAX. */
1713 +#include <limits.h>
1714 +/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
1715 + installation; work around this configuration error. */
1716 +#if !defined MB_LEN_MAX || MB_LEN_MAX == 1
1717 +# define MB_LEN_MAX 16
1718 +#endif
1719 +
1720 +/* Get MB_CUR_MAX. */
1721 +#include <stdlib.h>
1722 +
1723 +/* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>. */
1724 +/* Get mbstate_t, mbrtowc(), wcwidth(). */
1725 +#if HAVE_WCHAR_H
1726 +# include <wchar.h>
1727 +#endif
1728 +
1729 +/* Get iswprint(). -- for wcwidth(). */
1730 +#if HAVE_WCTYPE_H
1731 +# include <wctype.h>
1732 +#endif
1733 +#if !defined iswprint && !HAVE_ISWPRINT
1734 +# define iswprint(wc) 1
1735 +#endif
1736 +
1737 #include "system.h"
1738 #include "error.h"
1739 #include "hard-locale.h"
1740 @@ -322,6 +348,18 @@
1741 #include "strftime.h"
1742 #include "xstrtol.h"
1743
1744 +/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
1745 +#if HAVE_MBRTOWC && defined mbstate_t
1746 +# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
1747 +#endif
1748 +
1749 +#ifndef HAVE_DECL_WCWIDTH
1750 +"this configure-time declaration test was not run"
1751 +#endif
1752 +#if !HAVE_DECL_WCWIDTH
1753 +extern int wcwidth ();
1754 +#endif
1755 +
1756 /* The official name of this program (e.g., no `g' prefix). */
1757 #define PROGRAM_NAME "pr"
1758
1759 @@ -414,7 +452,20 @@ struct COLUMN
1760
1761 typedef struct COLUMN COLUMN;
1762
1763 -static int char_to_clump (char c);
1764 +/* Funtion pointers to switch functions for single byte locale or for
1765 + multibyte locale. If multibyte functions do not exist in your sysytem,
1766 + these pointers always point the function for single byte locale. */
1767 +static void (*print_char) (char c);
1768 +static int (*char_to_clump) (char c);
1769 +
1770 +/* Functions for single byte locale. */
1771 +static void print_char_single (char c);
1772 +static int char_to_clump_single (char c);
1773 +
1774 +/* Functions for multibyte locale. */
1775 +static void print_char_multi (char c);
1776 +static int char_to_clump_multi (char c);
1777 +
1778 static bool read_line (COLUMN *p);
1779 static bool print_page (void);
1780 static bool print_stored (COLUMN *p);
1781 @@ -424,6 +475,7 @@ static void print_header (void);
1782 static void pad_across_to (int position);
1783 static void add_line_number (COLUMN *p);
1784 static void getoptarg (char *arg, char switch_char, char *character,
1785 + int *character_length, int *character_width,
1786 int *number);
1787 void usage (int status);
1788 static void print_files (int number_of_files, char **av);
1789 @@ -438,7 +490,6 @@ static void store_char (char c);
1790 static void pad_down (int lines);
1791 static void read_rest_of_line (COLUMN *p);
1792 static void skip_read (COLUMN *p, int column_number);
1793 -static void print_char (char c);
1794 static void cleanup (void);
1795 static void print_sep_string (void);
1796 static void separator_string (const char *optarg_S);
1797 @@ -450,7 +501,7 @@ static COLUMN *column_vector;
1798 we store the leftmost columns contiguously in buff.
1799 To print a line from buff, get the index of the first character
1800 from line_vector[i], and print up to line_vector[i + 1]. */
1801 -static char *buff;
1802 +static unsigned char *buff;
1803
1804 /* Index of the position in buff where the next character
1805 will be stored. */
1806 @@ -554,7 +605,7 @@ static int chars_per_column;
1807 static bool untabify_input = false;
1808
1809 /* (-e) The input tab character. */
1810 -static char input_tab_char = '\t';
1811 +static char input_tab_char[MB_LEN_MAX] = "\t";
1812
1813 /* (-e) Tabstops are at chars_per_tab, 2*chars_per_tab, 3*chars_per_tab, ...
1814 where the leftmost column is 1. */
1815 @@ -564,7 +615,10 @@ static int chars_per_input_tab = 8;
1816 static bool tabify_output = false;
1817
1818 /* (-i) The output tab character. */
1819 -static char output_tab_char = '\t';
1820 +static char output_tab_char[MB_LEN_MAX] = "\t";
1821 +
1822 +/* (-i) The byte length of output tab character. */
1823 +static int output_tab_char_length = 1;
1824
1825 /* (-i) The width of the output tab. */
1826 static int chars_per_output_tab = 8;
1827 @@ -638,7 +692,13 @@ static int power_10;
1828 static bool numbered_lines = false;
1829
1830 /* (-n) Character which follows each line number. */
1831 -static char number_separator = '\t';
1832 +static char number_separator[MB_LEN_MAX] = "\t";
1833 +
1834 +/* (-n) The byte length of the character which follows each line number. */
1835 +static int number_separator_length = 1;
1836 +
1837 +/* (-n) The character width of the character which follows each line number. */
1838 +static int number_separator_width = 0;
1839
1840 /* (-n) line counting starts with 1st line of input file (not with 1st
1841 line of 1st page printed). */
1842 @@ -691,6 +751,7 @@ static bool use_col_separator = false;
1843 -a|COLUMN|-m is a `space' and with the -J option a `tab'. */
1844 static char *col_sep_string = (char *) "";
1845 static int col_sep_length = 0;
1846 +static int col_sep_width = 0;
1847 static char *column_separator = (char *) " ";
1848 static char *line_separator = (char *) "\t";
1849
1850 @@ -847,6 +908,13 @@ separator_string (const char *optarg_S)
1851 col_sep_length = (int) strlen (optarg_S);
1852 col_sep_string = xmalloc (col_sep_length + 1);
1853 strcpy (col_sep_string, optarg_S);
1854 +
1855 +#if HAVE_MBRTOWC
1856 + if (MB_CUR_MAX > 1)
1857 + col_sep_width = mbswidth (col_sep_string, 0);
1858 + else
1859 +#endif
1860 + col_sep_width = col_sep_length;
1861 }
1862
1863 int
1864 @@ -871,6 +939,21 @@ main (int argc, char **argv)
1865
1866 atexit (close_stdout);
1867
1868 +/* Define which functions are used, the ones for single byte locale or the ones
1869 + for multibyte locale. */
1870 +#if HAVE_MBRTOWC
1871 + if (MB_CUR_MAX > 1)
1872 + {
1873 + print_char = print_char_multi;
1874 + char_to_clump = char_to_clump_multi;
1875 + }
1876 + else
1877 +#endif
1878 + {
1879 + print_char = print_char_single;
1880 + char_to_clump = char_to_clump_single;
1881 + }
1882 +
1883 n_files = 0;
1884 file_names = (argc > 1
1885 ? xmalloc ((argc - 1) * sizeof (char *))
1886 @@ -947,8 +1030,12 @@ main (int argc, char **argv)
1887 break;
1888 case 'e':
1889 if (optarg)
1890 - getoptarg (optarg, 'e', &input_tab_char,
1891 - &chars_per_input_tab);
1892 + {
1893 + int dummy_length, dummy_width;
1894 +
1895 + getoptarg (optarg, 'e', input_tab_char, &dummy_length,
1896 + &dummy_width, &chars_per_input_tab);
1897 + }
1898 /* Could check tab width > 0. */
1899 untabify_input = true;
1900 break;
1901 @@ -961,8 +1048,12 @@ main (int argc, char **argv)
1902 break;
1903 case 'i':
1904 if (optarg)
1905 - getoptarg (optarg, 'i', &output_tab_char,
1906 - &chars_per_output_tab);
1907 + {
1908 + int dummy_width;
1909 +
1910 + getoptarg (optarg, 'i', output_tab_char, &output_tab_char_length,
1911 + &dummy_width, &chars_per_output_tab);
1912 + }
1913 /* Could check tab width > 0. */
1914 tabify_output = true;
1915 break;
1916 @@ -989,8 +1080,8 @@ main (int argc, char **argv)
1917 case 'n':
1918 numbered_lines = true;
1919 if (optarg)
1920 - getoptarg (optarg, 'n', &number_separator,
1921 - &chars_per_number);
1922 + getoptarg (optarg, 'n', number_separator, &number_separator_length,
1923 + &number_separator_width, &chars_per_number);
1924 break;
1925 case 'N':
1926 skip_count = false;
1927 @@ -1029,7 +1120,7 @@ main (int argc, char **argv)
1928 old_s = false;
1929 /* Reset an additional input of -s, -S dominates -s */
1930 col_sep_string = bad_cast ("");
1931 - col_sep_length = 0;
1932 + col_sep_length = col_sep_width = 0;
1933 use_col_separator = true;
1934 if (optarg)
1935 separator_string (optarg);
1936 @@ -1186,10 +1277,45 @@ main (int argc, char **argv)
1937 a number. */
1938
1939 static void
1940 -getoptarg (char *arg, char switch_char, char *character, int *number)
1941 +getoptarg (char *arg, char switch_char, char *character, int *character_length,
1942 + int *character_width, int *number)
1943 {
1944 if (!ISDIGIT (*arg))
1945 - *character = *arg++;
1946 + {
1947 +#ifdef HAVE_MBRTOWC
1948 + if (MB_CUR_MAX > 1) /* for multibyte locale. */
1949 + {
1950 + wchar_t wc;
1951 + size_t mblength;
1952 + int width;
1953 + mbstate_t state = {'\0'};
1954 +
1955 + mblength = mbrtowc (&wc, arg, strnlen(arg, MB_LEN_MAX), &state);
1956 +
1957 + if (mblength == (size_t)-1 || mblength == (size_t)-2)
1958 + {
1959 + *character_length = 1;
1960 + *character_width = 1;
1961 + }
1962 + else
1963 + {
1964 + *character_length = (mblength < 1) ? 1 : mblength;
1965 + width = wcwidth (wc);
1966 + *character_width = (width < 0) ? 0 : width;
1967 + }
1968 +
1969 + strncpy (character, arg, *character_length);
1970 + arg += *character_length;
1971 + }
1972 + else /* for single byte locale. */
1973 +#endif
1974 + {
1975 + *character = *arg++;
1976 + *character_length = 1;
1977 + *character_width = 1;
1978 + }
1979 + }
1980 +
1981 if (*arg)
1982 {
1983 long int tmp_long;
1984 @@ -1248,7 +1374,7 @@ init_parameters (int number_of_files)
1985 else
1986 col_sep_string = column_separator;
1987
1988 - col_sep_length = 1;
1989 + col_sep_length = col_sep_width = 1;
1990 use_col_separator = true;
1991 }
1992 /* It's rather pointless to define a TAB separator with column
1993 @@ -1279,11 +1405,11 @@ init_parameters (int number_of_files)
1994 TAB_WIDTH (chars_per_input_tab, chars_per_number); */
1995
1996 /* Estimate chars_per_text without any margin and keep it constant. */
1997 - if (number_separator == '\t')
1998 + if (number_separator[0] == '\t')
1999 number_width = chars_per_number +
2000 TAB_WIDTH (chars_per_default_tab, chars_per_number);
2001 else
2002 - number_width = chars_per_number + 1;
2003 + number_width = chars_per_number + number_separator_width;
2004
2005 /* The number is part of the column width unless we are
2006 printing files in parallel. */
2007 @@ -1298,7 +1424,7 @@ init_parameters (int number_of_files)
2008 }
2009
2010 chars_per_column = (chars_per_line - chars_used_by_number -
2011 - (columns - 1) * col_sep_length) / columns;
2012 + (columns - 1) * col_sep_width) / columns;
2013
2014 if (chars_per_column < 1)
2015 error (EXIT_FAILURE, 0, _("page width too narrow"));
2016 @@ -1423,7 +1549,7 @@ init_funcs (void)
2017
2018 /* Enlarge p->start_position of first column to use the same form of
2019 padding_not_printed with all columns. */
2020 - h = h + col_sep_length;
2021 + h = h + col_sep_width;
2022
2023 /* This loop takes care of all but the rightmost column. */
2024
2025 @@ -1457,7 +1583,7 @@ init_funcs (void)
2026 }
2027 else
2028 {
2029 - h = h_next + col_sep_length;
2030 + h = h_next + col_sep_width;
2031 h_next = h + chars_per_column;
2032 }
2033 }
2034 @@ -1747,9 +1873,9 @@ static void
2035 align_column (COLUMN *p)
2036 {
2037 padding_not_printed = p->start_position;
2038 - if (padding_not_printed - col_sep_length > 0)
2039 + if (padding_not_printed - col_sep_width > 0)
2040 {
2041 - pad_across_to (padding_not_printed - col_sep_length);
2042 + pad_across_to (padding_not_printed - col_sep_width);
2043 padding_not_printed = ANYWHERE;
2044 }
2045
2046 @@ -2020,13 +2146,13 @@ store_char (char c)
2047 /* May be too generous. */
2048 buff = X2REALLOC (buff, &buff_allocated);
2049 }
2050 - buff[buff_current++] = c;
2051 + buff[buff_current++] = (unsigned char) c;
2052 }
2053
2054 static void
2055 add_line_number (COLUMN *p)
2056 {
2057 - int i;
2058 + int i, j;
2059 char *s;
2060 int left_cut;
2061
2062 @@ -2049,22 +2175,24 @@ add_line_number (COLUMN *p)
2063 /* Tabification is assumed for multiple columns, also for n-separators,
2064 but `default n-separator = TAB' hasn't been given priority over
2065 equal column_width also specified by POSIX. */
2066 - if (number_separator == '\t')
2067 + if (number_separator[0] == '\t')
2068 {
2069 i = number_width - chars_per_number;
2070 while (i-- > 0)
2071 (p->char_func) (' ');
2072 }
2073 else
2074 - (p->char_func) (number_separator);
2075 + for (j = 0; j < number_separator_length; j++)
2076 + (p->char_func) (number_separator[j]);
2077 }
2078 else
2079 /* To comply with POSIX, we avoid any expansion of default TAB
2080 separator with a single column output. No column_width requirement
2081 has to be considered. */
2082 {
2083 - (p->char_func) (number_separator);
2084 - if (number_separator == '\t')
2085 + for (j = 0; j < number_separator_length; j++)
2086 + (p->char_func) (number_separator[j]);
2087 + if (number_separator[0] == '\t')
2088 output_position = POS_AFTER_TAB (chars_per_output_tab,
2089 output_position);
2090 }
2091 @@ -2225,7 +2353,7 @@ print_white_space (void)
2092 while (goal - h_old > 1
2093 && (h_new = POS_AFTER_TAB (chars_per_output_tab, h_old)) <= goal)
2094 {
2095 - putchar (output_tab_char);
2096 + fwrite (output_tab_char, sizeof(char), output_tab_char_length, stdout);
2097 h_old = h_new;
2098 }
2099 while (++h_old <= goal)
2100 @@ -2245,6 +2373,7 @@ print_sep_string (void)
2101 {
2102 char *s;
2103 int l = col_sep_length;
2104 + int not_space_flag;
2105
2106 s = col_sep_string;
2107
2108 @@ -2258,6 +2387,7 @@ print_sep_string (void)
2109 {
2110 for (; separators_not_printed > 0; --separators_not_printed)
2111 {
2112 + not_space_flag = 0;
2113 while (l-- > 0)
2114 {
2115 /* 3 types of sep_strings: spaces only, spaces and chars,
2116 @@ -2271,12 +2401,15 @@ print_sep_string (void)
2117 }
2118 else
2119 {
2120 + not_space_flag = 1;
2121 if (spaces_not_printed > 0)
2122 print_white_space ();
2123 putchar (*s++);
2124 - ++output_position;
2125 }
2126 }
2127 + if (not_space_flag)
2128 + output_position += col_sep_width;
2129 +
2130 /* sep_string ends with some spaces */
2131 if (spaces_not_printed > 0)
2132 print_white_space ();
2133 @@ -2304,7 +2437,7 @@ print_clump (COLUMN *p, int n, char *clu
2134 required number of tabs and spaces. */
2135
2136 static void
2137 -print_char (char c)
2138 +print_char_single (char c)
2139 {
2140 if (tabify_output)
2141 {
2142 @@ -2328,6 +2461,74 @@ print_char (char c)
2143 putchar (c);
2144 }
2145
2146 +#ifdef HAVE_MBRTOWC
2147 +static void
2148 +print_char_multi (char c)
2149 +{
2150 + static size_t mbc_pos = 0;
2151 + static char mbc[MB_LEN_MAX] = {'\0'};
2152 + static mbstate_t state = {'\0'};
2153 + mbstate_t state_bak;
2154 + wchar_t wc;
2155 + size_t mblength;
2156 + int width;
2157 +
2158 + if (tabify_output)
2159 + {
2160 + state_bak = state;
2161 + mbc[mbc_pos++] = c;
2162 + mblength = mbrtowc (&wc, mbc, mbc_pos, &state);
2163 +
2164 + while (mbc_pos > 0)
2165 + {
2166 + switch (mblength)
2167 + {
2168 + case (size_t)-2:
2169 + state = state_bak;
2170 + return;
2171 +
2172 + case (size_t)-1:
2173 + state = state_bak;
2174 + ++output_position;
2175 + putchar (mbc[0]);
2176 + memmove (mbc, mbc + 1, MB_CUR_MAX - 1);
2177 + --mbc_pos;
2178 + break;
2179 +
2180 + case 0:
2181 + mblength = 1;
2182 +
2183 + default:
2184 + if (wc == L' ')
2185 + {
2186 + memmove (mbc, mbc + mblength, MB_CUR_MAX - mblength);
2187 + --mbc_pos;
2188 + ++spaces_not_printed;
2189 + return;
2190 + }
2191 + else if (spaces_not_printed > 0)
2192 + print_white_space ();
2193 +
2194 + /* Nonprintables are assumed to have width 0, except L'\b'. */
2195 + if ((width = wcwidth (wc)) < 1)
2196 + {
2197 + if (wc == L'\b')
2198 + --output_position;
2199 + }
2200 + else
2201 + output_position += width;
2202 +
2203 + fwrite (mbc, sizeof(char), mblength, stdout);
2204 + memmove (mbc, mbc + mblength, MB_CUR_MAX - mblength);
2205 + mbc_pos -= mblength;
2206 + }
2207 + }
2208 + return;
2209 + }
2210 + putchar (c);
2211 +}
2212 +#endif
2213 +
2214 /* Skip to page PAGE before printing.
2215 PAGE may be larger than total number of pages. */
2216
2217 @@ -2507,9 +2708,9 @@ read_line (COLUMN *p)
2218 align_empty_cols = false;
2219 }
2220
2221 - if (padding_not_printed - col_sep_length > 0)
2222 + if (padding_not_printed - col_sep_width > 0)
2223 {
2224 - pad_across_to (padding_not_printed - col_sep_length);
2225 + pad_across_to (padding_not_printed - col_sep_width);
2226 padding_not_printed = ANYWHERE;
2227 }
2228
2229 @@ -2610,9 +2811,9 @@ print_stored (COLUMN *p)
2230 }
2231 }
2232
2233 - if (padding_not_printed - col_sep_length > 0)
2234 + if (padding_not_printed - col_sep_width > 0)
2235 {
2236 - pad_across_to (padding_not_printed - col_sep_length);
2237 + pad_across_to (padding_not_printed - col_sep_width);
2238 padding_not_printed = ANYWHERE;
2239 }
2240
2241 @@ -2625,8 +2826,8 @@ print_stored (COLUMN *p)
2242 if (spaces_not_printed == 0)
2243 {
2244 output_position = p->start_position + end_vector[line];
2245 - if (p->start_position - col_sep_length == chars_per_margin)
2246 - output_position -= col_sep_length;
2247 + if (p->start_position - col_sep_width == chars_per_margin)
2248 + output_position -= col_sep_width;
2249 }
2250
2251 return true;
2252 @@ -2645,7 +2846,7 @@ print_stored (COLUMN *p)
2253 number of characters is 1.) */
2254
2255 static int
2256 -char_to_clump (char c)
2257 +char_to_clump_single (char c)
2258 {
2259 unsigned char uc = c;
2260 char *s = clump_buff;
2261 @@ -2655,10 +2856,10 @@ char_to_clump (char c)
2262 int chars;
2263 int chars_per_c = 8;
2264
2265 - if (c == input_tab_char)
2266 + if (c == input_tab_char[0])
2267 chars_per_c = chars_per_input_tab;
2268
2269 - if (c == input_tab_char || c == '\t')
2270 + if (c == input_tab_char[0] || c == '\t')
2271 {
2272 width = TAB_WIDTH (chars_per_c, input_position);
2273
2274 @@ -2739,6 +2940,154 @@ char_to_clump (char c)
2275 return chars;
2276 }
2277
2278 +#ifdef HAVE_MBRTOWC
2279 +static int
2280 +char_to_clump_multi (char c)
2281 +{
2282 + static size_t mbc_pos = 0;
2283 + static char mbc[MB_LEN_MAX] = {'\0'};
2284 + static mbstate_t state = {'\0'};
2285 + mbstate_t state_bak;
2286 + wchar_t wc;
2287 + size_t mblength;
2288 + int wc_width;
2289 + register char *s = clump_buff;
2290 + register int i, j;
2291 + char esc_buff[4];
2292 + int width;
2293 + int chars;
2294 + int chars_per_c = 8;
2295 +
2296 + state_bak = state;
2297 + mbc[mbc_pos++] = c;
2298 + mblength = mbrtowc (&wc, mbc, mbc_pos, &state);
2299 +
2300 + width = 0;
2301 + chars = 0;
2302 + while (mbc_pos > 0)
2303 + {
2304 + switch (mblength)
2305 + {
2306 + case (size_t)-2:
2307 + state = state_bak;
2308 + return 0;
2309 +
2310 + case (size_t)-1:
2311 + state = state_bak;
2312 + mblength = 1;
2313 +
2314 + if (use_esc_sequence || use_cntrl_prefix)
2315 + {
2316 + width = +4;
2317 + chars = +4;
2318 + *s++ = '\\';
2319 + sprintf (esc_buff, "%03o", mbc[0]);
2320 + for (i = 0; i <= 2; ++i)
2321 + *s++ = (int) esc_buff[i];
2322 + }
2323 + else
2324 + {
2325 + width += 1;
2326 + chars += 1;
2327 + *s++ = mbc[0];
2328 + }
2329 + break;
2330 +
2331 + case 0:
2332 + mblength = 1;
2333 + /* Fall through */
2334 +
2335 + default:
2336 + if (memcmp (mbc, input_tab_char, mblength) == 0)
2337 + chars_per_c = chars_per_input_tab;
2338 +
2339 + if (memcmp (mbc, input_tab_char, mblength) == 0 || c == '\t')
2340 + {
2341 + int width_inc;
2342 +
2343 + width_inc = TAB_WIDTH (chars_per_c, input_position);
2344 + width += width_inc;
2345 +
2346 + if (untabify_input)
2347 + {
2348 + for (i = width_inc; i; --i)
2349 + *s++ = ' ';
2350 + chars += width_inc;
2351 + }
2352 + else
2353 + {
2354 + for (i = 0; i < mblength; i++)
2355 + *s++ = mbc[i];
2356 + chars += mblength;
2357 + }
2358 + }
2359 + else if ((wc_width = wcwidth (wc)) < 1)
2360 + {
2361 + if (use_esc_sequence)
2362 + {
2363 + for (i = 0; i < mblength; i++)
2364 + {
2365 + width += 4;
2366 + chars += 4;
2367 + *s++ = '\\';
2368 + sprintf (esc_buff, "%03o", c);
2369 + for (j = 0; j <= 2; ++j)
2370 + *s++ = (int) esc_buff[j];
2371 + }
2372 + }
2373 + else if (use_cntrl_prefix)
2374 + {
2375 + if (wc < 0200)
2376 + {
2377 + width += 2;
2378 + chars += 2;
2379 + *s++ = '^';
2380 + *s++ = wc ^ 0100;
2381 + }
2382 + else
2383 + {
2384 + for (i = 0; i < mblength; i++)
2385 + {
2386 + width += 4;
2387 + chars += 4;
2388 + *s++ = '\\';
2389 + sprintf (esc_buff, "%03o", c);
2390 + for (j = 0; j <= 2; ++j)
2391 + *s++ = (int) esc_buff[j];
2392 + }
2393 + }
2394 + }
2395 + else if (wc == L'\b')
2396 + {
2397 + width += -1;
2398 + chars += 1;
2399 + *s++ = c;
2400 + }
2401 + else
2402 + {
2403 + width += 0;
2404 + chars += mblength;
2405 + for (i = 0; i < mblength; i++)
2406 + *s++ = mbc[i];
2407 + }
2408 + }
2409 + else
2410 + {
2411 + width += wc_width;
2412 + chars += mblength;
2413 + for (i = 0; i < mblength; i++)
2414 + *s++ = mbc[i];
2415 + }
2416 + }
2417 + memmove (mbc, mbc + mblength, MB_CUR_MAX - mblength);
2418 + mbc_pos -= mblength;
2419 + }
2420 +
2421 + input_position += width;
2422 + return chars;
2423 +}
2424 +#endif
2425 +
2426 /* We've just printed some files and need to clean up things before
2427 looking for more options and printing the next batch of files.
2428
2429 diff -urNp coreutils-8.0-orig/src/sort.c coreutils-8.0/src/sort.c
2430 --- coreutils-8.0-orig/src/sort.c 2009-09-29 15:27:54.000000000 +0200
2431 +++ coreutils-8.0/src/sort.c 2009-10-07 10:07:16.000000000 +0200
2432 @@ -22,10 +22,19 @@
2433
2434 #include <config.h>
2435
2436 +#include <assert.h>
2437 #include <getopt.h>
2438 #include <sys/types.h>
2439 #include <sys/wait.h>
2440 #include <signal.h>
2441 +#if HAVE_WCHAR_H
2442 +# include <wchar.h>
2443 +#endif
2444 +/* Get isw* functions. */
2445 +#if HAVE_WCTYPE_H
2446 +# include <wctype.h>
2447 +#endif
2448 +
2449 #include "system.h"
2450 #include "argmatch.h"
2451 #include "error.h"
2452 @@ -122,14 +131,38 @@ static int decimal_point;
2453 /* Thousands separator; if -1, then there isn't one. */
2454 static int thousands_sep;
2455
2456 +static int force_general_numcompare = 0;
2457 +
2458 /* Nonzero if the corresponding locales are hard. */
2459 static bool hard_LC_COLLATE;
2460 -#if HAVE_NL_LANGINFO
2461 +#if HAVE_LANGINFO_CODESET
2462 static bool hard_LC_TIME;
2463 #endif
2464
2465 #define NONZERO(x) ((x) != 0)
2466
2467 +/* get a multibyte character's byte length. */
2468 +#define GET_BYTELEN_OF_CHAR(LIM, PTR, MBLENGTH, STATE) \
2469 + do \
2470 + { \
2471 + wchar_t wc; \
2472 + mbstate_t state_bak; \
2473 + \
2474 + state_bak = STATE; \
2475 + mblength = mbrtowc (&wc, PTR, LIM - PTR, &STATE); \
2476 + \
2477 + switch (MBLENGTH) \
2478 + { \
2479 + case (size_t)-1: \
2480 + case (size_t)-2: \
2481 + STATE = state_bak; \
2482 + /* Fall through. */ \
2483 + case 0: \
2484 + MBLENGTH = 1; \
2485 + } \
2486 + } \
2487 + while (0)
2488 +
2489 /* The kind of blanks for '-b' to skip in various options. */
2490 enum blanktype { bl_start, bl_end, bl_both };
2491
2492 @@ -268,13 +301,11 @@ static bool reverse;
2493 they were read if all keys compare equal. */
2494 static bool stable;
2495
2496 -/* If TAB has this value, blanks separate fields. */
2497 -enum { TAB_DEFAULT = CHAR_MAX + 1 };
2498 -
2499 -/* Tab character separating fields. If TAB_DEFAULT, then fields are
2500 +/* Tab character separating fields. If tab_length is 0, then fields are
2501 separated by the empty string between a non-blank character and a blank
2502 character. */
2503 -static int tab = TAB_DEFAULT;
2504 +static char tab[MB_LEN_MAX + 1];
2505 +static size_t tab_length = 0;
2506
2507 /* Flag to remove consecutive duplicate lines from the output.
2508 Only the last of a sequence of equal lines will be output. */
2509 @@ -712,6 +743,44 @@ reap_some (void)
2510 update_proc (pid);
2511 }
2512
2513 +/* Function pointers. */
2514 +static void
2515 +(*inittables) (void);
2516 +static char *
2517 +(*begfield) (const struct line*, const struct keyfield *);
2518 +static char *
2519 +(*limfield) (const struct line*, const struct keyfield *);
2520 +static int
2521 +(*getmonth) (char const *, size_t);
2522 +static int
2523 +(*keycompare) (const struct line *, const struct line *);
2524 +static int
2525 +(*numcompare) (const char *, const char *);
2526 +
2527 +/* Test for white space multibyte character.
2528 + Set LENGTH the byte length of investigated multibyte character. */
2529 +#if HAVE_MBRTOWC
2530 +static int
2531 +ismbblank (const char *str, size_t len, size_t *length)
2532 +{
2533 + size_t mblength;
2534 + wchar_t wc;
2535 + mbstate_t state;
2536 +
2537 + memset (&state, '\0', sizeof(mbstate_t));
2538 + mblength = mbrtowc (&wc, str, len, &state);
2539 +
2540 + if (mblength == (size_t)-1 || mblength == (size_t)-2)
2541 + {
2542 + *length = 1;
2543 + return 0;
2544 + }
2545 +
2546 + *length = (mblength < 1) ? 1 : mblength;
2547 + return iswblank (wc);
2548 +}
2549 +#endif
2550 +
2551 /* Clean up any remaining temporary files. */
2552
2553 static void
2554 @@ -1093,7 +1162,7 @@ zaptemp (const char *name)
2555 free (node);
2556 }
2557
2558 -#if HAVE_NL_LANGINFO
2559 +#if HAVE_LANGINFO_CODESET
2560
2561 static int
2562 struct_month_cmp (const void *m1, const void *m2)
2563 @@ -1108,7 +1177,7 @@ struct_month_cmp (const void *m1, const
2564 /* Initialize the character class tables. */
2565
2566 static void
2567 -inittables (void)
2568 +inittables_uni (void)
2569 {
2570 size_t i;
2571
2572 @@ -1120,7 +1189,7 @@ inittables (void)
2573 fold_toupper[i] = toupper (i);
2574 }
2575
2576 -#if HAVE_NL_LANGINFO
2577 +#if HAVE_LANGINFO_CODESET
2578 /* If we're not in the "C" locale, read different names for months. */
2579 if (hard_LC_TIME)
2580 {
2581 @@ -1202,6 +1271,64 @@ specify_nmerge (int oi, char c, char con
2582 xstrtol_fatal (e, oi, c, long_options, s);
2583 }
2584
2585 +#if HAVE_MBRTOWC
2586 +static void
2587 +inittables_mb (void)
2588 +{
2589 + int i, j, k, l;
2590 + char *name, *s;
2591 + size_t s_len, mblength;
2592 + char mbc[MB_LEN_MAX];
2593 + wchar_t wc, pwc;
2594 + mbstate_t state_mb, state_wc;
2595 +
2596 + for (i = 0; i < MONTHS_PER_YEAR; i++)
2597 + {
2598 + s = (char *) nl_langinfo (ABMON_1 + i);
2599 + s_len = strlen (s);
2600 + monthtab[i].name = name = (char *) xmalloc (s_len + 1);
2601 + monthtab[i].val = i + 1;
2602 +
2603 + memset (&state_mb, '\0', sizeof (mbstate_t));
2604 + memset (&state_wc, '\0', sizeof (mbstate_t));
2605 +
2606 + for (j = 0; j < s_len;)
2607 + {
2608 + if (!ismbblank (s + j, s_len - j, &mblength))
2609 + break;
2610 + j += mblength;
2611 + }
2612 +
2613 + for (k = 0; j < s_len;)
2614 + {
2615 + mblength = mbrtowc (&wc, (s + j), (s_len - j), &state_mb);
2616 + assert (mblength != (size_t)-1 && mblength != (size_t)-2);
2617 + if (mblength == 0)
2618 + break;
2619 +
2620 + pwc = towupper (wc);
2621 + if (pwc == wc)
2622 + {
2623 + memcpy (mbc, s + j, mblength);
2624 + j += mblength;
2625 + }
2626 + else
2627 + {
2628 + j += mblength;
2629 + mblength = wcrtomb (mbc, pwc, &state_wc);
2630 + assert (mblength != (size_t)0 && mblength != (size_t)-1);
2631 + }
2632 +
2633 + for (l = 0; l < mblength; l++)
2634 + name[k++] = mbc[l];
2635 + }
2636 + name[k] = '\0';
2637 + }
2638 + qsort ((void *) monthtab, MONTHS_PER_YEAR,
2639 + sizeof (struct month), struct_month_cmp);
2640 +}
2641 +#endif
2642 +
2643 /* Specify the amount of main memory to use when sorting. */
2644 static void
2645 specify_sort_size (int oi, char c, char const *s)
2646 @@ -1412,7 +1539,7 @@ buffer_linelim (struct buffer const *buf
2647 by KEY in LINE. */
2648
2649 static char *
2650 -begfield (const struct line *line, const struct keyfield *key)
2651 +begfield_uni (const struct line *line, const struct keyfield *key)
2652 {
2653 char *ptr = line->text, *lim = ptr + line->length - 1;
2654 size_t sword = key->sword;
2655 @@ -1421,10 +1548,10 @@ begfield (const struct line *line, const
2656 /* The leading field separator itself is included in a field when -t
2657 is absent. */
2658
2659 - if (tab != TAB_DEFAULT)
2660 + if (tab_length)
2661 while (ptr < lim && sword--)
2662 {
2663 - while (ptr < lim && *ptr != tab)
2664 + while (ptr < lim && *ptr != tab[0])
2665 ++ptr;
2666 if (ptr < lim)
2667 ++ptr;
2668 @@ -1450,11 +1577,70 @@ begfield (const struct line *line, const
2669 return ptr;
2670 }
2671
2672 +#if HAVE_MBRTOWC
2673 +static char *
2674 +begfield_mb (const struct line *line, const struct keyfield *key)
2675 +{
2676 + int i;
2677 + char *ptr = line->text, *lim = ptr + line->length - 1;
2678 + size_t sword = key->sword;
2679 + size_t schar = key->schar;
2680 + size_t mblength;
2681 + mbstate_t state;
2682 +
2683 + memset (&state, '\0', sizeof(mbstate_t));
2684 +
2685 + if (tab_length)
2686 + while (ptr < lim && sword--)
2687 + {
2688 + while (ptr < lim && memcmp (ptr, tab, tab_length) != 0)
2689 + {
2690 + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
2691 + ptr += mblength;
2692 + }
2693 + if (ptr < lim)
2694 + {
2695 + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
2696 + ptr += mblength;
2697 + }
2698 + }
2699 + else
2700 + while (ptr < lim && sword--)
2701 + {
2702 + while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength))
2703 + ptr += mblength;
2704 + if (ptr < lim)
2705 + {
2706 + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
2707 + ptr += mblength;
2708 + }
2709 + while (ptr < lim && !ismbblank (ptr, lim - ptr, &mblength))
2710 + ptr += mblength;
2711 + }
2712 +
2713 + if (key->skipsblanks)
2714 + while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength))
2715 + ptr += mblength;
2716 +
2717 + for (i = 0; i < schar; i++)
2718 + {
2719 + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
2720 +
2721 + if (ptr + mblength > lim)
2722 + break;
2723 + else
2724 + ptr += mblength;
2725 + }
2726 +
2727 + return ptr;
2728 +}
2729 +#endif
2730 +
2731 /* Return the limit of (a pointer to the first character after) the field
2732 in LINE specified by KEY. */
2733
2734 static char *
2735 -limfield (const struct line *line, const struct keyfield *key)
2736 +limfield_uni (const struct line *line, const struct keyfield *key)
2737 {
2738 char *ptr = line->text, *lim = ptr + line->length - 1;
2739 size_t eword = key->eword, echar = key->echar;
2740 @@ -1469,10 +1655,10 @@ limfield (const struct line *line, const
2741 `beginning' is the first character following the delimiting TAB.
2742 Otherwise, leave PTR pointing at the first `blank' character after
2743 the preceding field. */
2744 - if (tab != TAB_DEFAULT)
2745 + if (tab_length)
2746 while (ptr < lim && eword--)
2747 {
2748 - while (ptr < lim && *ptr != tab)
2749 + while (ptr < lim && *ptr != tab[0])
2750 ++ptr;
2751 if (ptr < lim && (eword || echar))
2752 ++ptr;
2753 @@ -1518,10 +1704,10 @@ limfield (const struct line *line, const
2754 */
2755
2756 /* Make LIM point to the end of (one byte past) the current field. */
2757 - if (tab != TAB_DEFAULT)
2758 + if (tab_length)
2759 {
2760 char *newlim;
2761 - newlim = memchr (ptr, tab, lim - ptr);
2762 + newlim = memchr (ptr, tab[0], lim - ptr);
2763 if (newlim)
2764 lim = newlim;
2765 }
2766 @@ -1552,6 +1738,113 @@ limfield (const struct line *line, const
2767 return ptr;
2768 }
2769
2770 +#if HAVE_MBRTOWC
2771 +static char *
2772 +limfield_mb (const struct line *line, const struct keyfield *key)
2773 +{
2774 + char *ptr = line->text, *lim = ptr + line->length - 1;
2775 + size_t eword = key->eword, echar = key->echar;
2776 + int i;
2777 + size_t mblength;
2778 + mbstate_t state;
2779 +
2780 + if (echar == 0)
2781 + eword++; /* skip all of end field. */
2782 +
2783 + memset (&state, '\0', sizeof(mbstate_t));
2784 +
2785 + if (tab_length)
2786 + while (ptr < lim && eword--)
2787 + {
2788 + while (ptr < lim && memcmp (ptr, tab, tab_length) != 0)
2789 + {
2790 + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
2791 + ptr += mblength;
2792 + }
2793 + if (ptr < lim && (eword | echar))
2794 + {
2795 + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
2796 + ptr += mblength;
2797 + }
2798 + }
2799 + else
2800 + while (ptr < lim && eword--)
2801 + {
2802 + while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength))
2803 + ptr += mblength;
2804 + if (ptr < lim)
2805 + {
2806 + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
2807 + ptr += mblength;
2808 + }
2809 + while (ptr < lim && !ismbblank (ptr, lim - ptr, &mblength))
2810 + ptr += mblength;
2811 + }
2812 +
2813 +
2814 +# ifdef POSIX_UNSPECIFIED
2815 + /* Make LIM point to the end of (one byte past) the current field. */
2816 + if (tab_length)
2817 + {
2818 + char *newlim, *p;
2819 +
2820 + newlim = NULL;
2821 + for (p = ptr; p < lim;)
2822 + {
2823 + if (memcmp (p, tab, tab_length) == 0)
2824 + {
2825 + newlim = p;
2826 + break;
2827 + }
2828 +
2829 + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
2830 + p += mblength;
2831 + }
2832 + }
2833 + else
2834 + {
2835 + char *newlim;
2836 + newlim = ptr;
2837 +
2838 + while (newlim < lim && ismbblank (newlim, lim - newlim, &mblength))
2839 + newlim += mblength;
2840 + if (ptr < lim)
2841 + {
2842 + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
2843 + ptr += mblength;
2844 + }
2845 + while (newlim < lim && !ismbblank (newlim, lim - newlim, &mblength))
2846 + newlim += mblength;
2847 + lim = newlim;
2848 + }
2849 +# endif
2850 +
2851 + if (echar != 0)
2852 + {
2853 + /* If we're skipping leading blanks, don't start counting characters
2854 + * until after skipping past any leading blanks. */
2855 + if (key->skipsblanks)
2856 + while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength))
2857 + ptr += mblength;
2858 +
2859 + memset (&state, '\0', sizeof(mbstate_t));
2860 +
2861 + /* Advance PTR by ECHAR (if possible), but no further than LIM. */
2862 + for (i = 0; i < echar; i++)
2863 + {
2864 + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
2865 +
2866 + if (ptr + mblength > lim)
2867 + break;
2868 + else
2869 + ptr += mblength;
2870 + }
2871 + }
2872 +
2873 + return ptr;
2874 +}
2875 +#endif
2876 +
2877 /* Fill BUF reading from FP, moving buf->left bytes from the end
2878 of buf->buf to the beginning first. If EOF is reached and the
2879 file wasn't terminated by a newline, supply one. Set up BUF's line
2880 @@ -1634,8 +1927,24 @@ fillbuf (struct buffer *buf, FILE *fp, c
2881 else
2882 {
2883 if (key->skipsblanks)
2884 - while (blanks[to_uchar (*line_start)])
2885 - line_start++;
2886 + {
2887 +#if HAVE_MBRTOWC
2888 + if (MB_CUR_MAX > 1)
2889 + {
2890 + size_t mblength;
2891 + mbstate_t state;
2892 + memset (&state, '\0', sizeof(mbstate_t));
2893 + while (line_start < line->keylim &&
2894 + ismbblank (line_start,
2895 + line->keylim - line_start,
2896 + &mblength))
2897 + line_start += mblength;
2898 + }
2899 + else
2900 +#endif
2901 + while (blanks[to_uchar (*line_start)])
2902 + line_start++;
2903 + }
2904 line->keybeg = line_start;
2905 }
2906 }
2907 @@ -1673,7 +1982,7 @@ fillbuf (struct buffer *buf, FILE *fp, c
2908 hideously fast. */
2909
2910 static int
2911 -numcompare (const char *a, const char *b)
2912 +numcompare_uni (const char *a, const char *b)
2913 {
2914 while (blanks[to_uchar (*a)])
2915 a++;
2916 @@ -1782,6 +2091,25 @@ human_numcompare (const char *a, const c
2917 : strnumcmp (a, b, decimal_point, thousands_sep));
2918 }
2919
2920 +#if HAVE_MBRTOWC
2921 +static int
2922 +numcompare_mb (const char *a, const char *b)
2923 +{
2924 + size_t mblength, len;
2925 + len = strlen (a); /* okay for UTF-8 */
2926 + while (*a && ismbblank (a, len > MB_CUR_MAX ? MB_CUR_MAX : len, &mblength))
2927 + {
2928 + a += mblength;
2929 + len -= mblength;
2930 + }
2931 + len = strlen (b); /* okay for UTF-8 */
2932 + while (*b && ismbblank (b, len > MB_CUR_MAX ? MB_CUR_MAX : len, &mblength))
2933 + b += mblength;
2934 +
2935 + return strnumcmp (a, b, decimal_point, thousands_sep);
2936 +}
2937 +#endif /* HAV_EMBRTOWC */
2938 +
2939 static int
2940 general_numcompare (const char *sa, const char *sb)
2941 {
2942 @@ -1815,7 +2143,7 @@ general_numcompare (const char *sa, cons
2943 Return 0 if the name in S is not recognized. */
2944
2945 static int
2946 -getmonth (char const *month, size_t len)
2947 +getmonth_uni (char const *month, size_t len)
2948 {
2949 size_t lo = 0;
2950 size_t hi = MONTHS_PER_YEAR;
2951 @@ -1996,11 +2324,79 @@ compare_version (char *restrict texta, s
2952 return diff;
2953 }
2954
2955 +#if HAVE_MBRTOWC
2956 +static int
2957 +getmonth_mb (const char *s, size_t len)
2958 +{
2959 + char *month;
2960 + register size_t i;
2961 + register int lo = 0, hi = MONTHS_PER_YEAR, result;
2962 + char *tmp;
2963 + size_t wclength, mblength;
2964 + const char **pp;
2965 + const wchar_t **wpp;
2966 + wchar_t *month_wcs;
2967 + mbstate_t state;
2968 +
2969 + while (len > 0 && ismbblank (s, len, &mblength))
2970 + {
2971 + s += mblength;
2972 + len -= mblength;
2973 + }
2974 +
2975 + if (len == 0)
2976 + return 0;
2977 +
2978 + month = (char *) alloca (len + 1);
2979 +
2980 + tmp = (char *) alloca (len + 1);
2981 + memcpy (tmp, s, len);
2982 + tmp[len] = '\0';
2983 + pp = (const char **)&tmp;
2984 + month_wcs = (wchar_t *) alloca ((len + 1) * sizeof (wchar_t));
2985 + memset (&state, '\0', sizeof(mbstate_t));
2986 +
2987 + wclength = mbsrtowcs (month_wcs, pp, len + 1, &state);
2988 + assert (wclength != (size_t)-1 && *pp == NULL);
2989 +
2990 + for (i = 0; i < wclength; i++)
2991 + {
2992 + month_wcs[i] = towupper(month_wcs[i]);
2993 + if (iswblank (month_wcs[i]))
2994 + {
2995 + month_wcs[i] = L'\0';
2996 + break;
2997 + }
2998 + }
2999 +
3000 + wpp = (const wchar_t **)&month_wcs;
3001 +
3002 + mblength = wcsrtombs (month, wpp, len + 1, &state);
3003 + assert (mblength != (-1) && *wpp == NULL);
3004 +
3005 + do
3006 + {
3007 + int ix = (lo + hi) / 2;
3008 +
3009 + if (strncmp (month, monthtab[ix].name, strlen (monthtab[ix].name)) < 0)
3010 + hi = ix;
3011 + else
3012 + lo = ix;
3013 + }
3014 + while (hi - lo > 1);
3015 +
3016 + result = (!strncmp (month, monthtab[lo].name, strlen (monthtab[lo].name))
3017 + ? monthtab[lo].val : 0);
3018 +
3019 + return result;
3020 +}
3021 +#endif
3022 +
3023 /* Compare two lines A and B trying every key in sequence until there
3024 are no more keys or a difference is found. */
3025
3026 static int
3027 -keycompare (const struct line *a, const struct line *b)
3028 +keycompare_uni (const struct line *a, const struct line *b)
3029 {
3030 struct keyfield *key = keylist;
3031
3032 @@ -2180,6 +2576,179 @@ keycompare (const struct line *a, const
3033 return key->reverse ? -diff : diff;
3034 }
3035
3036 +#if HAVE_MBRTOWC
3037 +static int
3038 +keycompare_mb (const struct line *a, const struct line *b)
3039 +{
3040 + struct keyfield *key = keylist;
3041 +
3042 + /* For the first iteration only, the key positions have been
3043 + precomputed for us. */
3044 + char *texta = a->keybeg;
3045 + char *textb = b->keybeg;
3046 + char *lima = a->keylim;
3047 + char *limb = b->keylim;
3048 +
3049 + size_t mblength_a, mblength_b;
3050 + wchar_t wc_a, wc_b;
3051 + mbstate_t state_a, state_b;
3052 +
3053 + int diff;
3054 +
3055 + memset (&state_a, '\0', sizeof(mbstate_t));
3056 + memset (&state_b, '\0', sizeof(mbstate_t));
3057 +
3058 + for (;;)
3059 + {
3060 + char const *translate = key->translate;
3061 + bool const *ignore = key->ignore;
3062 +
3063 + /* Find the lengths. */
3064 + size_t lena = lima <= texta ? 0 : lima - texta;
3065 + size_t lenb = limb <= textb ? 0 : limb - textb;
3066 +
3067 + /* Actually compare the fields. */
3068 + if (key->random)
3069 + diff = compare_random (texta, lena, textb, lenb);
3070 + else if (key->numeric | key->general_numeric | key->human_numeric)
3071 + {
3072 + char savea = *lima, saveb = *limb;
3073 +
3074 + *lima = *limb = '\0';
3075 + diff = (key->numeric ? numcompare (texta, textb)
3076 + : key->general_numeric ? general_numcompare (texta, textb)
3077 + : human_numcompare (texta, textb, key));
3078 + *lima = savea, *limb = saveb;
3079 + }
3080 + else if (key->version)
3081 + diff = compare_version (texta, lena, textb, lenb);
3082 + else if (key->month)
3083 + diff = getmonth (texta, lena) - getmonth (textb, lenb);
3084 + else
3085 + {
3086 + if (ignore || translate)
3087 + {
3088 + char *copy_a = (char *) alloca (lena + 1 + lenb + 1);
3089 + char *copy_b = copy_a + lena + 1;
3090 + size_t new_len_a, new_len_b;
3091 + size_t i, j;
3092 +
3093 + /* Ignore and/or translate chars before comparing. */
3094 +# define IGNORE_CHARS(NEW_LEN, LEN, TEXT, COPY, WC, MBLENGTH, STATE) \
3095 + do \
3096 + { \
3097 + wchar_t uwc; \
3098 + char mbc[MB_LEN_MAX]; \
3099 + mbstate_t state_wc; \
3100 + \
3101 + for (NEW_LEN = i = 0; i < LEN;) \
3102 + { \
3103 + mbstate_t state_bak; \
3104 + \
3105 + state_bak = STATE; \
3106 + MBLENGTH = mbrtowc (&WC, TEXT + i, LEN - i, &STATE); \
3107 + \
3108 + if (MBLENGTH == (size_t)-2 || MBLENGTH == (size_t)-1 \
3109 + || MBLENGTH == 0) \
3110 + { \
3111 + if (MBLENGTH == (size_t)-2 || MBLENGTH == (size_t)-1) \
3112 + STATE = state_bak; \
3113 + if (!ignore) \
3114 + COPY[NEW_LEN++] = TEXT[i++]; \
3115 + continue; \
3116 + } \
3117 + \
3118 + if (ignore) \
3119 + { \
3120 + if ((ignore == nonprinting && !iswprint (WC)) \
3121 + || (ignore == nondictionary \
3122 + && !iswalnum (WC) && !iswblank (WC))) \
3123 + { \
3124 + i += MBLENGTH; \
3125 + continue; \
3126 + } \
3127 + } \
3128 + \
3129 + if (translate) \
3130 + { \
3131 + \
3132 + uwc = towupper(WC); \
3133 + if (WC == uwc) \
3134 + { \
3135 + memcpy (mbc, TEXT + i, MBLENGTH); \
3136 + i += MBLENGTH; \
3137 + } \
3138 + else \
3139 + { \
3140 + i += MBLENGTH; \
3141 + WC = uwc; \
3142 + memset (&state_wc, '\0', sizeof (mbstate_t)); \
3143 + \
3144 + MBLENGTH = wcrtomb (mbc, WC, &state_wc); \
3145 + assert (MBLENGTH != (size_t)-1 && MBLENGTH != 0); \
3146 + } \
3147 + \
3148 + for (j = 0; j < MBLENGTH; j++) \
3149 + COPY[NEW_LEN++] = mbc[j]; \
3150 + } \
3151 + else \
3152 + for (j = 0; j < MBLENGTH; j++) \
3153 + COPY[NEW_LEN++] = TEXT[i++]; \
3154 + } \
3155 + COPY[NEW_LEN] = '\0'; \
3156 + } \
3157 + while (0)
3158 + IGNORE_CHARS (new_len_a, lena, texta, copy_a,
3159 + wc_a, mblength_a, state_a);
3160 + IGNORE_CHARS (new_len_b, lenb, textb, copy_b,
3161 + wc_b, mblength_b, state_b);
3162 + diff = xmemcoll (copy_a, new_len_a, copy_b, new_len_b);
3163 + }
3164 + else if (lena == 0)
3165 + diff = - NONZERO (lenb);
3166 + else if (lenb == 0)
3167 + goto greater;
3168 + else
3169 + diff = xmemcoll (texta, lena, textb, lenb);
3170 + }
3171 +
3172 + if (diff)
3173 + goto not_equal;
3174 +
3175 + key = key->next;
3176 + if (! key)
3177 + break;
3178 +
3179 + /* Find the beginning and limit of the next field. */
3180 + if (key->eword != -1)
3181 + lima = limfield (a, key), limb = limfield (b, key);
3182 + else
3183 + lima = a->text + a->length - 1, limb = b->text + b->length - 1;
3184 +
3185 + if (key->sword != -1)
3186 + texta = begfield (a, key), textb = begfield (b, key);
3187 + else
3188 + {
3189 + texta = a->text, textb = b->text;
3190 + if (key->skipsblanks)
3191 + {
3192 + while (texta < lima && ismbblank (texta, lima - texta, &mblength_a))
3193 + texta += mblength_a;
3194 + while (textb < limb && ismbblank (textb, limb - textb, &mblength_b))
3195 + textb += mblength_b;
3196 + }
3197 + }
3198 + }
3199 +
3200 + return 0;
3201 +
3202 +greater:
3203 + diff = 1;
3204 +not_equal:
3205 + return key->reverse ? -diff : diff;
3206 +}
3207 +#endif
3208 +
3209 /* Compare two lines A and B, returning negative, zero, or positive
3210 depending on whether A compares less than, equal to, or greater than B. */
3211
3212 @@ -3178,7 +3747,7 @@ main (int argc, char **argv)
3213 initialize_exit_failure (SORT_FAILURE);
3214
3215 hard_LC_COLLATE = hard_locale (LC_COLLATE);
3216 -#if HAVE_NL_LANGINFO
3217 +#if HAVE_LANGINFO_CODESET
3218 hard_LC_TIME = hard_locale (LC_TIME);
3219 #endif
3220
3221 @@ -3199,6 +3768,27 @@ main (int argc, char **argv)
3222 thousands_sep = -1;
3223 }
3224
3225 +#if HAVE_MBRTOWC
3226 + if (MB_CUR_MAX > 1)
3227 + {
3228 + inittables = inittables_mb;
3229 + begfield = begfield_mb;
3230 + limfield = limfield_mb;
3231 + getmonth = getmonth_mb;
3232 + keycompare = keycompare_mb;
3233 + numcompare = numcompare_mb;
3234 + }
3235 + else
3236 +#endif
3237 + {
3238 + inittables = inittables_uni;
3239 + begfield = begfield_uni;
3240 + limfield = limfield_uni;
3241 + getmonth = getmonth_uni;
3242 + keycompare = keycompare_uni;
3243 + numcompare = numcompare_uni;
3244 + }
3245 +
3246 have_read_stdin = false;
3247 inittables ();
3248
3249 @@ -3459,13 +4049,35 @@ main (int argc, char **argv)
3250
3251 case 't':
3252 {
3253 - char newtab = optarg[0];
3254 - if (! newtab)
3255 + char newtab[MB_LEN_MAX + 1];
3256 + size_t newtab_length = 1;
3257 + strncpy (newtab, optarg, MB_LEN_MAX);
3258 + if (! newtab[0])
3259 error (SORT_FAILURE, 0, _("empty tab"));
3260 - if (optarg[1])
3261 +#if HAVE_MBRTOWC
3262 + if (MB_CUR_MAX > 1)
3263 + {
3264 + wchar_t wc;
3265 + mbstate_t state;
3266 + size_t i;
3267 +
3268 + memset (&state, '\0', sizeof (mbstate_t));
3269 + newtab_length = mbrtowc (&wc, newtab, strnlen (newtab,
3270 + MB_LEN_MAX),
3271 + &state);
3272 + switch (newtab_length)
3273 + {
3274 + case (size_t) -1:
3275 + case (size_t) -2:
3276 + case 0:
3277 + newtab_length = 1;
3278 + }
3279 + }
3280 +#endif
3281 + if (newtab_length == 1 && optarg[1])
3282 {
3283 if (STREQ (optarg, "\\0"))
3284 - newtab = '\0';
3285 + newtab[0] = '\0';
3286 else
3287 {
3288 /* Provoke with `sort -txx'. Complain about
3289 @@ -3476,9 +4088,12 @@ main (int argc, char **argv)
3290 quote (optarg));
3291 }
3292 }
3293 - if (tab != TAB_DEFAULT && tab != newtab)
3294 + if (tab_length
3295 + && (tab_length != newtab_length
3296 + || memcmp (tab, newtab, tab_length) != 0))
3297 error (SORT_FAILURE, 0, _("incompatible tabs"));
3298 - tab = newtab;
3299 + memcpy (tab, newtab, newtab_length);
3300 + tab_length = newtab_length;
3301 }
3302 break;
3303
3304 diff -urNp coreutils-8.0-orig/src/unexpand.c coreutils-8.0/src/unexpand.c
3305 --- coreutils-8.0-orig/src/unexpand.c 2009-09-29 15:27:54.000000000 +0200
3306 +++ coreutils-8.0/src/unexpand.c 2009-10-07 10:07:16.000000000 +0200
3307 @@ -38,11 +38,28 @@
3308 #include <stdio.h>
3309 #include <getopt.h>
3310 #include <sys/types.h>
3311 +
3312 +/* Get mbstate_t, mbrtowc(), wcwidth(). */
3313 +#if HAVE_WCHAR_H
3314 +# include <wchar.h>
3315 +#endif
3316 +
3317 #include "system.h"
3318 #include "error.h"
3319 #include "quote.h"
3320 #include "xstrndup.h"
3321
3322 +/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
3323 + installation; work around this configuration error. */
3324 +#if !defined MB_LEN_MAX || MB_LEN_MAX < 2
3325 +# define MB_LEN_MAX 16
3326 +#endif
3327 +
3328 +/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
3329 +#if HAVE_MBRTOWC && defined mbstate_t
3330 +# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
3331 +#endif
3332 +
3333 /* The official name of this program (e.g., no `g' prefix). */
3334 #define PROGRAM_NAME "unexpand"
3335
3336 @@ -102,6 +119,208 @@ static struct option const longopts[] =
3337 {NULL, 0, NULL, 0}
3338 };
3339
3340 +static FILE *next_file (FILE *fp);
3341 +
3342 +#if HAVE_MBRTOWC
3343 +static void
3344 +unexpand_multibyte (void)
3345 +{
3346 + FILE *fp; /* Input stream. */
3347 + mbstate_t i_state; /* Current shift state of the input stream. */
3348 + mbstate_t i_state_bak; /* Back up the I_STATE. */
3349 + mbstate_t o_state; /* Current shift state of the output stream. */
3350 + char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */
3351 + char *bufpos; /* Next read position of BUF. */
3352 + size_t buflen = 0; /* The length of the byte sequence in buf. */
3353 + wint_t wc; /* A gotten wide character. */
3354 + size_t mblength; /* The byte size of a multibyte character
3355 + which shows as same character as WC. */
3356 +
3357 + /* Index in `tab_list' of next tabstop: */
3358 + int tab_index = 0; /* For calculating width of pending tabs. */
3359 + int print_tab_index = 0; /* For printing as many tabs as possible. */
3360 + unsigned int column = 0; /* Column on screen of next char. */
3361 + int next_tab_column; /* Column the next tab stop is on. */
3362 + int convert = 1; /* If nonzero, perform translations. */
3363 + unsigned int pending = 0; /* Pending columns of blanks. */
3364 +
3365 + fp = next_file ((FILE *) NULL);
3366 + if (fp == NULL)
3367 + return;
3368 +
3369 + memset (&o_state, '\0', sizeof(mbstate_t));
3370 + memset (&i_state, '\0', sizeof(mbstate_t));
3371 +
3372 + for (;;)
3373 + {
3374 + if (buflen < MB_LEN_MAX && !feof(fp) && !ferror(fp))
3375 + {
3376 + memmove (buf, bufpos, buflen);
3377 + buflen += fread (buf + buflen, sizeof(char), BUFSIZ, fp);
3378 + bufpos = buf;
3379 + }
3380 +
3381 + /* Get a wide character. */
3382 + if (buflen < 1)
3383 + {
3384 + mblength = 1;
3385 + wc = WEOF;
3386 + }
3387 + else
3388 + {
3389 + i_state_bak = i_state;
3390 + mblength = mbrtowc ((wchar_t *)&wc, bufpos, buflen, &i_state);
3391 + }
3392 +
3393 + if (mblength == (size_t)-1 || mblength == (size_t)-2)
3394 + {
3395 + i_state = i_state_bak;
3396 + wc = L'\0';
3397 + }
3398 +
3399 + if (wc == L' ' && convert && column < INT_MAX)
3400 + {
3401 + ++pending;
3402 + ++column;
3403 + }
3404 + else if (wc == L'\t' && convert)
3405 + {
3406 + if (tab_size == 0)
3407 + {
3408 + /* Do not let tab_index == first_free_tab;
3409 + stop when it is 1 less. */
3410 + while (tab_index < first_free_tab - 1
3411 + && column >= tab_list[tab_index])
3412 + tab_index++;
3413 + next_tab_column = tab_list[tab_index];
3414 + if (tab_index < first_free_tab - 1)
3415 + tab_index++;
3416 + if (column >= next_tab_column)
3417 + {
3418 + convert = 0; /* Ran out of tab stops. */
3419 + goto flush_pend_mb;
3420 + }
3421 + }
3422 + else
3423 + {
3424 + next_tab_column = column + tab_size - column % tab_size;
3425 + }
3426 + pending += next_tab_column - column;
3427 + column = next_tab_column;
3428 + }
3429 + else
3430 + {
3431 +flush_pend_mb:
3432 + /* Flush pending spaces. Print as many tabs as possible,
3433 + then print the rest as spaces. */
3434 + if (pending == 1)
3435 + {
3436 + putchar (' ');
3437 + pending = 0;
3438 + }
3439 + column -= pending;
3440 + while (pending > 0)
3441 + {
3442 + if (tab_size == 0)
3443 + {
3444 + /* Do not let print_tab_index == first_free_tab;
3445 + stop when it is 1 less. */
3446 + while (print_tab_index < first_free_tab - 1
3447 + && column >= tab_list[print_tab_index])
3448 + print_tab_index++;
3449 + next_tab_column = tab_list[print_tab_index];
3450 + if (print_tab_index < first_free_tab - 1)
3451 + print_tab_index++;
3452 + }
3453 + else
3454 + {
3455 + next_tab_column =
3456 + column + tab_size - column % tab_size;
3457 + }
3458 + if (next_tab_column - column <= pending)
3459 + {
3460 + putchar ('\t');
3461 + pending -= next_tab_column - column;
3462 + column = next_tab_column;
3463 + }
3464 + else
3465 + {
3466 + --print_tab_index;
3467 + column += pending;
3468 + while (pending != 0)
3469 + {
3470 + putchar (' ');
3471 + pending--;
3472 + }
3473 + }
3474 + }
3475 +
3476 + if (wc == WEOF)
3477 + {
3478 + fp = next_file (fp);
3479 + if (fp == NULL)
3480 + break; /* No more files. */
3481 + else
3482 + {
3483 + memset (&i_state, '\0', sizeof(mbstate_t));
3484 + continue;
3485 + }
3486 + }
3487 +
3488 + if (mblength == (size_t)-1 || mblength == (size_t)-2)
3489 + {
3490 + if (convert)
3491 + {
3492 + ++column;
3493 + if (convert_entire_line == 0)
3494 + convert = 0;
3495 + }
3496 + mblength = 1;
3497 + putchar (buf[0]);
3498 + }
3499 + else if (mblength == 0)
3500 + {
3501 + if (convert && convert_entire_line == 0)
3502 + convert = 0;
3503 + mblength = 1;
3504 + putchar ('\0');
3505 + }
3506 + else
3507 + {
3508 + if (convert)
3509 + {
3510 + if (wc == L'\b')
3511 + {
3512 + if (column > 0)
3513 + --column;
3514 + }
3515 + else
3516 + {
3517 + int width; /* The width of WC. */
3518 +
3519 + width = wcwidth (wc);
3520 + column += (width > 0) ? width : 0;
3521 + if (convert_entire_line == 0)
3522 + convert = 0;
3523 + }
3524 + }
3525 +
3526 + if (wc == L'\n')
3527 + {
3528 + tab_index = print_tab_index = 0;
3529 + column = pending = 0;
3530 + convert = 1;
3531 + }
3532 + fwrite (bufpos, sizeof(char), mblength, stdout);
3533 + }
3534 + }
3535 + buflen -= mblength;
3536 + bufpos += mblength;
3537 + }
3538 +}
3539 +#endif
3540 +
3541 +
3542 void
3543 usage (int status)
3544 {
3545 @@ -523,7 +742,12 @@ main (int argc, char **argv)
3546
3547 file_list = (optind < argc ? &argv[optind] : stdin_argv);
3548
3549 - unexpand ();
3550 +#if HAVE_MBRTOWC
3551 + if (MB_CUR_MAX > 1)
3552 + unexpand_multibyte ();
3553 + else
3554 +#endif
3555 + unexpand ();
3556
3557 if (have_read_stdin && fclose (stdin) != 0)
3558 error (EXIT_FAILURE, errno, "-");
3559 diff -urNp coreutils-8.0-orig/src/uniq.c coreutils-8.0/src/uniq.c
3560 --- coreutils-8.0-orig/src/uniq.c 2009-09-23 10:25:44.000000000 +0200
3561 +++ coreutils-8.0/src/uniq.c 2009-10-07 10:07:16.000000000 +0200
3562 @@ -22,6 +22,16 @@
3563 #include <getopt.h>
3564 #include <sys/types.h>
3565
3566 +/* Get mbstate_t, mbrtowc(). */
3567 +#if HAVE_WCHAR_H
3568 +# include <wchar.h>
3569 +#endif
3570 +
3571 +/* Get isw* functions. */
3572 +#if HAVE_WCTYPE_H
3573 +# include <wctype.h>
3574 +#endif
3575 +
3576 #include "system.h"
3577 #include "argmatch.h"
3578 #include "linebuffer.h"
3579 @@ -31,7 +41,19 @@
3580 #include "stdio--.h"
3581 #include "xmemcoll.h"
3582 #include "xstrtol.h"
3583 -#include "memcasecmp.h"
3584 +#include "xmemcoll.h"
3585 +
3586 +/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
3587 + installation; work around this configuration error. */
3588 +#if !defined MB_LEN_MAX || MB_LEN_MAX < 2
3589 +# define MB_LEN_MAX 16
3590 +#endif
3591 +
3592 +/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
3593 +#if HAVE_MBRTOWC && defined mbstate_t
3594 +# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
3595 +#endif
3596 +
3597
3598 /* The official name of this program (e.g., no `g' prefix). */
3599 #define PROGRAM_NAME "uniq"
3600 @@ -107,6 +129,10 @@ static enum delimit_method const delimit
3601 /* Select whether/how to delimit groups of duplicate lines. */
3602 static enum delimit_method delimit_groups;
3603
3604 +/* Function pointers. */
3605 +static char *
3606 +(*find_field) (struct linebuffer *line);
3607 +
3608 static struct option const longopts[] =
3609 {
3610 {"count", no_argument, NULL, 'c'},
3611 @@ -206,7 +232,7 @@ size_opt (char const *opt, char const *m
3612 return a pointer to the beginning of the line's field to be compared. */
3613
3614 static char *
3615 -find_field (struct linebuffer const *line)
3616 +find_field_uni (struct linebuffer *line)
3617 {
3618 size_t count;
3619 char const *lp = line->buffer;
3620 @@ -227,6 +253,83 @@ find_field (struct linebuffer const *lin
3621 return line->buffer + i;
3622 }
3623
3624 +#if HAVE_MBRTOWC
3625 +
3626 +# define MBCHAR_TO_WCHAR(WC, MBLENGTH, LP, POS, SIZE, STATEP, CONVFAIL) \
3627 + do \
3628 + { \
3629 + mbstate_t state_bak; \
3630 + \
3631 + CONVFAIL = 0; \
3632 + state_bak = *STATEP; \
3633 + \
3634 + MBLENGTH = mbrtowc (&WC, LP + POS, SIZE - POS, STATEP); \
3635 + \
3636 + switch (MBLENGTH) \
3637 + { \
3638 + case (size_t)-2: \
3639 + case (size_t)-1: \
3640 + *STATEP = state_bak; \
3641 + CONVFAIL++; \
3642 + /* Fall through */ \
3643 + case 0: \
3644 + MBLENGTH = 1; \
3645 + } \
3646 + } \
3647 + while (0)
3648 +
3649 +static char *
3650 +find_field_multi (struct linebuffer *line)
3651 +{
3652 + size_t count;
3653 + char *lp = line->buffer;
3654 + size_t size = line->length - 1;
3655 + size_t pos;
3656 + size_t mblength;
3657 + wchar_t wc;
3658 + mbstate_t *statep;
3659 + int convfail;
3660 +
3661 + pos = 0;
3662 + statep = &(line->state);
3663 +
3664 + /* skip fields. */
3665 + for (count = 0; count < skip_fields && pos < size; count++)
3666 + {
3667 + while (pos < size)
3668 + {
3669 + MBCHAR_TO_WCHAR (wc, mblength, lp, pos, size, statep, convfail);
3670 +
3671 + if (convfail || !iswblank (wc))
3672 + {
3673 + pos += mblength;
3674 + break;
3675 + }
3676 + pos += mblength;
3677 + }
3678 +
3679 + while (pos < size)
3680 + {
3681 + MBCHAR_TO_WCHAR (wc, mblength, lp, pos, size, statep, convfail);
3682 +
3683 + if (!convfail && iswblank (wc))
3684 + break;
3685 +
3686 + pos += mblength;
3687 + }
3688 + }
3689 +
3690 + /* skip fields. */
3691 + for (count = 0; count < skip_chars && pos < size; count++)
3692 + {
3693 + MBCHAR_TO_WCHAR (wc, mblength, lp, pos, size, statep, convfail);
3694 + pos += mblength;
3695 + }
3696 +
3697 + return lp + pos;
3698 +}
3699 +#endif
3700 +
3701 /* Return false if two strings OLD and NEW match, true if not.
3702 OLD and NEW point not to the beginnings of the lines
3703 but rather to the beginnings of the fields to compare.
3704 @@ -235,6 +338,8 @@ find_field (struct linebuffer const *lin
3705 static bool
3706 different (char *old, char *new, size_t oldlen, size_t newlen)
3707 {
3708 + char *copy_old, *copy_new;
3709 +
3710 if (check_chars < oldlen)
3711 oldlen = check_chars;
3712 if (check_chars < newlen)
3713 @@ -242,14 +347,92 @@ different (char *old, char *new, size_t
3714
3715 if (ignore_case)
3716 {
3717 - /* FIXME: This should invoke strcoll somehow. */
3718 - return oldlen != newlen || memcasecmp (old, new, oldlen);
3719 + size_t i;
3720 +
3721 + copy_old = alloca (oldlen + 1);
3722 + copy_new = alloca (oldlen + 1);
3723 +
3724 + for (i = 0; i < oldlen; i++)
3725 + {
3726 + copy_old[i] = toupper (old[i]);
3727 + copy_new[i] = toupper (new[i]);
3728 + }
3729 }
3730 - else if (hard_LC_COLLATE)
3731 - return xmemcoll (old, oldlen, new, newlen) != 0;
3732 else
3733 - return oldlen != newlen || memcmp (old, new, oldlen);
3734 + {
3735 + copy_old = (char *)old;
3736 + copy_new = (char *)new;
3737 + }
3738 +
3739 + return xmemcoll (copy_old, oldlen, copy_new, newlen);
3740 +}
3741 +
3742 +#if HAVE_MBRTOWC
3743 +static int
3744 +different_multi (const char *old, const char *new, size_t oldlen, size_t newlen, mbstate_t oldstate, mbstate_t newstate)
3745 +{
3746 + size_t i, j, chars;
3747 + const char *str[2];
3748 + char *copy[2];
3749 + size_t len[2];
3750 + mbstate_t state[2];
3751 + size_t mblength;
3752 + wchar_t wc, uwc;
3753 + mbstate_t state_bak;
3754 +
3755 + str[0] = old;
3756 + str[1] = new;
3757 + len[0] = oldlen;
3758 + len[1] = newlen;
3759 + state[0] = oldstate;
3760 + state[1] = newstate;
3761 +
3762 + for (i = 0; i < 2; i++)
3763 + {
3764 + copy[i] = alloca (len[i] + 1);
3765 +
3766 + for (j = 0, chars = 0; j < len[i] && chars < check_chars; chars++)
3767 + {
3768 + state_bak = state[i];
3769 + mblength = mbrtowc (&wc, str[i] + j, len[i] - j, &(state[i]));
3770 +
3771 + switch (mblength)
3772 + {
3773 + case (size_t)-1:
3774 + case (size_t)-2:
3775 + state[i] = state_bak;
3776 + /* Fall through */
3777 + case 0:
3778 + mblength = 1;
3779 + break;
3780 +
3781 + default:
3782 + if (ignore_case)
3783 + {
3784 + uwc = towupper (wc);
3785 +
3786 + if (uwc != wc)
3787 + {
3788 + mbstate_t state_wc;
3789 +
3790 + memset (&state_wc, '\0', sizeof(mbstate_t));
3791 + wcrtomb (copy[i] + j, uwc, &state_wc);
3792 + }
3793 + else
3794 + memcpy (copy[i] + j, str[i] + j, mblength);
3795 + }
3796 + else
3797 + memcpy (copy[i] + j, str[i] + j, mblength);
3798 + }
3799 + j += mblength;
3800 + }
3801 + copy[i][j] = '\0';
3802 + len[i] = j;
3803 + }
3804 +
3805 + return xmemcoll (copy[0], len[0], copy[1], len[1]);
3806 }
3807 +#endif
3808
3809 /* Output the line in linebuffer LINE to standard output
3810 provided that the switches say it should be output.
3811 @@ -303,15 +486,43 @@ check_file (const char *infile, const ch
3812 {
3813 char *prevfield IF_LINT (= NULL);
3814 size_t prevlen IF_LINT (= 0);
3815 +#if HAVE_MBRTOWC
3816 + mbstate_t prevstate;
3817 +
3818 + memset (&prevstate, '\0', sizeof (mbstate_t));
3819 +#endif
3820
3821 while (!feof (stdin))
3822 {
3823 char *thisfield;
3824 size_t thislen;
3825 +#if HAVE_MBRTOWC
3826 + mbstate_t thisstate;
3827 +#endif
3828 +
3829 if (readlinebuffer_delim (thisline, stdin, delimiter) == 0)
3830 break;
3831 thisfield = find_field (thisline);
3832 thislen = thisline->length - 1 - (thisfield - thisline->buffer);
3833 +#if HAVE_MBRTOWC
3834 + if (MB_CUR_MAX > 1)
3835 + {
3836 + thisstate = thisline->state;
3837 +
3838 + if (prevline->length == 0 || different_multi
3839 + (thisfield, prevfield, thislen, prevlen, thisstate, prevstate))
3840 + {
3841 + fwrite (thisline->buffer, sizeof (char),
3842 + thisline->length, stdout);
3843 +
3844 + SWAP_LINES (prevline, thisline);
3845 + prevfield = thisfield;
3846 + prevlen = thislen;
3847 + prevstate = thisstate;
3848 + }
3849 + }
3850 + else
3851 +#endif
3852 if (prevline->length == 0
3853 || different (thisfield, prevfield, thislen, prevlen))
3854 {
3855 @@ -330,17 +541,26 @@ check_file (const char *infile, const ch
3856 size_t prevlen;
3857 uintmax_t match_count = 0;
3858 bool first_delimiter = true;
3859 +#if HAVE_MBRTOWC
3860 + mbstate_t prevstate;
3861 +#endif
3862
3863 if (readlinebuffer_delim (prevline, stdin, delimiter) == 0)
3864 goto closefiles;
3865 prevfield = find_field (prevline);
3866 prevlen = prevline->length - 1 - (prevfield - prevline->buffer);
3867 +#if HAVE_MBRTOWC
3868 + prevstate = prevline->state;
3869 +#endif
3870
3871 while (!feof (stdin))
3872 {
3873 bool match;
3874 char *thisfield;
3875 size_t thislen;
3876 +#if HAVE_MBRTOWC
3877 + mbstate_t thisstate;
3878 +#endif
3879 if (readlinebuffer_delim (thisline, stdin, delimiter) == 0)
3880 {
3881 if (ferror (stdin))
3882 @@ -349,6 +569,15 @@ check_file (const char *infile, const ch
3883 }
3884 thisfield = find_field (thisline);
3885 thislen = thisline->length - 1 - (thisfield - thisline->buffer);
3886 +#if HAVE_MBRTOWC
3887 + if (MB_CUR_MAX > 1)
3888 + {
3889 + thisstate = thisline->state;
3890 + match = !different_multi (thisfield, prevfield,
3891 + thislen, prevlen, thisstate, prevstate);
3892 + }
3893 + else
3894 +#endif
3895 match = !different (thisfield, prevfield, thislen, prevlen);
3896 match_count += match;
3897
3898 @@ -381,6 +610,9 @@ check_file (const char *infile, const ch
3899 SWAP_LINES (prevline, thisline);
3900 prevfield = thisfield;
3901 prevlen = thislen;
3902 +#if HAVE_MBRTOWC
3903 + prevstate = thisstate;
3904 +#endif
3905 if (!match)
3906 match_count = 0;
3907 }
3908 @@ -426,6 +658,19 @@ main (int argc, char **argv)
3909
3910 atexit (close_stdout);
3911
3912 +#if HAVE_MBRTOWC
3913 + if (MB_CUR_MAX > 1)
3914 + {
3915 + find_field = find_field_multi;
3916 + }
3917 + else
3918 +#endif
3919 + {
3920 + find_field = find_field_uni;
3921 + }
3922 +
3923 +
3924 +
3925 skip_chars = 0;
3926 skip_fields = 0;
3927 check_chars = SIZE_MAX;
3928 diff -urNp coreutils-8.0-orig/tests/Makefile.in coreutils-8.0/tests/Makefile.in
3929 --- coreutils-8.0-orig/tests/Makefile.in 2009-09-29 16:25:44.000000000 +0200
3930 +++ coreutils-8.0/tests/Makefile.in 2009-10-07 10:07:16.000000000 +0200
3931 @@ -1126,6 +1126,7 @@ TESTS = \
3932 misc/sort-compress \
3933 misc/sort-continue \
3934 misc/sort-files0-from \
3935 + misc/sort-mb-tests \
3936 misc/sort-merge \
3937 misc/sort-merge-fdlimit \
3938 misc/sort-rand \
3939 @@ -1582,6 +1582,10 @@ TESTS = \
3940 $(root_tests)
3941
3942 pr_data = \
3943 + misc/mb1.X \
3944 + misc/mb1.I \
3945 + misc/mb2.X \
3946 + misc/mb2.I \
3947 pr/0F \
3948 pr/0FF \
3949 pr/0FFnt \
3950 diff -urNp coreutils-8.0-orig/tests/Makefile.am coreutils-8.0/tests/Makefile.am
3951 --- coreutils-8.0-orig/tests/Makefile.am 2009-09-29 16:25:44.000000000 +0200
3952 +++ coreutils-8.0/tests/Makefile.am 2009-10-07 10:07:16.000000000 +0200
3953 @@ -208,6 +208,7 @@ TESTS = \
3954 misc/sort-compress \
3955 misc/sort-continue \
3956 misc/sort-files0-from \
3957 + misc/sort-mb-tests \
3958 misc/sort-merge \
3959 misc/sort-merge-fdlimit \
3960 misc/sort-rand \
3961 @@ -452,6 +453,10 @@ TESTS = \
3962 $(root_tests)
3963
3964 pr_data = \
3965 + misc/mb1.X \
3966 + misc/mb1.I \
3967 + misc/mb2.X \
3968 + misc/mb2.I \
3969 pr/0F \
3970 pr/0FF \
3971 pr/0FFnt \
3972 diff -urNp coreutils-8.0-orig/tests/misc/cut coreutils-8.0/tests/misc/cut
3973 --- coreutils-8.0-orig/tests/misc/cut 2009-09-21 14:29:33.000000000 +0200
3974 +++ coreutils-8.0/tests/misc/cut 2009-10-07 10:07:16.000000000 +0200
3975 @@ -26,7 +26,7 @@ use strict;
3976 my $prog = 'cut';
3977 my $try = "Try \`$prog --help' for more information.\n";
3978 my $from_1 = "$prog: fields and positions are numbered from 1\n$try";
3979 -my $inval = "$prog: invalid byte or field list\n$try";
3980 +my $inval = "$prog: invalid byte, character or field list\n$try";
3981 my $no_endpoint = "$prog: invalid range with no endpoint: -\n$try";
3982
3983 my @Tests =
3984 @@ -141,7 +141,7 @@ my @Tests =
3985
3986 # None of the following invalid ranges provoked an error up to coreutils-6.9.
3987 ['inval1', qw(-f 2-0), {IN=>''}, {OUT=>''}, {EXIT=>1},
3988 - {ERR=>"$prog: invalid decreasing range\n$try"}],
3989 + {ERR=>"$prog: invalid byte, character or field list\n$try"}],
3990 ['inval2', qw(-f -), {IN=>''}, {OUT=>''}, {EXIT=>1}, {ERR=>$no_endpoint}],
3991 ['inval3', '-f', '4,-', {IN=>''}, {OUT=>''}, {EXIT=>1}, {ERR=>$no_endpoint}],
3992 ['inval4', '-f', '1-2,-', {IN=>''}, {OUT=>''}, {EXIT=>1}, {ERR=>$no_endpoint}],
3993 diff -urNp coreutils-8.0-orig/tests/misc/mb1.I coreutils-8.0/tests/misc/mb1.I
3994 --- coreutils-8.0-orig/tests/misc/mb1.I 1970-01-01 01:00:00.000000000 +0100
3995 +++ coreutils-8.0/tests/misc/mb1.I 2009-10-07 10:07:16.000000000 +0200
3996 @@ -0,0 +1,4 @@
3997 +Appleï¼ 10
3998 +Bananaï¼ 5
3999 +Citrusï¼ 20
4000 +Cherryï¼ 30
4001 diff -urNp coreutils-8.0-orig/tests/misc/mb1.X coreutils-8.0/tests/misc/mb1.X
4002 --- coreutils-8.0-orig/tests/misc/mb1.X 1970-01-01 01:00:00.000000000 +0100
4003 +++ coreutils-8.0/tests/misc/mb1.X 2009-10-07 10:07:16.000000000 +0200
4004 @@ -0,0 +1,4 @@
4005 +Bananaï¼ 5
4006 +Appleï¼ 10
4007 +Citrusï¼ 20
4008 +Cherryï¼ 30
4009 diff -urNp coreutils-8.0-orig/tests/misc/mb2.I coreutils-8.0/tests/misc/mb2.I
4010 --- coreutils-8.0-orig/tests/misc/mb2.I 1970-01-01 01:00:00.000000000 +0100
4011 +++ coreutils-8.0/tests/misc/mb2.I 2009-10-07 10:07:16.000000000 +0200
4012 @@ -0,0 +1,4 @@
4013 +Apple@AA10@@20
4014 +Banana@AA5@@30
4015 +Citrus@AA20@@5
4016 +Cherry@AA30@@10
4017 diff -urNp coreutils-8.0-orig/tests/misc/mb2.X coreutils-8.0/tests/misc/mb2.X
4018 --- coreutils-8.0-orig/tests/misc/mb2.X 1970-01-01 01:00:00.000000000 +0100
4019 +++ coreutils-8.0/tests/misc/mb2.X 2009-10-07 10:07:16.000000000 +0200
4020 @@ -0,0 +1,4 @@
4021 +Citrus@AA20@@5
4022 +Cherry@AA30@@10
4023 +Apple@AA10@@20
4024 +Banana@AA5@@30
4025 diff -urNp coreutils-8.0-orig/tests/misc/sort-mb-tests coreutils-8.0/tests/misc/sort-mb-tests
4026 --- coreutils-8.0-orig/tests/misc/sort-mb-tests 1970-01-01 01:00:00.000000000 +0100
4027 +++ coreutils-8.0/tests/misc/sort-mb-tests 2009-10-07 10:07:16.000000000 +0200
4028 @@ -0,0 +1,58 @@
4029 +#! /bin/sh
4030 +case $# in
4031 + 0) xx='../src/sort';;
4032 + *) xx="$1";;
4033 +esac
4034 +test "$VERBOSE" && echo=echo || echo=:
4035 +$echo testing program: $xx
4036 +errors=0
4037 +test "$srcdir" || srcdir=.
4038 +test "$VERBOSE" && $xx --version 2> /dev/null
4039 +
4040 +export LC_ALL=en_US.UTF-8
4041 +locale -k LC_CTYPE 2>&1 | grep -q charmap.*UTF-8 || exit 77
4042 +errors=0
4043 +
4044 +$xx -t ï¼  -k2 -n misc/mb1.I > misc/mb1.O
4045 +code=$?
4046 +if test $code != 0; then
4047 + $echo "Test mb1 failed: $xx return code $code differs from expected value 0" 1>&2
4048 + errors=`expr $errors + 1`
4049 +else
4050 + cmp misc/mb1.O $srcdir/misc/mb1.X > /dev/null 2>&1
4051 + case $? in
4052 + 0) if test "$VERBOSE"; then $echo "passed mb1"; fi;;
4053 + 1) $echo "Test mb1 failed: files misc/mb1.O and $srcdir/misc/mb1.X differ" 1>&2
4054 + (diff -c misc/mb1.O $srcdir/misc/mb1.X) 2> /dev/null
4055 + errors=`expr $errors + 1`;;
4056 + 2) $echo "Test mb1 may have failed." 1>&2
4057 + $echo The command "cmp misc/mb1.O $srcdir/misc/mb1.X" failed. 1>&2
4058 + errors=`expr $errors + 1`;;
4059 + esac
4060 +fi
4061 +
4062 +$xx -t ï¼  -k4 -n misc/mb2.I > misc/mb2.O
4063 +code=$?
4064 +if test $code != 0; then
4065 + $echo "Test mb2 failed: $xx return code $code differs from expected value 0" 1>&2
4066 + errors=`expr $errors + 1`
4067 +else
4068 + cmp misc/mb2.O $srcdir/misc/mb2.X > /dev/null 2>&1
4069 + case $? in
4070 + 0) if test "$VERBOSE"; then $echo "passed mb2"; fi;;
4071 + 1) $echo "Test mb2 failed: files misc/mb2.O and $srcdir/misc/mb2.X differ" 1>&2
4072 + (diff -c misc/mb2.O $srcdir/misc/mb2.X) 2> /dev/null
4073 + errors=`expr $errors + 1`;;
4074 + 2) $echo "Test mb2 may have failed." 1>&2
4075 + $echo The command "cmp misc/mb2.O $srcdir/misc/mb2.X" failed. 1>&2
4076 + errors=`expr $errors + 1`;;
4077 + esac
4078 +fi
4079 +
4080 +if test $errors = 0; then
4081 + $echo Passed all 113 tests. 1>&2
4082 +else
4083 + $echo Failed $errors tests. 1>&2
4084 +fi
4085 +test $errors = 0 || errors=1
4086 +exit $errors
4087
4088
4089
4090 1.1 src/patchsets/coreutils/8.3/003_all_coreutils-gentoo-uname.patch
4091
4092 file : http://sources.gentoo.org/viewcvs.py/gentoo/src/patchsets/coreutils/8.3/003_all_coreutils-gentoo-uname.patch?rev=1.1&view=markup
4093 plain: http://sources.gentoo.org/viewcvs.py/gentoo/src/patchsets/coreutils/8.3/003_all_coreutils-gentoo-uname.patch?rev=1.1&content-type=text/plain
4094
4095 Index: 003_all_coreutils-gentoo-uname.patch
4096 ===================================================================
4097 On linux platforms, grok /proc/cpuinfo for the CPU/vendor info.
4098
4099 Prob not suitable for upstream seeing as how it's 100% linux-specific
4100 http://lists.gnu.org/archive/html/bug-coreutils/2005-09/msg00063.html
4101
4102 Patch originally by Carlos E. Gorges <carlos@×××××××××××××.br>, but
4103 heavily reworked to suck less.
4104
4105 To add support for additional platforms, check out the show_cpuinfo()
4106 func in the linux/arch/<ARCH>/ source tree of the kernel.
4107
4108 --- coreutils/src/uname.c
4109 +++ coreutils/src/uname.c
4110 @@ -50,6 +50,11 @@
4111 # include <mach-o/arch.h>
4112 #endif
4113
4114 +#if defined(__linux__)
4115 +# define USE_PROCINFO
4116 +# define UNAME_HARDWARE_PLATFORM
4117 +#endif
4118 +
4119 #include "system.h"
4120 #include "error.h"
4121 #include "quote.h"
4122 @@ -138,6 +143,117 @@
4123 exit (status);
4124 }
4125
4126 +#if defined(USE_PROCINFO)
4127 +
4128 +# if defined(__s390__) || defined(__s390x__)
4129 +# define CPUINFO_FILE "/proc/sysinfo"
4130 +# define CPUINFO_FORMAT "%64[^\t :]%*[ :]%256[^\n]%c"
4131 +# else
4132 +# define CPUINFO_FILE "/proc/cpuinfo"
4133 +# define CPUINFO_FORMAT "%64[^\t:]\t:%256[^\n]%c"
4134 +# endif
4135 +
4136 +# define PROCINFO_PROCESSOR 0
4137 +# define PROCINFO_HARDWARE_PLATFORM 1
4138 +
4139 +static void __eat_cpuinfo_space(char *buf)
4140 +{
4141 + /* first eat trailing space */
4142 + char *tmp = buf + strlen(buf) - 1;
4143 + while (tmp > buf && isspace(*tmp))
4144 + *tmp-- = '\0';
4145 + /* then eat leading space */
4146 + tmp = buf;
4147 + while (*tmp && isspace(*tmp))
4148 + tmp++;
4149 + if (tmp != buf)
4150 + memmove(buf, tmp, strlen(tmp)+1);
4151 + /* finally collapse whitespace */
4152 + tmp = buf;
4153 + while (tmp[0] && tmp[1]) {
4154 + if (isspace(tmp[0]) && isspace(tmp[1])) {
4155 + memmove(tmp, tmp+1, strlen(tmp));
4156 + continue;
4157 + }
4158 + ++tmp;
4159 + }
4160 +}
4161 +
4162 +static int __linux_procinfo(int x, char *fstr, size_t s)
4163 +{
4164 + FILE *fp;
4165 +
4166 + char *procinfo_keys[] = {
4167 + /* --processor --hardware-platform */
4168 + #if defined(__alpha__)
4169 + "cpu model", "system type"
4170 + #elif defined(__arm__)
4171 + "Processor", "Hardware"
4172 + #elif defined(__avr32__)
4173 + "processor", "cpu family"
4174 + #elif defined(__bfin__)
4175 + "CPU", "BOARD Name"
4176 + #elif defined(__cris__)
4177 + "cpu", "cpu model"
4178 + #elif defined(__frv__)
4179 + "CPU-Core", "System"
4180 + #elif defined(__i386__) || defined(__x86_64__)
4181 + "model name", "vendor_id"
4182 + #elif defined(__ia64__)
4183 + "family", "vendor"
4184 + #elif defined(__hppa__)
4185 + "cpu", "model"
4186 + #elif defined(__m68k__)
4187 + "CPU", "MMU"
4188 + #elif defined(__mips__)
4189 + "cpu model", "system type"
4190 + #elif defined(__powerpc__) || defined(__powerpc64__)
4191 + "cpu", "machine"
4192 + #elif defined(__s390__) || defined(__s390x__)
4193 + "Type", "Manufacturer"
4194 + #elif defined(__sh__)
4195 + "cpu type", "machine"
4196 + #elif defined(sparc) || defined(__sparc__)
4197 + "type", "cpu"
4198 + #elif defined(__vax__)
4199 + "cpu type", "cpu"
4200 + #else
4201 + "unknown", "unknown"
4202 + #endif
4203 + };
4204 +
4205 + if ((fp = fopen(CPUINFO_FILE, "r")) != NULL) {
4206 + char key[65], value[257], eol, *ret = NULL;
4207 +
4208 + while (fscanf(fp, CPUINFO_FORMAT, key, value, &eol) != EOF) {
4209 + __eat_cpuinfo_space(key);
4210 + if (!strcmp(key, procinfo_keys[x])) {
4211 + __eat_cpuinfo_space(value);
4212 + ret = value;
4213 + break;
4214 + }
4215 + if (eol != '\n') {
4216 + /* we need two fscanf's here in case the previous
4217 + * length limit caused us to read right up to the
4218 + * newline ... doing "%*[^\n]\n" wont eat the newline
4219 + */
4220 + fscanf(fp, "%*[^\n]");
4221 + fscanf(fp, "\n");
4222 + }
4223 + }
4224 + fclose(fp);
4225 +
4226 + if (ret) {
4227 + strncpy(fstr, ret, s);
4228 + return 0;
4229 + }
4230 + }
4231 +
4232 + return -1;
4233 +}
4234 +
4235 +#endif
4236 +
4237 /* Print ELEMENT, preceded by a space if something has already been
4238 printed. */
4239
4240 @@ -250,10 +344,14 @@ main (int argc, char **argv)
4241 if (toprint & PRINT_PROCESSOR)
4242 {
4243 char const *element = unknown;
4244 -#if HAVE_SYSINFO && defined SI_ARCHITECTURE
4245 +#if ( HAVE_SYSINFO && defined SI_ARCHITECTURE ) || defined(USE_PROCINFO)
4246 {
4247 static char processor[257];
4248 +#if defined(USE_PROCINFO)
4249 + if (0 <= __linux_procinfo (PROCINFO_PROCESSOR, processor, sizeof processor))
4250 +#else
4251 if (0 <= sysinfo (SI_ARCHITECTURE, processor, sizeof processor))
4252 +#endif
4253 element = processor;
4254 }
4255 #endif
4256 @@ -306,9 +404,13 @@ main (int argc, char **argv)
4257 if (element == unknown)
4258 {
4259 static char hardware_platform[257];
4260 +#if defined(USE_PROCINFO)
4261 + if (0 <= __linux_procinfo (PROCINFO_HARDWARE_PLATFORM, hardware_platform, sizeof hardware_platform))
4262 +#else
4263 size_t s = sizeof hardware_platform;
4264 static int mib[] = { CTL_HW, UNAME_HARDWARE_PLATFORM };
4265 if (sysctl (mib, 2, hardware_platform, &s, 0, 0) >= 0)
4266 +#endif
4267 element = hardware_platform;
4268 }
4269 #endif
4270
4271
4272
4273 1.1 src/patchsets/coreutils/8.3/010_all_coreutils-tests.patch
4274
4275 file : http://sources.gentoo.org/viewcvs.py/gentoo/src/patchsets/coreutils/8.3/010_all_coreutils-tests.patch?rev=1.1&view=markup
4276 plain: http://sources.gentoo.org/viewcvs.py/gentoo/src/patchsets/coreutils/8.3/010_all_coreutils-tests.patch?rev=1.1&content-type=text/plain
4277
4278 Index: 010_all_coreutils-tests.patch
4279 ===================================================================
4280 this test only gets run as non-root, so giving it temp write access to the
4281 root dir is safe since normal unix access will deny it #259876
4282 --- a/tests/touch/not-owner
4283 +++ b/tests/touch/not-owner
4284 @@ -39,6 +39,7 @@
4285 # Before fileutils-4.1, we'd get the following misleading
4286 # diagnostic instead of `...: Permission denied'.
4287 # touch: creating `/': Is a directory
4288 +env SANDBOX_WRITE=${SANDBOX_WRITE}:/ \
4289 touch / > out 2>&1 && fail=1
4290
4291 # On SunOS4, EPERM is `Not owner'.
4292
4293 the dd test looks up a device and tries to test seeking on it. it shouldnt
4294 cause any corruption because it uses a count of 0 and seeks past the end of
4295 the device
4296 --- a/tests/dd/skip-seek-past-dev
4297 +++ b/tests/dd/skip-seek-past-dev
4298 @@ -53,6 +53,7 @@
4299 0+0 records out" > err_ok || framework_failure
4300 compare err_ok err || fail=1
4301
4302 +env SANDBOX_WRITE=${SANDBOX_WRITE}:$device \
4303 timeout 1 dd bs=1 seek=$DEV_OFLOW count=0 status=noxfer > "$device" 2> err
4304 test "$?" = "1" || fail=1
4305 echo "dd: \`standard output': cannot seek: Invalid argument
4306
4307
4308
4309 1.1 src/patchsets/coreutils/8.3/030_all_coreutils-more-dir-colors.patch
4310
4311 file : http://sources.gentoo.org/viewcvs.py/gentoo/src/patchsets/coreutils/8.3/030_all_coreutils-more-dir-colors.patch?rev=1.1&view=markup
4312 plain: http://sources.gentoo.org/viewcvs.py/gentoo/src/patchsets/coreutils/8.3/030_all_coreutils-more-dir-colors.patch?rev=1.1&content-type=text/plain
4313
4314 Index: 030_all_coreutils-more-dir-colors.patch
4315 ===================================================================
4316 --- coreutils-7.5/src/dircolors.hin
4317 +++ coreutils-7.5/src/dircolors.hin
4318 @@ -5,6 +5,9 @@
4319
4320 # The keywords COLOR, OPTIONS, and EIGHTBIT (honored by the
4321 # slackware version of dircolors) are recognized but ignored.
4322 +
4323 +# You can copy this file to .dir_colors in your $HOME directory to override
4324 +# the system defaults.
4325
4326 # Below, there should be one TERM entry for each termtype that is colorizable
4327 TERM Eterm
4328 @@ -66,7 +66,8 @@
4329 DOOR 01;35 # door
4330 BLK 40;33;01 # block device driver
4331 CHR 40;33;01 # character device driver
4332 -ORPHAN 40;31;01 # symlink to nonexistent file, or non-stat'able file
4333 +ORPHAN 01;05;37;41 # orphaned syminks
4334 +MISSING 01;05;37;41 # ... and the files they point to
4335 SETUID 37;41 # file that is setuid (u+s)
4336 SETGID 30;43 # file that is setgid (g+s)
4337 STICKY_OTHER_WRITABLE 30;42 # dir that is sticky and other-writable (+t,o+w)
4338 @@ -125,6 +154,16 @@
4339 .xwd 01;35
4340 .yuv 01;35
4341
4342 +# Document files
4343 +.pdf 00;32
4344 +.ps 00;32
4345 +.txt 00;32
4346 +.patch 00;32
4347 +.diff 00;32
4348 +.log 00;32
4349 +.tex 00;32
4350 +.doc 00;32
4351 +
4352 # audio formats
4353 .aac 00;36
4354 .au 00;36
4355
4356
4357
4358 1.1 src/patchsets/coreutils/8.3/README.history
4359
4360 file : http://sources.gentoo.org/viewcvs.py/gentoo/src/patchsets/coreutils/8.3/README.history?rev=1.1&view=markup
4361 plain: http://sources.gentoo.org/viewcvs.py/gentoo/src/patchsets/coreutils/8.3/README.history?rev=1.1&content-type=text/plain
4362
4363 Index: README.history
4364 ===================================================================
4365 1 07.01.2010
4366 + 000_all_coreutils-i18n.patch
4367 + 003_all_coreutils-gentoo-uname.patch
4368 + 010_all_coreutils-tests.patch
4369 + 030_all_coreutils-more-dir-colors.patch