Ticket #2643: coreutils-8.5-i18n-1.patch
File coreutils-8.5-i18n-1.patch, 117.9 KB (added by , 15 years ago) |
---|
-
lib/linebuffer.h
Submitted by: William Immendorf <will.immendorf@gmail.com> Date: 2010-05-08 Inital Package Version: 8.5 Origion: Fedora CVS, where we usually get the patch. Upstream Status: You should know by now. If you don't, the patch is rejected. Description: Fixes some Coreutils issues with UTF-8. diff -Naur coreutils-8.5.orig/lib/linebuffer.h coreutils-8.5/lib/linebuffer.h
old new 21 21 22 22 # include <stdio.h> 23 23 24 /* Get mbstate_t. */ 25 # if HAVE_WCHAR_H 26 # include <wchar.h> 27 # endif 28 24 29 /* A `struct linebuffer' holds a line of text. */ 25 30 26 31 struct linebuffer … … 28 33 size_t size; /* Allocated. */ 29 34 size_t length; /* Used. */ 30 35 char *buffer; 36 # if HAVE_WCHAR_H 37 mbstate_t state; 38 # endif 31 39 }; 32 40 33 41 /* Initialize linebuffer LINEBUFFER for use. */ -
coreutils-8.5
diff -Naur coreutils-8.5.orig/src/cut.c coreutils-8.5/src/cut.c
old new 28 28 #include <assert.h> 29 29 #include <getopt.h> 30 30 #include <sys/types.h> 31 32 /* Get mbstate_t, mbrtowc(). */ 33 #if HAVE_WCHAR_H 34 # include <wchar.h> 35 #endif 31 36 #include "system.h" 32 37 33 38 #include "error.h" … … 36 41 #include "quote.h" 37 42 #include "xstrndup.h" 38 43 44 /* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC 45 installation; work around this configuration error. */ 46 #if !defined MB_LEN_MAX || MB_LEN_MAX < 2 47 # undef MB_LEN_MAX 48 # define MB_LEN_MAX 16 49 #endif 50 51 /* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */ 52 #if HAVE_MBRTOWC && defined mbstate_t 53 # define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0) 54 #endif 55 39 56 /* The official name of this program (e.g., no `g' prefix). */ 40 57 #define PROGRAM_NAME "cut" 41 58 … … 71 88 } \ 72 89 while (0) 73 90 91 /* Refill the buffer BUF to get a multibyte character. */ 92 #define REFILL_BUFFER(BUF, BUFPOS, BUFLEN, STREAM) \ 93 do \ 94 { \ 95 if (BUFLEN < MB_LEN_MAX && !feof (STREAM) && !ferror (STREAM)) \ 96 { \ 97 memmove (BUF, BUFPOS, BUFLEN); \ 98 BUFLEN += fread (BUF + BUFLEN, sizeof(char), BUFSIZ, STREAM); \ 99 BUFPOS = BUF; \ 100 } \ 101 } \ 102 while (0) 103 104 /* Get wide character on BUFPOS. BUFPOS is not included after that. 105 If byte sequence is not valid as a character, CONVFAIL is 1. Otherwise 0. */ 106 #define GET_NEXT_WC_FROM_BUFFER(WC, BUFPOS, BUFLEN, MBLENGTH, STATE, CONVFAIL) \ 107 do \ 108 { \ 109 mbstate_t state_bak; \ 110 \ 111 if (BUFLEN < 1) \ 112 { \ 113 WC = WEOF; \ 114 break; \ 115 } \ 116 \ 117 /* Get a wide character. */ \ 118 CONVFAIL = 0; \ 119 state_bak = STATE; \ 120 MBLENGTH = mbrtowc ((wchar_t *)&WC, BUFPOS, BUFLEN, &STATE); \ 121 \ 122 switch (MBLENGTH) \ 123 { \ 124 case (size_t)-1: \ 125 case (size_t)-2: \ 126 CONVFAIL++; \ 127 STATE = state_bak; \ 128 /* Fall througn. */ \ 129 \ 130 case 0: \ 131 MBLENGTH = 1; \ 132 break; \ 133 } \ 134 } \ 135 while (0) 136 74 137 struct range_pair 75 138 { 76 139 size_t lo; … … 89 152 /* The number of bytes allocated for FIELD_1_BUFFER. */ 90 153 static size_t field_1_bufsize; 91 154 92 /* The largest field or byteindex used as an endpoint of a closed155 /* The largest byte, character or field index used as an endpoint of a closed 93 156 or degenerate range specification; this doesn't include the starting 94 157 index of right-open-ended ranges. For example, with either range spec 95 158 `2-5,9-', `2-3,5,9-' this variable would be set to 5. */ … … 101 164 102 165 /* This is a bit vector. 103 166 In byte mode, which bytes to output. 167 In character mode, which characters to output. 104 168 In field mode, which DELIM-separated fields to output. 105 B oth bytes and fields are numbered starting with 1,169 Bytes, characters and fields are numbered starting with 1, 106 170 so the zeroth bit of this array is unused. 107 A field or byteK has been selected if171 A byte, character or field K has been selected if 108 172 (K <= MAX_RANGE_ENDPOINT and is_printable_field(K)) 109 173 || (EOL_RANGE_START > 0 && K >= EOL_RANGE_START). */ 110 174 static unsigned char *printable_field; … … 113 177 { 114 178 undefined_mode, 115 179 116 /* Output characters that are in the given bytes. */180 /* Output bytes that are at the given positions. */ 117 181 byte_mode, 118 182 183 /* Output characters that are at the given positions. */ 184 character_mode, 185 119 186 /* Output the given delimeter-separated fields. */ 120 187 field_mode 121 188 }; 122 189 123 190 static enum operating_mode operating_mode; 124 191 192 /* If nonzero, when in byte mode, don't split multibyte characters. */ 193 static int byte_mode_character_aware; 194 195 /* If nonzero, the function for single byte locale is work 196 if this program runs on multibyte locale. */ 197 static int force_singlebyte_mode; 198 125 199 /* If true do not output lines containing no delimeter characters. 126 200 Otherwise, all such lines are printed. This option is valid only 127 201 with field mode. */ … … 133 207 134 208 /* The delimeter character for field mode. */ 135 209 static unsigned char delim; 210 #if HAVE_WCHAR_H 211 static wchar_t wcdelim; 212 #endif 136 213 137 214 /* True if the --output-delimiter=STRING option was specified. */ 138 215 static bool output_delimiter_specified; … … 206 283 -f, --fields=LIST select only these fields; also print any line\n\ 207 284 that contains no delimiter character, unless\n\ 208 285 the -s option is specified\n\ 209 -n (ignored)\n\286 -n with -b: don't split multibyte characters\n\ 210 287 "), stdout); 211 288 fputs (_("\ 212 289 --complement complement the set of selected bytes, characters\n\ … … 365 442 in_digits = false; 366 443 /* Starting a range. */ 367 444 if (dash_found) 368 FATAL_ERROR (_("invalid byte or field list"));445 FATAL_ERROR (_("invalid byte, character or field list")); 369 446 dash_found = true; 370 447 fieldstr++; 371 448 … … 389 466 if (!rhs_specified) 390 467 { 391 468 /* `n-'. From `initial' to end of line. */ 392 eol_range_start = initial; 469 if (eol_range_start == 0 || 470 (eol_range_start != 0 && eol_range_start > initial)) 471 eol_range_start = initial; 393 472 field_found = true; 394 473 } 395 474 else 396 475 { 397 476 /* `m-n' or `-n' (1-n). */ 398 477 if (value < initial) 399 FATAL_ERROR (_("invalid decreasing range"));478 FATAL_ERROR (_("invalid byte, character or field list")); 400 479 401 480 /* Is there already a range going to end of line? */ 402 481 if (eol_range_start != 0) … … 476 555 if (operating_mode == byte_mode) 477 556 error (0, 0, 478 557 _("byte offset %s is too large"), quote (bad_num)); 558 else if (operating_mode == character_mode) 559 error (0, 0, 560 _("character offset %s is too large"), quote (bad_num)); 479 561 else 480 562 error (0, 0, 481 563 _("field number %s is too large"), quote (bad_num)); … … 486 568 fieldstr++; 487 569 } 488 570 else 489 FATAL_ERROR (_("invalid byte or field list"));571 FATAL_ERROR (_("invalid byte, character or field list")); 490 572 } 491 573 492 574 max_range_endpoint = 0; … … 579 661 } 580 662 } 581 663 664 #if HAVE_MBRTOWC 665 /* This function is in use for the following case. 666 667 1. Read from the stream STREAM, printing to standard output any selected 668 characters. 669 670 2. Read from stream STREAM, printing to standard output any selected bytes, 671 without splitting multibyte characters. */ 672 673 static void 674 cut_characters_or_cut_bytes_no_split (FILE *stream) 675 { 676 int idx; /* number of bytes or characters in the line so far. */ 677 char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */ 678 char *bufpos; /* Next read position of BUF. */ 679 size_t buflen; /* The length of the byte sequence in buf. */ 680 wint_t wc; /* A gotten wide character. */ 681 size_t mblength; /* The byte size of a multibyte character which shows 682 as same character as WC. */ 683 mbstate_t state; /* State of the stream. */ 684 int convfail; /* 1, when conversion is failed. Otherwise 0. */ 685 686 idx = 0; 687 buflen = 0; 688 bufpos = buf; 689 memset (&state, '\0', sizeof(mbstate_t)); 690 691 while (1) 692 { 693 REFILL_BUFFER (buf, bufpos, buflen, stream); 694 695 GET_NEXT_WC_FROM_BUFFER (wc, bufpos, buflen, mblength, state, convfail); 696 697 if (wc == WEOF) 698 { 699 if (idx > 0) 700 putchar ('\n'); 701 break; 702 } 703 else if (wc == L'\n') 704 { 705 putchar ('\n'); 706 idx = 0; 707 } 708 else 709 { 710 idx += (operating_mode == byte_mode) ? mblength : 1; 711 if (print_kth (idx, NULL)) 712 fwrite (bufpos, mblength, sizeof(char), stdout); 713 } 714 715 buflen -= mblength; 716 bufpos += mblength; 717 } 718 } 719 #endif 720 582 721 /* Read from stream STREAM, printing to standard output any selected fields. */ 583 722 584 723 static void … … 701 840 } 702 841 } 703 842 843 #if HAVE_MBRTOWC 844 static void 845 cut_fields_mb (FILE *stream) 846 { 847 int c; 848 unsigned int field_idx; 849 int found_any_selected_field; 850 int buffer_first_field; 851 int empty_input; 852 char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */ 853 char *bufpos; /* Next read position of BUF. */ 854 size_t buflen; /* The length of the byte sequence in buf. */ 855 wint_t wc = 0; /* A gotten wide character. */ 856 size_t mblength; /* The byte size of a multibyte character which shows 857 as same character as WC. */ 858 mbstate_t state; /* State of the stream. */ 859 int convfail; /* 1, when conversion is failed. Otherwise 0. */ 860 861 found_any_selected_field = 0; 862 field_idx = 1; 863 bufpos = buf; 864 buflen = 0; 865 memset (&state, '\0', sizeof(mbstate_t)); 866 867 c = getc (stream); 868 empty_input = (c == EOF); 869 if (c != EOF) 870 ungetc (c, stream); 871 else 872 wc = WEOF; 873 874 /* To support the semantics of the -s flag, we may have to buffer 875 all of the first field to determine whether it is `delimited.' 876 But that is unnecessary if all non-delimited lines must be printed 877 and the first field has been selected, or if non-delimited lines 878 must be suppressed and the first field has *not* been selected. 879 That is because a non-delimited line has exactly one field. */ 880 buffer_first_field = (suppress_non_delimited ^ !print_kth (1, NULL)); 881 882 while (1) 883 { 884 if (field_idx == 1 && buffer_first_field) 885 { 886 int len = 0; 887 888 while (1) 889 { 890 REFILL_BUFFER (buf, bufpos, buflen, stream); 891 892 GET_NEXT_WC_FROM_BUFFER 893 (wc, bufpos, buflen, mblength, state, convfail); 894 895 if (wc == WEOF) 896 break; 897 898 field_1_buffer = xrealloc (field_1_buffer, len + mblength); 899 memcpy (field_1_buffer + len, bufpos, mblength); 900 len += mblength; 901 buflen -= mblength; 902 bufpos += mblength; 903 904 if (!convfail && (wc == L'\n' || wc == wcdelim)) 905 break; 906 } 907 908 if (wc == WEOF) 909 break; 910 911 /* If the first field extends to the end of line (it is not 912 delimited) and we are printing all non-delimited lines, 913 print this one. */ 914 if (convfail || (!convfail && wc != wcdelim)) 915 { 916 if (suppress_non_delimited) 917 { 918 /* Empty. */ 919 } 920 else 921 { 922 fwrite (field_1_buffer, sizeof (char), len, stdout); 923 /* Make sure the output line is newline terminated. */ 924 if (convfail || (!convfail && wc != L'\n')) 925 putchar ('\n'); 926 } 927 continue; 928 } 929 930 if (print_kth (1, NULL)) 931 { 932 /* Print the field, but not the trailing delimiter. */ 933 fwrite (field_1_buffer, sizeof (char), len - 1, stdout); 934 found_any_selected_field = 1; 935 } 936 ++field_idx; 937 } 938 939 if (wc != WEOF) 940 { 941 if (print_kth (field_idx, NULL)) 942 { 943 if (found_any_selected_field) 944 { 945 fwrite (output_delimiter_string, sizeof (char), 946 output_delimiter_length, stdout); 947 } 948 found_any_selected_field = 1; 949 } 950 951 while (1) 952 { 953 REFILL_BUFFER (buf, bufpos, buflen, stream); 954 955 GET_NEXT_WC_FROM_BUFFER 956 (wc, bufpos, buflen, mblength, state, convfail); 957 958 if (wc == WEOF) 959 break; 960 else if (!convfail && (wc == wcdelim || wc == L'\n')) 961 { 962 buflen -= mblength; 963 bufpos += mblength; 964 break; 965 } 966 967 if (print_kth (field_idx, NULL)) 968 fwrite (bufpos, mblength, sizeof(char), stdout); 969 970 buflen -= mblength; 971 bufpos += mblength; 972 } 973 } 974 975 if ((!convfail || wc == L'\n') && buflen < 1) 976 wc = WEOF; 977 978 if (!convfail && wc == wcdelim) 979 ++field_idx; 980 else if (wc == WEOF || (!convfail && wc == L'\n')) 981 { 982 if (found_any_selected_field 983 || (!empty_input && !(suppress_non_delimited && field_idx == 1))) 984 putchar ('\n'); 985 if (wc == WEOF) 986 break; 987 field_idx = 1; 988 found_any_selected_field = 0; 989 } 990 } 991 } 992 #endif 993 704 994 static void 705 995 cut_stream (FILE *stream) 706 996 { 707 if (operating_mode == byte_mode) 708 cut_bytes (stream); 997 #if HAVE_MBRTOWC 998 if (MB_CUR_MAX > 1 && !force_singlebyte_mode) 999 { 1000 switch (operating_mode) 1001 { 1002 case byte_mode: 1003 if (byte_mode_character_aware) 1004 cut_characters_or_cut_bytes_no_split (stream); 1005 else 1006 cut_bytes (stream); 1007 break; 1008 1009 case character_mode: 1010 cut_characters_or_cut_bytes_no_split (stream); 1011 break; 1012 1013 case field_mode: 1014 cut_fields_mb (stream); 1015 break; 1016 1017 default: 1018 abort (); 1019 } 1020 } 709 1021 else 710 cut_fields (stream); 1022 #endif 1023 { 1024 if (operating_mode == field_mode) 1025 cut_fields (stream); 1026 else 1027 cut_bytes (stream); 1028 } 711 1029 } 712 1030 713 1031 /* Process file FILE to standard output. … … 757 1075 bool ok; 758 1076 bool delim_specified = false; 759 1077 char *spec_list_string IF_LINT (= NULL); 1078 char mbdelim[MB_LEN_MAX + 1]; 1079 size_t delimlen = 0; 760 1080 761 1081 initialize_main (&argc, &argv); 762 1082 set_program_name (argv[0]); … … 779 1099 switch (optc) 780 1100 { 781 1101 case 'b': 782 case 'c':783 1102 /* Build the byte list. */ 784 1103 if (operating_mode != undefined_mode) 785 1104 FATAL_ERROR (_("only one type of list may be specified")); … … 787 1106 spec_list_string = optarg; 788 1107 break; 789 1108 1109 case 'c': 1110 /* Build the character list. */ 1111 if (operating_mode != undefined_mode) 1112 FATAL_ERROR (_("only one type of list may be specified")); 1113 operating_mode = character_mode; 1114 spec_list_string = optarg; 1115 break; 1116 790 1117 case 'f': 791 1118 /* Build the field list. */ 792 1119 if (operating_mode != undefined_mode) … … 798 1125 case 'd': 799 1126 /* New delimiter. */ 800 1127 /* Interpret -d '' to mean `use the NUL byte as the delimiter.' */ 801 if (optarg[0] != '\0' && optarg[1] != '\0') 802 FATAL_ERROR (_("the delimiter must be a single character")); 803 delim = optarg[0]; 804 delim_specified = true; 1128 { 1129 #if HAVE_MBRTOWC 1130 if(MB_CUR_MAX > 1) 1131 { 1132 mbstate_t state; 1133 1134 memset (&state, '\0', sizeof(mbstate_t)); 1135 delimlen = mbrtowc (&wcdelim, optarg, strnlen(optarg, MB_LEN_MAX), &state); 1136 1137 if (delimlen == (size_t)-1 || delimlen == (size_t)-2) 1138 ++force_singlebyte_mode; 1139 else 1140 { 1141 delimlen = (delimlen < 1) ? 1 : delimlen; 1142 if (wcdelim != L'\0' && *(optarg + delimlen) != '\0') 1143 FATAL_ERROR (_("the delimiter must be a single character")); 1144 memcpy (mbdelim, optarg, delimlen); 1145 } 1146 } 1147 1148 if (MB_CUR_MAX <= 1 || force_singlebyte_mode) 1149 #endif 1150 { 1151 if (optarg[0] != '\0' && optarg[1] != '\0') 1152 FATAL_ERROR (_("the delimiter must be a single character")); 1153 delim = (unsigned char) optarg[0]; 1154 } 1155 delim_specified = true; 1156 } 805 1157 break; 806 1158 807 1159 case OUTPUT_DELIMITER_OPTION: … … 814 1166 break; 815 1167 816 1168 case 'n': 1169 byte_mode_character_aware = 1; 817 1170 break; 818 1171 819 1172 case 's': … … 836 1189 if (operating_mode == undefined_mode) 837 1190 FATAL_ERROR (_("you must specify a list of bytes, characters, or fields")); 838 1191 839 if (delim != '\0'&& operating_mode != field_mode)1192 if (delim_specified && operating_mode != field_mode) 840 1193 FATAL_ERROR (_("an input delimiter may be specified only\ 841 1194 when operating on fields")); 842 1195 … … 863 1216 } 864 1217 865 1218 if (!delim_specified) 866 delim = '\t'; 1219 { 1220 delim = '\t'; 1221 #ifdef HAVE_MBRTOWC 1222 wcdelim = L'\t'; 1223 mbdelim[0] = '\t'; 1224 mbdelim[1] = '\0'; 1225 delimlen = 1; 1226 #endif 1227 } 867 1228 868 1229 if (output_delimiter_string == NULL) 869 1230 { 870 static char dummy[2]; 871 dummy[0] = delim; 872 dummy[1] = '\0'; 873 output_delimiter_string = dummy; 874 output_delimiter_length = 1; 1231 #ifdef HAVE_MBRTOWC 1232 if (MB_CUR_MAX > 1 && !force_singlebyte_mode) 1233 { 1234 output_delimiter_string = xstrdup(mbdelim); 1235 output_delimiter_length = delimlen; 1236 } 1237 1238 if (MB_CUR_MAX <= 1 || force_singlebyte_mode) 1239 #endif 1240 { 1241 static char dummy[2]; 1242 dummy[0] = delim; 1243 dummy[1] = '\0'; 1244 output_delimiter_string = dummy; 1245 output_delimiter_length = 1; 1246 } 875 1247 } 876 1248 877 1249 if (optind == argc) -
src/expand.c
diff -Naur coreutils-8.5.orig/src/expand.c coreutils-8.5/src/expand.c
old new 38 38 #include <stdio.h> 39 39 #include <getopt.h> 40 40 #include <sys/types.h> 41 42 /* Get mbstate_t, mbrtowc(), wcwidth(). */ 43 #if HAVE_WCHAR_H 44 # include <wchar.h> 45 #endif 46 41 47 #include "system.h" 42 48 #include "error.h" 43 49 #include "quote.h" 44 50 #include "xstrndup.h" 45 51 52 /* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC 53 installation; work around this configuration error. */ 54 #if !defined MB_LEN_MAX || MB_LEN_MAX < 2 55 # define MB_LEN_MAX 16 56 #endif 57 58 /* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */ 59 #if HAVE_MBRTOWC && defined mbstate_t 60 # define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0) 61 #endif 62 46 63 /* The official name of this program (e.g., no `g' prefix). */ 47 64 #define PROGRAM_NAME "expand" 48 65 … … 358 375 } 359 376 } 360 377 378 #if HAVE_MBRTOWC 379 static void 380 expand_multibyte (void) 381 { 382 FILE *fp; /* Input strem. */ 383 mbstate_t i_state; /* Current shift state of the input stream. */ 384 mbstate_t i_state_bak; /* Back up the I_STATE. */ 385 mbstate_t o_state; /* Current shift state of the output stream. */ 386 char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */ 387 char *bufpos; /* Next read position of BUF. */ 388 size_t buflen = 0; /* The length of the byte sequence in buf. */ 389 wchar_t wc; /* A gotten wide character. */ 390 size_t mblength; /* The byte size of a multibyte character 391 which shows as same character as WC. */ 392 int tab_index = 0; /* Index in `tab_list' of next tabstop. */ 393 int column = 0; /* Column on screen of the next char. */ 394 int next_tab_column; /* Column the next tab stop is on. */ 395 int convert = 1; /* If nonzero, perform translations. */ 396 397 fp = next_file ((FILE *) NULL); 398 if (fp == NULL) 399 return; 400 401 memset (&o_state, '\0', sizeof(mbstate_t)); 402 memset (&i_state, '\0', sizeof(mbstate_t)); 403 404 for (;;) 405 { 406 /* Refill the buffer BUF. */ 407 if (buflen < MB_LEN_MAX && !feof(fp) && !ferror(fp)) 408 { 409 memmove (buf, bufpos, buflen); 410 buflen += fread (buf + buflen, sizeof(char), BUFSIZ, fp); 411 bufpos = buf; 412 } 413 414 /* No character is left in BUF. */ 415 if (buflen < 1) 416 { 417 fp = next_file (fp); 418 419 if (fp == NULL) 420 break; /* No more files. */ 421 else 422 { 423 memset (&i_state, '\0', sizeof(mbstate_t)); 424 continue; 425 } 426 } 427 428 /* Get a wide character. */ 429 i_state_bak = i_state; 430 mblength = mbrtowc (&wc, bufpos, buflen, &i_state); 431 432 switch (mblength) 433 { 434 case (size_t)-1: /* illegal byte sequence. */ 435 case (size_t)-2: 436 mblength = 1; 437 i_state = i_state_bak; 438 if (convert) 439 { 440 ++column; 441 if (convert_entire_line == 0) 442 convert = 0; 443 } 444 putchar (*bufpos); 445 break; 446 447 case 0: /* null. */ 448 mblength = 1; 449 if (convert && convert_entire_line == 0) 450 convert = 0; 451 putchar ('\0'); 452 break; 453 454 default: 455 if (wc == L'\n') /* LF. */ 456 { 457 tab_index = 0; 458 column = 0; 459 convert = 1; 460 putchar ('\n'); 461 } 462 else if (wc == L'\t' && convert) /* Tab. */ 463 { 464 if (tab_size == 0) 465 { 466 /* Do not let tab_index == first_free_tab; 467 stop when it is 1 less. */ 468 while (tab_index < first_free_tab - 1 469 && column >= tab_list[tab_index]) 470 tab_index++; 471 next_tab_column = tab_list[tab_index]; 472 if (tab_index < first_free_tab - 1) 473 tab_index++; 474 if (column >= next_tab_column) 475 next_tab_column = column + 1; 476 } 477 else 478 next_tab_column = column + tab_size - column % tab_size; 479 480 while (column < next_tab_column) 481 { 482 putchar (' '); 483 ++column; 484 } 485 } 486 else /* Others. */ 487 { 488 if (convert) 489 { 490 if (wc == L'\b') 491 { 492 if (column > 0) 493 --column; 494 } 495 else 496 { 497 int width; /* The width of WC. */ 498 499 width = wcwidth (wc); 500 column += (width > 0) ? width : 0; 501 if (convert_entire_line == 0) 502 convert = 0; 503 } 504 } 505 fwrite (bufpos, sizeof(char), mblength, stdout); 506 } 507 } 508 buflen -= mblength; 509 bufpos += mblength; 510 } 511 } 512 #endif 513 361 514 int 362 515 main (int argc, char **argv) 363 516 { … … 422 575 423 576 file_list = (optind < argc ? &argv[optind] : stdin_argv); 424 577 425 expand (); 578 #if HAVE_MBRTOWC 579 if (MB_CUR_MAX > 1) 580 expand_multibyte (); 581 else 582 #endif 583 expand (); 426 584 427 585 if (have_read_stdin && fclose (stdin) != 0) 428 586 error (EXIT_FAILURE, errno, "-"); -
coreutils-8.5
diff -Naur coreutils-8.5.orig/src/fold.c coreutils-8.5/src/fold.c
old new 22 22 #include <getopt.h> 23 23 #include <sys/types.h> 24 24 25 /* Get mbstate_t, mbrtowc(), wcwidth(). */ 26 #if HAVE_WCHAR_H 27 # include <wchar.h> 28 #endif 29 30 /* Get iswprint(), iswblank(), wcwidth(). */ 31 #if HAVE_WCTYPE_H 32 # include <wctype.h> 33 #endif 34 25 35 #include "system.h" 26 36 #include "error.h" 27 37 #include "quote.h" 28 38 #include "xstrtol.h" 29 39 40 /* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC 41 installation; work around this configuration error. */ 42 #if !defined MB_LEN_MAX || MB_LEN_MAX < 2 43 # undef MB_LEN_MAX 44 # define MB_LEN_MAX 16 45 #endif 46 47 /* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */ 48 #if HAVE_MBRTOWC && defined mbstate_t 49 # define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0) 50 #endif 51 30 52 #define TAB_WIDTH 8 31 53 32 54 /* The official name of this program (e.g., no `g' prefix). */ … … 34 56 35 57 #define AUTHORS proper_name ("David MacKenzie") 36 58 59 #define FATAL_ERROR(Message) \ 60 do \ 61 { \ 62 error (0, 0, (Message)); \ 63 usage (2); \ 64 } \ 65 while (0) 66 67 enum operating_mode 68 { 69 /* Fold texts by columns that are at the given positions. */ 70 column_mode, 71 72 /* Fold texts by bytes that are at the given positions. */ 73 byte_mode, 74 75 /* Fold texts by characters that are at the given positions. */ 76 character_mode, 77 }; 78 79 /* The argument shows current mode. (Default: column_mode) */ 80 static enum operating_mode operating_mode; 81 37 82 /* If nonzero, try to break on whitespace. */ 38 83 static bool break_spaces; 39 84 40 /* If nonzero, count bytes, not column positions. */41 static bool count_bytes;42 43 85 /* If nonzero, at least one of the files we read was standard input. */ 44 86 static bool have_read_stdin; 45 87 46 static char const shortopts[] = "b sw:0::1::2::3::4::5::6::7::8::9::";88 static char const shortopts[] = "bcsw:0::1::2::3::4::5::6::7::8::9::"; 47 89 48 90 static struct option const longopts[] = 49 91 { 50 92 {"bytes", no_argument, NULL, 'b'}, 93 {"characters", no_argument, NULL, 'c'}, 51 94 {"spaces", no_argument, NULL, 's'}, 52 95 {"width", required_argument, NULL, 'w'}, 53 96 {GETOPT_HELP_OPTION_DECL}, … … 77 120 "), stdout); 78 121 fputs (_("\ 79 122 -b, --bytes count bytes rather than columns\n\ 123 -c, --characters count characters rather than columns\n\ 80 124 -s, --spaces break at spaces\n\ 81 125 -w, --width=WIDTH use WIDTH columns instead of 80\n\ 82 126 "), stdout); … … 94 138 static size_t 95 139 adjust_column (size_t column, char c) 96 140 { 97 if ( !count_bytes)141 if (operating_mode != byte_mode) 98 142 { 99 143 if (c == '\b') 100 144 { … … 117 161 to stdout, with maximum line length WIDTH. 118 162 Return true if successful. */ 119 163 120 static bool121 fold_ file (char const *filename, size_t width)164 static void 165 fold_text (FILE *istream, size_t width, int *saved_errno) 122 166 { 123 FILE *istream;124 167 int c; 125 168 size_t column = 0; /* Screen column where next char will go. */ 126 169 size_t offset_out = 0; /* Index in `line_out' for next char. */ 127 170 static char *line_out = NULL; 128 171 static size_t allocated_out = 0; 129 int saved_errno;130 131 if (STREQ (filename, "-"))132 {133 istream = stdin;134 have_read_stdin = true;135 }136 else137 istream = fopen (filename, "r");138 139 if (istream == NULL)140 {141 error (0, errno, "%s", filename);142 return false;143 }144 172 145 173 while ((c = getc (istream)) != EOF) 146 174 { … … 168 196 bool found_blank = false; 169 197 size_t logical_end = offset_out; 170 198 199 /* If LINE_OUT has no wide character, 200 put a new wide character in LINE_OUT 201 if column is bigger than width. */ 202 if (offset_out == 0) 203 { 204 line_out[offset_out++] = c; 205 continue; 206 } 207 171 208 /* Look for the last blank. */ 172 209 while (logical_end) 173 210 { … … 214 251 line_out[offset_out++] = c; 215 252 } 216 253 217 saved_errno = errno;254 *saved_errno = errno; 218 255 219 256 if (offset_out) 220 257 fwrite (line_out, sizeof (char), (size_t) offset_out, stdout); 221 258 259 } 260 261 #if HAVE_MBRTOWC 262 static void 263 fold_multibyte_text (FILE *istream, size_t width, int *saved_errno) 264 { 265 char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */ 266 size_t buflen = 0; /* The length of the byte sequence in buf. */ 267 char *bufpos = NULL; /* Next read position of BUF. */ 268 wint_t wc; /* A gotten wide character. */ 269 size_t mblength; /* The byte size of a multibyte character which shows 270 as same character as WC. */ 271 mbstate_t state, state_bak; /* State of the stream. */ 272 int convfail; /* 1, when conversion is failed. Otherwise 0. */ 273 274 static char *line_out = NULL; 275 size_t offset_out = 0; /* Index in `line_out' for next char. */ 276 static size_t allocated_out = 0; 277 278 int increment; 279 size_t column = 0; 280 281 size_t last_blank_pos; 282 size_t last_blank_column; 283 int is_blank_seen; 284 int last_blank_increment = 0; 285 int is_bs_following_last_blank; 286 size_t bs_following_last_blank_num; 287 int is_cr_after_last_blank; 288 289 #define CLEAR_FLAGS \ 290 do \ 291 { \ 292 last_blank_pos = 0; \ 293 last_blank_column = 0; \ 294 is_blank_seen = 0; \ 295 is_bs_following_last_blank = 0; \ 296 bs_following_last_blank_num = 0; \ 297 is_cr_after_last_blank = 0; \ 298 } \ 299 while (0) 300 301 #define START_NEW_LINE \ 302 do \ 303 { \ 304 putchar ('\n'); \ 305 column = 0; \ 306 offset_out = 0; \ 307 CLEAR_FLAGS; \ 308 } \ 309 while (0) 310 311 CLEAR_FLAGS; 312 memset (&state, '\0', sizeof(mbstate_t)); 313 314 for (;; bufpos += mblength, buflen -= mblength) 315 { 316 if (buflen < MB_LEN_MAX && !feof (istream) && !ferror (istream)) 317 { 318 memmove (buf, bufpos, buflen); 319 buflen += fread (buf + buflen, sizeof(char), BUFSIZ, istream); 320 bufpos = buf; 321 } 322 323 if (buflen < 1) 324 break; 325 326 /* Get a wide character. */ 327 convfail = 0; 328 state_bak = state; 329 mblength = mbrtowc ((wchar_t *)&wc, bufpos, buflen, &state); 330 331 switch (mblength) 332 { 333 case (size_t)-1: 334 case (size_t)-2: 335 convfail++; 336 state = state_bak; 337 /* Fall through. */ 338 339 case 0: 340 mblength = 1; 341 break; 342 } 343 344 rescan: 345 if (operating_mode == byte_mode) /* byte mode */ 346 increment = mblength; 347 else if (operating_mode == character_mode) /* character mode */ 348 increment = 1; 349 else /* column mode */ 350 { 351 if (convfail) 352 increment = 1; 353 else 354 { 355 switch (wc) 356 { 357 case L'\n': 358 fwrite (line_out, sizeof(char), offset_out, stdout); 359 START_NEW_LINE; 360 continue; 361 362 case L'\b': 363 increment = (column > 0) ? -1 : 0; 364 break; 365 366 case L'\r': 367 increment = -1 * column; 368 break; 369 370 case L'\t': 371 increment = 8 - column % 8; 372 break; 373 374 default: 375 increment = wcwidth (wc); 376 increment = (increment < 0) ? 0 : increment; 377 } 378 } 379 } 380 381 if (column + increment > width && break_spaces && last_blank_pos) 382 { 383 fwrite (line_out, sizeof(char), last_blank_pos, stdout); 384 putchar ('\n'); 385 386 offset_out = offset_out - last_blank_pos; 387 column = column - last_blank_column + ((is_cr_after_last_blank) 388 ? last_blank_increment : bs_following_last_blank_num); 389 memmove (line_out, line_out + last_blank_pos, offset_out); 390 CLEAR_FLAGS; 391 goto rescan; 392 } 393 394 if (column + increment > width && column != 0) 395 { 396 fwrite (line_out, sizeof(char), offset_out, stdout); 397 START_NEW_LINE; 398 goto rescan; 399 } 400 401 if (allocated_out < offset_out + mblength) 402 { 403 line_out = X2REALLOC (line_out, &allocated_out); 404 } 405 406 memcpy (line_out + offset_out, bufpos, mblength); 407 offset_out += mblength; 408 column += increment; 409 410 if (is_blank_seen && !convfail && wc == L'\r') 411 is_cr_after_last_blank = 1; 412 413 if (is_bs_following_last_blank && !convfail && wc == L'\b') 414 ++bs_following_last_blank_num; 415 else 416 is_bs_following_last_blank = 0; 417 418 if (break_spaces && !convfail && iswblank (wc)) 419 { 420 last_blank_pos = offset_out; 421 last_blank_column = column; 422 is_blank_seen = 1; 423 last_blank_increment = increment; 424 is_bs_following_last_blank = 1; 425 bs_following_last_blank_num = 0; 426 is_cr_after_last_blank = 0; 427 } 428 } 429 430 *saved_errno = errno; 431 432 if (offset_out) 433 fwrite (line_out, sizeof (char), (size_t) offset_out, stdout); 434 435 } 436 #endif 437 438 /* Fold file FILENAME, or standard input if FILENAME is "-", 439 to stdout, with maximum line length WIDTH. 440 Return 0 if successful, 1 if an error occurs. */ 441 442 static bool 443 fold_file (char *filename, size_t width) 444 { 445 FILE *istream; 446 int saved_errno; 447 448 if (STREQ (filename, "-")) 449 { 450 istream = stdin; 451 have_read_stdin = 1; 452 } 453 else 454 istream = fopen (filename, "r"); 455 456 if (istream == NULL) 457 { 458 error (0, errno, "%s", filename); 459 return 1; 460 } 461 462 /* Define how ISTREAM is being folded. */ 463 #if HAVE_MBRTOWC 464 if (MB_CUR_MAX > 1) 465 fold_multibyte_text (istream, width, &saved_errno); 466 else 467 #endif 468 fold_text (istream, width, &saved_errno); 469 222 470 if (ferror (istream)) 223 471 { 224 472 error (0, saved_errno, "%s", filename); … … 251 499 252 500 atexit (close_stdout); 253 501 254 break_spaces = count_bytes = have_read_stdin = false; 502 operating_mode = column_mode; 503 break_spaces = have_read_stdin = false; 255 504 256 505 while ((optc = getopt_long (argc, argv, shortopts, longopts, NULL)) != -1) 257 506 { … … 260 509 switch (optc) 261 510 { 262 511 case 'b': /* Count bytes rather than columns. */ 263 count_bytes = true; 512 if (operating_mode != column_mode) 513 FATAL_ERROR (_("only one way of folding may be specified")); 514 operating_mode = byte_mode; 515 break; 516 517 case 'c': 518 if (operating_mode != column_mode) 519 FATAL_ERROR (_("only one way of folding may be specified")); 520 operating_mode = character_mode; 264 521 break; 265 522 266 523 case 's': /* Break at word boundaries. */ -
coreutils-8.5
diff -Naur coreutils-8.5.orig/src/join.c coreutils-8.5/src/join.c
old new 22 22 #include <sys/types.h> 23 23 #include <getopt.h> 24 24 25 /* Get mbstate_t, mbrtowc(), mbrtowc(), wcwidth(). */ 26 #if HAVE_WCHAR_H 27 # include <wchar.h> 28 #endif 29 30 /* Get iswblank(), towupper. */ 31 #if HAVE_WCTYPE_H 32 # include <wctype.h> 33 #endif 34 25 35 #include "system.h" 26 36 #include "error.h" 27 37 #include "hard-locale.h" 28 38 #include "linebuffer.h" 29 #include "memcasecmp.h"30 39 #include "quote.h" 31 40 #include "stdio--.h" 32 41 #include "xmemcoll.h" 33 42 #include "xstrtol.h" 34 43 #include "argmatch.h" 35 44 45 /* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */ 46 #if HAVE_MBRTOWC && defined mbstate_t 47 # define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0) 48 #endif 49 36 50 /* The official name of this program (e.g., no `g' prefix). */ 37 51 #define PROGRAM_NAME "join" 38 52 … … 121 135 /* Last element in `outlist', where a new element can be added. */ 122 136 static struct outlist *outlist_end = &outlist_head; 123 137 124 /* Tab character separating fields. If negative, fields are separated 125 by any nonempty string of blanks, otherwise by exactly one 126 tab character whose value (when cast to unsigned char) equals TAB. */ 127 static int tab = -1; 138 /* Tab character separating fields. If NULL, fields are separated 139 by any nonempty string of blanks. */ 140 static char *tab = NULL; 141 142 /* The number of bytes used for tab. */ 143 static size_t tablen = 0; 128 144 129 145 /* If nonzero, check that the input is correctly ordered. */ 130 146 static enum … … 248 264 if (ptr == lim) 249 265 return; 250 266 251 if ( 0 <= tab)267 if (tab != NULL) 252 268 { 269 unsigned char t = tab[0]; 253 270 char *sep; 254 for (; (sep = memchr (ptr, t ab, lim - ptr)) != NULL; ptr = sep + 1)271 for (; (sep = memchr (ptr, t, lim - ptr)) != NULL; ptr = sep + 1) 255 272 extract_field (line, ptr, sep - ptr); 256 273 } 257 274 else … … 278 295 extract_field (line, ptr, lim - ptr); 279 296 } 280 297 298 #if HAVE_MBRTOWC 299 static void 300 xfields_multibyte (struct line *line) 301 { 302 char *ptr = line->buf.buffer; 303 char const *lim = ptr + line->buf.length - 1; 304 wchar_t wc = 0; 305 size_t mblength = 1; 306 mbstate_t state, state_bak; 307 308 memset (&state, 0, sizeof (mbstate_t)); 309 310 if (ptr >= lim) 311 return; 312 313 if (tab != NULL) 314 { 315 unsigned char t = tab[0]; 316 char *sep = ptr; 317 for (; ptr < lim; ptr = sep + mblength) 318 { 319 sep = ptr; 320 while (sep < lim) 321 { 322 state_bak = state; 323 mblength = mbrtowc (&wc, sep, lim - sep + 1, &state); 324 325 if (mblength == (size_t)-1 || mblength == (size_t)-2) 326 { 327 mblength = 1; 328 state = state_bak; 329 } 330 mblength = (mblength < 1) ? 1 : mblength; 331 332 if (mblength == tablen && !memcmp (sep, tab, mblength)) 333 break; 334 else 335 { 336 sep += mblength; 337 continue; 338 } 339 } 340 341 if (sep >= lim) 342 break; 343 344 extract_field (line, ptr, sep - ptr); 345 } 346 } 347 else 348 { 349 /* Skip leading blanks before the first field. */ 350 while(ptr < lim) 351 { 352 state_bak = state; 353 mblength = mbrtowc (&wc, ptr, lim - ptr + 1, &state); 354 355 if (mblength == (size_t)-1 || mblength == (size_t)-2) 356 { 357 mblength = 1; 358 state = state_bak; 359 break; 360 } 361 mblength = (mblength < 1) ? 1 : mblength; 362 363 if (!iswblank(wc)) 364 break; 365 ptr += mblength; 366 } 367 368 do 369 { 370 char *sep; 371 state_bak = state; 372 mblength = mbrtowc (&wc, ptr, lim - ptr + 1, &state); 373 if (mblength == (size_t)-1 || mblength == (size_t)-2) 374 { 375 mblength = 1; 376 state = state_bak; 377 break; 378 } 379 mblength = (mblength < 1) ? 1 : mblength; 380 381 sep = ptr + mblength; 382 while (sep < lim) 383 { 384 state_bak = state; 385 mblength = mbrtowc (&wc, sep, lim - sep + 1, &state); 386 if (mblength == (size_t)-1 || mblength == (size_t)-2) 387 { 388 mblength = 1; 389 state = state_bak; 390 break; 391 } 392 mblength = (mblength < 1) ? 1 : mblength; 393 394 if (iswblank (wc)) 395 break; 396 397 sep += mblength; 398 } 399 400 extract_field (line, ptr, sep - ptr); 401 if (sep >= lim) 402 return; 403 404 state_bak = state; 405 mblength = mbrtowc (&wc, sep, lim - sep + 1, &state); 406 if (mblength == (size_t)-1 || mblength == (size_t)-2) 407 { 408 mblength = 1; 409 state = state_bak; 410 break; 411 } 412 mblength = (mblength < 1) ? 1 : mblength; 413 414 ptr = sep + mblength; 415 while (ptr < lim) 416 { 417 state_bak = state; 418 mblength = mbrtowc (&wc, ptr, lim - ptr + 1, &state); 419 if (mblength == (size_t)-1 || mblength == (size_t)-2) 420 { 421 mblength = 1; 422 state = state_bak; 423 break; 424 } 425 mblength = (mblength < 1) ? 1 : mblength; 426 427 if (!iswblank (wc)) 428 break; 429 430 ptr += mblength; 431 } 432 } 433 while (ptr < lim); 434 } 435 436 extract_field (line, ptr, lim - ptr); 437 } 438 #endif 439 281 440 static void 282 441 freeline (struct line *line) 283 442 { … … 299 458 size_t jf_1, size_t jf_2) 300 459 { 301 460 /* Start of field to compare in each file. */ 302 char *beg1; 303 char *beg2; 304 305 size_t len1; 306 size_t len2; /* Length of fields to compare. */ 461 char *beg[2]; 462 char *copy[2]; 463 size_t len[2]; /* Length of fields to compare. */ 307 464 int diff; 465 int i, j; 308 466 309 467 if (jf_1 < line1->nfields) 310 468 { 311 beg 1= line1->fields[jf_1].beg;312 len 1= line1->fields[jf_1].len;469 beg[0] = line1->fields[jf_1].beg; 470 len[0] = line1->fields[jf_1].len; 313 471 } 314 472 else 315 473 { 316 beg 1= NULL;317 len 1= 0;474 beg[0] = NULL; 475 len[0] = 0; 318 476 } 319 477 320 478 if (jf_2 < line2->nfields) 321 479 { 322 beg 2= line2->fields[jf_2].beg;323 len 2= line2->fields[jf_2].len;480 beg[1] = line2->fields[jf_2].beg; 481 len[1] = line2->fields[jf_2].len; 324 482 } 325 483 else 326 484 { 327 beg 2= NULL;328 len 2= 0;485 beg[1] = NULL; 486 len[1] = 0; 329 487 } 330 488 331 if (len 1== 0)332 return len 2== 0 ? 0 : -1;333 if (len 2== 0)489 if (len[0] == 0) 490 return len[1] == 0 ? 0 : -1; 491 if (len[1] == 0) 334 492 return 1; 335 493 336 494 if (ignore_case) 337 495 { 338 /* FIXME: ignore_case does not work with NLS (in particular, 339 with multibyte chars). */ 340 diff = memcasecmp (beg1, beg2, MIN (len1, len2)); 496 #ifdef HAVE_MBRTOWC 497 if (MB_CUR_MAX > 1) 498 { 499 size_t mblength; 500 wchar_t wc, uwc; 501 mbstate_t state, state_bak; 502 503 memset (&state, '\0', sizeof (mbstate_t)); 504 505 for (i = 0; i < 2; i++) 506 { 507 copy[i] = alloca (len[i] + 1); 508 509 for (j = 0; j < MIN (len[0], len[1]);) 510 { 511 state_bak = state; 512 mblength = mbrtowc (&wc, beg[i] + j, len[i] - j, &state); 513 514 switch (mblength) 515 { 516 case (size_t) -1: 517 case (size_t) -2: 518 state = state_bak; 519 /* Fall through */ 520 case 0: 521 mblength = 1; 522 break; 523 524 default: 525 uwc = towupper (wc); 526 527 if (uwc != wc) 528 { 529 mbstate_t state_wc; 530 531 memset (&state_wc, '\0', sizeof (mbstate_t)); 532 wcrtomb (copy[i] + j, uwc, &state_wc); 533 } 534 else 535 memcpy (copy[i] + j, beg[i] + j, mblength); 536 } 537 j += mblength; 538 } 539 copy[i][j] = '\0'; 540 } 541 } 542 else 543 #endif 544 { 545 for (i = 0; i < 2; i++) 546 { 547 copy[i] = alloca (len[i] + 1); 548 549 for (j = 0; j < MIN (len[0], len[1]); j++) 550 copy[i][j] = toupper (beg[i][j]); 551 552 copy[i][j] = '\0'; 553 } 554 } 341 555 } 342 556 else 343 557 { 344 if (hard_LC_COLLATE) 345 return xmemcoll (beg1, len1, beg2, len2); 346 diff = memcmp (beg1, beg2, MIN (len1, len2)); 558 copy[0] = (unsigned char *) beg[0]; 559 copy[1] = (unsigned char *) beg[1]; 347 560 } 348 561 562 if (hard_LC_COLLATE) 563 return xmemcoll ((char *) copy[0], len[0], (char *) copy[1], len[1]); 564 diff = memcmp (copy[0], copy[1], MIN (len[0], len[1])); 565 566 349 567 if (diff) 350 568 return diff; 351 return len 1 < len2 ? -1 : len1 != len2;569 return len[0] - len[1]; 352 570 } 353 571 354 572 /* Check that successive input lines PREV and CURRENT from input file … … 429 647 return false; 430 648 } 431 649 650 #if HAVE_MBRTOWC 651 if (MB_CUR_MAX > 1) 652 xfields_multibyte (line); 653 else 654 #endif 432 655 xfields (line); 433 656 434 657 if (prevline[which - 1]) … … 528 751 529 752 /* Print the join of LINE1 and LINE2. */ 530 753 754 #define PUT_TAB_CHAR \ 755 do \ 756 { \ 757 (tab != NULL) ? \ 758 fwrite(tab, sizeof(char), tablen, stdout) : putchar (' '); \ 759 } \ 760 while (0) 761 531 762 static void 532 763 prjoin (struct line const *line1, struct line const *line2) 533 764 { 534 765 const struct outlist *outlist; 535 char output_separator = tab < 0 ? ' ' : tab;536 766 537 767 outlist = outlist_head.next; 538 768 if (outlist) … … 567 797 o = o->next; 568 798 if (o == NULL) 569 799 break; 570 putchar (output_separator);800 PUT_TAB_CHAR; 571 801 } 572 802 putchar ('\n'); 573 803 } … … 585 815 prfield (join_field_1, line1); 586 816 for (i = 0; i < join_field_1 && i < line1->nfields; ++i) 587 817 { 588 putchar (output_separator);818 PUT_TAB_CHAR; 589 819 prfield (i, line1); 590 820 } 591 821 for (i = join_field_1 + 1; i < line1->nfields; ++i) 592 822 { 593 putchar (output_separator);823 PUT_TAB_CHAR; 594 824 prfield (i, line1); 595 825 } 596 826 597 827 for (i = 0; i < join_field_2 && i < line2->nfields; ++i) 598 828 { 599 putchar (output_separator);829 PUT_TAB_CHAR; 600 830 prfield (i, line2); 601 831 } 602 832 for (i = join_field_2 + 1; i < line2->nfields; ++i) 603 833 { 604 putchar (output_separator);834 PUT_TAB_CHAR; 605 835 prfield (i, line2); 606 836 } 607 837 putchar ('\n'); … … 1039 1269 1040 1270 case 't': 1041 1271 { 1042 unsigned char newtab = optarg[0]; 1272 char *newtab; 1273 size_t newtablen; 1274 newtab = xstrdup (optarg); 1275 #if HAVE_MBRTOWC 1276 if (MB_CUR_MAX > 1) 1277 { 1278 mbstate_t state; 1279 1280 memset (&state, 0, sizeof (mbstate_t)); 1281 newtablen = mbrtowc (NULL, newtab, 1282 strnlen (newtab, MB_LEN_MAX), 1283 &state); 1284 if (newtablen == (size_t) 0 1285 || newtablen == (size_t) -1 1286 || newtablen == (size_t) -2) 1287 newtablen = 1; 1288 } 1289 else 1290 #endif 1291 newtablen = 1; 1043 1292 if (! newtab) 1293 { 1044 1294 newtab = '\n'; /* '' => process the whole line. */ 1295 } 1045 1296 else if (optarg[1]) 1046 1297 { 1047 if (STREQ (optarg, "\\0")) 1048 newtab = '\0'; 1049 else 1050 error (EXIT_FAILURE, 0, _("multi-character tab %s"), 1051 quote (optarg)); 1298 if (newtablen == 1 && newtab[1]) 1299 { 1300 if (STREQ (newtab, "\\0")) 1301 newtab[0] = '\0'; 1302 } 1303 } 1304 if (tab != NULL && strcmp (tab, newtab)) 1305 { 1306 free (newtab); 1307 error (EXIT_FAILURE, 0, _("incompatible tabs")); 1052 1308 } 1053 if (0 <= tab && tab != newtab)1054 error (EXIT_FAILURE, 0, _("incompatible tabs"));1055 1309 tab = newtab; 1056 } 1310 tablen = newtablen; 1311 } 1057 1312 break; 1058 1313 1059 1314 case NOCHECK_ORDER_OPTION: -
coreutils-8.5
diff -Naur coreutils-8.5.orig/src/pr.c coreutils-8.5/src/pr.c
old new 312 312 313 313 #include <getopt.h> 314 314 #include <sys/types.h> 315 316 /* Get MB_LEN_MAX. */ 317 #include <limits.h> 318 /* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC 319 installation; work around this configuration error. */ 320 #if !defined MB_LEN_MAX || MB_LEN_MAX == 1 321 # define MB_LEN_MAX 16 322 #endif 323 324 /* Get MB_CUR_MAX. */ 325 #include <stdlib.h> 326 327 /* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>. */ 328 /* Get mbstate_t, mbrtowc(), wcwidth(). */ 329 #if HAVE_WCHAR_H 330 # include <wchar.h> 331 #endif 332 333 /* Get iswprint(). -- for wcwidth(). */ 334 #if HAVE_WCTYPE_H 335 # include <wctype.h> 336 #endif 337 #if !defined iswprint && !HAVE_ISWPRINT 338 # define iswprint(wc) 1 339 #endif 340 315 341 #include "system.h" 316 342 #include "error.h" 317 343 #include "hard-locale.h" … … 322 348 #include "strftime.h" 323 349 #include "xstrtol.h" 324 350 351 /* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */ 352 #if HAVE_MBRTOWC && defined mbstate_t 353 # define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0) 354 #endif 355 356 #ifndef HAVE_DECL_WCWIDTH 357 "this configure-time declaration test was not run" 358 #endif 359 #if !HAVE_DECL_WCWIDTH 360 extern int wcwidth (); 361 #endif 362 325 363 /* The official name of this program (e.g., no `g' prefix). */ 326 364 #define PROGRAM_NAME "pr" 327 365 … … 414 452 415 453 typedef struct COLUMN COLUMN; 416 454 417 static int char_to_clump (char c); 455 /* Funtion pointers to switch functions for single byte locale or for 456 multibyte locale. If multibyte functions do not exist in your sysytem, 457 these pointers always point the function for single byte locale. */ 458 static void (*print_char) (char c); 459 static int (*char_to_clump) (char c); 460 461 /* Functions for single byte locale. */ 462 static void print_char_single (char c); 463 static int char_to_clump_single (char c); 464 465 /* Functions for multibyte locale. */ 466 static void print_char_multi (char c); 467 static int char_to_clump_multi (char c); 468 418 469 static bool read_line (COLUMN *p); 419 470 static bool print_page (void); 420 471 static bool print_stored (COLUMN *p); … … 424 475 static void pad_across_to (int position); 425 476 static void add_line_number (COLUMN *p); 426 477 static void getoptarg (char *arg, char switch_char, char *character, 478 int *character_length, int *character_width, 427 479 int *number); 428 480 void usage (int status); 429 481 static void print_files (int number_of_files, char **av); … … 438 490 static void pad_down (int lines); 439 491 static void read_rest_of_line (COLUMN *p); 440 492 static void skip_read (COLUMN *p, int column_number); 441 static void print_char (char c);442 493 static void cleanup (void); 443 494 static void print_sep_string (void); 444 495 static void separator_string (const char *optarg_S); … … 450 501 we store the leftmost columns contiguously in buff. 451 502 To print a line from buff, get the index of the first character 452 503 from line_vector[i], and print up to line_vector[i + 1]. */ 453 static char *buff;504 static unsigned char *buff; 454 505 455 506 /* Index of the position in buff where the next character 456 507 will be stored. */ … … 554 605 static bool untabify_input = false; 555 606 556 607 /* (-e) The input tab character. */ 557 static char input_tab_char = '\t';608 static char input_tab_char[MB_LEN_MAX] = "\t"; 558 609 559 610 /* (-e) Tabstops are at chars_per_tab, 2*chars_per_tab, 3*chars_per_tab, ... 560 611 where the leftmost column is 1. */ … … 564 615 static bool tabify_output = false; 565 616 566 617 /* (-i) The output tab character. */ 567 static char output_tab_char = '\t'; 618 static char output_tab_char[MB_LEN_MAX] = "\t"; 619 620 /* (-i) The byte length of output tab character. */ 621 static int output_tab_char_length = 1; 568 622 569 623 /* (-i) The width of the output tab. */ 570 624 static int chars_per_output_tab = 8; … … 638 692 static bool numbered_lines = false; 639 693 640 694 /* (-n) Character which follows each line number. */ 641 static char number_separator = '\t'; 695 static char number_separator[MB_LEN_MAX] = "\t"; 696 697 /* (-n) The byte length of the character which follows each line number. */ 698 static int number_separator_length = 1; 699 700 /* (-n) The character width of the character which follows each line number. */ 701 static int number_separator_width = 0; 642 702 643 703 /* (-n) line counting starts with 1st line of input file (not with 1st 644 704 line of 1st page printed). */ … … 691 751 -a|COLUMN|-m is a `space' and with the -J option a `tab'. */ 692 752 static char *col_sep_string = (char *) ""; 693 753 static int col_sep_length = 0; 754 static int col_sep_width = 0; 694 755 static char *column_separator = (char *) " "; 695 756 static char *line_separator = (char *) "\t"; 696 757 … … 847 908 col_sep_length = (int) strlen (optarg_S); 848 909 col_sep_string = xmalloc (col_sep_length + 1); 849 910 strcpy (col_sep_string, optarg_S); 911 912 #if HAVE_MBRTOWC 913 if (MB_CUR_MAX > 1) 914 col_sep_width = mbswidth (col_sep_string, 0); 915 else 916 #endif 917 col_sep_width = col_sep_length; 850 918 } 851 919 852 920 int … … 871 939 872 940 atexit (close_stdout); 873 941 942 /* Define which functions are used, the ones for single byte locale or the ones 943 for multibyte locale. */ 944 #if HAVE_MBRTOWC 945 if (MB_CUR_MAX > 1) 946 { 947 print_char = print_char_multi; 948 char_to_clump = char_to_clump_multi; 949 } 950 else 951 #endif 952 { 953 print_char = print_char_single; 954 char_to_clump = char_to_clump_single; 955 } 956 874 957 n_files = 0; 875 958 file_names = (argc > 1 876 959 ? xmalloc ((argc - 1) * sizeof (char *)) … … 947 1030 break; 948 1031 case 'e': 949 1032 if (optarg) 950 getoptarg (optarg, 'e', &input_tab_char, 951 &chars_per_input_tab); 1033 { 1034 int dummy_length, dummy_width; 1035 1036 getoptarg (optarg, 'e', input_tab_char, &dummy_length, 1037 &dummy_width, &chars_per_input_tab); 1038 } 952 1039 /* Could check tab width > 0. */ 953 1040 untabify_input = true; 954 1041 break; … … 961 1048 break; 962 1049 case 'i': 963 1050 if (optarg) 964 getoptarg (optarg, 'i', &output_tab_char, 965 &chars_per_output_tab); 1051 { 1052 int dummy_width; 1053 1054 getoptarg (optarg, 'i', output_tab_char, &output_tab_char_length, 1055 &dummy_width, &chars_per_output_tab); 1056 } 966 1057 /* Could check tab width > 0. */ 967 1058 tabify_output = true; 968 1059 break; … … 989 1080 case 'n': 990 1081 numbered_lines = true; 991 1082 if (optarg) 992 getoptarg (optarg, 'n', &number_separator,993 & chars_per_number);1083 getoptarg (optarg, 'n', number_separator, &number_separator_length, 1084 &number_separator_width, &chars_per_number); 994 1085 break; 995 1086 case 'N': 996 1087 skip_count = false; … … 1029 1120 old_s = false; 1030 1121 /* Reset an additional input of -s, -S dominates -s */ 1031 1122 col_sep_string = bad_cast (""); 1032 col_sep_length = 0;1123 col_sep_length = col_sep_width = 0; 1033 1124 use_col_separator = true; 1034 1125 if (optarg) 1035 1126 separator_string (optarg); … … 1186 1277 a number. */ 1187 1278 1188 1279 static void 1189 getoptarg (char *arg, char switch_char, char *character, int *number) 1280 getoptarg (char *arg, char switch_char, char *character, int *character_length, 1281 int *character_width, int *number) 1190 1282 { 1191 1283 if (!ISDIGIT (*arg)) 1192 *character = *arg++; 1284 { 1285 #ifdef HAVE_MBRTOWC 1286 if (MB_CUR_MAX > 1) /* for multibyte locale. */ 1287 { 1288 wchar_t wc; 1289 size_t mblength; 1290 int width; 1291 mbstate_t state = {'\0'}; 1292 1293 mblength = mbrtowc (&wc, arg, strnlen(arg, MB_LEN_MAX), &state); 1294 1295 if (mblength == (size_t)-1 || mblength == (size_t)-2) 1296 { 1297 *character_length = 1; 1298 *character_width = 1; 1299 } 1300 else 1301 { 1302 *character_length = (mblength < 1) ? 1 : mblength; 1303 width = wcwidth (wc); 1304 *character_width = (width < 0) ? 0 : width; 1305 } 1306 1307 strncpy (character, arg, *character_length); 1308 arg += *character_length; 1309 } 1310 else /* for single byte locale. */ 1311 #endif 1312 { 1313 *character = *arg++; 1314 *character_length = 1; 1315 *character_width = 1; 1316 } 1317 } 1318 1193 1319 if (*arg) 1194 1320 { 1195 1321 long int tmp_long; … … 1248 1374 else 1249 1375 col_sep_string = column_separator; 1250 1376 1251 col_sep_length = 1;1377 col_sep_length = col_sep_width = 1; 1252 1378 use_col_separator = true; 1253 1379 } 1254 1380 /* It's rather pointless to define a TAB separator with column … … 1279 1405 TAB_WIDTH (chars_per_input_tab, chars_per_number); */ 1280 1406 1281 1407 /* Estimate chars_per_text without any margin and keep it constant. */ 1282 if (number_separator == '\t')1408 if (number_separator[0] == '\t') 1283 1409 number_width = chars_per_number + 1284 1410 TAB_WIDTH (chars_per_default_tab, chars_per_number); 1285 1411 else 1286 number_width = chars_per_number + 1;1412 number_width = chars_per_number + number_separator_width; 1287 1413 1288 1414 /* The number is part of the column width unless we are 1289 1415 printing files in parallel. */ … … 1298 1424 } 1299 1425 1300 1426 chars_per_column = (chars_per_line - chars_used_by_number - 1301 (columns - 1) * col_sep_ length) / columns;1427 (columns - 1) * col_sep_width) / columns; 1302 1428 1303 1429 if (chars_per_column < 1) 1304 1430 error (EXIT_FAILURE, 0, _("page width too narrow")); … … 1423 1549 1424 1550 /* Enlarge p->start_position of first column to use the same form of 1425 1551 padding_not_printed with all columns. */ 1426 h = h + col_sep_ length;1552 h = h + col_sep_width; 1427 1553 1428 1554 /* This loop takes care of all but the rightmost column. */ 1429 1555 … … 1457 1583 } 1458 1584 else 1459 1585 { 1460 h = h_next + col_sep_ length;1586 h = h_next + col_sep_width; 1461 1587 h_next = h + chars_per_column; 1462 1588 } 1463 1589 } … … 1747 1873 align_column (COLUMN *p) 1748 1874 { 1749 1875 padding_not_printed = p->start_position; 1750 if (padding_not_printed - col_sep_ length > 0)1876 if (padding_not_printed - col_sep_width > 0) 1751 1877 { 1752 pad_across_to (padding_not_printed - col_sep_ length);1878 pad_across_to (padding_not_printed - col_sep_width); 1753 1879 padding_not_printed = ANYWHERE; 1754 1880 } 1755 1881 … … 2020 2146 /* May be too generous. */ 2021 2147 buff = X2REALLOC (buff, &buff_allocated); 2022 2148 } 2023 buff[buff_current++] = c;2149 buff[buff_current++] = (unsigned char) c; 2024 2150 } 2025 2151 2026 2152 static void 2027 2153 add_line_number (COLUMN *p) 2028 2154 { 2029 int i ;2155 int i, j; 2030 2156 char *s; 2031 2157 int left_cut; 2032 2158 … … 2049 2175 /* Tabification is assumed for multiple columns, also for n-separators, 2050 2176 but `default n-separator = TAB' hasn't been given priority over 2051 2177 equal column_width also specified by POSIX. */ 2052 if (number_separator == '\t')2178 if (number_separator[0] == '\t') 2053 2179 { 2054 2180 i = number_width - chars_per_number; 2055 2181 while (i-- > 0) 2056 2182 (p->char_func) (' '); 2057 2183 } 2058 2184 else 2059 (p->char_func) (number_separator); 2185 for (j = 0; j < number_separator_length; j++) 2186 (p->char_func) (number_separator[j]); 2060 2187 } 2061 2188 else 2062 2189 /* To comply with POSIX, we avoid any expansion of default TAB 2063 2190 separator with a single column output. No column_width requirement 2064 2191 has to be considered. */ 2065 2192 { 2066 (p->char_func) (number_separator); 2067 if (number_separator == '\t') 2193 for (j = 0; j < number_separator_length; j++) 2194 (p->char_func) (number_separator[j]); 2195 if (number_separator[0] == '\t') 2068 2196 output_position = POS_AFTER_TAB (chars_per_output_tab, 2069 2197 output_position); 2070 2198 } … … 2225 2353 while (goal - h_old > 1 2226 2354 && (h_new = POS_AFTER_TAB (chars_per_output_tab, h_old)) <= goal) 2227 2355 { 2228 putchar (output_tab_char);2356 fwrite (output_tab_char, sizeof(char), output_tab_char_length, stdout); 2229 2357 h_old = h_new; 2230 2358 } 2231 2359 while (++h_old <= goal) … … 2245 2373 { 2246 2374 char *s; 2247 2375 int l = col_sep_length; 2376 int not_space_flag; 2248 2377 2249 2378 s = col_sep_string; 2250 2379 … … 2258 2387 { 2259 2388 for (; separators_not_printed > 0; --separators_not_printed) 2260 2389 { 2390 not_space_flag = 0; 2261 2391 while (l-- > 0) 2262 2392 { 2263 2393 /* 3 types of sep_strings: spaces only, spaces and chars, … … 2271 2401 } 2272 2402 else 2273 2403 { 2404 not_space_flag = 1; 2274 2405 if (spaces_not_printed > 0) 2275 2406 print_white_space (); 2276 2407 putchar (*s++); 2277 ++output_position;2278 2408 } 2279 2409 } 2410 if (not_space_flag) 2411 output_position += col_sep_width; 2412 2280 2413 /* sep_string ends with some spaces */ 2281 2414 if (spaces_not_printed > 0) 2282 2415 print_white_space (); … … 2304 2437 required number of tabs and spaces. */ 2305 2438 2306 2439 static void 2307 print_char (char c)2440 print_char_single (char c) 2308 2441 { 2309 2442 if (tabify_output) 2310 2443 { … … 2328 2461 putchar (c); 2329 2462 } 2330 2463 2464 #ifdef HAVE_MBRTOWC 2465 static void 2466 print_char_multi (char c) 2467 { 2468 static size_t mbc_pos = 0; 2469 static char mbc[MB_LEN_MAX] = {'\0'}; 2470 static mbstate_t state = {'\0'}; 2471 mbstate_t state_bak; 2472 wchar_t wc; 2473 size_t mblength; 2474 int width; 2475 2476 if (tabify_output) 2477 { 2478 state_bak = state; 2479 mbc[mbc_pos++] = c; 2480 mblength = mbrtowc (&wc, mbc, mbc_pos, &state); 2481 2482 while (mbc_pos > 0) 2483 { 2484 switch (mblength) 2485 { 2486 case (size_t)-2: 2487 state = state_bak; 2488 return; 2489 2490 case (size_t)-1: 2491 state = state_bak; 2492 ++output_position; 2493 putchar (mbc[0]); 2494 memmove (mbc, mbc + 1, MB_CUR_MAX - 1); 2495 --mbc_pos; 2496 break; 2497 2498 case 0: 2499 mblength = 1; 2500 2501 default: 2502 if (wc == L' ') 2503 { 2504 memmove (mbc, mbc + mblength, MB_CUR_MAX - mblength); 2505 --mbc_pos; 2506 ++spaces_not_printed; 2507 return; 2508 } 2509 else if (spaces_not_printed > 0) 2510 print_white_space (); 2511 2512 /* Nonprintables are assumed to have width 0, except L'\b'. */ 2513 if ((width = wcwidth (wc)) < 1) 2514 { 2515 if (wc == L'\b') 2516 --output_position; 2517 } 2518 else 2519 output_position += width; 2520 2521 fwrite (mbc, sizeof(char), mblength, stdout); 2522 memmove (mbc, mbc + mblength, MB_CUR_MAX - mblength); 2523 mbc_pos -= mblength; 2524 } 2525 } 2526 return; 2527 } 2528 putchar (c); 2529 } 2530 #endif 2531 2331 2532 /* Skip to page PAGE before printing. 2332 2533 PAGE may be larger than total number of pages. */ 2333 2534 … … 2507 2708 align_empty_cols = false; 2508 2709 } 2509 2710 2510 if (padding_not_printed - col_sep_ length > 0)2711 if (padding_not_printed - col_sep_width > 0) 2511 2712 { 2512 pad_across_to (padding_not_printed - col_sep_ length);2713 pad_across_to (padding_not_printed - col_sep_width); 2513 2714 padding_not_printed = ANYWHERE; 2514 2715 } 2515 2716 … … 2610 2811 } 2611 2812 } 2612 2813 2613 if (padding_not_printed - col_sep_ length > 0)2814 if (padding_not_printed - col_sep_width > 0) 2614 2815 { 2615 pad_across_to (padding_not_printed - col_sep_ length);2816 pad_across_to (padding_not_printed - col_sep_width); 2616 2817 padding_not_printed = ANYWHERE; 2617 2818 } 2618 2819 … … 2625 2826 if (spaces_not_printed == 0) 2626 2827 { 2627 2828 output_position = p->start_position + end_vector[line]; 2628 if (p->start_position - col_sep_ length == chars_per_margin)2629 output_position -= col_sep_ length;2829 if (p->start_position - col_sep_width == chars_per_margin) 2830 output_position -= col_sep_width; 2630 2831 } 2631 2832 2632 2833 return true; … … 2645 2846 number of characters is 1.) */ 2646 2847 2647 2848 static int 2648 char_to_clump (char c)2849 char_to_clump_single (char c) 2649 2850 { 2650 2851 unsigned char uc = c; 2651 2852 char *s = clump_buff; … … 2655 2856 int chars; 2656 2857 int chars_per_c = 8; 2657 2858 2658 if (c == input_tab_char )2859 if (c == input_tab_char[0]) 2659 2860 chars_per_c = chars_per_input_tab; 2660 2861 2661 if (c == input_tab_char || c == '\t')2862 if (c == input_tab_char[0] || c == '\t') 2662 2863 { 2663 2864 width = TAB_WIDTH (chars_per_c, input_position); 2664 2865 … … 2739 2940 return chars; 2740 2941 } 2741 2942 2943 #ifdef HAVE_MBRTOWC 2944 static int 2945 char_to_clump_multi (char c) 2946 { 2947 static size_t mbc_pos = 0; 2948 static char mbc[MB_LEN_MAX] = {'\0'}; 2949 static mbstate_t state = {'\0'}; 2950 mbstate_t state_bak; 2951 wchar_t wc; 2952 size_t mblength; 2953 int wc_width; 2954 register char *s = clump_buff; 2955 register int i, j; 2956 char esc_buff[4]; 2957 int width; 2958 int chars; 2959 int chars_per_c = 8; 2960 2961 state_bak = state; 2962 mbc[mbc_pos++] = c; 2963 mblength = mbrtowc (&wc, mbc, mbc_pos, &state); 2964 2965 width = 0; 2966 chars = 0; 2967 while (mbc_pos > 0) 2968 { 2969 switch (mblength) 2970 { 2971 case (size_t)-2: 2972 state = state_bak; 2973 return 0; 2974 2975 case (size_t)-1: 2976 state = state_bak; 2977 mblength = 1; 2978 2979 if (use_esc_sequence || use_cntrl_prefix) 2980 { 2981 width = +4; 2982 chars = +4; 2983 *s++ = '\\'; 2984 sprintf (esc_buff, "%03o", mbc[0]); 2985 for (i = 0; i <= 2; ++i) 2986 *s++ = (int) esc_buff[i]; 2987 } 2988 else 2989 { 2990 width += 1; 2991 chars += 1; 2992 *s++ = mbc[0]; 2993 } 2994 break; 2995 2996 case 0: 2997 mblength = 1; 2998 /* Fall through */ 2999 3000 default: 3001 if (memcmp (mbc, input_tab_char, mblength) == 0) 3002 chars_per_c = chars_per_input_tab; 3003 3004 if (memcmp (mbc, input_tab_char, mblength) == 0 || c == '\t') 3005 { 3006 int width_inc; 3007 3008 width_inc = TAB_WIDTH (chars_per_c, input_position); 3009 width += width_inc; 3010 3011 if (untabify_input) 3012 { 3013 for (i = width_inc; i; --i) 3014 *s++ = ' '; 3015 chars += width_inc; 3016 } 3017 else 3018 { 3019 for (i = 0; i < mblength; i++) 3020 *s++ = mbc[i]; 3021 chars += mblength; 3022 } 3023 } 3024 else if ((wc_width = wcwidth (wc)) < 1) 3025 { 3026 if (use_esc_sequence) 3027 { 3028 for (i = 0; i < mblength; i++) 3029 { 3030 width += 4; 3031 chars += 4; 3032 *s++ = '\\'; 3033 sprintf (esc_buff, "%03o", c); 3034 for (j = 0; j <= 2; ++j) 3035 *s++ = (int) esc_buff[j]; 3036 } 3037 } 3038 else if (use_cntrl_prefix) 3039 { 3040 if (wc < 0200) 3041 { 3042 width += 2; 3043 chars += 2; 3044 *s++ = '^'; 3045 *s++ = wc ^ 0100; 3046 } 3047 else 3048 { 3049 for (i = 0; i < mblength; i++) 3050 { 3051 width += 4; 3052 chars += 4; 3053 *s++ = '\\'; 3054 sprintf (esc_buff, "%03o", c); 3055 for (j = 0; j <= 2; ++j) 3056 *s++ = (int) esc_buff[j]; 3057 } 3058 } 3059 } 3060 else if (wc == L'\b') 3061 { 3062 width += -1; 3063 chars += 1; 3064 *s++ = c; 3065 } 3066 else 3067 { 3068 width += 0; 3069 chars += mblength; 3070 for (i = 0; i < mblength; i++) 3071 *s++ = mbc[i]; 3072 } 3073 } 3074 else 3075 { 3076 width += wc_width; 3077 chars += mblength; 3078 for (i = 0; i < mblength; i++) 3079 *s++ = mbc[i]; 3080 } 3081 } 3082 memmove (mbc, mbc + mblength, MB_CUR_MAX - mblength); 3083 mbc_pos -= mblength; 3084 } 3085 3086 input_position += width; 3087 return chars; 3088 } 3089 #endif 3090 2742 3091 /* We've just printed some files and need to clean up things before 2743 3092 looking for more options and printing the next batch of files. 2744 3093 -
coreutils-8.5
diff -Naur coreutils-8.5.orig/src/sort.c coreutils-8.5/src/sort.c
old new 22 22 23 23 #include <config.h> 24 24 25 #include <assert.h> 25 26 #include <getopt.h> 26 27 #include <sys/types.h> 27 28 #include <sys/wait.h> 28 29 #include <signal.h> 30 #if HAVE_WCHAR_H 31 # include <wchar.h> 32 #endif 33 /* Get isw* functions. */ 34 #if HAVE_WCTYPE_H 35 # include <wctype.h> 36 #endif 37 29 38 #include "system.h" 30 39 #include "argmatch.h" 31 40 #include "error.h" … … 124 133 /* Thousands separator; if -1, then there isn't one. */ 125 134 static int thousands_sep; 126 135 136 static int force_general_numcompare = 0; 137 127 138 /* Nonzero if the corresponding locales are hard. */ 128 139 static bool hard_LC_COLLATE; 129 #if HAVE_ NL_LANGINFO140 #if HAVE_LANGINFO_CODESET 130 141 static bool hard_LC_TIME; 131 142 #endif 132 143 133 144 #define NONZERO(x) ((x) != 0) 134 145 146 /* get a multibyte character's byte length. */ 147 #define GET_BYTELEN_OF_CHAR(LIM, PTR, MBLENGTH, STATE) \ 148 do \ 149 { \ 150 wchar_t wc; \ 151 mbstate_t state_bak; \ 152 \ 153 state_bak = STATE; \ 154 mblength = mbrtowc (&wc, PTR, LIM - PTR, &STATE); \ 155 \ 156 switch (MBLENGTH) \ 157 { \ 158 case (size_t)-1: \ 159 case (size_t)-2: \ 160 STATE = state_bak; \ 161 /* Fall through. */ \ 162 case 0: \ 163 MBLENGTH = 1; \ 164 } \ 165 } \ 166 while (0) 167 135 168 /* The kind of blanks for '-b' to skip in various options. */ 136 169 enum blanktype { bl_start, bl_end, bl_both }; 137 170 … … 270 303 they were read if all keys compare equal. */ 271 304 static bool stable; 272 305 273 /* If TAB has this value, blanks separate fields. */ 274 enum { TAB_DEFAULT = CHAR_MAX + 1 }; 275 276 /* Tab character separating fields. If TAB_DEFAULT, then fields are 306 /* Tab character separating fields. If tab_length is 0, then fields are 277 307 separated by the empty string between a non-blank character and a blank 278 308 character. */ 279 static int tab = TAB_DEFAULT; 309 static char tab[MB_LEN_MAX + 1]; 310 static size_t tab_length = 0; 280 311 281 312 /* Flag to remove consecutive duplicate lines from the output. 282 313 Only the last of a sequence of equal lines will be output. */ … … 714 745 update_proc (pid); 715 746 } 716 747 748 /* Function pointers. */ 749 static void 750 (*inittables) (void); 751 static char * 752 (*begfield) (const struct line*, const struct keyfield *); 753 static char * 754 (*limfield) (const struct line*, const struct keyfield *); 755 static int 756 (*getmonth) (char const *, size_t); 757 static int 758 (*keycompare) (const struct line *, const struct line *); 759 static int 760 (*numcompare) (const char *, const char *); 761 762 /* Test for white space multibyte character. 763 Set LENGTH the byte length of investigated multibyte character. */ 764 #if HAVE_MBRTOWC 765 static int 766 ismbblank (const char *str, size_t len, size_t *length) 767 { 768 size_t mblength; 769 wchar_t wc; 770 mbstate_t state; 771 772 memset (&state, '\0', sizeof(mbstate_t)); 773 mblength = mbrtowc (&wc, str, len, &state); 774 775 if (mblength == (size_t)-1 || mblength == (size_t)-2) 776 { 777 *length = 1; 778 return 0; 779 } 780 781 *length = (mblength < 1) ? 1 : mblength; 782 return iswblank (wc); 783 } 784 #endif 785 717 786 /* Clean up any remaining temporary files. */ 718 787 719 788 static void … … 1158 1227 free (node); 1159 1228 } 1160 1229 1161 #if HAVE_ NL_LANGINFO1230 #if HAVE_LANGINFO_CODESET 1162 1231 1163 1232 static int 1164 1233 struct_month_cmp (const void *m1, const void *m2) … … 1173 1242 /* Initialize the character class tables. */ 1174 1243 1175 1244 static void 1176 inittables (void)1245 inittables_uni (void) 1177 1246 { 1178 1247 size_t i; 1179 1248 … … 1185 1254 fold_toupper[i] = toupper (i); 1186 1255 } 1187 1256 1188 #if HAVE_ NL_LANGINFO1257 #if HAVE_LANGINFO_CODESET 1189 1258 /* If we're not in the "C" locale, read different names for months. */ 1190 1259 if (hard_LC_TIME) 1191 1260 { … … 1268 1337 xstrtol_fatal (e, oi, c, long_options, s); 1269 1338 } 1270 1339 1340 #if HAVE_MBRTOWC 1341 static void 1342 inittables_mb (void) 1343 { 1344 int i, j, k, l; 1345 char *name, *s; 1346 size_t s_len, mblength; 1347 char mbc[MB_LEN_MAX]; 1348 wchar_t wc, pwc; 1349 mbstate_t state_mb, state_wc; 1350 1351 for (i = 0; i < MONTHS_PER_YEAR; i++) 1352 { 1353 s = (char *) nl_langinfo (ABMON_1 + i); 1354 s_len = strlen (s); 1355 monthtab[i].name = name = (char *) xmalloc (s_len + 1); 1356 monthtab[i].val = i + 1; 1357 1358 memset (&state_mb, '\0', sizeof (mbstate_t)); 1359 memset (&state_wc, '\0', sizeof (mbstate_t)); 1360 1361 for (j = 0; j < s_len;) 1362 { 1363 if (!ismbblank (s + j, s_len - j, &mblength)) 1364 break; 1365 j += mblength; 1366 } 1367 1368 for (k = 0; j < s_len;) 1369 { 1370 mblength = mbrtowc (&wc, (s + j), (s_len - j), &state_mb); 1371 assert (mblength != (size_t)-1 && mblength != (size_t)-2); 1372 if (mblength == 0) 1373 break; 1374 1375 pwc = towupper (wc); 1376 if (pwc == wc) 1377 { 1378 memcpy (mbc, s + j, mblength); 1379 j += mblength; 1380 } 1381 else 1382 { 1383 j += mblength; 1384 mblength = wcrtomb (mbc, pwc, &state_wc); 1385 assert (mblength != (size_t)0 && mblength != (size_t)-1); 1386 } 1387 1388 for (l = 0; l < mblength; l++) 1389 name[k++] = mbc[l]; 1390 } 1391 name[k] = '\0'; 1392 } 1393 qsort ((void *) monthtab, MONTHS_PER_YEAR, 1394 sizeof (struct month), struct_month_cmp); 1395 } 1396 #endif 1397 1271 1398 /* Specify the amount of main memory to use when sorting. */ 1272 1399 static void 1273 1400 specify_sort_size (int oi, char c, char const *s) … … 1478 1605 by KEY in LINE. */ 1479 1606 1480 1607 static char * 1481 begfield (const struct line *line, const struct keyfield *key)1608 begfield_uni (const struct line *line, const struct keyfield *key) 1482 1609 { 1483 1610 char *ptr = line->text, *lim = ptr + line->length - 1; 1484 1611 size_t sword = key->sword; … … 1487 1614 /* The leading field separator itself is included in a field when -t 1488 1615 is absent. */ 1489 1616 1490 if (tab != TAB_DEFAULT)1617 if (tab_length) 1491 1618 while (ptr < lim && sword--) 1492 1619 { 1493 while (ptr < lim && *ptr != tab )1620 while (ptr < lim && *ptr != tab[0]) 1494 1621 ++ptr; 1495 1622 if (ptr < lim) 1496 1623 ++ptr; … … 1516 1643 return ptr; 1517 1644 } 1518 1645 1646 #if HAVE_MBRTOWC 1647 static char * 1648 begfield_mb (const struct line *line, const struct keyfield *key) 1649 { 1650 int i; 1651 char *ptr = line->text, *lim = ptr + line->length - 1; 1652 size_t sword = key->sword; 1653 size_t schar = key->schar; 1654 size_t mblength; 1655 mbstate_t state; 1656 1657 memset (&state, '\0', sizeof(mbstate_t)); 1658 1659 if (tab_length) 1660 while (ptr < lim && sword--) 1661 { 1662 while (ptr < lim && memcmp (ptr, tab, tab_length) != 0) 1663 { 1664 GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); 1665 ptr += mblength; 1666 } 1667 if (ptr < lim) 1668 { 1669 GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); 1670 ptr += mblength; 1671 } 1672 } 1673 else 1674 while (ptr < lim && sword--) 1675 { 1676 while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength)) 1677 ptr += mblength; 1678 if (ptr < lim) 1679 { 1680 GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); 1681 ptr += mblength; 1682 } 1683 while (ptr < lim && !ismbblank (ptr, lim - ptr, &mblength)) 1684 ptr += mblength; 1685 } 1686 1687 if (key->skipsblanks) 1688 while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength)) 1689 ptr += mblength; 1690 1691 for (i = 0; i < schar; i++) 1692 { 1693 GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); 1694 1695 if (ptr + mblength > lim) 1696 break; 1697 else 1698 ptr += mblength; 1699 } 1700 1701 return ptr; 1702 } 1703 #endif 1704 1519 1705 /* Return the limit of (a pointer to the first character after) the field 1520 1706 in LINE specified by KEY. */ 1521 1707 1522 1708 static char * 1523 limfield (const struct line *line, const struct keyfield *key)1709 limfield_uni (const struct line *line, const struct keyfield *key) 1524 1710 { 1525 1711 char *ptr = line->text, *lim = ptr + line->length - 1; 1526 1712 size_t eword = key->eword, echar = key->echar; … … 1535 1721 `beginning' is the first character following the delimiting TAB. 1536 1722 Otherwise, leave PTR pointing at the first `blank' character after 1537 1723 the preceding field. */ 1538 if (tab != TAB_DEFAULT)1724 if (tab_length) 1539 1725 while (ptr < lim && eword--) 1540 1726 { 1541 while (ptr < lim && *ptr != tab )1727 while (ptr < lim && *ptr != tab[0]) 1542 1728 ++ptr; 1543 1729 if (ptr < lim && (eword || echar)) 1544 1730 ++ptr; … … 1584 1770 */ 1585 1771 1586 1772 /* Make LIM point to the end of (one byte past) the current field. */ 1587 if (tab != TAB_DEFAULT)1773 if (tab_length) 1588 1774 { 1589 1775 char *newlim; 1590 newlim = memchr (ptr, tab , lim - ptr);1776 newlim = memchr (ptr, tab[0], lim - ptr); 1591 1777 if (newlim) 1592 1778 lim = newlim; 1593 1779 } … … 1618 1804 return ptr; 1619 1805 } 1620 1806 1807 #if HAVE_MBRTOWC 1808 static char * 1809 limfield_mb (const struct line *line, const struct keyfield *key) 1810 { 1811 char *ptr = line->text, *lim = ptr + line->length - 1; 1812 size_t eword = key->eword, echar = key->echar; 1813 int i; 1814 size_t mblength; 1815 mbstate_t state; 1816 1817 if (echar == 0) 1818 eword++; /* skip all of end field. */ 1819 1820 memset (&state, '\0', sizeof(mbstate_t)); 1821 1822 if (tab_length) 1823 while (ptr < lim && eword--) 1824 { 1825 while (ptr < lim && memcmp (ptr, tab, tab_length) != 0) 1826 { 1827 GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); 1828 ptr += mblength; 1829 } 1830 if (ptr < lim && (eword | echar)) 1831 { 1832 GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); 1833 ptr += mblength; 1834 } 1835 } 1836 else 1837 while (ptr < lim && eword--) 1838 { 1839 while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength)) 1840 ptr += mblength; 1841 if (ptr < lim) 1842 { 1843 GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); 1844 ptr += mblength; 1845 } 1846 while (ptr < lim && !ismbblank (ptr, lim - ptr, &mblength)) 1847 ptr += mblength; 1848 } 1849 1850 1851 # ifdef POSIX_UNSPECIFIED 1852 /* Make LIM point to the end of (one byte past) the current field. */ 1853 if (tab_length) 1854 { 1855 char *newlim, *p; 1856 1857 newlim = NULL; 1858 for (p = ptr; p < lim;) 1859 { 1860 if (memcmp (p, tab, tab_length) == 0) 1861 { 1862 newlim = p; 1863 break; 1864 } 1865 1866 GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); 1867 p += mblength; 1868 } 1869 } 1870 else 1871 { 1872 char *newlim; 1873 newlim = ptr; 1874 1875 while (newlim < lim && ismbblank (newlim, lim - newlim, &mblength)) 1876 newlim += mblength; 1877 if (ptr < lim) 1878 { 1879 GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); 1880 ptr += mblength; 1881 } 1882 while (newlim < lim && !ismbblank (newlim, lim - newlim, &mblength)) 1883 newlim += mblength; 1884 lim = newlim; 1885 } 1886 # endif 1887 1888 if (echar != 0) 1889 { 1890 /* If we're skipping leading blanks, don't start counting characters 1891 * until after skipping past any leading blanks. */ 1892 if (key->skipsblanks) 1893 while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength)) 1894 ptr += mblength; 1895 1896 memset (&state, '\0', sizeof(mbstate_t)); 1897 1898 /* Advance PTR by ECHAR (if possible), but no further than LIM. */ 1899 for (i = 0; i < echar; i++) 1900 { 1901 GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); 1902 1903 if (ptr + mblength > lim) 1904 break; 1905 else 1906 ptr += mblength; 1907 } 1908 } 1909 1910 return ptr; 1911 } 1912 #endif 1913 1621 1914 /* Fill BUF reading from FP, moving buf->left bytes from the end 1622 1915 of buf->buf to the beginning first. If EOF is reached and the 1623 1916 file wasn't terminated by a newline, supply one. Set up BUF's line … … 1700 1993 else 1701 1994 { 1702 1995 if (key->skipsblanks) 1703 while (blanks[to_uchar (*line_start)]) 1704 line_start++; 1996 { 1997 #if HAVE_MBRTOWC 1998 if (MB_CUR_MAX > 1) 1999 { 2000 size_t mblength; 2001 mbstate_t state; 2002 memset (&state, '\0', sizeof(mbstate_t)); 2003 while (line_start < line->keylim && 2004 ismbblank (line_start, 2005 line->keylim - line_start, 2006 &mblength)) 2007 line_start += mblength; 2008 } 2009 else 2010 #endif 2011 while (blanks[to_uchar (*line_start)]) 2012 line_start++; 2013 } 1705 2014 line->keybeg = line_start; 1706 2015 } 1707 2016 } … … 1739 2048 hideously fast. */ 1740 2049 1741 2050 static int 1742 numcompare (const char *a, const char *b)2051 numcompare_uni (const char *a, const char *b) 1743 2052 { 1744 2053 while (blanks[to_uchar (*a)]) 1745 2054 a++; … … 1848 2157 : strnumcmp (a, b, decimal_point, thousands_sep)); 1849 2158 } 1850 2159 2160 #if HAVE_MBRTOWC 2161 static int 2162 numcompare_mb (const char *a, const char *b) 2163 { 2164 size_t mblength, len; 2165 len = strlen (a); /* okay for UTF-8 */ 2166 while (*a && ismbblank (a, len > MB_CUR_MAX ? MB_CUR_MAX : len, &mblength)) 2167 { 2168 a += mblength; 2169 len -= mblength; 2170 } 2171 len = strlen (b); /* okay for UTF-8 */ 2172 while (*b && ismbblank (b, len > MB_CUR_MAX ? MB_CUR_MAX : len, &mblength)) 2173 b += mblength; 2174 2175 return strnumcmp (a, b, decimal_point, thousands_sep); 2176 } 2177 #endif /* HAV_EMBRTOWC */ 2178 1851 2179 static int 1852 2180 general_numcompare (const char *sa, const char *sb) 1853 2181 { … … 1881 2209 Return 0 if the name in S is not recognized. */ 1882 2210 1883 2211 static int 1884 getmonth (char const *month, size_t len)2212 getmonth_uni (char const *month, size_t len) 1885 2213 { 1886 2214 size_t lo = 0; 1887 2215 size_t hi = MONTHS_PER_YEAR; … … 2062 2390 return diff; 2063 2391 } 2064 2392 2393 #if HAVE_MBRTOWC 2394 static int 2395 getmonth_mb (const char *s, size_t len) 2396 { 2397 char *month; 2398 register size_t i; 2399 register int lo = 0, hi = MONTHS_PER_YEAR, result; 2400 char *tmp; 2401 size_t wclength, mblength; 2402 const char **pp; 2403 const wchar_t **wpp; 2404 wchar_t *month_wcs; 2405 mbstate_t state; 2406 2407 while (len > 0 && ismbblank (s, len, &mblength)) 2408 { 2409 s += mblength; 2410 len -= mblength; 2411 } 2412 2413 if (len == 0) 2414 return 0; 2415 2416 month = (char *) alloca (len + 1); 2417 2418 tmp = (char *) alloca (len + 1); 2419 memcpy (tmp, s, len); 2420 tmp[len] = '\0'; 2421 pp = (const char **)&tmp; 2422 month_wcs = (wchar_t *) alloca ((len + 1) * sizeof (wchar_t)); 2423 memset (&state, '\0', sizeof(mbstate_t)); 2424 2425 wclength = mbsrtowcs (month_wcs, pp, len + 1, &state); 2426 assert (wclength != (size_t)-1 && *pp == NULL); 2427 2428 for (i = 0; i < wclength; i++) 2429 { 2430 month_wcs[i] = towupper(month_wcs[i]); 2431 if (iswblank (month_wcs[i])) 2432 { 2433 month_wcs[i] = L'\0'; 2434 break; 2435 } 2436 } 2437 2438 wpp = (const wchar_t **)&month_wcs; 2439 2440 mblength = wcsrtombs (month, wpp, len + 1, &state); 2441 assert (mblength != (-1) && *wpp == NULL); 2442 2443 do 2444 { 2445 int ix = (lo + hi) / 2; 2446 2447 if (strncmp (month, monthtab[ix].name, strlen (monthtab[ix].name)) < 0) 2448 hi = ix; 2449 else 2450 lo = ix; 2451 } 2452 while (hi - lo > 1); 2453 2454 result = (!strncmp (month, monthtab[lo].name, strlen (monthtab[lo].name)) 2455 ? monthtab[lo].val : 0); 2456 2457 return result; 2458 } 2459 #endif 2460 2065 2461 /* Compare two lines A and B trying every key in sequence until there 2066 2462 are no more keys or a difference is found. */ 2067 2463 2068 2464 static int 2069 keycompare (const struct line *a, const struct line *b)2465 keycompare_uni (const struct line *a, const struct line *b) 2070 2466 { 2071 2467 struct keyfield *key = keylist; 2072 2468 … … 2246 2642 return key->reverse ? -diff : diff; 2247 2643 } 2248 2644 2645 #if HAVE_MBRTOWC 2646 static int 2647 keycompare_mb (const struct line *a, const struct line *b) 2648 { 2649 struct keyfield *key = keylist; 2650 2651 /* For the first iteration only, the key positions have been 2652 precomputed for us. */ 2653 char *texta = a->keybeg; 2654 char *textb = b->keybeg; 2655 char *lima = a->keylim; 2656 char *limb = b->keylim; 2657 2658 size_t mblength_a, mblength_b; 2659 wchar_t wc_a, wc_b; 2660 mbstate_t state_a, state_b; 2661 2662 int diff; 2663 2664 memset (&state_a, '\0', sizeof(mbstate_t)); 2665 memset (&state_b, '\0', sizeof(mbstate_t)); 2666 2667 for (;;) 2668 { 2669 char const *translate = key->translate; 2670 bool const *ignore = key->ignore; 2671 2672 /* Find the lengths. */ 2673 size_t lena = lima <= texta ? 0 : lima - texta; 2674 size_t lenb = limb <= textb ? 0 : limb - textb; 2675 2676 /* Actually compare the fields. */ 2677 if (key->random) 2678 diff = compare_random (texta, lena, textb, lenb); 2679 else if (key->numeric | key->general_numeric | key->human_numeric) 2680 { 2681 char savea = *lima, saveb = *limb; 2682 2683 *lima = *limb = '\0'; 2684 diff = (key->numeric ? numcompare (texta, textb) 2685 : key->general_numeric ? general_numcompare (texta, textb) 2686 : human_numcompare (texta, textb, key)); 2687 *lima = savea, *limb = saveb; 2688 } 2689 else if (key->version) 2690 diff = compare_version (texta, lena, textb, lenb); 2691 else if (key->month) 2692 diff = getmonth (texta, lena) - getmonth (textb, lenb); 2693 else 2694 { 2695 if (ignore || translate) 2696 { 2697 char *copy_a = (char *) alloca (lena + 1 + lenb + 1); 2698 char *copy_b = copy_a + lena + 1; 2699 size_t new_len_a, new_len_b; 2700 size_t i, j; 2701 2702 /* Ignore and/or translate chars before comparing. */ 2703 # define IGNORE_CHARS(NEW_LEN, LEN, TEXT, COPY, WC, MBLENGTH, STATE) \ 2704 do \ 2705 { \ 2706 wchar_t uwc; \ 2707 char mbc[MB_LEN_MAX]; \ 2708 mbstate_t state_wc; \ 2709 \ 2710 for (NEW_LEN = i = 0; i < LEN;) \ 2711 { \ 2712 mbstate_t state_bak; \ 2713 \ 2714 state_bak = STATE; \ 2715 MBLENGTH = mbrtowc (&WC, TEXT + i, LEN - i, &STATE); \ 2716 \ 2717 if (MBLENGTH == (size_t)-2 || MBLENGTH == (size_t)-1 \ 2718 || MBLENGTH == 0) \ 2719 { \ 2720 if (MBLENGTH == (size_t)-2 || MBLENGTH == (size_t)-1) \ 2721 STATE = state_bak; \ 2722 if (!ignore) \ 2723 COPY[NEW_LEN++] = TEXT[i++]; \ 2724 continue; \ 2725 } \ 2726 \ 2727 if (ignore) \ 2728 { \ 2729 if ((ignore == nonprinting && !iswprint (WC)) \ 2730 || (ignore == nondictionary \ 2731 && !iswalnum (WC) && !iswblank (WC))) \ 2732 { \ 2733 i += MBLENGTH; \ 2734 continue; \ 2735 } \ 2736 } \ 2737 \ 2738 if (translate) \ 2739 { \ 2740 \ 2741 uwc = towupper(WC); \ 2742 if (WC == uwc) \ 2743 { \ 2744 memcpy (mbc, TEXT + i, MBLENGTH); \ 2745 i += MBLENGTH; \ 2746 } \ 2747 else \ 2748 { \ 2749 i += MBLENGTH; \ 2750 WC = uwc; \ 2751 memset (&state_wc, '\0', sizeof (mbstate_t)); \ 2752 \ 2753 MBLENGTH = wcrtomb (mbc, WC, &state_wc); \ 2754 assert (MBLENGTH != (size_t)-1 && MBLENGTH != 0); \ 2755 } \ 2756 \ 2757 for (j = 0; j < MBLENGTH; j++) \ 2758 COPY[NEW_LEN++] = mbc[j]; \ 2759 } \ 2760 else \ 2761 for (j = 0; j < MBLENGTH; j++) \ 2762 COPY[NEW_LEN++] = TEXT[i++]; \ 2763 } \ 2764 COPY[NEW_LEN] = '\0'; \ 2765 } \ 2766 while (0) 2767 IGNORE_CHARS (new_len_a, lena, texta, copy_a, 2768 wc_a, mblength_a, state_a); 2769 IGNORE_CHARS (new_len_b, lenb, textb, copy_b, 2770 wc_b, mblength_b, state_b); 2771 diff = xmemcoll (copy_a, new_len_a, copy_b, new_len_b); 2772 } 2773 else if (lena == 0) 2774 diff = - NONZERO (lenb); 2775 else if (lenb == 0) 2776 goto greater; 2777 else 2778 diff = xmemcoll (texta, lena, textb, lenb); 2779 } 2780 2781 if (diff) 2782 goto not_equal; 2783 2784 key = key->next; 2785 if (! key) 2786 break; 2787 2788 /* Find the beginning and limit of the next field. */ 2789 if (key->eword != -1) 2790 lima = limfield (a, key), limb = limfield (b, key); 2791 else 2792 lima = a->text + a->length - 1, limb = b->text + b->length - 1; 2793 2794 if (key->sword != -1) 2795 texta = begfield (a, key), textb = begfield (b, key); 2796 else 2797 { 2798 texta = a->text, textb = b->text; 2799 if (key->skipsblanks) 2800 { 2801 while (texta < lima && ismbblank (texta, lima - texta, &mblength_a)) 2802 texta += mblength_a; 2803 while (textb < limb && ismbblank (textb, limb - textb, &mblength_b)) 2804 textb += mblength_b; 2805 } 2806 } 2807 } 2808 2809 return 0; 2810 2811 greater: 2812 diff = 1; 2813 not_equal: 2814 return key->reverse ? -diff : diff; 2815 } 2816 #endif 2817 2249 2818 /* Compare two lines A and B, returning negative, zero, or positive 2250 2819 depending on whether A compares less than, equal to, or greater than B. */ 2251 2820 … … 3244 3813 initialize_exit_failure (SORT_FAILURE); 3245 3814 3246 3815 hard_LC_COLLATE = hard_locale (LC_COLLATE); 3247 #if HAVE_ NL_LANGINFO3816 #if HAVE_LANGINFO_CODESET 3248 3817 hard_LC_TIME = hard_locale (LC_TIME); 3249 3818 #endif 3250 3819 … … 3265 3834 thousands_sep = -1; 3266 3835 } 3267 3836 3837 #if HAVE_MBRTOWC 3838 if (MB_CUR_MAX > 1) 3839 { 3840 inittables = inittables_mb; 3841 begfield = begfield_mb; 3842 limfield = limfield_mb; 3843 getmonth = getmonth_mb; 3844 keycompare = keycompare_mb; 3845 numcompare = numcompare_mb; 3846 } 3847 else 3848 #endif 3849 { 3850 inittables = inittables_uni; 3851 begfield = begfield_uni; 3852 limfield = limfield_uni; 3853 getmonth = getmonth_uni; 3854 keycompare = keycompare_uni; 3855 numcompare = numcompare_uni; 3856 } 3857 3268 3858 have_read_stdin = false; 3269 3859 inittables (); 3270 3860 … … 3536 4126 3537 4127 case 't': 3538 4128 { 3539 char newtab = optarg[0]; 3540 if (! newtab) 4129 char newtab[MB_LEN_MAX + 1]; 4130 size_t newtab_length = 1; 4131 strncpy (newtab, optarg, MB_LEN_MAX); 4132 if (! newtab[0]) 3541 4133 error (SORT_FAILURE, 0, _("empty tab")); 3542 if (optarg[1]) 4134 #if HAVE_MBRTOWC 4135 if (MB_CUR_MAX > 1) 4136 { 4137 wchar_t wc; 4138 mbstate_t state; 4139 size_t i; 4140 4141 memset (&state, '\0', sizeof (mbstate_t)); 4142 newtab_length = mbrtowc (&wc, newtab, strnlen (newtab, 4143 MB_LEN_MAX), 4144 &state); 4145 switch (newtab_length) 4146 { 4147 case (size_t) -1: 4148 case (size_t) -2: 4149 case 0: 4150 newtab_length = 1; 4151 } 4152 } 4153 #endif 4154 if (newtab_length == 1 && optarg[1]) 3543 4155 { 3544 4156 if (STREQ (optarg, "\\0")) 3545 newtab = '\0';4157 newtab[0] = '\0'; 3546 4158 else 3547 4159 { 3548 4160 /* Provoke with `sort -txx'. Complain about … … 3553 4165 quote (optarg)); 3554 4166 } 3555 4167 } 3556 if (tab != TAB_DEFAULT && tab != newtab) 4168 if (tab_length 4169 && (tab_length != newtab_length 4170 || memcmp (tab, newtab, tab_length) != 0)) 3557 4171 error (SORT_FAILURE, 0, _("incompatible tabs")); 3558 tab = newtab; 4172 memcpy (tab, newtab, newtab_length); 4173 tab_length = newtab_length; 3559 4174 } 3560 4175 break; 3561 4176 -
src/unexpand.c
diff -Naur coreutils-8.5.orig/src/unexpand.c coreutils-8.5/src/unexpand.c
old new 39 39 #include <stdio.h> 40 40 #include <getopt.h> 41 41 #include <sys/types.h> 42 43 /* Get mbstate_t, mbrtowc(), wcwidth(). */ 44 #if HAVE_WCHAR_H 45 # include <wchar.h> 46 #endif 47 42 48 #include "system.h" 43 49 #include "error.h" 44 50 #include "quote.h" 45 51 #include "xstrndup.h" 46 52 53 /* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC 54 installation; work around this configuration error. */ 55 #if !defined MB_LEN_MAX || MB_LEN_MAX < 2 56 # define MB_LEN_MAX 16 57 #endif 58 59 /* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */ 60 #if HAVE_MBRTOWC && defined mbstate_t 61 # define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0) 62 #endif 63 47 64 /* The official name of this program (e.g., no `g' prefix). */ 48 65 #define PROGRAM_NAME "unexpand" 49 66 … … 103 120 {NULL, 0, NULL, 0} 104 121 }; 105 122 123 static FILE *next_file (FILE *fp); 124 125 #if HAVE_MBRTOWC 126 static void 127 unexpand_multibyte (void) 128 { 129 FILE *fp; /* Input stream. */ 130 mbstate_t i_state; /* Current shift state of the input stream. */ 131 mbstate_t i_state_bak; /* Back up the I_STATE. */ 132 mbstate_t o_state; /* Current shift state of the output stream. */ 133 char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */ 134 char *bufpos; /* Next read position of BUF. */ 135 size_t buflen = 0; /* The length of the byte sequence in buf. */ 136 wint_t wc; /* A gotten wide character. */ 137 size_t mblength; /* The byte size of a multibyte character 138 which shows as same character as WC. */ 139 140 /* Index in `tab_list' of next tabstop: */ 141 int tab_index = 0; /* For calculating width of pending tabs. */ 142 int print_tab_index = 0; /* For printing as many tabs as possible. */ 143 unsigned int column = 0; /* Column on screen of next char. */ 144 int next_tab_column; /* Column the next tab stop is on. */ 145 int convert = 1; /* If nonzero, perform translations. */ 146 unsigned int pending = 0; /* Pending columns of blanks. */ 147 148 fp = next_file ((FILE *) NULL); 149 if (fp == NULL) 150 return; 151 152 memset (&o_state, '\0', sizeof(mbstate_t)); 153 memset (&i_state, '\0', sizeof(mbstate_t)); 154 155 for (;;) 156 { 157 if (buflen < MB_LEN_MAX && !feof(fp) && !ferror(fp)) 158 { 159 memmove (buf, bufpos, buflen); 160 buflen += fread (buf + buflen, sizeof(char), BUFSIZ, fp); 161 bufpos = buf; 162 } 163 164 /* Get a wide character. */ 165 if (buflen < 1) 166 { 167 mblength = 1; 168 wc = WEOF; 169 } 170 else 171 { 172 i_state_bak = i_state; 173 mblength = mbrtowc ((wchar_t *)&wc, bufpos, buflen, &i_state); 174 } 175 176 if (mblength == (size_t)-1 || mblength == (size_t)-2) 177 { 178 i_state = i_state_bak; 179 wc = L'\0'; 180 } 181 182 if (wc == L' ' && convert && column < INT_MAX) 183 { 184 ++pending; 185 ++column; 186 } 187 else if (wc == L'\t' && convert) 188 { 189 if (tab_size == 0) 190 { 191 /* Do not let tab_index == first_free_tab; 192 stop when it is 1 less. */ 193 while (tab_index < first_free_tab - 1 194 && column >= tab_list[tab_index]) 195 tab_index++; 196 next_tab_column = tab_list[tab_index]; 197 if (tab_index < first_free_tab - 1) 198 tab_index++; 199 if (column >= next_tab_column) 200 { 201 convert = 0; /* Ran out of tab stops. */ 202 goto flush_pend_mb; 203 } 204 } 205 else 206 { 207 next_tab_column = column + tab_size - column % tab_size; 208 } 209 pending += next_tab_column - column; 210 column = next_tab_column; 211 } 212 else 213 { 214 flush_pend_mb: 215 /* Flush pending spaces. Print as many tabs as possible, 216 then print the rest as spaces. */ 217 if (pending == 1) 218 { 219 putchar (' '); 220 pending = 0; 221 } 222 column -= pending; 223 while (pending > 0) 224 { 225 if (tab_size == 0) 226 { 227 /* Do not let print_tab_index == first_free_tab; 228 stop when it is 1 less. */ 229 while (print_tab_index < first_free_tab - 1 230 && column >= tab_list[print_tab_index]) 231 print_tab_index++; 232 next_tab_column = tab_list[print_tab_index]; 233 if (print_tab_index < first_free_tab - 1) 234 print_tab_index++; 235 } 236 else 237 { 238 next_tab_column = 239 column + tab_size - column % tab_size; 240 } 241 if (next_tab_column - column <= pending) 242 { 243 putchar ('\t'); 244 pending -= next_tab_column - column; 245 column = next_tab_column; 246 } 247 else 248 { 249 --print_tab_index; 250 column += pending; 251 while (pending != 0) 252 { 253 putchar (' '); 254 pending--; 255 } 256 } 257 } 258 259 if (wc == WEOF) 260 { 261 fp = next_file (fp); 262 if (fp == NULL) 263 break; /* No more files. */ 264 else 265 { 266 memset (&i_state, '\0', sizeof(mbstate_t)); 267 continue; 268 } 269 } 270 271 if (mblength == (size_t)-1 || mblength == (size_t)-2) 272 { 273 if (convert) 274 { 275 ++column; 276 if (convert_entire_line == 0) 277 convert = 0; 278 } 279 mblength = 1; 280 putchar (buf[0]); 281 } 282 else if (mblength == 0) 283 { 284 if (convert && convert_entire_line == 0) 285 convert = 0; 286 mblength = 1; 287 putchar ('\0'); 288 } 289 else 290 { 291 if (convert) 292 { 293 if (wc == L'\b') 294 { 295 if (column > 0) 296 --column; 297 } 298 else 299 { 300 int width; /* The width of WC. */ 301 302 width = wcwidth (wc); 303 column += (width > 0) ? width : 0; 304 if (convert_entire_line == 0) 305 convert = 0; 306 } 307 } 308 309 if (wc == L'\n') 310 { 311 tab_index = print_tab_index = 0; 312 column = pending = 0; 313 convert = 1; 314 } 315 fwrite (bufpos, sizeof(char), mblength, stdout); 316 } 317 } 318 buflen -= mblength; 319 bufpos += mblength; 320 } 321 } 322 #endif 323 324 106 325 void 107 326 usage (int status) 108 327 { … … 524 743 525 744 file_list = (optind < argc ? &argv[optind] : stdin_argv); 526 745 527 unexpand (); 746 #if HAVE_MBRTOWC 747 if (MB_CUR_MAX > 1) 748 unexpand_multibyte (); 749 else 750 #endif 751 unexpand (); 528 752 529 753 if (have_read_stdin && fclose (stdin) != 0) 530 754 error (EXIT_FAILURE, errno, "-"); -
coreutils-8.5
diff -Naur coreutils-8.5.orig/src/uniq.c coreutils-8.5/src/uniq.c
old new 21 21 #include <getopt.h> 22 22 #include <sys/types.h> 23 23 24 /* Get mbstate_t, mbrtowc(). */ 25 #if HAVE_WCHAR_H 26 # include <wchar.h> 27 #endif 28 29 /* Get isw* functions. */ 30 #if HAVE_WCTYPE_H 31 # include <wctype.h> 32 #endif 33 24 34 #include "system.h" 25 35 #include "argmatch.h" 26 36 #include "linebuffer.h" … … 31 41 #include "stdio--.h" 32 42 #include "xmemcoll.h" 33 43 #include "xstrtol.h" 34 #include "memcasecmp.h" 44 #include "xmemcoll.h" 45 46 /* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC 47 installation; work around this configuration error. */ 48 #if !defined MB_LEN_MAX || MB_LEN_MAX < 2 49 # define MB_LEN_MAX 16 50 #endif 51 52 /* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */ 53 #if HAVE_MBRTOWC && defined mbstate_t 54 # define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0) 55 #endif 56 35 57 36 58 /* The official name of this program (e.g., no `g' prefix). */ 37 59 #define PROGRAM_NAME "uniq" … … 107 129 /* Select whether/how to delimit groups of duplicate lines. */ 108 130 static enum delimit_method delimit_groups; 109 131 132 /* Function pointers. */ 133 static char * 134 (*find_field) (struct linebuffer *line); 135 110 136 static struct option const longopts[] = 111 137 { 112 138 {"count", no_argument, NULL, 'c'}, … … 206 232 return a pointer to the beginning of the line's field to be compared. */ 207 233 208 234 static char * 209 find_field (struct linebuffer const*line)235 find_field_uni (struct linebuffer *line) 210 236 { 211 237 size_t count; 212 238 char const *lp = line->buffer; … … 227 253 return line->buffer + i; 228 254 } 229 255 256 #if HAVE_MBRTOWC 257 258 # define MBCHAR_TO_WCHAR(WC, MBLENGTH, LP, POS, SIZE, STATEP, CONVFAIL) \ 259 do \ 260 { \ 261 mbstate_t state_bak; \ 262 \ 263 CONVFAIL = 0; \ 264 state_bak = *STATEP; \ 265 \ 266 MBLENGTH = mbrtowc (&WC, LP + POS, SIZE - POS, STATEP); \ 267 \ 268 switch (MBLENGTH) \ 269 { \ 270 case (size_t)-2: \ 271 case (size_t)-1: \ 272 *STATEP = state_bak; \ 273 CONVFAIL++; \ 274 /* Fall through */ \ 275 case 0: \ 276 MBLENGTH = 1; \ 277 } \ 278 } \ 279 while (0) 280 281 static char * 282 find_field_multi (struct linebuffer *line) 283 { 284 size_t count; 285 char *lp = line->buffer; 286 size_t size = line->length - 1; 287 size_t pos; 288 size_t mblength; 289 wchar_t wc; 290 mbstate_t *statep; 291 int convfail; 292 293 pos = 0; 294 statep = &(line->state); 295 296 /* skip fields. */ 297 for (count = 0; count < skip_fields && pos < size; count++) 298 { 299 while (pos < size) 300 { 301 MBCHAR_TO_WCHAR (wc, mblength, lp, pos, size, statep, convfail); 302 303 if (convfail || !iswblank (wc)) 304 { 305 pos += mblength; 306 break; 307 } 308 pos += mblength; 309 } 310 311 while (pos < size) 312 { 313 MBCHAR_TO_WCHAR (wc, mblength, lp, pos, size, statep, convfail); 314 315 if (!convfail && iswblank (wc)) 316 break; 317 318 pos += mblength; 319 } 320 } 321 322 /* skip fields. */ 323 for (count = 0; count < skip_chars && pos < size; count++) 324 { 325 MBCHAR_TO_WCHAR (wc, mblength, lp, pos, size, statep, convfail); 326 pos += mblength; 327 } 328 329 return lp + pos; 330 } 331 #endif 332 230 333 /* Return false if two strings OLD and NEW match, true if not. 231 334 OLD and NEW point not to the beginnings of the lines 232 335 but rather to the beginnings of the fields to compare. … … 235 338 static bool 236 339 different (char *old, char *new, size_t oldlen, size_t newlen) 237 340 { 341 char *copy_old, *copy_new; 342 238 343 if (check_chars < oldlen) 239 344 oldlen = check_chars; 240 345 if (check_chars < newlen) … … 242 347 243 348 if (ignore_case) 244 349 { 245 /* FIXME: This should invoke strcoll somehow. */ 246 return oldlen != newlen || memcasecmp (old, new, oldlen); 350 size_t i; 351 352 copy_old = alloca (oldlen + 1); 353 copy_new = alloca (oldlen + 1); 354 355 for (i = 0; i < oldlen; i++) 356 { 357 copy_old[i] = toupper (old[i]); 358 copy_new[i] = toupper (new[i]); 359 } 247 360 } 248 else if (hard_LC_COLLATE)249 return xmemcoll (old, oldlen, new, newlen) != 0;250 361 else 251 return oldlen != newlen || memcmp (old, new, oldlen); 362 { 363 copy_old = (char *)old; 364 copy_new = (char *)new; 365 } 366 367 return xmemcoll (copy_old, oldlen, copy_new, newlen); 368 } 369 370 #if HAVE_MBRTOWC 371 static int 372 different_multi (const char *old, const char *new, size_t oldlen, size_t newlen, mbstate_t oldstate, mbstate_t newstate) 373 { 374 size_t i, j, chars; 375 const char *str[2]; 376 char *copy[2]; 377 size_t len[2]; 378 mbstate_t state[2]; 379 size_t mblength; 380 wchar_t wc, uwc; 381 mbstate_t state_bak; 382 383 str[0] = old; 384 str[1] = new; 385 len[0] = oldlen; 386 len[1] = newlen; 387 state[0] = oldstate; 388 state[1] = newstate; 389 390 for (i = 0; i < 2; i++) 391 { 392 copy[i] = alloca (len[i] + 1); 393 394 for (j = 0, chars = 0; j < len[i] && chars < check_chars; chars++) 395 { 396 state_bak = state[i]; 397 mblength = mbrtowc (&wc, str[i] + j, len[i] - j, &(state[i])); 398 399 switch (mblength) 400 { 401 case (size_t)-1: 402 case (size_t)-2: 403 state[i] = state_bak; 404 /* Fall through */ 405 case 0: 406 mblength = 1; 407 break; 408 409 default: 410 if (ignore_case) 411 { 412 uwc = towupper (wc); 413 414 if (uwc != wc) 415 { 416 mbstate_t state_wc; 417 418 memset (&state_wc, '\0', sizeof(mbstate_t)); 419 wcrtomb (copy[i] + j, uwc, &state_wc); 420 } 421 else 422 memcpy (copy[i] + j, str[i] + j, mblength); 423 } 424 else 425 memcpy (copy[i] + j, str[i] + j, mblength); 426 } 427 j += mblength; 428 } 429 copy[i][j] = '\0'; 430 len[i] = j; 431 } 432 433 return xmemcoll (copy[0], len[0], copy[1], len[1]); 252 434 } 435 #endif 253 436 254 437 /* Output the line in linebuffer LINE to standard output 255 438 provided that the switches say it should be output. … … 303 486 { 304 487 char *prevfield IF_LINT (= NULL); 305 488 size_t prevlen IF_LINT (= 0); 489 #if HAVE_MBRTOWC 490 mbstate_t prevstate; 491 492 memset (&prevstate, '\0', sizeof (mbstate_t)); 493 #endif 306 494 307 495 while (!feof (stdin)) 308 496 { 309 497 char *thisfield; 310 498 size_t thislen; 499 #if HAVE_MBRTOWC 500 mbstate_t thisstate; 501 #endif 502 311 503 if (readlinebuffer_delim (thisline, stdin, delimiter) == 0) 312 504 break; 313 505 thisfield = find_field (thisline); 314 506 thislen = thisline->length - 1 - (thisfield - thisline->buffer); 507 #if HAVE_MBRTOWC 508 if (MB_CUR_MAX > 1) 509 { 510 thisstate = thisline->state; 511 512 if (prevline->length == 0 || different_multi 513 (thisfield, prevfield, thislen, prevlen, thisstate, prevstate)) 514 { 515 fwrite (thisline->buffer, sizeof (char), 516 thisline->length, stdout); 517 518 SWAP_LINES (prevline, thisline); 519 prevfield = thisfield; 520 prevlen = thislen; 521 prevstate = thisstate; 522 } 523 } 524 else 525 #endif 315 526 if (prevline->length == 0 316 527 || different (thisfield, prevfield, thislen, prevlen)) 317 528 { … … 330 541 size_t prevlen; 331 542 uintmax_t match_count = 0; 332 543 bool first_delimiter = true; 544 #if HAVE_MBRTOWC 545 mbstate_t prevstate; 546 #endif 333 547 334 548 if (readlinebuffer_delim (prevline, stdin, delimiter) == 0) 335 549 goto closefiles; 336 550 prevfield = find_field (prevline); 337 551 prevlen = prevline->length - 1 - (prevfield - prevline->buffer); 552 #if HAVE_MBRTOWC 553 prevstate = prevline->state; 554 #endif 338 555 339 556 while (!feof (stdin)) 340 557 { 341 558 bool match; 342 559 char *thisfield; 343 560 size_t thislen; 561 #if HAVE_MBRTOWC 562 mbstate_t thisstate; 563 #endif 344 564 if (readlinebuffer_delim (thisline, stdin, delimiter) == 0) 345 565 { 346 566 if (ferror (stdin)) … … 349 569 } 350 570 thisfield = find_field (thisline); 351 571 thislen = thisline->length - 1 - (thisfield - thisline->buffer); 572 #if HAVE_MBRTOWC 573 if (MB_CUR_MAX > 1) 574 { 575 thisstate = thisline->state; 576 match = !different_multi (thisfield, prevfield, 577 thislen, prevlen, thisstate, prevstate); 578 } 579 else 580 #endif 352 581 match = !different (thisfield, prevfield, thislen, prevlen); 353 582 match_count += match; 354 583 … … 381 610 SWAP_LINES (prevline, thisline); 382 611 prevfield = thisfield; 383 612 prevlen = thislen; 613 #if HAVE_MBRTOWC 614 prevstate = thisstate; 615 #endif 384 616 if (!match) 385 617 match_count = 0; 386 618 } … … 426 658 427 659 atexit (close_stdout); 428 660 661 #if HAVE_MBRTOWC 662 if (MB_CUR_MAX > 1) 663 { 664 find_field = find_field_multi; 665 } 666 else 667 #endif 668 { 669 find_field = find_field_uni; 670 } 671 672 673 429 674 skip_chars = 0; 430 675 skip_fields = 0; 431 676 check_chars = SIZE_MAX; -
tests/Makefile.am
diff -Naur coreutils-8.5.orig/tests/Makefile.am coreutils-8.5/tests/Makefile.am
old new 224 224 misc/sort-compress \ 225 225 misc/sort-continue \ 226 226 misc/sort-files0-from \ 227 misc/sort-mb-tests \ 227 228 misc/sort-merge \ 228 229 misc/sort-merge-fdlimit \ 229 230 misc/sort-month \ … … 474 475 $(root_tests) 475 476 476 477 pr_data = \ 478 misc/mb1.X \ 479 misc/mb1.I \ 480 misc/mb2.X \ 481 misc/mb2.I \ 477 482 pr/0F \ 478 483 pr/0FF \ 479 484 pr/0FFnt \ -
tests/misc/cut
diff -Naur coreutils-8.5.orig/tests/misc/cut coreutils-8.5/tests/misc/cut
old new 26 26 my $prog = 'cut'; 27 27 my $try = "Try \`$prog --help' for more information.\n"; 28 28 my $from_1 = "$prog: fields and positions are numbered from 1\n$try"; 29 my $inval = "$prog: invalid byte or field list\n$try";29 my $inval = "$prog: invalid byte, character or field list\n$try"; 30 30 my $no_endpoint = "$prog: invalid range with no endpoint: -\n$try"; 31 31 32 32 my @Tests = … … 141 141 142 142 # None of the following invalid ranges provoked an error up to coreutils-6.9. 143 143 ['inval1', qw(-f 2-0), {IN=>''}, {OUT=>''}, {EXIT=>1}, 144 {ERR=>"$prog: invalid decreasing range\n$try"}],144 {ERR=>"$prog: invalid byte, character or field list\n$try"}], 145 145 ['inval2', qw(-f -), {IN=>''}, {OUT=>''}, {EXIT=>1}, {ERR=>$no_endpoint}], 146 146 ['inval3', '-f', '4,-', {IN=>''}, {OUT=>''}, {EXIT=>1}, {ERR=>$no_endpoint}], 147 147 ['inval4', '-f', '1-2,-', {IN=>''}, {OUT=>''}, {EXIT=>1}, {ERR=>$no_endpoint}], -
tests/misc/mb1.I
diff -Naur coreutils-8.5.orig/tests/misc/mb1.I coreutils-8.5/tests/misc/mb1.I
old new 1 Apple@10 2 Banana@5 3 Citrus@20 4 Cherry@30 -
tests/misc/mb1.X
diff -Naur coreutils-8.5.orig/tests/misc/mb1.X coreutils-8.5/tests/misc/mb1.X
old new 1 Banana@5 2 Apple@10 3 Citrus@20 4 Cherry@30 -
tests/misc/mb2.I
diff -Naur coreutils-8.5.orig/tests/misc/mb2.I coreutils-8.5/tests/misc/mb2.I
old new 1 Apple@AA10@@20 2 Banana@AA5@@30 3 Citrus@AA20@@5 4 Cherry@AA30@@10 -
tests/misc/mb2.X
diff -Naur coreutils-8.5.orig/tests/misc/mb2.X coreutils-8.5/tests/misc/mb2.X
old new 1 Citrus@AA20@@5 2 Cherry@AA30@@10 3 Apple@AA10@@20 4 Banana@AA5@@30 -
tests/misc/sort-mb-tests
diff -Naur coreutils-8.5.orig/tests/misc/sort-mb-tests coreutils-8.5/tests/misc/sort-mb-tests
old new 1 #! /bin/sh 2 case $# in 3 0) xx='../src/sort';; 4 *) xx="$1";; 5 esac 6 test "$VERBOSE" && echo=echo || echo=: 7 $echo testing program: $xx 8 errors=0 9 test "$srcdir" || srcdir=. 10 test "$VERBOSE" && $xx --version 2> /dev/null 11 12 export LC_ALL=en_US.UTF-8 13 locale -k LC_CTYPE 2>&1 | grep -q charmap.*UTF-8 || exit 77 14 errors=0 15 16 $xx -t @ -k2 -n misc/mb1.I > misc/mb1.O 17 code=$? 18 if test $code != 0; then 19 $echo "Test mb1 failed: $xx return code $code differs from expected value 0" 1>&2 20 errors=`expr $errors + 1` 21 else 22 cmp misc/mb1.O $srcdir/misc/mb1.X > /dev/null 2>&1 23 case $? in 24 0) if test "$VERBOSE"; then $echo "passed mb1"; fi;; 25 1) $echo "Test mb1 failed: files misc/mb1.O and $srcdir/misc/mb1.X differ" 1>&2 26 (diff -c misc/mb1.O $srcdir/misc/mb1.X) 2> /dev/null 27 errors=`expr $errors + 1`;; 28 2) $echo "Test mb1 may have failed." 1>&2 29 $echo The command "cmp misc/mb1.O $srcdir/misc/mb1.X" failed. 1>&2 30 errors=`expr $errors + 1`;; 31 esac 32 fi 33 34 $xx -t @ -k4 -n misc/mb2.I > misc/mb2.O 35 code=$? 36 if test $code != 0; then 37 $echo "Test mb2 failed: $xx return code $code differs from expected value 0" 1>&2 38 errors=`expr $errors + 1` 39 else 40 cmp misc/mb2.O $srcdir/misc/mb2.X > /dev/null 2>&1 41 case $? in 42 0) if test "$VERBOSE"; then $echo "passed mb2"; fi;; 43 1) $echo "Test mb2 failed: files misc/mb2.O and $srcdir/misc/mb2.X differ" 1>&2 44 (diff -c misc/mb2.O $srcdir/misc/mb2.X) 2> /dev/null 45 errors=`expr $errors + 1`;; 46 2) $echo "Test mb2 may have failed." 1>&2 47 $echo The command "cmp misc/mb2.O $srcdir/misc/mb2.X" failed. 1>&2 48 errors=`expr $errors + 1`;; 49 esac 50 fi 51 52 if test $errors = 0; then 53 $echo Passed all 113 tests. 1>&2 54 else 55 $echo Failed $errors tests. 1>&2 56 fi 57 test $errors = 0 || errors=1 58 exit $errors