Skip to content

Commit d09297c

Browse files
authored
Fix reading UTF-8 literals (#592)
1 parent 3052a3b commit d09297c

File tree

4 files changed

+93
-1
lines changed

4 files changed

+93
-1
lines changed

cobj/pplex.l.m4

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -981,6 +981,7 @@ ppinput (char *buff, int max_size)
981981
int i;
982982
int n;
983983
int coln;
984+
int buff_len = 0;
984985
char *str1 = NULL;
985986
char *str2 = NULL;
986987
int comment_counter = 0;
@@ -1152,8 +1153,19 @@ start:
11521153
within_comment = 0;
11531154
}
11541155

1156+
#ifdef I18N_UTF8
1157+
unsigned char *p = (unsigned char *)buff;
1158+
if(utf8_ext_pick(p)){
1159+
buff_len = (int) utf8_calc_sjis_size(p, strlen(buff));
1160+
}else{
1161+
buff_len = n;
1162+
}
1163+
#else /*!I18N_UTF8*/
1164+
buff_len = n;
1165+
#endif /*I18N_UTF8*/
1166+
11551167
/* check the text that is longer than cb_text_column */
1156-
if (n > cb_text_column + 1 && cb_source_format != CB_FORMAT_VARIABLE) {
1168+
if (buff_len > cb_text_column + 1 && cb_source_format != CB_FORMAT_VARIABLE) {
11571169

11581170
/* show warning if it is not whitespaces */
11591171
if (cb_warn_column_overflow && last_line_2 < cb_source_line - 1) {

cobj/scanner.l.m4

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -815,6 +815,12 @@ read_literal (int mark, enum cb_category category)
815815
}
816816

817817
while ((c = input ()) != EOF) {
818+
#if EOF != 0
819+
if (unlikely (c == 0)){
820+
cb_error(_("The literal is not properly closed by %c."), mark);
821+
break;
822+
}
823+
#endif
818824
plexbuff[i++] = c;
819825
if (c == mark && (c = input ()) != mark) {
820826
i--;

tests/cobol_utf8.src/pic-n.at

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -520,3 +520,40 @@ AT_CHECK([echo -n '゜ポンデリング' | nkf --ic=UTF-8 --oc=Shift_JIS > out2
520520
AT_CHECK([diff out1.txt out2.txt])
521521

522522
AT_CLEANUP
523+
524+
AT_SETUP([PIC N WRITE FROM Japanese literal])
525+
export LC_ALL=''
526+
527+
AT_DATA([prog.cob], [
528+
IDENTIFICATION DIVISION.
529+
PROGRAM-ID. prog.
530+
ENVIRONMENT DIVISION.
531+
INPUT-OUTPUT SECTION.
532+
FILE-CONTROL.
533+
SELECT TEST-FILE ASSIGN TO "TESTFILE"
534+
ORGANIZATION IS SEQUENTIAL.
535+
DATA DIVISION.
536+
FILE SECTION.
537+
FD TEST-FILE.
538+
01 TEST-DATA PIC N(18).
539+
WORKING-STORAGE SECTION.
540+
PROCEDURE DIVISION.
541+
OPEN OUTPUT TEST-FILE.
542+
WRITE TEST-DATA FROM "縄文弥生古墳飛鳥奈良平安鎌倉室町江戸".
543+
CLOSE TEST-FILE.
544+
545+
OPEN INPUT TEST-FILE.
546+
READ TEST-FILE
547+
NOT AT END
548+
DISPLAY TEST-DATA
549+
END-READ.
550+
CLOSE TEST-FILE.
551+
])
552+
553+
AT_CHECK([cobj prog.cob])
554+
AT_CHECK([java prog > out1.txt])
555+
AT_CHECK([echo -n '縄文弥生古墳飛鳥奈良平安鎌倉室町江戸
556+
' | nkf --ic=UTF-8 --oc=Shift_JIS > out2.txt])
557+
AT_CHECK([diff out1.txt out2.txt])
558+
559+
AT_CLEANUP

tests/cobol_utf8.src/pic-x.at

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -524,6 +524,43 @@ AT_CHECK([diff out1.txt out2.txt])
524524

525525
AT_CLEANUP
526526

527+
AT_SETUP([WRITE FROM Japanese literal])
528+
export LC_ALL=''
529+
530+
AT_DATA([prog.cob], [
531+
IDENTIFICATION DIVISION.
532+
PROGRAM-ID. prog.
533+
ENVIRONMENT DIVISION.
534+
INPUT-OUTPUT SECTION.
535+
FILE-CONTROL.
536+
SELECT TEST-FILE ASSIGN TO "TESTFILE"
537+
ORGANIZATION IS SEQUENTIAL.
538+
DATA DIVISION.
539+
FILE SECTION.
540+
FD TEST-FILE.
541+
01 TEST-DATA PIC X(36).
542+
WORKING-STORAGE SECTION.
543+
PROCEDURE DIVISION.
544+
OPEN OUTPUT TEST-FILE.
545+
WRITE TEST-DATA FROM "縄文弥生古墳飛鳥奈良平安鎌倉室町江戸".
546+
CLOSE TEST-FILE.
547+
548+
OPEN INPUT TEST-FILE.
549+
READ TEST-FILE
550+
NOT AT END
551+
DISPLAY TEST-DATA
552+
END-READ.
553+
CLOSE TEST-FILE.
554+
])
555+
556+
AT_CHECK([cobj prog.cob])
557+
AT_CHECK([java prog > out1.txt])
558+
AT_CHECK([echo -n '縄文弥生古墳飛鳥奈良平安鎌倉室町江戸
559+
' | nkf --ic=UTF-8 --oc=Shift_JIS > out2.txt])
560+
AT_CHECK([diff out1.txt out2.txt])
561+
562+
AT_CLEANUP
563+
527564
#AT_SETUP([Readable string literals])
528565
#export LC_ALL=''
529566
## Older compilers converts string literals "日本語" in COBOL source code

0 commit comments

Comments
 (0)