From e1b370f6269a0bffac54fe20dfcb7ecc3d1a5fc1 Mon Sep 17 00:00:00 2001 From: Victor Wagner Date: Sat, 15 Nov 2025 11:35:00 +0300 Subject: [PATCH] Fix handling of dest charsets with chars > u7FFF and missing chasets Prevoisly, if unexisting charset was specified in the command line, catdoc/xls2csv/catppt silently falls back to default charset. Now it exits with non-zero exit code and appropriate message It was also found that in make_reverse_map unicode code was incorrectly casted to (unsigned) instead of (unsigned short) which cause segfault if charater with 16-bit unicode code highest bit set present in the destination charset (i.e. in mac-roman). --- src/catdoc.h | 2 +- src/charsets.c | 2 +- src/confutil.c | 23 ++++++++++------------- src/fileutil.c | 13 +++++++------ 4 files changed, 19 insertions(+), 21 deletions(-) diff --git a/src/catdoc.h b/src/catdoc.h index 3a4010f..4c189de 100644 --- a/src/catdoc.h +++ b/src/catdoc.h @@ -233,7 +233,7 @@ extern int longest_sequence;/* for checking which value of wrap_margin can cause buffer overflow*/ char *map_subst(SUBSTMAP map,int uc); -int check_charset(char **filename,const char *charset); +void check_charset(char **filename,const char *charset); int process_file(FILE *f,long stop); void copy_out(FILE *f, char *header); void output_paragraph(unsigned short int *buffer) ; diff --git a/src/charsets.c b/src/charsets.c index e64e24c..3f63e74 100644 --- a/src/charsets.c +++ b/src/charsets.c @@ -51,7 +51,7 @@ CHARSET make_reverse_map(short int *charset) { } for (i=0;i<256;i++) { k= charset[i]; - j= (unsigned)k>>8; + j= (unsigned short int)k>>8; if (!newmap[j]) { newmap[j] = (short int *)malloc(sizeof(short int)*256); if (!newmap[j]) { diff --git a/src/confutil.c b/src/confutil.c index 1bfe199..a10aa67 100644 --- a/src/confutil.c +++ b/src/confutil.c @@ -110,10 +110,9 @@ void get_locale_charset() { if (!strncmp(codeset,"646",3)) { /* ISO 646 is another name for us=ascii */ check_charset(&dest_csname,"us-ascii") ; - } else { - if (check_charset(&dest_csname,codeset)) { - locale_charset = dest_csname; - } + } else { + check_charset(&dest_csname,codeset); + locale_charset = dest_csname; } } else if (!strcmp(codeset,"ANSI_X3.4-1968")) { check_charset(&dest_csname,"us-ascii"); @@ -125,9 +124,8 @@ void get_locale_charset() { newstr = malloc(strlen(codeset)-4+2+1); strcpy(newstr,"cp"); strcpy(newstr+2,codeset+4); - if (check_charset(&dest_csname,newstr)) { - locale_charset = dest_csname; - } + check_charset(&dest_csname,newstr); + locale_charset = dest_csname; free(newstr); } else if (!strncmp(codeset,"IBM",3)) { char *newstr; @@ -136,18 +134,17 @@ void get_locale_charset() { newstr=malloc(strlen(codeset)+2+1); strcpy(newstr,"cp"); strcpy(newstr+2,codeset); - if (check_charset(&dest_csname, newstr)) { - locale_charset=dest_csname; - } + check_charset(&dest_csname, newstr); + locale_charset=dest_csname; free(newstr); } else { char *i,*newstr = strdup(codeset); for (i=newstr;*i;i++) { *i=tolower(*i); } - if (check_charset(&dest_csname,newstr)) { - locale_charset = dest_csname; - } + check_charset(&dest_csname,newstr); + locale_charset = dest_csname; + } } diff --git a/src/fileutil.c b/src/fileutil.c index fc8e7f7..201f259 100644 --- a/src/fileutil.c +++ b/src/fileutil.c @@ -95,24 +95,25 @@ char *find_file(char *name, const char *path) /************************************************************************/ /* Searches for charset with given name and put pointer to malloced copy*/ /* of its name into first arg if found. Otherwise leaves first arg */ -/* unchanged. Returns non-zero on success */ +/* unchanged. Terminatnes program if charset not found */ /************************************************************************/ -int check_charset(char **filename,const char *charset) { +void check_charset(char **filename,const char *charset) { char *tmppath; if (charset == NULL ) { - return 0; + exit(1); } if (!strncmp(charset,"utf-8",6)) { *filename=strdup("utf-8"); - return 1; + return; } tmppath=find_file(stradd(charset,CHARSET_EXT),charset_path); if (tmppath && *tmppath) { *filename=strdup(charset); free(tmppath); - return 1; + return; } - return 0; + fprintf(stderr, "charset %s not found\n", charset); + exit(1); } /**********************************************************************/ -- 2.47.3