From 790ecc75063e718e33528060ce966088e9aa99db Mon Sep 17 00:00:00 2001 From: Victor Wagner Date: Tue, 18 Jul 2006 11:20:01 +0000 Subject: [PATCH] Simular fix in reader.c --- src/reader.c | 2 +- src/rtfread.c | 224 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 225 insertions(+), 1 deletion(-) diff --git a/src/reader.c b/src/reader.c index b51996e..4db9ffb 100644 --- a/src/reader.c +++ b/src/reader.c @@ -170,7 +170,7 @@ int process_file(FILE *f,long stop) { * else*/ buffer[++bufptr]=c; } - } while (bufptr0) { diff --git a/src/rtfread.c b/src/rtfread.c index 8ed1be6..cbfc103 100644 --- a/src/rtfread.c +++ b/src/rtfread.c @@ -173,6 +173,230 @@ void add_to_buffer(int *bufptr,unsigned short int c) { buffer[++(*bufptr)]=c; if (*bufptr >= PARAGRAPH_BUFFER-2) { buffer[++(*bufptr)]=0; +/*****************************************************************/ +/* Reading routines for MS-Word, MS-Write and text files */ +/* */ +/* This file is part of catdoc project */ +/* (c) Victor Wagner 1996-2003, (c) Alex Ott 2003 */ +/*****************************************************************/ +#ifdef HAVE_CONFIG_H +#include +#endif +#include +#include +#include "catdoc.h" +unsigned short int buffer[PARAGRAPH_BUFFER]; +static unsigned char read_buf[256]; +static int buf_is_unicode; + +/**************************************************************************/ +/* Just prints out content of input file. Called when file is not OLE */ +/* stream */ +/* Parameters - f - file to copy out. header - first few bytes of file, */ +/* which have been already read by format recognition code, but should */ +/* be output anyway */ +/**************************************************************************/ +void copy_out (FILE *f,char *header) { + char *buf=(char *)buffer; + int count,i; + long offset; + if (get_unicode_char == get_word8_char) { + /* non-word file and -u specified. Trying to guess which kind of + * unicode is used + */ + if ((unsigned char)header[0]==0xFE && (unsigned char)header[1]==0xFF) { + get_unicode_char = get_utf16msb; + fputs(convert_char(header[2]<<8|header[3]),stdout); + fputs(convert_char(header[4]<<8|header[5]),stdout); + fputs(convert_char(header[6]<<8|header[7]),stdout); + } else if ((unsigned char)header[0]!=0xFF || + (unsigned char)header[1]!=0xFE) { + int c,j,d; + /* if it is not utf16, assume it is UTF8. We are told -u, + * aren't we */ + get_unicode_char = get_utf8; + i=0; + while (i<8) { + c=(unsigned char)header[i++]; + if (c >=0x80) { + if ( c<0xE0) { + c=(c & 0x1F); + count =1; + } else { + c=(c & 0xF); + count = 2; + } + for (j=0;j0) { + buffer[++bufptr]=0; + output_paragraph(buffer); + } + } + return 0; +} +/**********************************************************************/ +/* Reads file from MS-Word 97 and above file. Takes in account strange* + * situation that unicode and non-unicode 256-byte blocks could be * + * intermixed in word file * + * * + * Parameters: * + * * + * f - file to read * + * offset - position of the character inside file (to determine * + * possible block boundaries * + **********************************************************************/ +int get_word8_char(FILE *f,long *offset,long fileend) { + int count,i,u; + char c; + if ((i=(*offset)%256) ==0) { + count=catdoc_read(read_buf,1,256,f); + memset(read_buf+count,0,256-count); + buf_is_unicode=0; + if (*offset+(long)count>fileend) { + count=fileend-*offset; + } + while (i