X-Git-Url: http://www.wagner.pp.ru/gitweb/?a=blobdiff_plain;f=src%2Frtfread.c;h=86a57fbb771a4f04a15b27a8890263cbf963f826;hb=59325793f5368e5eb82da49328ae9a580c33b8e0;hp=cbfc103b896bf953712bbf1137e6fe23fe4ae009;hpb=790ecc75063e718e33528060ce966088e9aa99db;p=oss%2Fcatdoc.git diff --git a/src/rtfread.c b/src/rtfread.c index cbfc103..86a57fb 100644 --- a/src/rtfread.c +++ b/src/rtfread.c @@ -103,6 +103,7 @@ RTFTypeMap rtf_types[]={ #define RTFNAMEMAXLEN 32 #define RTFARGSMAXLEN 64 +#define MAX_DIGITS_IN_NUMBER 10 /** * Structure describing rtf command @@ -173,230 +174,6 @@ void add_to_buffer(int *bufptr,unsigned short int c) { buffer[++(*bufptr)]=c; if (*bufptr >= PARAGRAPH_BUFFER-2) { buffer[++(*bufptr)]=0; -/*****************************************************************/ -/* Reading routines for MS-Word, MS-Write and text files */ -/* */ -/* This file is part of catdoc project */ -/* (c) Victor Wagner 1996-2003, (c) Alex Ott 2003 */ -/*****************************************************************/ -#ifdef HAVE_CONFIG_H -#include -#endif -#include -#include -#include "catdoc.h" -unsigned short int buffer[PARAGRAPH_BUFFER]; -static unsigned char read_buf[256]; -static int buf_is_unicode; - -/**************************************************************************/ -/* Just prints out content of input file. Called when file is not OLE */ -/* stream */ -/* Parameters - f - file to copy out. header - first few bytes of file, */ -/* which have been already read by format recognition code, but should */ -/* be output anyway */ -/**************************************************************************/ -void copy_out (FILE *f,char *header) { - char *buf=(char *)buffer; - int count,i; - long offset; - if (get_unicode_char == get_word8_char) { - /* non-word file and -u specified. Trying to guess which kind of - * unicode is used - */ - if ((unsigned char)header[0]==0xFE && (unsigned char)header[1]==0xFF) { - get_unicode_char = get_utf16msb; - fputs(convert_char(header[2]<<8|header[3]),stdout); - fputs(convert_char(header[4]<<8|header[5]),stdout); - fputs(convert_char(header[6]<<8|header[7]),stdout); - } else if ((unsigned char)header[0]!=0xFF || - (unsigned char)header[1]!=0xFE) { - int c,j,d; - /* if it is not utf16, assume it is UTF8. We are told -u, - * aren't we */ - get_unicode_char = get_utf8; - i=0; - while (i<8) { - c=(unsigned char)header[i++]; - if (c >=0x80) { - if ( c<0xE0) { - c=(c & 0x1F); - count =1; - } else { - c=(c & 0xF); - count = 2; - } - for (j=0;j0) { - buffer[++bufptr]=0; - output_paragraph(buffer); - } - } - return 0; -} -/**********************************************************************/ -/* Reads file from MS-Word 97 and above file. Takes in account strange* - * situation that unicode and non-unicode 256-byte blocks could be * - * intermixed in word file * - * * - * Parameters: * - * * - * f - file to read * - * offset - position of the character inside file (to determine * - * possible block boundaries * - **********************************************************************/ -int get_word8_char(FILE *f,long *offset,long fileend) { - int count,i,u; - char c; - if ((i=(*offset)%256) ==0) { - count=catdoc_read(read_buf,1,256,f); - memset(read_buf+count,0,256-count); - buf_is_unicode=0; - if (*offset+(long)count>fileend) { - count=fileend-*offset; - } - while (i0) - fgetc(f); + while((--i)>0) { + int c=fgetc(f); + if (c == '\\') { + c = fgetc(f); + switch (c) { + case '\\': break; + case '\'': + /* skip two hex digits */ + fgetc(f); + fgetc(f); + break; + default: + break; + } + } + } + break; case RTF_PARA: /*if (para_mode > 0) {*/ @@ -579,6 +371,8 @@ signed long getNumber(FILE *f) { while(isdigit(c=fgetc(f)) || c=='-') { if(feof(f)) return -1; + if (count > MAX_DIGITS_IN_NUMBER) + break; buf[count++]=(char)c; } ungetc(c,f);