]> www.wagner.pp.ru Git - oss/catdoc.git/blob - src/analyze.c
Remove clever hack which relies on udefined behavoir. As reported by Dmitry Makarasov...
[oss/catdoc.git] / src / analyze.c
1 /*
2   Copyright 1998-2003 Victor Wagner
3   Copyright 2003 Alex Ott
4   This file is released under the GPL.  Details can be
5   found in the file COPYING accompanying this distribution.
6 */
7 #ifdef HAVE_CONFIG_H
8 #include <config.h>
9 #endif
10 #include <stdio.h>
11 #include <limits.h>
12 #include <stdlib.h>
13 #include <string.h>
14 #include "catdoc.h"
15 char ole_sign[]={0xD0,0xCF,0x11,0xE0,0xA1,0xB1,0x1A,0xE1,0};
16 char rtf_sign[]="{\\rtf";
17 char old_word_sign[]={0xdb,0xa5,0};
18 char write_sign[]={0x31,0xBE,0};
19 int verbose=0;
20 /********************************************************************* 
21  * Determines format of input file and calls parse_word_header or 
22  * process_file if
23  * it is word processor file or copy_out if it is plain text file
24  * return not 0 when error
25  ********************************************************************/ 
26 int analyze_format(FILE *f) {
27         unsigned char buffer[129];
28         long offset=0;
29         FILE *new_file, *ole_file;
30         int ret_code=69;
31
32         if (!signature_check) {
33                 /* forced parsing */
34                 /* no autodetect possible. Assume 8-bit if not overriden on
35                  * command line */ 
36                 if (!get_unicode_char) 
37                         get_unicode_char=get_8bit_char;
38                 return process_file(f,LONG_MAX);
39         }
40         catdoc_read(buffer,4,1,f);
41         buffer[4]=0;
42         if (strncmp(buffer,write_sign,2)==0) {
43                 printf("[Windows Write file. Some garbage expected]\n");
44                 get_unicode_char=get_8bit_char;
45                 return process_file(f,LONG_MAX);
46         } else if (strncmp(buffer,rtf_sign,4)==0) {
47                 return parse_rtf(f);
48         } else if (strncmp(buffer,old_word_sign,2)==0) {
49            fread(buffer+4,1,124,f);     
50            return parse_word_header(buffer,f,128,0);
51         }       
52         fread(buffer+4,1,4,f);
53         if (strncmp(buffer,ole_sign,8)==0) {
54                 if ((new_file=ole_init(f, buffer, 8)) != NULL) {
55                         set_ole_func();
56                         while((ole_file=ole_readdir(new_file)) != NULL) {
57                                 int res=ole_open(ole_file);
58                                 if (res >= 0) {
59                                         if (strcmp(((oleEntry*)ole_file)->name , "WordDocument") == 0) {
60                                                 offset=catdoc_read(buffer, 1, 128, ole_file);
61                                                 ret_code=parse_word_header(buffer,ole_file,-offset,offset);
62                                         }
63                                 } 
64                                 ole_close(ole_file);
65                         }
66                         set_std_func();
67                         ole_finish();
68                 } else {
69                         fprintf(stderr,"Broken OLE file. Try using -b switch");
70                         exit(1);
71                 }       
72         } else {
73                 set_std_func();
74                 copy_out(f,buffer);
75                 return 0;
76         }
77         
78         return ret_code;
79 }   
80 #define fDot 0x0001   
81 #define fGlsy 0x0002
82 #define fComplex 0x0004
83 #define fPictures 0x0008 
84 #define fEncrypted 0x100
85 #define fReadOnly 0x400
86 #define fReserved 0x800
87 #define fExtChar 0x1000
88
89 /*******************************************************************/
90 /* parses word file info block passed in buffer.  
91  * Determines actual size of text stream and calls process_file  
92  ********************************************************************/
93 int parse_word_header(unsigned char * buffer,FILE *f,int offset,long curpos) {
94         int flags,charset, ret_code=0;
95         long textstart,textlen,i;
96         char buf[2];
97         
98         if (verbose) {
99                 printf("File Info block version %d\n",getshort(buffer,2));
100                 printf("Found at file offset %ld (hex %lx)\n",curpos,curpos);
101                 printf("Written by product version %d\n",getshort(buffer,4));
102                 printf("Language %d\n",getshort(buffer,6));
103         }
104         flags = getshort(buffer,10);
105         if (verbose) {
106                 if ((flags & fDot)) {
107                         printf("This is template (DOT) file\n");
108                 } else {
109                         printf("This is document (DOC) file\n");
110                 }
111                 if (flags & fGlsy) {
112                         printf("This is glossary file\n");
113                 }
114         }
115         if (flags & fComplex) {
116                 fprintf(stderr,"[This was fast-saved %2d times. Some information is lost]\n",
117                                 (flags & 0xF0)>>4);
118 /*              ret_code=69;*/
119         }
120         if (verbose) {
121                 if (flags & fReadOnly) {
122                         printf("File is meant to be read-only\n");
123                 }
124                 if (flags & fReserved) {
125                         printf("File is write-reserved\n");
126                 }
127         }
128         if (flags & fExtChar) {
129                 if (verbose) {
130                         printf ("File uses extended character set\n");
131                 }
132                 if (!get_unicode_char) 
133                         get_unicode_char=get_word8_char;
134
135         } else if (!get_unicode_char) 
136                 get_unicode_char=get_8bit_char;
137
138         if (verbose) {
139                 if (buffer[18]) {
140                         printf("File created on Macintosh\n");
141                 } else {
142                         printf("File created on Windows\n");
143                 } 
144         }
145         if (flags & fEncrypted) {
146                 fprintf(stderr,"[File is encrypted. Encryption key = %08lx]\n",
147                                 getlong(buffer,14));
148                 return 69;
149         }
150         if (verbose) {
151                 charset=getshort(buffer,20);
152                 if (charset&&charset !=256) {
153                         printf("Using character set %d\n",charset);
154                 } else {
155                         printf("Using default character set\n");
156                 }
157         }
158         /* skipping to textstart and computing textend */
159         textstart=getlong(buffer,24);
160         textlen=getlong(buffer,28)-textstart;
161         textstart+=offset;
162         if (verbose) {
163                 printf ("Textstart = %ld (hex %lx)\n",textstart+curpos,textstart+curpos);
164                 printf ("Textlen =   %ld (hex %lx)\n",textlen,textlen);
165         }   
166         for (i=0;i<textstart;i++) {
167                 catdoc_read(buf, 1, 1, f);
168                 if (catdoc_eof(f)) {
169                         fprintf(stderr,"File ended before textstart. Probably it is broken. Try -b switch\n");
170                         exit(1);
171                 }
172         }    
173         return process_file(f,textlen) || ret_code;
174 }   
175