2 Copyright 1998-2003 Victor Wagner
3 Copyright 2003 Alex Ott
4 This file is released under the GPL. Details can be
5 found in the file COPYING accompanying this distribution.
15 char ole_sign[]={0xD0,0xCF,0x11,0xE0,0xA1,0xB1,0x1A,0xE1,0};
16 char rtf_sign[]="{\\rtf";
17 char old_word_sign[]={0xdb,0xa5,0};
18 char write_sign[]={0x31,0xBE,0};
20 /*********************************************************************
21 * Determines format of input file and calls parse_word_header or
23 * it is word processor file or copy_out if it is plain text file
24 * return not 0 when error
25 ********************************************************************/
26 int analyze_format(FILE *f) {
27 unsigned char buffer[129];
29 FILE *new_file, *ole_file;
32 if (!signature_check) {
34 /* no autodetect possible. Assume 8-bit if not overriden on
36 if (!get_unicode_char)
37 get_unicode_char=get_8bit_char;
38 return process_file(f,LONG_MAX);
40 catdoc_read(buffer,4,1,f);
42 if (strncmp(buffer,write_sign,2)==0) {
43 printf("[Windows Write file. Some garbage expected]\n");
44 get_unicode_char=get_8bit_char;
45 return process_file(f,LONG_MAX);
46 } else if (strncmp(buffer,rtf_sign,4)==0) {
48 } else if (strncmp(buffer,old_word_sign,2)==0) {
49 fread(buffer+4,1,124,f);
50 return parse_word_header(buffer,f,128,0);
52 fread(buffer+4,1,4,f);
53 if (strncmp(buffer,ole_sign,8)==0) {
54 if ((new_file=ole_init(f, buffer, 8)) != NULL) {
56 while((ole_file=ole_readdir(new_file)) != NULL) {
57 int res=ole_open(ole_file);
59 if (strcmp(((oleEntry*)ole_file)->name , "WordDocument") == 0) {
60 offset=catdoc_read(buffer, 1, 128, ole_file);
61 ret_code=parse_word_header(buffer,ole_file,-offset,offset);
69 fprintf(stderr,"Broken OLE file. Try using -b switch");
82 #define fComplex 0x0004
83 #define fPictures 0x0008
84 #define fEncrypted 0x100
85 #define fReadOnly 0x400
86 #define fReserved 0x800
87 #define fExtChar 0x1000
89 /*******************************************************************/
90 /* parses word file info block passed in buffer.
91 * Determines actual size of text stream and calls process_file
92 ********************************************************************/
93 int parse_word_header(unsigned char * buffer,FILE *f,int offset,long curpos) {
94 int flags,charset, ret_code=0;
95 long textstart,textlen,i;
99 printf("File Info block version %d\n",getshort(buffer,2));
100 printf("Found at file offset %ld (hex %lx)\n",curpos,curpos);
101 printf("Written by product version %d\n",getshort(buffer,4));
102 printf("Language %d\n",getshort(buffer,6));
104 flags = getshort(buffer,10);
106 if ((flags & fDot)) {
107 printf("This is template (DOT) file\n");
109 printf("This is document (DOC) file\n");
112 printf("This is glossary file\n");
115 if (flags & fComplex) {
116 fprintf(stderr,"[This was fast-saved %2d times. Some information is lost]\n",
121 if (flags & fReadOnly) {
122 printf("File is meant to be read-only\n");
124 if (flags & fReserved) {
125 printf("File is write-reserved\n");
128 if (flags & fExtChar) {
130 printf ("File uses extended character set\n");
132 if (!get_unicode_char)
133 get_unicode_char=get_word8_char;
135 } else if (!get_unicode_char)
136 get_unicode_char=get_8bit_char;
140 printf("File created on Macintosh\n");
142 printf("File created on Windows\n");
145 if (flags & fEncrypted) {
146 fprintf(stderr,"[File is encrypted. Encryption key = %08lx]\n",
151 charset=getshort(buffer,20);
152 if (charset&&charset !=256) {
153 printf("Using character set %d\n",charset);
155 printf("Using default character set\n");
158 /* skipping to textstart and computing textend */
159 textstart=getlong(buffer,24);
160 textlen=getlong(buffer,28)-textstart;
163 printf ("Textstart = %ld (hex %lx)\n",textstart+curpos,textstart+curpos);
164 printf ("Textlen = %ld (hex %lx)\n",textlen,textlen);
166 for (i=0;i<textstart;i++) {
167 catdoc_read(buf, 1, 1, f);
169 fprintf(stderr,"File ended before textstart. Probably it is broken. Try -b switch\n");
173 return process_file(f,textlen) || ret_code;