2 Copyright 1998-2003 Victor Wagner
3 Copyright 2003 Alex Ott
4 This file is released under the GPL. Details can be
5 found in the file COPYING accompanying this distribution.
15 extern char ole_sign[], zip_sign[]; /* from ole.c */
16 char rtf_sign[]="{\\rtf";
17 char old_word_sign[]={0xdb,0xa5,0};
18 char write_sign[]={0x31,0xBE,0};
20 /*********************************************************************
21 * Determines format of input file and calls parse_word_header or
23 * it is word processor file or copy_out if it is plain text file
24 * return not 0 when error
25 ********************************************************************/
26 int analyze_format(FILE *f) {
27 unsigned char buffer[129];
29 FILE *new_file, *ole_file;
32 if (!signature_check) {
34 /* no autodetect possible. Assume 8-bit if not overriden on
36 if (!get_unicode_char)
37 get_unicode_char=get_8bit_char;
38 return process_file(f,LONG_MAX);
40 catdoc_read(buffer,4,1,f);
42 if (strncmp((char *)&buffer,write_sign,2)==0) {
43 printf("[Windows Write file. Some garbage expected]\n");
44 get_unicode_char=get_8bit_char;
45 return process_file(f,LONG_MAX);
46 } else if (strncmp((char *)&buffer,rtf_sign,4)==0) {
48 } else if (strncmp((char *)&buffer, zip_sign,4) == 0) {
49 fprintf(stderr,"This file looks like ZIP archive or Office 2007 "
50 "or later file.\nNot supported by catdoc\n");
52 } else if (strncmp((char *)&buffer,old_word_sign,2)==0) {
53 fread(buffer+4,1,124,f);
54 return parse_word_header(buffer,f,128,0);
56 fread(buffer+4,1,4,f);
57 if (strncmp((char *)&buffer,ole_sign,8)==0) {
58 if ((new_file=ole_init(f, buffer, 8)) != NULL) {
60 while((ole_file=ole_readdir(new_file)) != NULL) {
61 int res=ole_open(ole_file);
63 if (strcmp(((oleEntry*)ole_file)->name , "WordDocument") == 0) {
64 offset=catdoc_read(buffer, 1, 128, ole_file);
65 ret_code=parse_word_header(buffer,ole_file,-offset,offset);
73 fprintf(stderr,"Broken OLE file. Try using -b switch\n");
78 copy_out(f,(char *)&buffer);
86 #define fComplex 0x0004
87 #define fPictures 0x0008
88 #define fEncrypted 0x100
89 #define fReadOnly 0x400
90 #define fReserved 0x800
91 #define fExtChar 0x1000
93 /*******************************************************************/
94 /* parses word file info block passed in buffer.
95 * Determines actual size of text stream and calls process_file
96 ********************************************************************/
97 int parse_word_header(unsigned char * buffer,FILE *f,int offset,long curpos) {
98 int flags,charset, ret_code=0;
99 long textstart,textlen,i;
103 printf("File Info block version %d\n",getshort(buffer,2));
104 printf("Found at file offset %ld (hex %lx)\n",curpos,curpos);
105 printf("Written by product version %d\n",getshort(buffer,4));
106 printf("Language %d\n",getshort(buffer,6));
108 flags = getshort(buffer,10);
110 if ((flags & fDot)) {
111 printf("This is template (DOT) file\n");
113 printf("This is document (DOC) file\n");
116 printf("This is glossary file\n");
119 if (flags & fComplex) {
120 fprintf(stderr,"[This was fast-saved %2d times. Some information is lost]\n",
125 if (flags & fReadOnly) {
126 printf("File is meant to be read-only\n");
128 if (flags & fReserved) {
129 printf("File is write-reserved\n");
132 if (flags & fExtChar) {
134 printf ("File uses extended character set\n");
136 if (!get_unicode_char)
137 get_unicode_char=get_word8_char;
139 } else if (!get_unicode_char)
140 get_unicode_char=get_8bit_char;
144 printf("File created on Macintosh\n");
146 printf("File created on Windows\n");
149 if (flags & fEncrypted) {
150 fprintf(stderr,"[File is encrypted. Encryption key = %08lx]\n",
155 charset=getshort(buffer,20);
156 if (charset&&charset !=256) {
157 printf("Using character set %d\n",charset);
159 printf("Using default character set\n");
162 /* skipping to textstart and computing textend */
163 textstart=getlong(buffer,24);
164 textlen=getlong(buffer,28)-textstart;
167 printf ("Textstart = %ld (hex %lx)\n",textstart+curpos,textstart+curpos);
168 printf ("Textlen = %ld (hex %lx)\n",textlen,textlen);
170 for (i=0;i<textstart;i++) {
171 catdoc_read(buf, 1, 1, f);
173 fprintf(stderr,"File ended before textstart. Probably it is broken. Try -b switch\n");
177 return process_file(f,textlen) || ret_code;