]> www.wagner.pp.ru Git - oss/catdoc.git/blob - src/ole.c
Add detection of ZIP-archive and report that this type of file (i.e. OOXML or OpenDoc...
[oss/catdoc.git] / src / ole.c
1 /**
2  * @file   ole.c
3  * @author Alex Ott, Victor B Wagner
4  * @date   Wed Jun 11 12:33:01 2003
5  * Version: $Id: ole.c,v 1.2 2006-02-25 15:28:14 vitus Exp $
6  * Copyright: Victor B Wagner, 1996-2003 Alex Ott, 2003
7  * 
8  * @brief  Parsing structure of MS Office compound document
9  * 
10  * This file is part of catdoc project
11  * and distributed under GNU Public License
12  * 
13  */
14 #ifdef HAVE_CONFIG_H
15 #include <config.h>
16 #endif
17
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include <string.h>
21 #include <errno.h>
22
23 #include "catdoc.h"
24
25 #define min(a,b) ((a) < (b) ? (a) : (b))
26
27 long int sectorSize, shortSectorSize;
28 /* BBD Info */
29 long int  bbdNumBlocks;
30 unsigned char *BBD=NULL;
31 /* SBD Info */
32 long int sbdNumber, sbdStart, sbdLen;
33 unsigned char *SBD=NULL;
34 oleEntry *rootEntry=NULL;
35 /* Properties Info */
36 long propCurNumber, propLen, propNumber, propStart;
37 unsigned char *properties=NULL;
38 long int fileLength=0;
39
40 char ole_sign[]={0xD0,0xCF,0x11,0xE0,0xA1,0xB1,0x1A,0xE1,0};
41 char zip_sign[]="PK\003\004";
42
43 /** 
44  * Initializes ole structure
45  * 
46  * @param f (FILE *) compound document file, positioned at bufSize
47  *           byte. Might be pipe or socket 
48  * @param buffer (void *) bytes already read from f
49  * @param bufSize number of bytes already read from f should be less
50  *                than 512 
51  * 
52  * @return 
53  */
54 FILE* ole_init(FILE *f, void *buffer, size_t bufSize)  {
55         unsigned char oleBuf[BBD_BLOCK_SIZE];
56         unsigned char *tmpBuf;
57         FILE *newfile;
58         int ret=0, i;
59         long int sbdMaxLen, sbdCurrent, propMaxLen, propCurrent, mblock, msat_size;
60         oleEntry *tEntry;
61
62         /* deleting old data (if it was allocated) */
63         ole_finish();
64         
65         if (fseek(f,0,SEEK_SET) == -1) {
66                 if ( errno == ESPIPE ) {
67                         /* We got non-seekable file, create temp file */
68                         if((newfile=tmpfile()) == NULL) {
69                                 perror("Can't create tmp file");
70                                 return NULL;
71                         }
72                         if (bufSize > 0) {
73                                 ret=fwrite(buffer, 1, bufSize, newfile);
74                                 if(ret != bufSize) {
75                                         perror("Can't write to tmp file");
76                                         return NULL;
77                                 }
78                         }
79                         
80                         while(!feof(f)){
81                                 ret=fread(oleBuf,1,BBD_BLOCK_SIZE,f);
82                                 fwrite(oleBuf, 1, ret, newfile);
83                         }
84                         fseek(newfile,0,SEEK_SET);
85                 } else {
86                         perror("Can't seek in file");
87                         return NULL;
88                 }
89         } else {
90                 newfile=f;
91         }       
92         fseek(newfile,0,SEEK_END);
93         fileLength=ftell(newfile);
94 /*      fprintf(stderr, "fileLength=%ld\n", fileLength); */
95         fseek(newfile,0,SEEK_SET);
96         ret=fread(oleBuf,1,BBD_BLOCK_SIZE,newfile);
97         if ( ret != BBD_BLOCK_SIZE ) {
98                 return NULL;
99         }
100         if (strncmp((char *)&oleBuf,zip_sign,4) == 0) {
101                 fprintf(stderr,"Looks like ZIP archive or Office 2007 or later. Not supported\n");
102                 return NULL;
103         } else if (strncmp((char *)&oleBuf,ole_sign,8) != 0) {
104                 return NULL;
105         }
106         sectorSize = 1<<getshort(oleBuf,0x1e);
107         shortSectorSize=1<<getshort(oleBuf,0x20);
108         
109 /* Read BBD into memory */
110         bbdNumBlocks = getulong(oleBuf,0x2c);
111         if((BBD=malloc(bbdNumBlocks*sectorSize)) == NULL ) {
112                 return NULL;
113         }
114         
115         if((tmpBuf=malloc(MSAT_ORIG_SIZE)) == NULL ) {
116                 return NULL;
117         }
118         memcpy(tmpBuf,oleBuf+0x4c,MSAT_ORIG_SIZE);
119         mblock=getlong(oleBuf,0x44);
120         msat_size=getlong(oleBuf,0x48);
121
122 /*      fprintf(stderr, "msat_size=%ld\n", msat_size); */
123
124         i=0;
125         while((mblock >= 0) && (i < msat_size)) {
126                 unsigned char *newbuf;
127 /*              fprintf(stderr, "i=%d mblock=%ld\n", i, mblock); */
128                 if ((newbuf=realloc(tmpBuf, sectorSize*(i+1)+MSAT_ORIG_SIZE)) != NULL) {
129                         tmpBuf=newbuf;
130                 } else {
131                         perror("MSAT realloc error");
132                         free(tmpBuf);
133                         ole_finish();
134                         return NULL;
135                 }
136                 
137                 fseek(newfile, 512+mblock*sectorSize, SEEK_SET);
138                 if(fread(tmpBuf+MSAT_ORIG_SIZE+(sectorSize-4)*i,
139                                                  1, sectorSize, newfile) != sectorSize) {
140                         fprintf(stderr, "Error read MSAT!\n");
141                         ole_finish();
142                         return NULL;
143                 }
144
145                 i++;
146                 mblock=getlong(tmpBuf, MSAT_ORIG_SIZE+(sectorSize-4)*i);
147         }
148         
149 /*      fprintf(stderr, "bbdNumBlocks=%ld\n", bbdNumBlocks); */
150         for(i=0; i< bbdNumBlocks; i++) {
151                 long int bbdSector=getlong(tmpBuf,4*i);
152                 
153                 if (bbdSector >= fileLength/sectorSize || bbdSector < 0) {
154                         fprintf(stderr, "Bad BBD entry!\n");
155                         ole_finish();
156                         return NULL;
157                 }
158                 fseek(newfile, 512+bbdSector*sectorSize, SEEK_SET);
159                 if ( fread(BBD+i*sectorSize, 1, sectorSize, newfile) != sectorSize ) {
160                         fprintf(stderr, "Can't read BBD!\n");
161                         free(tmpBuf);
162                         ole_finish();
163                         return NULL;
164                 }
165         }
166         free(tmpBuf);
167         
168 /* Read SBD into memory */
169         sbdLen=0;
170         sbdMaxLen=10;
171         sbdCurrent = sbdStart = getlong(oleBuf,0x3c);
172         if (sbdStart > 0) {
173                 if((SBD=malloc(sectorSize*sbdMaxLen)) == NULL ) {
174                         ole_finish();
175                         return NULL;
176                 }
177                 while(1) {
178                         fseek(newfile, 512+sbdCurrent*sectorSize, SEEK_SET);
179                         fread(SBD+sbdLen*sectorSize, 1, sectorSize, newfile);
180                         sbdLen++;
181                         if (sbdLen >= sbdMaxLen) {
182                                 unsigned char *newSBD;
183                                 
184                                 sbdMaxLen+=5;
185                                 if ((newSBD=realloc(SBD, sectorSize*sbdMaxLen)) != NULL) {
186                                         SBD=newSBD;
187                                 } else {
188                                         perror("SBD realloc error");
189                                         ole_finish();
190                                         return NULL;
191                                 }
192                         }
193                         sbdCurrent = getlong(BBD, sbdCurrent*4);
194                         if(sbdCurrent < 0 ||
195                                 sbdCurrent >= fileLength/sectorSize)
196                                 break;
197                 }
198                 sbdNumber = (sbdLen*sectorSize)/shortSectorSize;
199 /*              fprintf(stderr, "sbdLen=%ld sbdNumber=%ld\n",sbdLen, sbdNumber); */
200         } else {
201                 SBD=NULL;
202         }
203 /* Read property catalog into memory */
204         propLen = 0;
205         propMaxLen = 5;
206         propCurrent = propStart = getlong(oleBuf,0x30);
207         if (propStart >= 0) {
208                 if((properties=malloc(propMaxLen*sectorSize)) == NULL ) {
209                         ole_finish();
210                         return NULL;
211                 }
212                 while(1) {
213 /*                      fprintf(stderr, "propCurrent=%ld\n",propCurrent); */
214                         fseek(newfile, 512+propCurrent*sectorSize, SEEK_SET);
215                         fread(properties+propLen*sectorSize,
216                                   1, sectorSize, newfile);
217                         propLen++;
218                         if (propLen >= propMaxLen) {
219                                 unsigned char *newProp;
220                                 
221                                 propMaxLen+=5;
222                                 if ((newProp=realloc(properties, propMaxLen*sectorSize)) != NULL)
223                                         properties=newProp;
224                                 else {
225                                         perror("Properties realloc error");
226                                         ole_finish();
227                                         return NULL;
228                                 }
229                         }
230                         
231                         propCurrent = getlong(BBD, propCurrent*4);
232                         if(propCurrent < 0 ||
233                            propCurrent >= fileLength/sectorSize ) {
234                                 break;
235                         }
236                 }
237 /*              fprintf(stderr, "propLen=%ld\n",propLen); */
238                 propNumber = (propLen*sectorSize)/PROP_BLOCK_SIZE;
239                 propCurNumber = 0;
240         } else {
241                 ole_finish();
242                 properties = NULL;
243                 return NULL;
244         }
245         
246         
247 /* Find Root Entry */
248         while((tEntry=(oleEntry*)ole_readdir(newfile)) != NULL) {
249                 if (tEntry->type == oleRootDir ) {
250                         rootEntry=tEntry;
251                         break;
252                 }
253                 ole_close((FILE*)tEntry);
254         }
255         propCurNumber = 0;
256         fseek(newfile, 0, SEEK_SET);
257         if (!rootEntry) {
258                 fprintf(stderr,"Broken OLE structure. Cannot find root entry in this file!\n");         ole_finish();
259                 return NULL;
260         }       
261         return newfile;
262 }
263
264 /** 
265  * 
266  * 
267  * @param oleBuf 
268  * 
269  * @return 
270  */
271 int rightOleType(unsigned char *oleBuf) {
272         return (oleBuf[0x42] == 1 || oleBuf[0x42] == 2 ||
273                         oleBuf[0x42] == 3 || oleBuf[0x42] == 5 );
274 }
275
276 /** 
277  * 
278  * 
279  * @param oleBuf 
280  * 
281  * @return 
282  */
283 oleType getOleType(unsigned char *oleBuf) {
284         return (oleType)((unsigned char)oleBuf[0x42]);
285 }
286
287 /** 
288  * Reads next directory entry from file
289  * 
290  * @param name buffer for name converted to us-ascii should be at least 33 chars long
291  * @param size size of file 
292  * 
293  * @return 0 if everything is ok -1 on error
294  */
295 FILE *ole_readdir(FILE *f) {
296         int i, nLen;
297         unsigned char *oleBuf;
298         oleEntry *e=NULL;
299         long int chainMaxLen, chainCurrent;
300         
301         if ( properties == NULL || propCurNumber >= propNumber || f == NULL )
302                 return NULL;
303         oleBuf=properties + propCurNumber*PROP_BLOCK_SIZE;
304         if( !rightOleType(oleBuf))
305                 return NULL;
306         if ((e = (oleEntry*)malloc(sizeof(oleEntry))) == NULL) {
307                 perror("Can\'t allocate memory");
308                 return NULL;
309         }
310         e->dirPos=oleBuf;
311         e->type=getOleType(oleBuf);
312         e->file=f;
313         e->startBlock=getlong(oleBuf,0x74);
314         e->blocks=NULL;
315         
316         nLen=getshort(oleBuf,0x40);
317         for (i=0 ; i < nLen /2; i++)
318                 e->name[i]=(char)oleBuf[i*2];
319         e->name[i]='\0';
320         propCurNumber++;
321         e->length=getulong(oleBuf,0x78);
322 /* Read sector chain for object */
323         chainMaxLen = 25;
324         e->numOfBlocks = 0;
325         chainCurrent = e->startBlock;
326         e->isBigBlock = (e->length >= 0x1000) || !strcmp(e->name, "Root Entry");
327 /*      fprintf(stderr, "e->name=%s e->length=%ld\n", e->name, e->length); */
328 /*      fprintf(stderr, "e->startBlock=%ld BBD=%p\n", e->startBlock, BBD); */
329         if (e->startBlock >= 0 &&
330                 e->length >= 0 &&
331                 (e->startBlock <=
332                  fileLength/(e->isBigBlock ? sectorSize : shortSectorSize))) {
333                 if((e->blocks=malloc(chainMaxLen*sizeof(long int))) == NULL ) {
334                         free(e);
335                         return NULL;
336                 }
337                 while(1) {
338                         if(chainCurrent < 0 ||
339                            chainCurrent >= (
340                                e->isBigBlock ?
341                                ((bbdNumBlocks*sectorSize)/4) :
342                                ((sbdNumber*shortSectorSize)/4)
343                                ) ||
344                            (e->numOfBlocks >
345                                 e->length/(
346                                     e->isBigBlock ?
347                                     sectorSize :
348                                     shortSectorSize
349                                     )
350                                 )
351                            ) {
352 /*                              fprintf(stderr, "chain End=%ld\n", chainCurrent);   */
353                                 break;
354                         }
355 /*                      fprintf(stderr, "chainCurrent=%ld\n", chainCurrent); */
356                         e->blocks[e->numOfBlocks++] = chainCurrent;
357                         if (e->numOfBlocks >= chainMaxLen) {
358                                 long int *newChain;
359                                 chainMaxLen+=25;
360                                 if ((newChain=realloc(e->blocks,
361                                                                           chainMaxLen*sizeof(long int))) != NULL)
362                                         e->blocks=newChain;
363                                 else {
364                                         perror("Properties realloc error");
365                                         free(e->blocks);
366                                         e->blocks=NULL;
367                                         return NULL;
368                                 }
369                         }
370                         if ( e->isBigBlock ) {
371                                 chainCurrent = getlong(BBD, chainCurrent*4);
372                         } else if ( SBD != NULL ) {
373                                 chainCurrent = getlong(SBD, chainCurrent*4);
374                         } else {
375                                 chainCurrent=-1;
376                         }
377                 }
378         }
379         
380         if(e->length > (e->isBigBlock ? sectorSize : shortSectorSize)*e->numOfBlocks)
381                 e->length = (e->isBigBlock ? sectorSize : shortSectorSize)*e->numOfBlocks;
382 /*      fprintf(stderr, "READDIR: e->name=%s e->numOfBlocks=%ld length=%ld\n", */
383 /*                                      e->name, e->numOfBlocks, e->length); */
384         
385         return (FILE*)e;
386 }
387
388 /** 
389  * Open stream, which correspond to directory entry last read by
390  * ole_readdir 
391  * 
392  * 
393  * @return opaque pointer to pass to ole_read, casted to (FILE *)
394  */
395 int ole_open(FILE *stream) {
396         oleEntry *e=(oleEntry *)stream;
397         if ( e->type != oleStream)
398                 return -2;
399         
400         e->ole_offset=0;
401         e->file_offset= ftell(e->file);
402         return 0;
403 }
404
405 /** 
406  * 
407  * 
408  * @param e 
409  * @param blk 
410  * 
411  * @return 
412  */
413 long int calcFileBlockOffset(oleEntry *e, long int blk) {
414         long int res;
415         if ( e->isBigBlock ) {
416                 res=512+e->blocks[blk]*sectorSize;
417         } else {
418                 long int sbdPerSector=sectorSize/shortSectorSize;
419                 long int sbdSecNum=e->blocks[blk]/sbdPerSector;
420                 long int sbdSecMod=e->blocks[blk]%sbdPerSector;
421 /*              fprintf(stderr, "calcoffset: e->name=%s e->numOfBlocks=%ld length=%ld sbdSecNum=%ld rootEntry->blocks=%p\n", 
422                                                 e->name, e->numOfBlocks, e->length, sbdSecNum, rootEntry->blocks);*/
423                 res=512 + rootEntry->blocks[sbdSecNum]*sectorSize + sbdSecMod*shortSectorSize;
424         }
425         return res;
426 }
427
428
429 /** 
430  * Reads block from open ole stream interface-compatible with fread
431  * 
432  * @param ptr pointer to buffer for read to
433  * @param size size of block
434  * @param nmemb size in blocks 
435  * @param stream pointer to FILE* structure
436  * 
437  * @return number of readed blocks
438  */
439 size_t ole_read(void *ptr, size_t size, size_t nmemb, FILE *stream) {
440         oleEntry *e = (oleEntry*)stream;
441         long int llen = size*nmemb, rread=0, i;
442         long int blockNumber, modBlock, toReadBlocks, toReadBytes, bytesInBlock;
443         long int ssize;                         /**< Size of block */
444         long int newoffset;
445         unsigned char *cptr = ptr;      
446         if( e->ole_offset+llen > e->length )
447                 llen= e->length - e->ole_offset;
448         
449         ssize = (e->isBigBlock ? sectorSize : shortSectorSize);
450         blockNumber=e->ole_offset/ssize;
451 /*      fprintf(stderr, "blockNumber=%ld e->numOfBlocks=%ld llen=%ld\n", */
452 /*                      blockNumber, e->numOfBlocks, llen); */
453         if ( blockNumber >= e->numOfBlocks || llen <=0 )
454                 return 0;
455         
456         modBlock=e->ole_offset%ssize;
457         bytesInBlock = ssize - modBlock;
458         if(bytesInBlock < llen) {
459                 toReadBlocks = (llen-bytesInBlock)/ssize;
460                 toReadBytes = (llen-bytesInBlock)%ssize; 
461         } else {
462                 toReadBlocks = toReadBytes = 0;
463         }
464 /*      fprintf(stderr, "llen=%ld toReadBlocks=%ld toReadBytes=%ld bytesInBlock=%ld blockNumber=%ld modBlock=%ld\n", */
465 /*                      llen, toReadBlocks, toReadBytes, bytesInBlock, blockNumber, modBlock); */
466         newoffset = calcFileBlockOffset(e,blockNumber)+modBlock;
467         if (e->file_offset != newoffset) {
468                 fseek(e->file, e->file_offset=newoffset, SEEK_SET);
469         }
470         rread=fread(ptr, 1, min(llen,bytesInBlock), e->file);
471         e->file_offset += rread;
472         for(i=0; i<toReadBlocks; i++) {
473                 int readbytes;
474                 blockNumber++;
475                 newoffset = calcFileBlockOffset(e,blockNumber);
476                 if (newoffset != e->file_offset);
477                 fseek(e->file, e->file_offset=newoffset , SEEK_SET);
478                 readbytes=fread(cptr+rread, 1, min(llen-rread, ssize), e->file);
479                 rread +=readbytes;
480                 e->file_offset +=readbytes;
481         }
482         if(toReadBytes > 0) {
483                 int readbytes;
484                 blockNumber++;
485                 newoffset = calcFileBlockOffset(e,blockNumber);
486                 fseek(e->file, e->file_offset=newoffset, SEEK_SET);
487         readbytes=fread(cptr+rread, 1, toReadBytes,e ->file);
488                 rread +=readbytes;
489                 e->file_offset +=readbytes;
490         }
491 /*      fprintf(stderr, "ole_offset=%ld rread=%ld llen=%ld\n",
492         e->ole_offset, rread, llen);*/
493         e->ole_offset+=rread;
494         return rread;
495 }       
496
497 /** 
498  * 
499  * 
500  * @param stream 
501  * 
502  * @return 
503  */
504 int ole_eof(FILE *stream) {
505         oleEntry *e=(oleEntry*)stream;
506 /*      fprintf(stderr, "EOF: e->ole_offset=%ld  e->length=%ld\n",
507         e->ole_offset,  e->length);*/
508         return (e->ole_offset >=  e->length);
509 }
510
511 /** 
512  * 
513  * 
514  */
515 void ole_finish(void) {
516         if ( BBD != NULL ) free(BBD);
517         if ( SBD != NULL ) free(SBD);
518         if ( properties != NULL ) free(properties);
519         if ( rootEntry != NULL ) ole_close((FILE*)rootEntry);
520         properties = SBD = BBD = NULL;
521         rootEntry = NULL;
522 }
523
524 /** 
525  * 
526  * 
527  * @param stream 
528  * 
529  * @return 
530  */
531 int ole_close(FILE *stream) {
532         oleEntry *e=(oleEntry*)stream;
533         if(e == NULL)
534                 return -1;
535         if (e->blocks != NULL)
536                 free(e->blocks);
537         free(e);
538         return 0;
539 }
540
541 /**
542  * 
543  * 
544  * @param stream pointer to OLE stream structure
545  * @param offset 
546  * @param whence 
547  * 
548  * @return 
549  */
550 int ole_seek(FILE *stream, long offset, int whence) {
551         oleEntry *e=(oleEntry*)stream;
552         long int new_ole_offset=0, new_file_offset;
553         int ssize, modBlock, blockNumber;
554         
555         switch(whence) {
556         case SEEK_SET:
557                 new_ole_offset=offset;
558                 break;
559                 
560         case SEEK_CUR:
561                 new_ole_offset=e->ole_offset+offset;
562                 break;
563                 
564         case SEEK_END:
565                 new_ole_offset=e->length+offset;
566                 break;
567                 
568         default:
569                 errno=EINVAL;
570                 return -1;
571         }
572         if(new_ole_offset<0)
573                 new_ole_offset=0;
574         if(new_ole_offset >= e->length)
575                 new_ole_offset=e->length;
576
577         ssize = (e->isBigBlock ? sectorSize : shortSectorSize);
578         blockNumber=new_ole_offset/ssize;
579         if ( blockNumber >= e->numOfBlocks )
580                 return -1;
581         
582         modBlock=new_ole_offset%ssize;
583         new_file_offset = calcFileBlockOffset(e,blockNumber)+modBlock;
584         fseek(e->file, e->file_offset=new_file_offset, SEEK_SET);
585         e->ole_offset=new_ole_offset;
586         
587         return 0;
588 }
589
590 /** 
591  * Tell position inside OLE stream
592  * 
593  * @param stream pointer to OLE stream
594  * 
595  * @return current position inside OLE stream
596  */
597 long ole_tell(FILE *stream) {
598         oleEntry *e=(oleEntry*)stream;
599         return e->ole_offset;
600 }
601
602
603 /**
604  * 
605  * 
606  */
607 size_t (*catdoc_read)(void *ptr, size_t size, size_t nmemb, FILE *stream);
608 int (*catdoc_eof)(FILE *stream);
609 int (*catdoc_seek)(FILE *stream, long offset, int whence);
610 long (*catdoc_tell)(FILE *stream);
611
612 void set_ole_func(void) {
613         catdoc_read=ole_read;
614         catdoc_eof=ole_eof;
615         catdoc_seek=ole_seek;
616         catdoc_tell=ole_tell;
617 }
618
619 #ifdef feof
620 /* feof is macro in Turbo C, so we need a real function to assign to
621  * pointer
622  */ 
623 int my_feof(FILE *f) {
624     return feof(f);
625 }    
626 #define FEOF my_feof
627 #else
628 #define FEOF feof
629 #endif
630
631 void set_std_func(void) {
632         catdoc_read=fread;
633         catdoc_eof=FEOF;
634         catdoc_seek=fseek;
635         catdoc_tell=ftell;
636 }
637