]> www.wagner.pp.ru Git - oss/catdoc.git/blob - src/ole.c
1e26dd797a190d98712ddce6d5a1927471bfae3c
[oss/catdoc.git] / src / ole.c
1 /**
2  * @file   ole.c
3  * @author Alex Ott, Victor B Wagner
4  * @date   Wed Jun 11 12:33:01 2003
5  * Version: $Id: ole.c,v 1.2 2006-02-25 15:28:14 vitus Exp $
6  * Copyright: Victor B Wagner, 1996-2003 Alex Ott, 2003
7  *
8  * @brief  Parsing structure of MS Office compound document
9  *
10  * This file is part of catdoc project
11  * and distributed under GNU Public License
12  *
13  */
14 #ifdef HAVE_CONFIG_H
15 #include <config.h>
16 #endif
17
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include <string.h>
21 #include <errno.h>
22
23 #include "catdoc.h"
24
25 #define min(a,b) ((a) < (b) ? (a) : (b))
26
27 long int sectorSize, shortSectorSize;
28 /* BBD Info */
29 long int  bbdNumBlocks;
30 unsigned char *BBD=NULL;
31 /* SBD Info */
32 long int sbdNumber, sbdStart, sbdLen;
33 unsigned char *SBD=NULL;
34 oleEntry *rootEntry=NULL;
35 /* Properties Info */
36 long propCurNumber, propLen, propNumber, propStart;
37 unsigned char *properties=NULL;
38 long int fileLength=0;
39
40 char ole_sign[]={0xD0,0xCF,0x11,0xE0,0xA1,0xB1,0x1A,0xE1,0};
41 char zip_sign[]="PK\003\004";
42
43 /**
44  * Initializes ole structure
45  *
46  * @param f (FILE *) compound document file, positioned at bufSize
47  *           byte. Might be pipe or socket
48  * @param buffer (void *) bytes already read from f
49  * @param bufSize number of bytes already read from f should be less
50  *                than 512
51  *
52  * @return
53  */
54 FILE* ole_init(FILE *f, void *buffer, size_t bufSize)  {
55         unsigned char oleBuf[BBD_BLOCK_SIZE];
56         unsigned char *tmpBuf;
57         FILE *newfile;
58         int ret=0, i;
59         long int bbdSize;
60         long int sbdMaxLen, sbdCurrent, propMaxLen, propCurrent, mblock, msat_size;
61         oleEntry *tEntry;
62
63         /* deleting old data (if it was allocated) */
64         ole_finish();
65
66         if (fseek(f,0,SEEK_SET) == -1) {
67                 if ( errno == ESPIPE ) {
68                         /* We got non-seekable file, create temp file */
69                         if((newfile=tmpfile()) == NULL) {
70                                 perror("Can't create tmp file");
71                                 return NULL;
72                         }
73                         if (bufSize > 0) {
74                                 ret=fwrite(buffer, 1, bufSize, newfile);
75                                 if(ret != bufSize) {
76                                         perror("Can't write to tmp file");
77                                         return NULL;
78                                 }
79                         }
80
81                         while(!feof(f)){
82                                 ret=fread(oleBuf,1,BBD_BLOCK_SIZE,f);
83                                 fwrite(oleBuf, 1, ret, newfile);
84                         }
85                         fseek(newfile,0,SEEK_SET);
86                 } else {
87                         perror("Can't seek in file");
88                         return NULL;
89                 }
90         } else {
91                 newfile=f;
92         }
93         fseek(newfile,0,SEEK_END);
94         fileLength=ftell(newfile);
95 /*      fprintf(stderr, "fileLength=%ld\n", fileLength); */
96         fseek(newfile,0,SEEK_SET);
97         ret=fread(oleBuf,1,BBD_BLOCK_SIZE,newfile);
98         if ( ret != BBD_BLOCK_SIZE ) {
99                 return NULL;
100         }
101         if (strncmp((char *)&oleBuf,zip_sign,4) == 0) {
102                 fprintf(stderr,"Looks like ZIP archive or Office 2007 or later. Not supported\n");
103                 return NULL;
104         } else if (strncmp((char *)&oleBuf,ole_sign,8) != 0) {
105                 return NULL;
106         }
107         sectorSize = 1<<getshort(oleBuf,0x1e);
108         shortSectorSize=1<<getshort(oleBuf,0x20);
109
110 /* Read BBD into memory */
111         bbdNumBlocks = getulong(oleBuf,0x2c);
112         bbdSize = bbdNumBlocks * sectorSize;
113         if (bbdSize > fileLength) {
114                 /* broken file, BBD size greater than entire file*/
115                 return NULL;
116         }
117
118         if((BBD=malloc(bbdNumBlocks*sectorSize)) == NULL ) {
119                 return NULL;
120         }
121
122         if((tmpBuf=malloc(MSAT_ORIG_SIZE)) == NULL ) {
123                 return NULL;
124         }
125         memcpy(tmpBuf,oleBuf+0x4c,MSAT_ORIG_SIZE);
126         mblock=getlong(oleBuf,0x44);
127         msat_size=getlong(oleBuf,0x48);
128         if (msat_size * sectorSize)
129 /*      fprintf(stderr, "msat_size=%ld\n", msat_size); */
130
131         i=0;
132         while((mblock >= 0) && (i < msat_size)) {
133                 unsigned char *newbuf;
134 /*              fprintf(stderr, "i=%d mblock=%ld\n", i, mblock); */
135                 if ((newbuf=realloc(tmpBuf, sectorSize*(i+1)+MSAT_ORIG_SIZE)) != NULL) {
136                         tmpBuf=newbuf;
137                 } else {
138                         perror("MSAT realloc error");
139                         free(tmpBuf);
140                         ole_finish();
141                         return NULL;
142                 }
143
144                 fseek(newfile, 512+mblock*sectorSize, SEEK_SET);
145                 if(fread(tmpBuf+MSAT_ORIG_SIZE+(sectorSize-4)*i,
146                                                  1, sectorSize, newfile) != sectorSize) {
147                         fprintf(stderr, "Error read MSAT!\n");
148                         ole_finish();
149                         return NULL;
150                 }
151
152                 i++;
153                 mblock=getlong(tmpBuf, MSAT_ORIG_SIZE+(sectorSize-4)*i);
154         }
155
156 /*      fprintf(stderr, "bbdNumBlocks=%ld\n", bbdNumBlocks); */
157         for(i=0; i< bbdNumBlocks; i++) {
158                 long int bbdSector=getlong(tmpBuf,4*i);
159
160                 if (bbdSector >= fileLength/sectorSize || bbdSector < 0) {
161                         fprintf(stderr, "Bad BBD entry!\n");
162                         ole_finish();
163                         return NULL;
164                 }
165                 fseek(newfile, 512+bbdSector*sectorSize, SEEK_SET);
166                 if ( fread(BBD+i*sectorSize, 1, sectorSize, newfile) != sectorSize ) {
167                         fprintf(stderr, "Can't read BBD!\n");
168                         free(tmpBuf);
169                         ole_finish();
170                         return NULL;
171                 }
172         }
173         free(tmpBuf);
174
175 /* Read SBD into memory */
176         sbdLen=0;
177         sbdMaxLen=10;
178         sbdCurrent = sbdStart = getlong(oleBuf,0x3c);
179         if (sbdStart > 0) {
180                 if((SBD=malloc(sectorSize*sbdMaxLen)) == NULL ) {
181                         ole_finish();
182                         return NULL;
183                 }
184                 while(1) {
185                         fseek(newfile, 512+sbdCurrent*sectorSize, SEEK_SET);
186                         fread(SBD+sbdLen*sectorSize, 1, sectorSize, newfile);
187                         sbdLen++;
188                         if (sbdLen >= sbdMaxLen) {
189                                 unsigned char *newSBD;
190
191                                 sbdMaxLen+=5;
192                                 if ((newSBD=realloc(SBD, sectorSize*sbdMaxLen)) != NULL) {
193                                         SBD=newSBD;
194                                 } else {
195                                         perror("SBD realloc error");
196                                         ole_finish();
197                                         return NULL;
198                                 }
199                         }
200                         if (sbdCurrent * 4 > bbdSize) {
201                                 ole_finish();
202                                 return NULL;
203                         }
204                         sbdCurrent = getlong(BBD, sbdCurrent*4);
205                         if(sbdCurrent < 0 ||
206                                 sbdCurrent >= fileLength/sectorSize)
207                                 break;
208                 }
209                 sbdNumber = (sbdLen*sectorSize)/shortSectorSize;
210 /*              fprintf(stderr, "sbdLen=%ld sbdNumber=%ld\n",sbdLen, sbdNumber); */
211         } else {
212                 SBD=NULL;
213         }
214 /* Read property catalog into memory */
215         propLen = 0;
216         propMaxLen = 5;
217         propCurrent = propStart = getlong(oleBuf,0x30);
218         if (propStart >= 0) {
219                 if((properties=malloc(propMaxLen*sectorSize)) == NULL ) {
220                         ole_finish();
221                         return NULL;
222                 }
223                 while(1) {
224 /*                      fprintf(stderr, "propCurrent=%ld\n",propCurrent); */
225                         fseek(newfile, 512+propCurrent*sectorSize, SEEK_SET);
226                         errno=0;
227                         if (fread(properties+propLen*sectorSize,
228                                   1, sectorSize, newfile)!=sectorSize) {
229                                   if (errno != 0) {
230                                         perror("reading properties catalog");
231                                   }
232                                   ole_finish();
233                                   return NULL;
234                         }
235                         propLen++;
236                         if (propLen >= propMaxLen) {
237                                 unsigned char *newProp;
238
239                                 propMaxLen+=5;
240                                 if ((newProp=realloc(properties, propMaxLen*sectorSize)) != NULL)
241                                         properties=newProp;
242                                 else {
243                                         perror("Properties realloc error");
244                                         ole_finish();
245                                         return NULL;
246                                 }
247                         }
248
249                         propCurrent = getlong(BBD, propCurrent*4);
250                         if(propCurrent < 0 ||
251                            propCurrent >= bbdSize/4 ) {
252                            break;
253                         }
254                 }
255 /*              fprintf(stderr, "propLen=%ld\n",propLen); */
256                 propNumber = (propLen*sectorSize)/PROP_BLOCK_SIZE;
257                 propCurNumber = 0;
258         } else {
259                 ole_finish();
260                 properties = NULL;
261                 return NULL;
262         }
263
264
265 /* Find Root Entry */
266         while((tEntry=(oleEntry*)ole_readdir(newfile)) != NULL) {
267                 if (tEntry->type == oleRootDir ) {
268                         rootEntry=tEntry;
269                         break;
270                 }
271                 ole_close((FILE*)tEntry);
272         }
273         propCurNumber = 0;
274         fseek(newfile, 0, SEEK_SET);
275         if (!rootEntry) {
276                 fprintf(stderr,"Broken OLE structure. Cannot find root entry in this file!\n");         ole_finish();
277                 return NULL;
278         }
279         return newfile;
280 }
281
282 /**
283  *
284  *
285  * @param oleBuf
286  *
287  * @return
288  */
289 int rightOleType(unsigned char *oleBuf) {
290         return (oleBuf[0x42] == 1 || oleBuf[0x42] == 2 ||
291                         oleBuf[0x42] == 3 || oleBuf[0x42] == 5 );
292 }
293
294 /**
295  *
296  *
297  * @param oleBuf
298  *
299  * @return
300  */
301 oleType getOleType(unsigned char *oleBuf) {
302         return (oleType)((unsigned char)oleBuf[0x42]);
303 }
304
305 /**
306  * Reads next directory entry from file
307  *
308  * @param name buffer for name converted to us-ascii should be at least 33 chars long
309  * @param size size of file
310  *
311  * @return 0 if everything is ok -1 on error
312  */
313 FILE *ole_readdir(FILE *f) {
314         int i, nLen;
315         unsigned char *oleBuf;
316         oleEntry *e=NULL;
317         long int chainMaxLen, chainCurrent;
318
319         if ( properties == NULL || propCurNumber >= propNumber || f == NULL )
320                 return NULL;
321         oleBuf=properties + propCurNumber*PROP_BLOCK_SIZE;
322         if( !rightOleType(oleBuf))
323                 return NULL;
324         if ((e = (oleEntry*) calloc(sizeof(oleEntry),1)) == NULL) {
325                 perror("Can\'t allocate memory");
326                 return NULL;
327         }
328         e->dirPos=oleBuf;
329         e->type=getOleType(oleBuf);
330         e->file=f;
331         e->startBlock=getlong(oleBuf,0x74);
332         e->blocks=NULL;
333
334         nLen=getshort(oleBuf,0x40);
335         if (nLen > OLENAMELENGTH) {
336                 free(e);
337                 return NULL;
338         }
339         for (i=0 ; i < nLen /2; i++)
340                 e->name[i]=(char)oleBuf[i*2];
341         e->name[i]='\0';
342         propCurNumber++;
343         e->length=getulong(oleBuf,0x78);
344 /* Read sector chain for object */
345         chainMaxLen = 25;
346         e->numOfBlocks = 0;
347         chainCurrent = e->startBlock;
348         e->isBigBlock = (e->length >= 0x1000) || !strcmp(e->name, "Root Entry");
349 /*      fprintf(stderr, "e->name=%s e->length=%ld\n", e->name, e->length); */
350 /*      fprintf(stderr, "e->startBlock=%ld BBD=%p\n", e->startBlock, BBD); */
351         if (e->startBlock >= 0 &&
352                 e->length >= 0 &&
353                 (e->startBlock <=
354                  fileLength/(e->isBigBlock ? sectorSize : shortSectorSize))) {
355                 if((e->blocks=malloc(chainMaxLen*sizeof(long int))) == NULL ) {
356                         free(e);
357                         return NULL;
358                 }
359                 while(1) {
360                         if(chainCurrent < 0 ||
361                            chainCurrent >= (
362                                e->isBigBlock ?
363                                ((bbdNumBlocks*sectorSize)/4) :
364                                ((sbdNumber*shortSectorSize)/4)
365                                ) ||
366                            (e->numOfBlocks >
367                                 e->length/(
368                                     e->isBigBlock ?
369                                     sectorSize :
370                                     shortSectorSize
371                                     )
372                                 )
373                            ) {
374 /*                              fprintf(stderr, "chain End=%ld\n", chainCurrent);   */
375                                 break;
376                         }
377 /*                      fprintf(stderr, "chainCurrent=%ld\n", chainCurrent); */
378                         e->blocks[e->numOfBlocks++] = chainCurrent;
379                         if (e->numOfBlocks >= chainMaxLen) {
380                                 long int *newChain;
381                                 chainMaxLen+=25;
382                                 if ((newChain=realloc(e->blocks,
383                                                                           chainMaxLen*sizeof(long int))) != NULL) {
384                                         e->blocks=newChain;
385                                 } else {
386                                         perror("Properties realloc error");
387                                         free(e->blocks);
388                                         e->blocks=NULL;
389                                         return NULL;
390                                 }
391                         }
392                         if ( e->isBigBlock ) {
393                                 chainCurrent = getlong(BBD, chainCurrent*4);
394                         } else if ( SBD != NULL ) {
395                                 chainCurrent = getlong(SBD, chainCurrent*4);
396                         } else {
397                                 chainCurrent=-1;
398                         }
399                 }
400         }
401
402         if(e->length > (e->isBigBlock ? sectorSize : shortSectorSize)*e->numOfBlocks)
403                 e->length = (e->isBigBlock ? sectorSize : shortSectorSize)*e->numOfBlocks;
404 /*      fprintf(stderr, "READDIR: e->name=%s e->numOfBlocks=%ld length=%ld\n", */
405 /*                                      e->name, e->numOfBlocks, e->length); */
406
407         return (FILE*)e;
408 }
409
410 /**
411  * Open stream, which correspond to directory entry last read by
412  * ole_readdir
413  *
414  *
415  * @return opaque pointer to pass to ole_read, casted to (FILE *)
416  */
417 int ole_open(FILE *stream) {
418         oleEntry *e=(oleEntry *)stream;
419         if ( e->type != oleStream)
420                 return -2;
421
422         e->ole_offset=0;
423         e->file_offset= ftell(e->file);
424         return 0;
425 }
426
427 /**
428  *
429  *
430  * @param e
431  * @param blk
432  *
433  * @return
434  */
435 long int calcFileBlockOffset(oleEntry *e, long int blk) {
436         long int res;
437         if ( e->isBigBlock ) {
438                 res=512+e->blocks[blk]*sectorSize;
439         } else {
440                 long int sbdPerSector=sectorSize/shortSectorSize;
441                 long int sbdSecNum=e->blocks[blk]/sbdPerSector;
442                 long int sbdSecMod=e->blocks[blk]%sbdPerSector;
443 /*              fprintf(stderr, "calcoffset: e->name=%s e->numOfBlocks=%ld length=%ld sbdSecNum=%ld rootEntry->blocks=%p\n",
444                                                 e->name, e->numOfBlocks, e->length, sbdSecNum, rootEntry->blocks);*/
445                 res=512 + rootEntry->blocks[sbdSecNum]*sectorSize + sbdSecMod*shortSectorSize;
446         }
447         return res;
448 }
449
450
451 /**
452  * Reads block from open ole stream interface-compatible with fread
453  *
454  * @param ptr pointer to buffer for read to
455  * @param size size of block
456  * @param nmemb size in blocks
457  * @param stream pointer to FILE* structure
458  *
459  * @return number of readed blocks
460  */
461 size_t ole_read(void *ptr, size_t size, size_t nmemb, FILE *stream) {
462         oleEntry *e = (oleEntry*)stream;
463         long int llen = size*nmemb, rread=0, i;
464         long int blockNumber, modBlock, toReadBlocks, toReadBytes, bytesInBlock;
465         long int ssize;                         /**< Size of block */
466         long int newoffset;
467         unsigned char *cptr = ptr;
468         if( e->ole_offset+llen > e->length )
469                 llen= e->length - e->ole_offset;
470
471         ssize = (e->isBigBlock ? sectorSize : shortSectorSize);
472         blockNumber=e->ole_offset/ssize;
473 /*      fprintf(stderr, "blockNumber=%ld e->numOfBlocks=%ld llen=%ld\n", */
474 /*                      blockNumber, e->numOfBlocks, llen); */
475         if ( blockNumber >= e->numOfBlocks || llen <=0 )
476                 return 0;
477
478         modBlock=e->ole_offset%ssize;
479         bytesInBlock = ssize - modBlock;
480         if(bytesInBlock < llen) {
481                 toReadBlocks = (llen-bytesInBlock)/ssize;
482                 toReadBytes = (llen-bytesInBlock)%ssize;
483         } else {
484                 toReadBlocks = toReadBytes = 0;
485         }
486 /*      fprintf(stderr, "llen=%ld toReadBlocks=%ld toReadBytes=%ld bytesInBlock=%ld blockNumber=%ld modBlock=%ld\n", */
487 /*                      llen, toReadBlocks, toReadBytes, bytesInBlock, blockNumber, modBlock); */
488         newoffset = calcFileBlockOffset(e,blockNumber)+modBlock;
489         if (e->file_offset != newoffset) {
490                 fseek(e->file, e->file_offset=newoffset, SEEK_SET);
491         }
492         rread=fread(ptr, 1, min(llen,bytesInBlock), e->file);
493         e->file_offset += rread;
494         for(i=0; i<toReadBlocks; i++) {
495                 int readbytes;
496                 blockNumber++;
497                 newoffset = calcFileBlockOffset(e,blockNumber);
498                 if (newoffset != e->file_offset);
499                 fseek(e->file, e->file_offset=newoffset , SEEK_SET);
500                 readbytes=fread(cptr+rread, 1, min(llen-rread, ssize), e->file);
501                 rread +=readbytes;
502                 e->file_offset +=readbytes;
503         }
504         if(toReadBytes > 0) {
505                 int readbytes;
506                 blockNumber++;
507                 newoffset = calcFileBlockOffset(e,blockNumber);
508                 fseek(e->file, e->file_offset=newoffset, SEEK_SET);
509         readbytes=fread(cptr+rread, 1, toReadBytes,e ->file);
510                 rread +=readbytes;
511                 e->file_offset +=readbytes;
512         }
513 /*      fprintf(stderr, "ole_offset=%ld rread=%ld llen=%ld\n",
514         e->ole_offset, rread, llen);*/
515         e->ole_offset+=rread;
516         return rread;
517 }
518
519 /**
520  *
521  *
522  * @param stream
523  *
524  * @return
525  */
526 int ole_eof(FILE *stream) {
527         oleEntry *e=(oleEntry*)stream;
528 /*      fprintf(stderr, "EOF: e->ole_offset=%ld  e->length=%ld\n",
529         e->ole_offset,  e->length);*/
530         return (e->ole_offset >=  e->length);
531 }
532
533 /**
534  *
535  *
536  */
537 void ole_finish(void) {
538         if ( BBD != NULL ) free(BBD);
539         if ( SBD != NULL ) free(SBD);
540         if ( properties != NULL ) free(properties);
541         if ( rootEntry != NULL ) ole_close((FILE*)rootEntry);
542         properties = SBD = BBD = NULL;
543         rootEntry = NULL;
544 }
545
546 /**
547  *
548  *
549  * @param stream
550  *
551  * @return
552  */
553 int ole_close(FILE *stream) {
554         oleEntry *e=(oleEntry*)stream;
555         if(e == NULL)
556                 return -1;
557         if (e->blocks != NULL)
558                 free(e->blocks);
559         free(e);
560         return 0;
561 }
562
563 /**
564  *
565  *
566  * @param stream pointer to OLE stream structure
567  * @param offset
568  * @param whence
569  *
570  * @return
571  */
572 int ole_seek(FILE *stream, long offset, int whence) {
573         oleEntry *e=(oleEntry*)stream;
574         long int new_ole_offset=0, new_file_offset;
575         int ssize, modBlock, blockNumber;
576
577         switch(whence) {
578         case SEEK_SET:
579                 new_ole_offset=offset;
580                 break;
581
582         case SEEK_CUR:
583                 new_ole_offset=e->ole_offset+offset;
584                 break;
585
586         case SEEK_END:
587                 new_ole_offset=e->length+offset;
588                 break;
589
590         default:
591                 errno=EINVAL;
592                 return -1;
593         }
594         if(new_ole_offset<0)
595                 new_ole_offset=0;
596         if(new_ole_offset >= e->length)
597                 new_ole_offset=e->length;
598
599         ssize = (e->isBigBlock ? sectorSize : shortSectorSize);
600         blockNumber=new_ole_offset/ssize;
601         if ( blockNumber >= e->numOfBlocks )
602                 return -1;
603
604         modBlock=new_ole_offset%ssize;
605         new_file_offset = calcFileBlockOffset(e,blockNumber)+modBlock;
606         fseek(e->file, e->file_offset=new_file_offset, SEEK_SET);
607         e->ole_offset=new_ole_offset;
608
609         return 0;
610 }
611
612 /**
613  * Tell position inside OLE stream
614  *
615  * @param stream pointer to OLE stream
616  *
617  * @return current position inside OLE stream
618  */
619 long ole_tell(FILE *stream) {
620         oleEntry *e=(oleEntry*)stream;
621         return e->ole_offset;
622 }
623
624
625 /**
626  *
627  *
628  */
629 size_t (*catdoc_read)(void *ptr, size_t size, size_t nmemb, FILE *stream);
630 int (*catdoc_eof)(FILE *stream);
631 int (*catdoc_seek)(FILE *stream, long offset, int whence);
632 long (*catdoc_tell)(FILE *stream);
633
634 void set_ole_func(void) {
635         catdoc_read=ole_read;
636         catdoc_eof=ole_eof;
637         catdoc_seek=ole_seek;
638         catdoc_tell=ole_tell;
639 }
640
641 #ifdef feof
642 /* feof is macro in Turbo C, so we need a real function to assign to
643  * pointer
644  */
645 int my_feof(FILE *f) {
646     return feof(f);
647 }
648 #define FEOF my_feof
649 #else
650 #define FEOF feof
651 #endif
652
653 void set_std_func(void) {
654         catdoc_read=fread;
655         catdoc_eof=FEOF;
656         catdoc_seek=fseek;
657         catdoc_tell=ftell;
658 }
659