]> www.wagner.pp.ru Git - oss/catdoc.git/blob - src/ole.c
Merge branch 'master' of http://wagner.pp.ru/git/oss/catdoc
[oss/catdoc.git] / src / ole.c
1 /**
2  * @file   ole.c
3  * @author Alex Ott, Victor B Wagner
4  * @date   Wed Jun 11 12:33:01 2003
5  * Version: $Id: ole.c,v 1.2 2006-02-25 15:28:14 vitus Exp $
6  * Copyright: Victor B Wagner, 1996-2003 Alex Ott, 2003
7  *
8  * @brief  Parsing structure of MS Office compound document
9  *
10  * This file is part of catdoc project
11  * and distributed under GNU Public License
12  *
13  */
14 #ifdef HAVE_CONFIG_H
15 #include <config.h>
16 #endif
17
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include <string.h>
21 #include <errno.h>
22
23 #include "catdoc.h"
24
25 #define min(a,b) ((a) < (b) ? (a) : (b))
26
27 long int sectorSize, shortSectorSize;
28 /* BBD Info */
29 long int  bbdNumBlocks;
30 unsigned char *BBD=NULL;
31 /* SBD Info */
32 long int sbdNumber, sbdStart, sbdLen;
33 unsigned char *SBD=NULL;
34 oleEntry *rootEntry=NULL;
35 /* Properties Info */
36 long propCurNumber, propLen, propNumber, propStart;
37 unsigned char *properties=NULL;
38 long int fileLength=0;
39
40 char ole_sign[]={0xD0,0xCF,0x11,0xE0,0xA1,0xB1,0x1A,0xE1,0};
41 char zip_sign[]="PK\003\004";
42
43 /**
44  * Initializes ole structure
45  *
46  * @param f (FILE *) compound document file, positioned at bufSize
47  *           byte. Might be pipe or socket
48  * @param buffer (void *) bytes already read from f
49  * @param bufSize number of bytes already read from f should be less
50  *                than 512
51  *
52  * @return
53  */
54 FILE* ole_init(FILE *f, void *buffer, size_t bufSize)  {
55         unsigned char oleBuf[BBD_BLOCK_SIZE];
56         unsigned char *tmpBuf;
57         FILE *newfile;
58         int ret=0, i;
59         long int bbdSize;
60         long int sbdMaxLen, sbdCurrent, propMaxLen, propCurrent, mblock, msat_size;
61         oleEntry *tEntry;
62
63         /* deleting old data (if it was allocated) */
64         ole_finish();
65
66         if (fseek(f,0,SEEK_SET) == -1) {
67                 if ( errno == ESPIPE ) {
68                         /* We got non-seekable file, create temp file */
69                         if((newfile=tmpfile()) == NULL) {
70                                 perror("Can't create tmp file");
71                                 return NULL;
72                         }
73                         if (bufSize > 0) {
74                                 ret=fwrite(buffer, 1, bufSize, newfile);
75                                 if(ret != bufSize) {
76                                         perror("Can't write to tmp file");
77                                         fclose(newfile);
78                                         return NULL;
79                                 }
80                         }
81
82                         while(!feof(f)){
83                                 ret=fread(oleBuf,1,BBD_BLOCK_SIZE,f);
84                                 fwrite(oleBuf, 1, ret, newfile);
85                         }
86                         fseek(newfile,0,SEEK_SET);
87                 } else {
88                         perror("Can't seek in file");
89                         return NULL;
90                 }
91         } else {
92                 newfile=f;
93         }
94         fseek(newfile,0,SEEK_END);
95         fileLength=ftell(newfile);
96 /*      fprintf(stderr, "fileLength=%ld\n", fileLength); */
97         fseek(newfile,0,SEEK_SET);
98         ret=fread(oleBuf,1,BBD_BLOCK_SIZE,newfile);
99         if ( ret != BBD_BLOCK_SIZE ) {
100                 return NULL;
101         }
102         if (strncmp((char *)&oleBuf,zip_sign,4) == 0) {
103                 fprintf(stderr,"Looks like ZIP archive or Office 2007 or later. Not supported\n");
104                 return NULL;
105         } else if (strncmp((char *)&oleBuf,ole_sign,8) != 0) {
106                 return NULL;
107         }
108         sectorSize = 1<<getshort(oleBuf,0x1e);
109         shortSectorSize=1<<getshort(oleBuf,0x20);
110
111 /* Read BBD into memory */
112         bbdNumBlocks = getulong(oleBuf,0x2c);
113         bbdSize = bbdNumBlocks * sectorSize;
114         if (bbdSize > fileLength) {
115                 /* broken file, BBD size greater than entire file*/
116                 return NULL;
117         }
118
119         if((BBD=malloc(bbdNumBlocks*sectorSize)) == NULL ) {
120                 return NULL;
121         }
122
123         if((tmpBuf=malloc(MSAT_ORIG_SIZE)) == NULL ) {
124                 return NULL;
125         }
126         memcpy(tmpBuf,oleBuf+0x4c,MSAT_ORIG_SIZE);
127         mblock=getlong(oleBuf,0x44);
128         msat_size=getlong(oleBuf,0x48);
129         if (msat_size * sectorSize > fileLength) {
130                 free(tmpBuf);
131                 return NULL;
132         }
133                 
134 /*      fprintf(stderr, "msat_size=%ld\n", msat_size); */
135
136         i=0;
137         while((mblock >= 0) && (i < msat_size)) {
138                 unsigned char *newbuf;
139 /*              fprintf(stderr, "i=%d mblock=%ld\n", i, mblock); */
140                 if ((newbuf=realloc(tmpBuf, sectorSize*(i+1)+MSAT_ORIG_SIZE)) != NULL) {
141                         tmpBuf=newbuf;
142                 } else {
143                         perror("MSAT realloc error");
144                         free(tmpBuf);
145                         ole_finish();
146                         return NULL;
147                 }
148
149                 fseek(newfile, 512+mblock*sectorSize, SEEK_SET);
150                 if(fread(tmpBuf+MSAT_ORIG_SIZE+(sectorSize-4)*i,
151                                                  1, sectorSize, newfile) != sectorSize) {
152                         fprintf(stderr, "Error read MSAT!\n");
153                         ole_finish();
154                         return NULL;
155                 }
156
157                 i++;
158                 mblock=getlong(tmpBuf, MSAT_ORIG_SIZE+(sectorSize-4)*i);
159         }
160
161 /*      fprintf(stderr, "bbdNumBlocks=%ld\n", bbdNumBlocks); */
162         for(i=0; i< bbdNumBlocks; i++) {
163                 long int bbdSector=getlong(tmpBuf,4*i);
164
165                 if (bbdSector >= fileLength/sectorSize || bbdSector < 0) {
166                         fprintf(stderr, "Bad BBD entry!\n");
167                         ole_finish();
168                         return NULL;
169                 }
170                 fseek(newfile, 512+bbdSector*sectorSize, SEEK_SET);
171                 if ( fread(BBD+i*sectorSize, 1, sectorSize, newfile) != sectorSize ) {
172                         fprintf(stderr, "Can't read BBD!\n");
173                         free(tmpBuf);
174                         ole_finish();
175                         return NULL;
176                 }
177         }
178         free(tmpBuf);
179
180 /* Read SBD into memory */
181         sbdLen=0;
182         sbdMaxLen=10;
183         sbdCurrent = sbdStart = getlong(oleBuf,0x3c);
184         if (sbdStart > 0) {
185                 if((SBD=malloc(sectorSize*sbdMaxLen)) == NULL ) {
186                         ole_finish();
187                         return NULL;
188                 }
189                 while(1) {
190                         fseek(newfile, 512+sbdCurrent*sectorSize, SEEK_SET);
191                         fread(SBD+sbdLen*sectorSize, 1, sectorSize, newfile);
192                         sbdLen++;
193                         if (sbdLen >= sbdMaxLen) {
194                                 unsigned char *newSBD;
195
196                                 sbdMaxLen+=5;
197                                 if ((newSBD=realloc(SBD, sectorSize*sbdMaxLen)) != NULL) {
198                                         SBD=newSBD;
199                                 } else {
200                                         perror("SBD realloc error");
201                                         ole_finish();
202                                         return NULL;
203                                 }
204                         }
205                         if (sbdCurrent * 4 > bbdSize) {
206                                 ole_finish();
207                                 return NULL;
208                         }
209                         sbdCurrent = getlong(BBD, sbdCurrent*4);
210                         if(sbdCurrent < 0 ||
211                                 sbdCurrent >= fileLength/sectorSize)
212                                 break;
213                 }
214                 sbdNumber = (sbdLen*sectorSize)/shortSectorSize;
215 /*              fprintf(stderr, "sbdLen=%ld sbdNumber=%ld\n",sbdLen, sbdNumber); */
216         } else {
217                 SBD=NULL;
218         }
219 /* Read property catalog into memory */
220         propLen = 0;
221         propMaxLen = 5;
222         propCurrent = propStart = getlong(oleBuf,0x30);
223         if (propStart >= 0) {
224                 if((properties=malloc(propMaxLen*sectorSize)) == NULL ) {
225                         ole_finish();
226                         return NULL;
227                 }
228                 while(1) {
229 /*                      fprintf(stderr, "propCurrent=%ld\n",propCurrent); */
230                         fseek(newfile, 512+propCurrent*sectorSize, SEEK_SET);
231                         errno=0;
232                         if (fread(properties+propLen*sectorSize,
233                                   1, sectorSize, newfile)!=sectorSize) {
234                                   if (errno != 0) {
235                                         perror("reading properties catalog");
236                                   }
237                                   ole_finish();
238                                   return NULL;
239                         }
240                         propLen++;
241                         if (propLen >= propMaxLen) {
242                                 unsigned char *newProp;
243
244                                 propMaxLen+=5;
245                                 if ((newProp=realloc(properties, propMaxLen*sectorSize)) != NULL)
246                                         properties=newProp;
247                                 else {
248                                         perror("Properties realloc error");
249                                         ole_finish();
250                                         return NULL;
251                                 }
252                         }
253
254                         propCurrent = getlong(BBD, propCurrent*4);
255                         if(propCurrent < 0 ||
256                            propCurrent >= bbdSize/4 ) {
257                            break;
258                         }
259                 }
260 /*              fprintf(stderr, "propLen=%ld\n",propLen); */
261                 propNumber = (propLen*sectorSize)/PROP_BLOCK_SIZE;
262                 propCurNumber = 0;
263         } else {
264                 ole_finish();
265                 properties = NULL;
266                 return NULL;
267         }
268
269
270 /* Find Root Entry */
271         while((tEntry=(oleEntry*)ole_readdir(newfile)) != NULL) {
272                 if (tEntry->type == oleRootDir ) {
273                         rootEntry=tEntry;
274                         break;
275                 }
276                 ole_close((FILE*)tEntry);
277         }
278         propCurNumber = 0;
279         fseek(newfile, 0, SEEK_SET);
280         if (!rootEntry) {
281                 fprintf(stderr,"Broken OLE structure. Cannot find root entry in this file!\n");         ole_finish();
282                 return NULL;
283         }
284         return newfile;
285 }
286
287 /**
288  *
289  *
290  * @param oleBuf
291  *
292  * @return
293  */
294 int rightOleType(unsigned char *oleBuf) {
295         return (oleBuf[0x42] == 1 || oleBuf[0x42] == 2 ||
296                         oleBuf[0x42] == 3 || oleBuf[0x42] == 5 );
297 }
298
299 /**
300  *
301  *
302  * @param oleBuf
303  *
304  * @return
305  */
306 oleType getOleType(unsigned char *oleBuf) {
307         return (oleType)((unsigned char)oleBuf[0x42]);
308 }
309
310 /**
311  * Reads next directory entry from file
312  *
313  * @param name buffer for name converted to us-ascii should be at least 33 chars long
314  * @param size size of file
315  *
316  * @return 0 if everything is ok -1 on error
317  */
318 FILE *ole_readdir(FILE *f) {
319         int i, nLen;
320         unsigned char *oleBuf;
321         oleEntry *e=NULL;
322         long int chainMaxLen, chainCurrent;
323
324         if ( properties == NULL || propCurNumber >= propNumber || f == NULL )
325                 return NULL;
326         oleBuf=properties + propCurNumber*PROP_BLOCK_SIZE;
327         if( !rightOleType(oleBuf))
328                 return NULL;
329         if ((e = (oleEntry*) calloc(sizeof(oleEntry),1)) == NULL) {
330                 perror("Can\'t allocate memory");
331                 return NULL;
332         }
333         e->dirPos=oleBuf;
334         e->type=getOleType(oleBuf);
335         e->file=f;
336         e->startBlock=getlong(oleBuf,0x74);
337         e->blocks=NULL;
338
339         nLen=getshort(oleBuf,0x40);
340         if (nLen > OLENAMELENGTH) {
341                 free(e);
342                 return NULL;
343         }
344         for (i=0 ; i < nLen /2; i++)
345                 e->name[i]=(char)oleBuf[i*2];
346         e->name[i]='\0';
347         propCurNumber++;
348         e->length=getulong(oleBuf,0x78);
349 /* Read sector chain for object */
350         chainMaxLen = 25;
351         e->numOfBlocks = 0;
352         chainCurrent = e->startBlock;
353         e->isBigBlock = (e->length >= 0x1000) || !strcmp(e->name, "Root Entry");
354 /*      fprintf(stderr, "e->name=%s e->length=%ld\n", e->name, e->length); */
355 /*      fprintf(stderr, "e->startBlock=%ld BBD=%p\n", e->startBlock, BBD); */
356         if (e->startBlock >= 0 &&
357                 e->length >= 0 &&
358                 (e->startBlock <=
359                  fileLength/(e->isBigBlock ? sectorSize : shortSectorSize))) {
360                 if((e->blocks=malloc(chainMaxLen*sizeof(long int))) == NULL ) {
361                         free(e);
362                         return NULL;
363                 }
364                 while(1) {
365                         if(chainCurrent < 0 ||
366                            chainCurrent >= (
367                                e->isBigBlock ?
368                                ((bbdNumBlocks*sectorSize)/4) :
369                                ((sbdNumber*shortSectorSize)/4)
370                                ) ||
371                            (e->numOfBlocks >
372                                 e->length/(
373                                     e->isBigBlock ?
374                                     sectorSize :
375                                     shortSectorSize
376                                     )
377                                 )
378                            ) {
379 /*                              fprintf(stderr, "chain End=%ld\n", chainCurrent);   */
380                                 break;
381                         }
382 /*                      fprintf(stderr, "chainCurrent=%ld\n", chainCurrent); */
383                         e->blocks[e->numOfBlocks++] = chainCurrent;
384                         if (e->numOfBlocks >= chainMaxLen) {
385                                 long int *newChain;
386                                 chainMaxLen+=25;
387                                 if ((newChain=realloc(e->blocks,
388                                                                           chainMaxLen*sizeof(long int))) != NULL) {
389                                         e->blocks=newChain;
390                                 } else {
391                                         perror("Properties realloc error");
392                                         free(e->blocks);
393                                         e->blocks=NULL;
394                                         return NULL;
395                                 }
396                         }
397                         if ( e->isBigBlock ) {
398                                 chainCurrent = getlong(BBD, chainCurrent*4);
399                         } else if ( SBD != NULL ) {
400                                 chainCurrent = getlong(SBD, chainCurrent*4);
401                         } else {
402                                 chainCurrent=-1;
403                         }
404                 }
405         }
406
407         if(e->length > (e->isBigBlock ? sectorSize : shortSectorSize)*e->numOfBlocks)
408                 e->length = (e->isBigBlock ? sectorSize : shortSectorSize)*e->numOfBlocks;
409 /*      fprintf(stderr, "READDIR: e->name=%s e->numOfBlocks=%ld length=%ld\n", */
410 /*                                      e->name, e->numOfBlocks, e->length); */
411
412         return (FILE*)e;
413 }
414
415 /**
416  * Open stream, which correspond to directory entry last read by
417  * ole_readdir
418  *
419  *
420  * @return opaque pointer to pass to ole_read, casted to (FILE *)
421  */
422 int ole_open(FILE *stream) {
423         oleEntry *e=(oleEntry *)stream;
424         if ( e->type != oleStream)
425                 return -2;
426
427         e->ole_offset=0;
428         e->file_offset= ftell(e->file);
429         return 0;
430 }
431
432 /**
433  *
434  *
435  * @param e
436  * @param blk
437  *
438  * @return
439  */
440 long int calcFileBlockOffset(oleEntry *e, long int blk) {
441         long int res;
442         if ( e->isBigBlock ) {
443                 res=512+e->blocks[blk]*sectorSize;
444         } else {
445                 long int sbdPerSector=sectorSize/shortSectorSize;
446                 long int sbdSecNum=e->blocks[blk]/sbdPerSector;
447                 long int sbdSecMod=e->blocks[blk]%sbdPerSector;
448 /*              fprintf(stderr, "calcoffset: e->name=%s e->numOfBlocks=%ld length=%ld sbdSecNum=%ld rootEntry->blocks=%p\n",
449                                                 e->name, e->numOfBlocks, e->length, sbdSecNum, rootEntry->blocks);*/
450                 res=512 + rootEntry->blocks[sbdSecNum]*sectorSize + sbdSecMod*shortSectorSize;
451         }
452         return res;
453 }
454
455
456 /**
457  * Reads block from open ole stream interface-compatible with fread
458  *
459  * @param ptr pointer to buffer for read to
460  * @param size size of block
461  * @param nmemb size in blocks
462  * @param stream pointer to FILE* structure
463  *
464  * @return number of readed blocks
465  */
466 size_t ole_read(void *ptr, size_t size, size_t nmemb, FILE *stream) {
467         oleEntry *e = (oleEntry*)stream;
468         long int llen = size*nmemb, rread=0, i;
469         long int blockNumber, modBlock, toReadBlocks, toReadBytes, bytesInBlock;
470         long int ssize;                         /**< Size of block */
471         long int newoffset;
472         unsigned char *cptr = ptr;
473         if( e->ole_offset+llen > e->length )
474                 llen= e->length - e->ole_offset;
475
476         ssize = (e->isBigBlock ? sectorSize : shortSectorSize);
477         blockNumber=e->ole_offset/ssize;
478 /*      fprintf(stderr, "blockNumber=%ld e->numOfBlocks=%ld llen=%ld\n", */
479 /*                      blockNumber, e->numOfBlocks, llen); */
480         if ( blockNumber >= e->numOfBlocks || llen <=0 )
481                 return 0;
482
483         modBlock=e->ole_offset%ssize;
484         bytesInBlock = ssize - modBlock;
485         if(bytesInBlock < llen) {
486                 toReadBlocks = (llen-bytesInBlock)/ssize;
487                 toReadBytes = (llen-bytesInBlock)%ssize;
488         } else {
489                 toReadBlocks = toReadBytes = 0;
490         }
491 /*      fprintf(stderr, "llen=%ld toReadBlocks=%ld toReadBytes=%ld bytesInBlock=%ld blockNumber=%ld modBlock=%ld\n", */
492 /*                      llen, toReadBlocks, toReadBytes, bytesInBlock, blockNumber, modBlock); */
493         newoffset = calcFileBlockOffset(e,blockNumber)+modBlock;
494         if (e->file_offset != newoffset) {
495                 fseek(e->file, e->file_offset=newoffset, SEEK_SET);
496         }
497         rread=fread(ptr, 1, min(llen,bytesInBlock), e->file);
498         e->file_offset += rread;
499         for(i=0; i<toReadBlocks; i++) {
500                 int readbytes;
501                 blockNumber++;
502                 newoffset = calcFileBlockOffset(e,blockNumber);
503                 if (newoffset != e->file_offset)
504                         fseek(e->file, e->file_offset=newoffset , SEEK_SET);
505                 readbytes=fread(cptr+rread, 1, min(llen-rread, ssize), e->file);
506                 rread +=readbytes;
507                 e->file_offset +=readbytes;
508         }
509         if(toReadBytes > 0) {
510                 int readbytes;
511                 blockNumber++;
512                 newoffset = calcFileBlockOffset(e,blockNumber);
513                 fseek(e->file, e->file_offset=newoffset, SEEK_SET);
514         readbytes=fread(cptr+rread, 1, toReadBytes,e ->file);
515                 rread +=readbytes;
516                 e->file_offset +=readbytes;
517         }
518 /*      fprintf(stderr, "ole_offset=%ld rread=%ld llen=%ld\n",
519         e->ole_offset, rread, llen);*/
520         e->ole_offset+=rread;
521         return rread;
522 }
523
524 /**
525  *
526  *
527  * @param stream
528  *
529  * @return
530  */
531 int ole_eof(FILE *stream) {
532         oleEntry *e=(oleEntry*)stream;
533 /*      fprintf(stderr, "EOF: e->ole_offset=%ld  e->length=%ld\n",
534         e->ole_offset,  e->length);*/
535         return (e->ole_offset >=  e->length);
536 }
537
538 /**
539  *
540  *
541  */
542 void ole_finish(void) {
543         if ( BBD != NULL ) free(BBD);
544         if ( SBD != NULL ) free(SBD);
545         if ( properties != NULL ) free(properties);
546         if ( rootEntry != NULL ) ole_close((FILE*)rootEntry);
547         properties = SBD = BBD = NULL;
548         rootEntry = NULL;
549 }
550
551 /**
552  *
553  *
554  * @param stream
555  *
556  * @return
557  */
558 int ole_close(FILE *stream) {
559         oleEntry *e=(oleEntry*)stream;
560         if(e == NULL)
561                 return -1;
562         if (e->blocks != NULL)
563                 free(e->blocks);
564         free(e);
565         return 0;
566 }
567
568 /**
569  *
570  *
571  * @param stream pointer to OLE stream structure
572  * @param offset
573  * @param whence
574  *
575  * @return
576  */
577 int ole_seek(FILE *stream, long offset, int whence) {
578         oleEntry *e=(oleEntry*)stream;
579         long int new_ole_offset=0, new_file_offset;
580         int ssize, modBlock, blockNumber;
581
582         switch(whence) {
583         case SEEK_SET:
584                 new_ole_offset=offset;
585                 break;
586
587         case SEEK_CUR:
588                 new_ole_offset=e->ole_offset+offset;
589                 break;
590
591         case SEEK_END:
592                 new_ole_offset=e->length+offset;
593                 break;
594
595         default:
596                 errno=EINVAL;
597                 return -1;
598         }
599         if(new_ole_offset<0)
600                 new_ole_offset=0;
601         if(new_ole_offset >= e->length)
602                 new_ole_offset=e->length;
603
604         ssize = (e->isBigBlock ? sectorSize : shortSectorSize);
605         blockNumber=new_ole_offset/ssize;
606         if ( blockNumber >= e->numOfBlocks )
607                 return -1;
608
609         modBlock=new_ole_offset%ssize;
610         new_file_offset = calcFileBlockOffset(e,blockNumber)+modBlock;
611         fseek(e->file, e->file_offset=new_file_offset, SEEK_SET);
612         e->ole_offset=new_ole_offset;
613
614         return 0;
615 }
616
617 /**
618  * Tell position inside OLE stream
619  *
620  * @param stream pointer to OLE stream
621  *
622  * @return current position inside OLE stream
623  */
624 long ole_tell(FILE *stream) {
625         oleEntry *e=(oleEntry*)stream;
626         return e->ole_offset;
627 }
628
629
630 /**
631  *
632  *
633  */
634 size_t (*catdoc_read)(void *ptr, size_t size, size_t nmemb, FILE *stream);
635 int (*catdoc_eof)(FILE *stream);
636 int (*catdoc_seek)(FILE *stream, long offset, int whence);
637 long (*catdoc_tell)(FILE *stream);
638
639 void set_ole_func(void) {
640         catdoc_read=ole_read;
641         catdoc_eof=ole_eof;
642         catdoc_seek=ole_seek;
643         catdoc_tell=ole_tell;
644 }
645
646 #ifdef feof
647 /* feof is macro in Turbo C, so we need a real function to assign to
648  * pointer
649  */
650 int my_feof(FILE *f) {
651     return feof(f);
652 }
653 #define FEOF my_feof
654 #else
655 #define FEOF feof
656 #endif
657
658 void set_std_func(void) {
659         catdoc_read=fread;
660         catdoc_eof=FEOF;
661         catdoc_seek=fseek;
662         catdoc_tell=ftell;
663 }
664