]> www.wagner.pp.ru Git - oss/catdoc.git/blob - src/ole.c
e131a0dac2140921121edfeda7c65dc14fa3ffca
[oss/catdoc.git] / src / ole.c
1 /**
2  * @file   ole.c
3  * @author Alex Ott, Victor B Wagner
4  * @date   Wed Jun 11 12:33:01 2003
5  * Version: $Id: ole.c,v 1.2 2006-02-25 15:28:14 vitus Exp $
6  * Copyright: Victor B Wagner, 1996-2003 Alex Ott, 2003
7  *
8  * @brief  Parsing structure of MS Office compound document
9  *
10  * This file is part of catdoc project
11  * and distributed under GNU Public License
12  *
13  */
14 #ifdef HAVE_CONFIG_H
15 #include <config.h>
16 #endif
17
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include <string.h>
21 #include <errno.h>
22
23 #include "catdoc.h"
24
25 #define min(a,b) ((a) < (b) ? (a) : (b))
26
27 long int sectorSize, shortSectorSize;
28 /* BBD Info */
29 long int  bbdNumBlocks;
30 unsigned char *BBD=NULL;
31 /* SBD Info */
32 long int sbdNumber, sbdStart, sbdLen;
33 unsigned char *SBD=NULL;
34 oleEntry *rootEntry=NULL;
35 /* Properties Info */
36 long propCurNumber, propLen, propNumber, propStart;
37 unsigned char *properties=NULL;
38 long int fileLength=0;
39
40 char ole_sign[]={0xD0,0xCF,0x11,0xE0,0xA1,0xB1,0x1A,0xE1,0};
41 char zip_sign[]="PK\003\004";
42
43 /**
44  * Initializes ole structure
45  *
46  * @param f (FILE *) compound document file, positioned at bufSize
47  *           byte. Might be pipe or socket
48  * @param buffer (void *) bytes already read from f
49  * @param bufSize number of bytes already read from f should be less
50  *                than 512
51  *
52  * @return
53  */
54 FILE* ole_init(FILE *f, void *buffer, size_t bufSize)  {
55         unsigned char oleBuf[BBD_BLOCK_SIZE];
56         unsigned char *tmpBuf;
57         FILE *newfile;
58         int ret=0, i;
59         long int bbdSize;
60         long int sbdMaxLen, sbdCurrent, propMaxLen, propCurrent, mblock, msat_size;
61         oleEntry *tEntry;
62
63         /* deleting old data (if it was allocated) */
64         ole_finish();
65
66         if (fseek(f,0,SEEK_SET) == -1) {
67                 if ( errno == ESPIPE ) {
68                         /* We got non-seekable file, create temp file */
69                         if((newfile=tmpfile()) == NULL) {
70                                 perror("Can't create tmp file");
71                                 return NULL;
72                         }
73                         if (bufSize > 0) {
74                                 ret=fwrite(buffer, 1, bufSize, newfile);
75                                 if(ret != bufSize) {
76                                         perror("Can't write to tmp file");
77                                         return NULL;
78                                 }
79                         }
80
81                         while(!feof(f)){
82                                 ret=fread(oleBuf,1,BBD_BLOCK_SIZE,f);
83                                 fwrite(oleBuf, 1, ret, newfile);
84                         }
85                         fseek(newfile,0,SEEK_SET);
86                 } else {
87                         perror("Can't seek in file");
88                         return NULL;
89                 }
90         } else {
91                 newfile=f;
92         }
93         fseek(newfile,0,SEEK_END);
94         fileLength=ftell(newfile);
95 /*      fprintf(stderr, "fileLength=%ld\n", fileLength); */
96         fseek(newfile,0,SEEK_SET);
97         ret=fread(oleBuf,1,BBD_BLOCK_SIZE,newfile);
98         if ( ret != BBD_BLOCK_SIZE ) {
99                 return NULL;
100         }
101         if (strncmp((char *)&oleBuf,zip_sign,4) == 0) {
102                 fprintf(stderr,"Looks like ZIP archive or Office 2007 or later. Not supported\n");
103                 return NULL;
104         } else if (strncmp((char *)&oleBuf,ole_sign,8) != 0) {
105                 return NULL;
106         }
107         sectorSize = 1<<getshort(oleBuf,0x1e);
108         shortSectorSize=1<<getshort(oleBuf,0x20);
109
110 /* Read BBD into memory */
111         bbdNumBlocks = getulong(oleBuf,0x2c);
112         bbdSize = bbdNumBlocks * sectorSize;
113         if (bbdSize > fileLength) {
114                 /* broken file, BBD size greater than entire file*/
115                 return NULL;
116         }
117
118         if((BBD=malloc(bbdNumBlocks*sectorSize)) == NULL ) {
119                 return NULL;
120         }
121
122         if((tmpBuf=malloc(MSAT_ORIG_SIZE)) == NULL ) {
123                 return NULL;
124         }
125         memcpy(tmpBuf,oleBuf+0x4c,MSAT_ORIG_SIZE);
126         mblock=getlong(oleBuf,0x44);
127         msat_size=getlong(oleBuf,0x48);
128         if (msat_size * sectorSize > fileLength) {
129                 free(tmpBuf);
130                 return NULL;
131         }
132                 
133 /*      fprintf(stderr, "msat_size=%ld\n", msat_size); */
134
135         i=0;
136         while((mblock >= 0) && (i < msat_size)) {
137                 unsigned char *newbuf;
138 /*              fprintf(stderr, "i=%d mblock=%ld\n", i, mblock); */
139                 if ((newbuf=realloc(tmpBuf, sectorSize*(i+1)+MSAT_ORIG_SIZE)) != NULL) {
140                         tmpBuf=newbuf;
141                 } else {
142                         perror("MSAT realloc error");
143                         free(tmpBuf);
144                         ole_finish();
145                         return NULL;
146                 }
147
148                 fseek(newfile, 512+mblock*sectorSize, SEEK_SET);
149                 if(fread(tmpBuf+MSAT_ORIG_SIZE+(sectorSize-4)*i,
150                                                  1, sectorSize, newfile) != sectorSize) {
151                         fprintf(stderr, "Error read MSAT!\n");
152                         ole_finish();
153                         return NULL;
154                 }
155
156                 i++;
157                 mblock=getlong(tmpBuf, MSAT_ORIG_SIZE+(sectorSize-4)*i);
158         }
159
160 /*      fprintf(stderr, "bbdNumBlocks=%ld\n", bbdNumBlocks); */
161         for(i=0; i< bbdNumBlocks; i++) {
162                 long int bbdSector=getlong(tmpBuf,4*i);
163
164                 if (bbdSector >= fileLength/sectorSize || bbdSector < 0) {
165                         fprintf(stderr, "Bad BBD entry!\n");
166                         ole_finish();
167                         return NULL;
168                 }
169                 fseek(newfile, 512+bbdSector*sectorSize, SEEK_SET);
170                 if ( fread(BBD+i*sectorSize, 1, sectorSize, newfile) != sectorSize ) {
171                         fprintf(stderr, "Can't read BBD!\n");
172                         free(tmpBuf);
173                         ole_finish();
174                         return NULL;
175                 }
176         }
177         free(tmpBuf);
178
179 /* Read SBD into memory */
180         sbdLen=0;
181         sbdMaxLen=10;
182         sbdCurrent = sbdStart = getlong(oleBuf,0x3c);
183         if (sbdStart > 0) {
184                 if((SBD=malloc(sectorSize*sbdMaxLen)) == NULL ) {
185                         ole_finish();
186                         return NULL;
187                 }
188                 while(1) {
189                         fseek(newfile, 512+sbdCurrent*sectorSize, SEEK_SET);
190                         fread(SBD+sbdLen*sectorSize, 1, sectorSize, newfile);
191                         sbdLen++;
192                         if (sbdLen >= sbdMaxLen) {
193                                 unsigned char *newSBD;
194
195                                 sbdMaxLen+=5;
196                                 if ((newSBD=realloc(SBD, sectorSize*sbdMaxLen)) != NULL) {
197                                         SBD=newSBD;
198                                 } else {
199                                         perror("SBD realloc error");
200                                         ole_finish();
201                                         return NULL;
202                                 }
203                         }
204                         if (sbdCurrent * 4 > bbdSize) {
205                                 ole_finish();
206                                 return NULL;
207                         }
208                         sbdCurrent = getlong(BBD, sbdCurrent*4);
209                         if(sbdCurrent < 0 ||
210                                 sbdCurrent >= fileLength/sectorSize)
211                                 break;
212                 }
213                 sbdNumber = (sbdLen*sectorSize)/shortSectorSize;
214 /*              fprintf(stderr, "sbdLen=%ld sbdNumber=%ld\n",sbdLen, sbdNumber); */
215         } else {
216                 SBD=NULL;
217         }
218 /* Read property catalog into memory */
219         propLen = 0;
220         propMaxLen = 5;
221         propCurrent = propStart = getlong(oleBuf,0x30);
222         if (propStart >= 0) {
223                 if((properties=malloc(propMaxLen*sectorSize)) == NULL ) {
224                         ole_finish();
225                         return NULL;
226                 }
227                 while(1) {
228 /*                      fprintf(stderr, "propCurrent=%ld\n",propCurrent); */
229                         fseek(newfile, 512+propCurrent*sectorSize, SEEK_SET);
230                         errno=0;
231                         if (fread(properties+propLen*sectorSize,
232                                   1, sectorSize, newfile)!=sectorSize) {
233                                   if (errno != 0) {
234                                         perror("reading properties catalog");
235                                   }
236                                   ole_finish();
237                                   return NULL;
238                         }
239                         propLen++;
240                         if (propLen >= propMaxLen) {
241                                 unsigned char *newProp;
242
243                                 propMaxLen+=5;
244                                 if ((newProp=realloc(properties, propMaxLen*sectorSize)) != NULL)
245                                         properties=newProp;
246                                 else {
247                                         perror("Properties realloc error");
248                                         ole_finish();
249                                         return NULL;
250                                 }
251                         }
252
253                         propCurrent = getlong(BBD, propCurrent*4);
254                         if(propCurrent < 0 ||
255                            propCurrent >= bbdSize/4 ) {
256                            break;
257                         }
258                 }
259 /*              fprintf(stderr, "propLen=%ld\n",propLen); */
260                 propNumber = (propLen*sectorSize)/PROP_BLOCK_SIZE;
261                 propCurNumber = 0;
262         } else {
263                 ole_finish();
264                 properties = NULL;
265                 return NULL;
266         }
267
268
269 /* Find Root Entry */
270         while((tEntry=(oleEntry*)ole_readdir(newfile)) != NULL) {
271                 if (tEntry->type == oleRootDir ) {
272                         rootEntry=tEntry;
273                         break;
274                 }
275                 ole_close((FILE*)tEntry);
276         }
277         propCurNumber = 0;
278         fseek(newfile, 0, SEEK_SET);
279         if (!rootEntry) {
280                 fprintf(stderr,"Broken OLE structure. Cannot find root entry in this file!\n");         ole_finish();
281                 return NULL;
282         }
283         return newfile;
284 }
285
286 /**
287  *
288  *
289  * @param oleBuf
290  *
291  * @return
292  */
293 int rightOleType(unsigned char *oleBuf) {
294         return (oleBuf[0x42] == 1 || oleBuf[0x42] == 2 ||
295                         oleBuf[0x42] == 3 || oleBuf[0x42] == 5 );
296 }
297
298 /**
299  *
300  *
301  * @param oleBuf
302  *
303  * @return
304  */
305 oleType getOleType(unsigned char *oleBuf) {
306         return (oleType)((unsigned char)oleBuf[0x42]);
307 }
308
309 /**
310  * Reads next directory entry from file
311  *
312  * @param name buffer for name converted to us-ascii should be at least 33 chars long
313  * @param size size of file
314  *
315  * @return 0 if everything is ok -1 on error
316  */
317 FILE *ole_readdir(FILE *f) {
318         int i, nLen;
319         unsigned char *oleBuf;
320         oleEntry *e=NULL;
321         long int chainMaxLen, chainCurrent;
322
323         if ( properties == NULL || propCurNumber >= propNumber || f == NULL )
324                 return NULL;
325         oleBuf=properties + propCurNumber*PROP_BLOCK_SIZE;
326         if( !rightOleType(oleBuf))
327                 return NULL;
328         if ((e = (oleEntry*) calloc(sizeof(oleEntry),1)) == NULL) {
329                 perror("Can\'t allocate memory");
330                 return NULL;
331         }
332         e->dirPos=oleBuf;
333         e->type=getOleType(oleBuf);
334         e->file=f;
335         e->startBlock=getlong(oleBuf,0x74);
336         e->blocks=NULL;
337
338         nLen=getshort(oleBuf,0x40);
339         if (nLen > OLENAMELENGTH) {
340                 free(e);
341                 return NULL;
342         }
343         for (i=0 ; i < nLen /2; i++)
344                 e->name[i]=(char)oleBuf[i*2];
345         e->name[i]='\0';
346         propCurNumber++;
347         e->length=getulong(oleBuf,0x78);
348 /* Read sector chain for object */
349         chainMaxLen = 25;
350         e->numOfBlocks = 0;
351         chainCurrent = e->startBlock;
352         e->isBigBlock = (e->length >= 0x1000) || !strcmp(e->name, "Root Entry");
353 /*      fprintf(stderr, "e->name=%s e->length=%ld\n", e->name, e->length); */
354 /*      fprintf(stderr, "e->startBlock=%ld BBD=%p\n", e->startBlock, BBD); */
355         if (e->startBlock >= 0 &&
356                 e->length >= 0 &&
357                 (e->startBlock <=
358                  fileLength/(e->isBigBlock ? sectorSize : shortSectorSize))) {
359                 if((e->blocks=malloc(chainMaxLen*sizeof(long int))) == NULL ) {
360                         free(e);
361                         return NULL;
362                 }
363                 while(1) {
364                         if(chainCurrent < 0 ||
365                            chainCurrent >= (
366                                e->isBigBlock ?
367                                ((bbdNumBlocks*sectorSize)/4) :
368                                ((sbdNumber*shortSectorSize)/4)
369                                ) ||
370                            (e->numOfBlocks >
371                                 e->length/(
372                                     e->isBigBlock ?
373                                     sectorSize :
374                                     shortSectorSize
375                                     )
376                                 )
377                            ) {
378 /*                              fprintf(stderr, "chain End=%ld\n", chainCurrent);   */
379                                 break;
380                         }
381 /*                      fprintf(stderr, "chainCurrent=%ld\n", chainCurrent); */
382                         e->blocks[e->numOfBlocks++] = chainCurrent;
383                         if (e->numOfBlocks >= chainMaxLen) {
384                                 long int *newChain;
385                                 chainMaxLen+=25;
386                                 if ((newChain=realloc(e->blocks,
387                                                                           chainMaxLen*sizeof(long int))) != NULL) {
388                                         e->blocks=newChain;
389                                 } else {
390                                         perror("Properties realloc error");
391                                         free(e->blocks);
392                                         e->blocks=NULL;
393                                         return NULL;
394                                 }
395                         }
396                         if ( e->isBigBlock ) {
397                                 chainCurrent = getlong(BBD, chainCurrent*4);
398                         } else if ( SBD != NULL ) {
399                                 chainCurrent = getlong(SBD, chainCurrent*4);
400                         } else {
401                                 chainCurrent=-1;
402                         }
403                 }
404         }
405
406         if(e->length > (e->isBigBlock ? sectorSize : shortSectorSize)*e->numOfBlocks)
407                 e->length = (e->isBigBlock ? sectorSize : shortSectorSize)*e->numOfBlocks;
408 /*      fprintf(stderr, "READDIR: e->name=%s e->numOfBlocks=%ld length=%ld\n", */
409 /*                                      e->name, e->numOfBlocks, e->length); */
410
411         return (FILE*)e;
412 }
413
414 /**
415  * Open stream, which correspond to directory entry last read by
416  * ole_readdir
417  *
418  *
419  * @return opaque pointer to pass to ole_read, casted to (FILE *)
420  */
421 int ole_open(FILE *stream) {
422         oleEntry *e=(oleEntry *)stream;
423         if ( e->type != oleStream)
424                 return -2;
425
426         e->ole_offset=0;
427         e->file_offset= ftell(e->file);
428         return 0;
429 }
430
431 /**
432  *
433  *
434  * @param e
435  * @param blk
436  *
437  * @return
438  */
439 long int calcFileBlockOffset(oleEntry *e, long int blk) {
440         long int res;
441         if ( e->isBigBlock ) {
442                 res=512+e->blocks[blk]*sectorSize;
443         } else {
444                 long int sbdPerSector=sectorSize/shortSectorSize;
445                 long int sbdSecNum=e->blocks[blk]/sbdPerSector;
446                 long int sbdSecMod=e->blocks[blk]%sbdPerSector;
447 /*              fprintf(stderr, "calcoffset: e->name=%s e->numOfBlocks=%ld length=%ld sbdSecNum=%ld rootEntry->blocks=%p\n",
448                                                 e->name, e->numOfBlocks, e->length, sbdSecNum, rootEntry->blocks);*/
449                 res=512 + rootEntry->blocks[sbdSecNum]*sectorSize + sbdSecMod*shortSectorSize;
450         }
451         return res;
452 }
453
454
455 /**
456  * Reads block from open ole stream interface-compatible with fread
457  *
458  * @param ptr pointer to buffer for read to
459  * @param size size of block
460  * @param nmemb size in blocks
461  * @param stream pointer to FILE* structure
462  *
463  * @return number of readed blocks
464  */
465 size_t ole_read(void *ptr, size_t size, size_t nmemb, FILE *stream) {
466         oleEntry *e = (oleEntry*)stream;
467         long int llen = size*nmemb, rread=0, i;
468         long int blockNumber, modBlock, toReadBlocks, toReadBytes, bytesInBlock;
469         long int ssize;                         /**< Size of block */
470         long int newoffset;
471         unsigned char *cptr = ptr;
472         if( e->ole_offset+llen > e->length )
473                 llen= e->length - e->ole_offset;
474
475         ssize = (e->isBigBlock ? sectorSize : shortSectorSize);
476         blockNumber=e->ole_offset/ssize;
477 /*      fprintf(stderr, "blockNumber=%ld e->numOfBlocks=%ld llen=%ld\n", */
478 /*                      blockNumber, e->numOfBlocks, llen); */
479         if ( blockNumber >= e->numOfBlocks || llen <=0 )
480                 return 0;
481
482         modBlock=e->ole_offset%ssize;
483         bytesInBlock = ssize - modBlock;
484         if(bytesInBlock < llen) {
485                 toReadBlocks = (llen-bytesInBlock)/ssize;
486                 toReadBytes = (llen-bytesInBlock)%ssize;
487         } else {
488                 toReadBlocks = toReadBytes = 0;
489         }
490 /*      fprintf(stderr, "llen=%ld toReadBlocks=%ld toReadBytes=%ld bytesInBlock=%ld blockNumber=%ld modBlock=%ld\n", */
491 /*                      llen, toReadBlocks, toReadBytes, bytesInBlock, blockNumber, modBlock); */
492         newoffset = calcFileBlockOffset(e,blockNumber)+modBlock;
493         if (e->file_offset != newoffset) {
494                 fseek(e->file, e->file_offset=newoffset, SEEK_SET);
495         }
496         rread=fread(ptr, 1, min(llen,bytesInBlock), e->file);
497         e->file_offset += rread;
498         for(i=0; i<toReadBlocks; i++) {
499                 int readbytes;
500                 blockNumber++;
501                 newoffset = calcFileBlockOffset(e,blockNumber);
502                 if (newoffset != e->file_offset);
503                 fseek(e->file, e->file_offset=newoffset , SEEK_SET);
504                 readbytes=fread(cptr+rread, 1, min(llen-rread, ssize), e->file);
505                 rread +=readbytes;
506                 e->file_offset +=readbytes;
507         }
508         if(toReadBytes > 0) {
509                 int readbytes;
510                 blockNumber++;
511                 newoffset = calcFileBlockOffset(e,blockNumber);
512                 fseek(e->file, e->file_offset=newoffset, SEEK_SET);
513         readbytes=fread(cptr+rread, 1, toReadBytes,e ->file);
514                 rread +=readbytes;
515                 e->file_offset +=readbytes;
516         }
517 /*      fprintf(stderr, "ole_offset=%ld rread=%ld llen=%ld\n",
518         e->ole_offset, rread, llen);*/
519         e->ole_offset+=rread;
520         return rread;
521 }
522
523 /**
524  *
525  *
526  * @param stream
527  *
528  * @return
529  */
530 int ole_eof(FILE *stream) {
531         oleEntry *e=(oleEntry*)stream;
532 /*      fprintf(stderr, "EOF: e->ole_offset=%ld  e->length=%ld\n",
533         e->ole_offset,  e->length);*/
534         return (e->ole_offset >=  e->length);
535 }
536
537 /**
538  *
539  *
540  */
541 void ole_finish(void) {
542         if ( BBD != NULL ) free(BBD);
543         if ( SBD != NULL ) free(SBD);
544         if ( properties != NULL ) free(properties);
545         if ( rootEntry != NULL ) ole_close((FILE*)rootEntry);
546         properties = SBD = BBD = NULL;
547         rootEntry = NULL;
548 }
549
550 /**
551  *
552  *
553  * @param stream
554  *
555  * @return
556  */
557 int ole_close(FILE *stream) {
558         oleEntry *e=(oleEntry*)stream;
559         if(e == NULL)
560                 return -1;
561         if (e->blocks != NULL)
562                 free(e->blocks);
563         free(e);
564         return 0;
565 }
566
567 /**
568  *
569  *
570  * @param stream pointer to OLE stream structure
571  * @param offset
572  * @param whence
573  *
574  * @return
575  */
576 int ole_seek(FILE *stream, long offset, int whence) {
577         oleEntry *e=(oleEntry*)stream;
578         long int new_ole_offset=0, new_file_offset;
579         int ssize, modBlock, blockNumber;
580
581         switch(whence) {
582         case SEEK_SET:
583                 new_ole_offset=offset;
584                 break;
585
586         case SEEK_CUR:
587                 new_ole_offset=e->ole_offset+offset;
588                 break;
589
590         case SEEK_END:
591                 new_ole_offset=e->length+offset;
592                 break;
593
594         default:
595                 errno=EINVAL;
596                 return -1;
597         }
598         if(new_ole_offset<0)
599                 new_ole_offset=0;
600         if(new_ole_offset >= e->length)
601                 new_ole_offset=e->length;
602
603         ssize = (e->isBigBlock ? sectorSize : shortSectorSize);
604         blockNumber=new_ole_offset/ssize;
605         if ( blockNumber >= e->numOfBlocks )
606                 return -1;
607
608         modBlock=new_ole_offset%ssize;
609         new_file_offset = calcFileBlockOffset(e,blockNumber)+modBlock;
610         fseek(e->file, e->file_offset=new_file_offset, SEEK_SET);
611         e->ole_offset=new_ole_offset;
612
613         return 0;
614 }
615
616 /**
617  * Tell position inside OLE stream
618  *
619  * @param stream pointer to OLE stream
620  *
621  * @return current position inside OLE stream
622  */
623 long ole_tell(FILE *stream) {
624         oleEntry *e=(oleEntry*)stream;
625         return e->ole_offset;
626 }
627
628
629 /**
630  *
631  *
632  */
633 size_t (*catdoc_read)(void *ptr, size_t size, size_t nmemb, FILE *stream);
634 int (*catdoc_eof)(FILE *stream);
635 int (*catdoc_seek)(FILE *stream, long offset, int whence);
636 long (*catdoc_tell)(FILE *stream);
637
638 void set_ole_func(void) {
639         catdoc_read=ole_read;
640         catdoc_eof=ole_eof;
641         catdoc_seek=ole_seek;
642         catdoc_tell=ole_tell;
643 }
644
645 #ifdef feof
646 /* feof is macro in Turbo C, so we need a real function to assign to
647  * pointer
648  */
649 int my_feof(FILE *f) {
650     return feof(f);
651 }
652 #define FEOF my_feof
653 #else
654 #define FEOF feof
655 #endif
656
657 void set_std_func(void) {
658         catdoc_read=fread;
659         catdoc_eof=FEOF;
660         catdoc_seek=fseek;
661         catdoc_tell=ftell;
662 }
663