]> www.wagner.pp.ru Git - oss/catdoc.git/blob - src/ole.c
More uses of uint16_t instead of short
[oss/catdoc.git] / src / ole.c
1 /**
2  * @file   ole.c
3  * @author Alex Ott, Victor B Wagner
4  * @date   Wed Jun 11 12:33:01 2003
5  * Version: $Id: ole.c,v 1.2 2006-02-25 15:28:14 vitus Exp $
6  * Copyright: Victor B Wagner, 1996-2003 Alex Ott, 2003
7  * 
8  * @brief  Parsing structure of MS Office compound document
9  * 
10  * This file is part of catdoc project
11  * and distributed under GNU Public License
12  * 
13  */
14 #ifdef HAVE_CONFIG_H
15 #include <config.h>
16 #endif
17
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include <string.h>
21 #include <errno.h>
22
23 #include "catdoc.h"
24
25 #define min(a,b) ((a) < (b) ? (a) : (b))
26
27 long int sectorSize, shortSectorSize;
28 /* BBD Info */
29 long int  bbdNumBlocks;
30 unsigned char *BBD=NULL;
31 /* SBD Info */
32 long int sbdNumber, sbdStart, sbdLen;
33 unsigned char *SBD=NULL;
34 oleEntry *rootEntry=NULL;
35 /* Properties Info */
36 long propCurNumber, propLen, propNumber, propStart;
37 unsigned char *properties=NULL;
38 long int fileLength=0;
39
40 static char ole_sign[]={0xD0,0xCF,0x11,0xE0,0xA1,0xB1,0x1A,0xE1,0};
41
42
43 /** 
44  * Initializes ole structure
45  * 
46  * @param f (FILE *) compound document file, positioned at bufSize
47  *           byte. Might be pipe or socket 
48  * @param buffer (void *) bytes already read from f
49  * @param bufSize number of bytes already read from f should be less
50  *                than 512 
51  * 
52  * @return 
53  */
54 FILE* ole_init(FILE *f, void *buffer, size_t bufSize)  {
55         unsigned char oleBuf[BBD_BLOCK_SIZE];
56         unsigned char *tmpBuf;
57         FILE *newfile;
58         int ret=0, i;
59         long int sbdMaxLen, sbdCurrent, propMaxLen, propCurrent, mblock, msat_size;
60         oleEntry *tEntry;
61
62         /* deleting old data (if it was allocated) */
63         ole_finish();
64         
65         if (fseek(f,0,SEEK_SET) == -1) {
66                 if ( errno == ESPIPE ) {
67                         /* We got non-seekable file, create temp file */
68                         if((newfile=tmpfile()) == NULL) {
69                                 perror("Can't create tmp file");
70                                 return NULL;
71                         }
72                         if (bufSize > 0) {
73                                 ret=fwrite(buffer, 1, bufSize, newfile);
74                                 if(ret != bufSize) {
75                                         perror("Can't write to tmp file");
76                                         return NULL;
77                                 }
78                         }
79                         
80                         while(!feof(f)){
81                                 ret=fread(oleBuf,1,BBD_BLOCK_SIZE,f);
82                                 fwrite(oleBuf, 1, ret, newfile);
83                         }
84                         fseek(newfile,0,SEEK_SET);
85                 } else {
86                         perror("Can't seek in file");
87                         return NULL;
88                 }
89         } else {
90                 newfile=f;
91         }       
92         fseek(newfile,0,SEEK_END);
93         fileLength=ftell(newfile);
94 /*      fprintf(stderr, "fileLength=%ld\n", fileLength); */
95         fseek(newfile,0,SEEK_SET);
96         ret=fread(oleBuf,1,BBD_BLOCK_SIZE,newfile);
97         if ( ret != BBD_BLOCK_SIZE ) {
98                 return NULL;
99         }
100         if (strncmp((char *)&oleBuf,ole_sign,8) != 0) {
101                 return NULL;
102         }
103         sectorSize = 1<<getshort(oleBuf,0x1e);
104         shortSectorSize=1<<getshort(oleBuf,0x20);
105         
106 /* Read BBD into memory */
107         bbdNumBlocks = getulong(oleBuf,0x2c);
108         if((BBD=malloc(bbdNumBlocks*sectorSize)) == NULL ) {
109                 return NULL;
110         }
111         
112         if((tmpBuf=malloc(MSAT_ORIG_SIZE)) == NULL ) {
113                 return NULL;
114         }
115         memcpy(tmpBuf,oleBuf+0x4c,MSAT_ORIG_SIZE);
116         mblock=getlong(oleBuf,0x44);
117         msat_size=getlong(oleBuf,0x48);
118
119 /*      fprintf(stderr, "msat_size=%ld\n", msat_size); */
120
121         i=0;
122         while((mblock >= 0) && (i < msat_size)) {
123                 unsigned char *newbuf;
124 /*              fprintf(stderr, "i=%d mblock=%ld\n", i, mblock); */
125                 if ((newbuf=realloc(tmpBuf, sectorSize*(i+1)+MSAT_ORIG_SIZE)) != NULL) {
126                         tmpBuf=newbuf;
127                 } else {
128                         perror("MSAT realloc error");
129                         free(tmpBuf);
130                         ole_finish();
131                         return NULL;
132                 }
133                 
134                 fseek(newfile, 512+mblock*sectorSize, SEEK_SET);
135                 if(fread(tmpBuf+MSAT_ORIG_SIZE+(sectorSize-4)*i,
136                                                  1, sectorSize, newfile) != sectorSize) {
137                         fprintf(stderr, "Error read MSAT!\n");
138                         ole_finish();
139                         return NULL;
140                 }
141
142                 i++;
143                 mblock=getlong(tmpBuf, MSAT_ORIG_SIZE+(sectorSize-4)*i);
144         }
145         
146 /*      fprintf(stderr, "bbdNumBlocks=%ld\n", bbdNumBlocks); */
147         for(i=0; i< bbdNumBlocks; i++) {
148                 long int bbdSector=getlong(tmpBuf,4*i);
149                 
150                 if (bbdSector >= fileLength/sectorSize || bbdSector < 0) {
151                         fprintf(stderr, "Bad BBD entry!\n");
152                         ole_finish();
153                         return NULL;
154                 }
155                 fseek(newfile, 512+bbdSector*sectorSize, SEEK_SET);
156                 if ( fread(BBD+i*sectorSize, 1, sectorSize, newfile) != sectorSize ) {
157                         fprintf(stderr, "Can't read BBD!\n");
158                         free(tmpBuf);
159                         ole_finish();
160                         return NULL;
161                 }
162         }
163         free(tmpBuf);
164         
165 /* Read SBD into memory */
166         sbdLen=0;
167         sbdMaxLen=10;
168         sbdCurrent = sbdStart = getlong(oleBuf,0x3c);
169         if (sbdStart > 0) {
170                 if((SBD=malloc(sectorSize*sbdMaxLen)) == NULL ) {
171                         ole_finish();
172                         return NULL;
173                 }
174                 while(1) {
175                         fseek(newfile, 512+sbdCurrent*sectorSize, SEEK_SET);
176                         fread(SBD+sbdLen*sectorSize, 1, sectorSize, newfile);
177                         sbdLen++;
178                         if (sbdLen >= sbdMaxLen) {
179                                 unsigned char *newSBD;
180                                 
181                                 sbdMaxLen+=5;
182                                 if ((newSBD=realloc(SBD, sectorSize*sbdMaxLen)) != NULL) {
183                                         SBD=newSBD;
184                                 } else {
185                                         perror("SBD realloc error");
186                                         ole_finish();
187                                         return NULL;
188                                 }
189                         }
190                         sbdCurrent = getlong(BBD, sbdCurrent*4);
191                         if(sbdCurrent < 0 ||
192                                 sbdCurrent >= fileLength/sectorSize)
193                                 break;
194                 }
195                 sbdNumber = (sbdLen*sectorSize)/shortSectorSize;
196 /*              fprintf(stderr, "sbdLen=%ld sbdNumber=%ld\n",sbdLen, sbdNumber); */
197         } else {
198                 SBD=NULL;
199         }
200 /* Read property catalog into memory */
201         propLen = 0;
202         propMaxLen = 5;
203         propCurrent = propStart = getlong(oleBuf,0x30);
204         if (propStart >= 0) {
205                 if((properties=malloc(propMaxLen*sectorSize)) == NULL ) {
206                         ole_finish();
207                         return NULL;
208                 }
209                 while(1) {
210 /*                      fprintf(stderr, "propCurrent=%ld\n",propCurrent); */
211                         fseek(newfile, 512+propCurrent*sectorSize, SEEK_SET);
212                         fread(properties+propLen*sectorSize,
213                                   1, sectorSize, newfile);
214                         propLen++;
215                         if (propLen >= propMaxLen) {
216                                 unsigned char *newProp;
217                                 
218                                 propMaxLen+=5;
219                                 if ((newProp=realloc(properties, propMaxLen*sectorSize)) != NULL)
220                                         properties=newProp;
221                                 else {
222                                         perror("Properties realloc error");
223                                         ole_finish();
224                                         return NULL;
225                                 }
226                         }
227                         
228                         propCurrent = getlong(BBD, propCurrent*4);
229                         if(propCurrent < 0 ||
230                            propCurrent >= fileLength/sectorSize ) {
231                                 break;
232                         }
233                 }
234 /*              fprintf(stderr, "propLen=%ld\n",propLen); */
235                 propNumber = (propLen*sectorSize)/PROP_BLOCK_SIZE;
236                 propCurNumber = 0;
237         } else {
238                 ole_finish();
239                 properties = NULL;
240                 return NULL;
241         }
242         
243         
244 /* Find Root Entry */
245         while((tEntry=(oleEntry*)ole_readdir(newfile)) != NULL) {
246                 if (tEntry->type == oleRootDir ) {
247                         rootEntry=tEntry;
248                         break;
249                 }
250                 ole_close((FILE*)tEntry);
251         }
252         propCurNumber = 0;
253         fseek(newfile, 0, SEEK_SET);
254         if (!rootEntry) {
255                 fprintf(stderr,"Broken OLE structure. Cannot find root entry in this file!\n");         ole_finish();
256                 return NULL;
257         }       
258         return newfile;
259 }
260
261 /** 
262  * 
263  * 
264  * @param oleBuf 
265  * 
266  * @return 
267  */
268 int rightOleType(unsigned char *oleBuf) {
269         return (oleBuf[0x42] == 1 || oleBuf[0x42] == 2 ||
270                         oleBuf[0x42] == 3 || oleBuf[0x42] == 5 );
271 }
272
273 /** 
274  * 
275  * 
276  * @param oleBuf 
277  * 
278  * @return 
279  */
280 oleType getOleType(unsigned char *oleBuf) {
281         return (oleType)((unsigned char)oleBuf[0x42]);
282 }
283
284 /** 
285  * Reads next directory entry from file
286  * 
287  * @param name buffer for name converted to us-ascii should be at least 33 chars long
288  * @param size size of file 
289  * 
290  * @return 0 if everything is ok -1 on error
291  */
292 FILE *ole_readdir(FILE *f) {
293         int i, nLen;
294         unsigned char *oleBuf;
295         oleEntry *e=NULL;
296         long int chainMaxLen, chainCurrent;
297         
298         if ( properties == NULL || propCurNumber >= propNumber || f == NULL )
299                 return NULL;
300         oleBuf=properties + propCurNumber*PROP_BLOCK_SIZE;
301         if( !rightOleType(oleBuf))
302                 return NULL;
303         if ((e = (oleEntry*)malloc(sizeof(oleEntry))) == NULL) {
304                 perror("Can\'t allocate memory");
305                 return NULL;
306         }
307         e->dirPos=oleBuf;
308         e->type=getOleType(oleBuf);
309         e->file=f;
310         e->startBlock=getlong(oleBuf,0x74);
311         e->blocks=NULL;
312         
313         nLen=getshort(oleBuf,0x40);
314         for (i=0 ; i < nLen /2; i++)
315                 e->name[i]=(char)oleBuf[i*2];
316         e->name[i]='\0';
317         propCurNumber++;
318         e->length=getulong(oleBuf,0x78);
319 /* Read sector chain for object */
320         chainMaxLen = 25;
321         e->numOfBlocks = 0;
322         chainCurrent = e->startBlock;
323         e->isBigBlock = (e->length >= 0x1000) || !strcmp(e->name, "Root Entry");
324 /*      fprintf(stderr, "e->name=%s e->length=%ld\n", e->name, e->length); */
325 /*      fprintf(stderr, "e->startBlock=%ld BBD=%p\n", e->startBlock, BBD); */
326         if (e->startBlock >= 0 &&
327                 e->length >= 0 &&
328                 (e->startBlock <=
329                  fileLength/(e->isBigBlock ? sectorSize : shortSectorSize))) {
330                 if((e->blocks=malloc(chainMaxLen*sizeof(long int))) == NULL ) {
331                         free(e);
332                         return NULL;
333                 }
334                 while(1) {
335                         if(chainCurrent < 0 ||
336                            chainCurrent >= (
337                                e->isBigBlock ?
338                                ((bbdNumBlocks*sectorSize)/4) :
339                                ((sbdNumber*shortSectorSize)/4)
340                                ) ||
341                            (e->numOfBlocks >
342                                 e->length/(
343                                     e->isBigBlock ?
344                                     sectorSize :
345                                     shortSectorSize
346                                     )
347                                 )
348                            ) {
349 /*                              fprintf(stderr, "chain End=%ld\n", chainCurrent);   */
350                                 break;
351                         }
352 /*                      fprintf(stderr, "chainCurrent=%ld\n", chainCurrent); */
353                         e->blocks[e->numOfBlocks++] = chainCurrent;
354                         if (e->numOfBlocks >= chainMaxLen) {
355                                 long int *newChain;
356                                 chainMaxLen+=25;
357                                 if ((newChain=realloc(e->blocks,
358                                                                           chainMaxLen*sizeof(long int))) != NULL)
359                                         e->blocks=newChain;
360                                 else {
361                                         perror("Properties realloc error");
362                                         free(e->blocks);
363                                         e->blocks=NULL;
364                                         return NULL;
365                                 }
366                         }
367                         if ( e->isBigBlock ) {
368                                 chainCurrent = getlong(BBD, chainCurrent*4);
369                         } else if ( SBD != NULL ) {
370                                 chainCurrent = getlong(SBD, chainCurrent*4);
371                         } else {
372                                 chainCurrent=-1;
373                         }
374                 }
375         }
376         
377         if(e->length > (e->isBigBlock ? sectorSize : shortSectorSize)*e->numOfBlocks)
378                 e->length = (e->isBigBlock ? sectorSize : shortSectorSize)*e->numOfBlocks;
379 /*      fprintf(stderr, "READDIR: e->name=%s e->numOfBlocks=%ld length=%ld\n", */
380 /*                                      e->name, e->numOfBlocks, e->length); */
381         
382         return (FILE*)e;
383 }
384
385 /** 
386  * Open stream, which correspond to directory entry last read by
387  * ole_readdir 
388  * 
389  * 
390  * @return opaque pointer to pass to ole_read, casted to (FILE *)
391  */
392 int ole_open(FILE *stream) {
393         oleEntry *e=(oleEntry *)stream;
394         if ( e->type != oleStream)
395                 return -2;
396         
397         e->ole_offset=0;
398         e->file_offset= ftell(e->file);
399         return 0;
400 }
401
402 /** 
403  * 
404  * 
405  * @param e 
406  * @param blk 
407  * 
408  * @return 
409  */
410 long int calcFileBlockOffset(oleEntry *e, long int blk) {
411         long int res;
412         if ( e->isBigBlock ) {
413                 res=512+e->blocks[blk]*sectorSize;
414         } else {
415                 long int sbdPerSector=sectorSize/shortSectorSize;
416                 long int sbdSecNum=e->blocks[blk]/sbdPerSector;
417                 long int sbdSecMod=e->blocks[blk]%sbdPerSector;
418 /*              fprintf(stderr, "calcoffset: e->name=%s e->numOfBlocks=%ld length=%ld sbdSecNum=%ld rootEntry->blocks=%p\n", 
419                                                 e->name, e->numOfBlocks, e->length, sbdSecNum, rootEntry->blocks);*/
420                 res=512 + rootEntry->blocks[sbdSecNum]*sectorSize + sbdSecMod*shortSectorSize;
421         }
422         return res;
423 }
424
425
426 /** 
427  * Reads block from open ole stream interface-compatible with fread
428  * 
429  * @param ptr pointer to buffer for read to
430  * @param size size of block
431  * @param nmemb size in blocks 
432  * @param stream pointer to FILE* structure
433  * 
434  * @return number of readed blocks
435  */
436 size_t ole_read(void *ptr, size_t size, size_t nmemb, FILE *stream) {
437         oleEntry *e = (oleEntry*)stream;
438         long int llen = size*nmemb, rread=0, i;
439         long int blockNumber, modBlock, toReadBlocks, toReadBytes, bytesInBlock;
440         long int ssize;                         /**< Size of block */
441         long int newoffset;
442         unsigned char *cptr = ptr;      
443         if( e->ole_offset+llen > e->length )
444                 llen= e->length - e->ole_offset;
445         
446         ssize = (e->isBigBlock ? sectorSize : shortSectorSize);
447         blockNumber=e->ole_offset/ssize;
448 /*      fprintf(stderr, "blockNumber=%ld e->numOfBlocks=%ld llen=%ld\n", */
449 /*                      blockNumber, e->numOfBlocks, llen); */
450         if ( blockNumber >= e->numOfBlocks || llen <=0 )
451                 return 0;
452         
453         modBlock=e->ole_offset%ssize;
454         bytesInBlock = ssize - modBlock;
455         if(bytesInBlock < llen) {
456                 toReadBlocks = (llen-bytesInBlock)/ssize;
457                 toReadBytes = (llen-bytesInBlock)%ssize; 
458         } else {
459                 toReadBlocks = toReadBytes = 0;
460         }
461 /*      fprintf(stderr, "llen=%ld toReadBlocks=%ld toReadBytes=%ld bytesInBlock=%ld blockNumber=%ld modBlock=%ld\n", */
462 /*                      llen, toReadBlocks, toReadBytes, bytesInBlock, blockNumber, modBlock); */
463         newoffset = calcFileBlockOffset(e,blockNumber)+modBlock;
464         if (e->file_offset != newoffset) {
465                 fseek(e->file, e->file_offset=newoffset, SEEK_SET);
466         }
467         rread=fread(ptr, 1, min(llen,bytesInBlock), e->file);
468         e->file_offset += rread;
469         for(i=0; i<toReadBlocks; i++) {
470                 int readbytes;
471                 blockNumber++;
472                 newoffset = calcFileBlockOffset(e,blockNumber);
473                 if (newoffset != e->file_offset);
474                 fseek(e->file, e->file_offset=newoffset , SEEK_SET);
475                 readbytes=fread(cptr+rread, 1, min(llen-rread, ssize), e->file);
476                 rread +=readbytes;
477                 e->file_offset +=readbytes;
478         }
479         if(toReadBytes > 0) {
480                 int readbytes;
481                 blockNumber++;
482                 newoffset = calcFileBlockOffset(e,blockNumber);
483                 fseek(e->file, e->file_offset=newoffset, SEEK_SET);
484         readbytes=fread(cptr+rread, 1, toReadBytes,e ->file);
485                 rread +=readbytes;
486                 e->file_offset +=readbytes;
487         }
488 /*      fprintf(stderr, "ole_offset=%ld rread=%ld llen=%ld\n",
489         e->ole_offset, rread, llen);*/
490         e->ole_offset+=rread;
491         return rread;
492 }       
493
494 /** 
495  * 
496  * 
497  * @param stream 
498  * 
499  * @return 
500  */
501 int ole_eof(FILE *stream) {
502         oleEntry *e=(oleEntry*)stream;
503 /*      fprintf(stderr, "EOF: e->ole_offset=%ld  e->length=%ld\n",
504         e->ole_offset,  e->length);*/
505         return (e->ole_offset >=  e->length);
506 }
507
508 /** 
509  * 
510  * 
511  */
512 void ole_finish(void) {
513         if ( BBD != NULL ) free(BBD);
514         if ( SBD != NULL ) free(SBD);
515         if ( properties != NULL ) free(properties);
516         if ( rootEntry != NULL ) ole_close((FILE*)rootEntry);
517         properties = SBD = BBD = NULL;
518         rootEntry = NULL;
519 }
520
521 /** 
522  * 
523  * 
524  * @param stream 
525  * 
526  * @return 
527  */
528 int ole_close(FILE *stream) {
529         oleEntry *e=(oleEntry*)stream;
530         if(e == NULL)
531                 return -1;
532         if (e->blocks != NULL)
533                 free(e->blocks);
534         free(e);
535         return 0;
536 }
537
538 /**
539  * 
540  * 
541  * @param stream pointer to OLE stream structure
542  * @param offset 
543  * @param whence 
544  * 
545  * @return 
546  */
547 int ole_seek(FILE *stream, long offset, int whence) {
548         oleEntry *e=(oleEntry*)stream;
549         long int new_ole_offset=0, new_file_offset;
550         int ssize, modBlock, blockNumber;
551         
552         switch(whence) {
553         case SEEK_SET:
554                 new_ole_offset=offset;
555                 break;
556                 
557         case SEEK_CUR:
558                 new_ole_offset=e->ole_offset+offset;
559                 break;
560                 
561         case SEEK_END:
562                 new_ole_offset=e->length+offset;
563                 break;
564                 
565         default:
566                 errno=EINVAL;
567                 return -1;
568         }
569         if(new_ole_offset<0)
570                 new_ole_offset=0;
571         if(new_ole_offset >= e->length)
572                 new_ole_offset=e->length;
573
574         ssize = (e->isBigBlock ? sectorSize : shortSectorSize);
575         blockNumber=new_ole_offset/ssize;
576         if ( blockNumber >= e->numOfBlocks )
577                 return -1;
578         
579         modBlock=new_ole_offset%ssize;
580         new_file_offset = calcFileBlockOffset(e,blockNumber)+modBlock;
581         fseek(e->file, e->file_offset=new_file_offset, SEEK_SET);
582         e->ole_offset=new_ole_offset;
583         
584         return 0;
585 }
586
587 /** 
588  * Tell position inside OLE stream
589  * 
590  * @param stream pointer to OLE stream
591  * 
592  * @return current position inside OLE stream
593  */
594 long ole_tell(FILE *stream) {
595         oleEntry *e=(oleEntry*)stream;
596         return e->ole_offset;
597 }
598
599
600 /**
601  * 
602  * 
603  */
604 size_t (*catdoc_read)(void *ptr, size_t size, size_t nmemb, FILE *stream);
605 int (*catdoc_eof)(FILE *stream);
606 int (*catdoc_seek)(FILE *stream, long offset, int whence);
607 long (*catdoc_tell)(FILE *stream);
608
609 void set_ole_func(void) {
610         catdoc_read=ole_read;
611         catdoc_eof=ole_eof;
612         catdoc_seek=ole_seek;
613         catdoc_tell=ole_tell;
614 }
615
616 #ifdef feof
617 /* feof is macro in Turbo C, so we need a real function to assign to
618  * pointer
619  */ 
620 int my_feof(FILE *f) {
621     return feof(f);
622 }    
623 #define FEOF my_feof
624 #else
625 #define FEOF feof
626 #endif
627
628 void set_std_func(void) {
629         catdoc_read=fread;
630         catdoc_eof=FEOF;
631         catdoc_seek=fseek;
632         catdoc_tell=ftell;
633 }
634