]> www.wagner.pp.ru Git - oss/catdoc.git/blob - src/ole.c
Recreated CVS repository from working copy
[oss/catdoc.git] / src / ole.c
1 /**
2  * @file   ole.c
3  * @author Alex Ott, Victor B Wagner
4  * @date   Wed Jun 11 12:33:01 2003
5  * Version: $Id: ole.c,v 1.1 2006-02-24 17:44:06 vitus Exp $
6  * Copyright: Victor B Wagner, 1996-2003 Alex Ott, 2003
7  * 
8  * @brief  Parsing structure of MS Office compound document
9  * 
10  * This file is part of catdoc project
11  * and distributed under GNU Public License
12  * 
13  */
14 #ifdef HAVE_CONFIG_H
15 #include <config.h>
16 #endif
17
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include <string.h>
21 #include <errno.h>
22
23 #include "catdoc.h"
24
25 #define min(a,b) ((a) < (b) ? (a) : (b))
26
27 long int sectorSize, shortSectorSize;
28 /* BBD Info */
29 long int  bbdNumBlocks;
30 unsigned char *BBD=NULL;
31 /* SBD Info */
32 long int sbdNumber, sbdStart, sbdLen;
33 unsigned char *SBD=NULL;
34 oleEntry *rootEntry=NULL;
35 /* Properties Info */
36 long propCurNumber, propLen, propNumber, propStart;
37 unsigned char *properties=NULL;
38 long int fileLength=0;
39
40 static unsigned char ole_sign[]={0xD0,0xCF,0x11,0xE0,0xA1,0xB1,0x1A,0xE1,0};
41
42
43 /** 
44  * Initializes ole structure
45  * 
46  * @param f (FILE *) compound document file, positioned at bufSize
47  *           byte. Might be pipe or socket 
48  * @param buffer (void *) bytes already read from f
49  * @param bufSize number of bytes already read from f should be less
50  *                than 512 
51  * 
52  * @return 
53  */
54 FILE* ole_init(FILE *f, void *buffer, size_t bufSize)  {
55         unsigned char oleBuf[BBD_BLOCK_SIZE];
56         unsigned char *tmpBuf;
57         FILE *newfile;
58         int ret=0, i;
59         long int sbdMaxLen, sbdCurrent, propMaxLen, propCurrent, mblock, msat_size;
60         oleEntry *tEntry;
61
62         /* deleting old data (if it was allocated) */
63         ole_finish();
64         
65         if (fseek(f,0,SEEK_SET) == -1) {
66                 if ( errno == ESPIPE ) {
67                         /* We got non-seekable file, create temp file */
68                         if((newfile=tmpfile()) == NULL) {
69                                 perror("Can't create tmp file");
70                                 return NULL;
71                         }
72                         if (bufSize > 0) {
73                                 ret=fwrite(buffer, 1, bufSize, newfile);
74                                 if(ret != bufSize) {
75                                         perror("Can't write to tmp file");
76                                         return NULL;
77                                 }
78                         }
79                         
80                         while(!feof(f)){
81                                 ret=fread(oleBuf,1,BBD_BLOCK_SIZE,f);
82                                 fwrite(oleBuf, 1, ret, newfile);
83                         }
84                         fseek(newfile,0,SEEK_SET);
85                 } else {
86                         perror("Can't seek in file");
87                         return NULL;
88                 }
89         } else {
90                 newfile=f;
91         }       
92         fseek(newfile,0,SEEK_END);
93         fileLength=ftell(newfile);
94 /*      fprintf(stderr, "fileLength=%ld\n", fileLength); */
95         fseek(newfile,0,SEEK_SET);
96         ret=fread(oleBuf,1,BBD_BLOCK_SIZE,newfile);
97         if ( ret != BBD_BLOCK_SIZE ) {
98                 return NULL;
99         }
100         if (strncmp(oleBuf,ole_sign,8) != 0) {
101                 return NULL;
102         }
103         sectorSize = 1<<getshort(oleBuf,0x1e);
104         shortSectorSize=1<<getshort(oleBuf,0x20);
105         
106 /* Read BBD into memory */
107         bbdNumBlocks = getulong(oleBuf,0x2c);
108         if((BBD=malloc(bbdNumBlocks*sectorSize)) == NULL ) {
109                 return NULL;
110         }
111         
112         if((tmpBuf=malloc(MSAT_ORIG_SIZE)) == NULL ) {
113                 return NULL;
114         }
115         memcpy(tmpBuf,oleBuf+0x4c,MSAT_ORIG_SIZE);
116         mblock=getlong(oleBuf,0x44);
117         msat_size=getlong(oleBuf,0x48);
118
119 /*      fprintf(stderr, "msat_size=%ld\n", msat_size); */
120
121         i=0;
122         while((mblock >= 0) && (i < msat_size)) {
123                 unsigned char *newbuf;
124 /*              fprintf(stderr, "i=%d mblock=%ld\n", i, mblock); */
125                 if ((newbuf=realloc(tmpBuf, sectorSize*(i+1)+MSAT_ORIG_SIZE)) != NULL) {
126                         tmpBuf=newbuf;
127                 } else {
128                         perror("MSAT realloc error");
129                         free(tmpBuf);
130                         ole_finish();
131                         return NULL;
132                 }
133                 
134                 fseek(newfile, 512+mblock*sectorSize, SEEK_SET);
135                 if(fread(tmpBuf+MSAT_ORIG_SIZE+(sectorSize-4)*i,
136                                                  1, sectorSize, newfile) != sectorSize) {
137                         fprintf(stderr, "Error read MSAT!\n");
138                         ole_finish();
139                         return NULL;
140                 }
141
142                 i++;
143                 mblock=getlong(tmpBuf, MSAT_ORIG_SIZE+(sectorSize-4)*i);
144         }
145         
146 /*      fprintf(stderr, "bbdNumBlocks=%ld\n", bbdNumBlocks); */
147         for(i=0; i< bbdNumBlocks; i++) {
148                 long int bbdSector=getlong(tmpBuf,4*i);
149                 
150                 if (bbdSector >= fileLength/sectorSize || bbdSector < 0) {
151                         fprintf(stderr, "Bad BBD entry!\n");
152                         ole_finish();
153                         return NULL;
154                 }
155                 fseek(newfile, 512+bbdSector*sectorSize, SEEK_SET);
156                 if ( fread(BBD+i*sectorSize, 1, sectorSize, newfile) != sectorSize ) {
157                         fprintf(stderr, "Can't read BBD!\n");
158                         free(tmpBuf);
159                         ole_finish();
160                         return NULL;
161                 }
162         }
163         free(tmpBuf);
164         
165 /* Read SBD into memory */
166         sbdLen=0;
167         sbdMaxLen=10;
168         sbdCurrent = sbdStart = getlong(oleBuf,0x3c);
169         if (sbdStart > 0) {
170                 if((SBD=malloc(sectorSize*sbdMaxLen)) == NULL ) {
171                         ole_finish();
172                         return NULL;
173                 }
174                 while(1) {
175                         fseek(newfile, 512+sbdCurrent*sectorSize, SEEK_SET);
176                         fread(SBD+sbdLen*sectorSize, 1, sectorSize, newfile);
177                         sbdLen++;
178                         if (sbdLen >= sbdMaxLen) {
179                                 unsigned char *newSBD;
180                                 
181                                 sbdMaxLen+=5;
182                                 if ((newSBD=realloc(SBD, sectorSize*sbdMaxLen)) != NULL) {
183                                         SBD=newSBD;
184                                 } else {
185                                         perror("SBD realloc error");
186                                         ole_finish();
187                                         return NULL;
188                                 }
189                         }
190                         sbdCurrent = getlong(BBD, sbdCurrent*4);
191                         if(sbdCurrent < 0 ||
192                                 sbdCurrent >= fileLength/sectorSize)
193                                 break;
194                 }
195                 sbdNumber = (sbdLen*sectorSize)/shortSectorSize;
196 /*              fprintf(stderr, "sbdLen=%ld sbdNumber=%ld\n",sbdLen, sbdNumber); */
197         } else {
198                 SBD=NULL;
199         }
200 /* Read property catalog into memory */
201         propLen = 0;
202         propMaxLen = 5;
203         propCurrent = propStart = getlong(oleBuf,0x30);
204         if (propStart >= 0) {
205                 if((properties=malloc(propMaxLen*sectorSize)) == NULL ) {
206                         ole_finish();
207                         return NULL;
208                 }
209                 while(1) {
210 /*                      fprintf(stderr, "propCurrent=%ld\n",propCurrent); */
211                         fseek(newfile, 512+propCurrent*sectorSize, SEEK_SET);
212                         fread(properties+propLen*sectorSize,
213                                   1, sectorSize, newfile);
214                         propLen++;
215                         if (propLen >= propMaxLen) {
216                                 unsigned char *newProp;
217                                 
218                                 propMaxLen+=5;
219                                 if ((newProp=realloc(properties, propMaxLen*sectorSize)) != NULL)
220                                         properties=newProp;
221                                 else {
222                                         perror("Properties realloc error");
223                                         ole_finish();
224                                         return NULL;
225                                 }
226                         }
227                         
228                         propCurrent = getlong(BBD, propCurrent*4);
229                         if(propCurrent < 0 ||
230                            propCurrent >= fileLength/sectorSize ) {
231                                 break;
232                         }
233                 }
234 /*              fprintf(stderr, "propLen=%ld\n",propLen); */
235                 propNumber = (propLen*sectorSize)/PROP_BLOCK_SIZE;
236                 propCurNumber = 0;
237         } else {
238                 ole_finish();
239                 properties = NULL;
240                 return NULL;
241         }
242         
243         
244 /* Find Root Entry */
245         while((tEntry=(oleEntry*)ole_readdir(newfile)) != NULL) {
246                 if (!tEntry->name[0]||strcmp(tEntry->name,"Root Entry") == 0) {
247                         rootEntry=tEntry;
248                         break;
249                 }
250                 ole_close((FILE*)tEntry);
251         }
252         propCurNumber = 0;
253         fseek(newfile, 0, SEEK_SET);
254         if (!rootEntry) {
255                 fprintf(stderr,"Cannot find root entry in this file!\n");
256                 ole_finish();
257                 return NULL;
258         }       
259         return newfile;
260 }
261
262 /** 
263  * 
264  * 
265  * @param oleBuf 
266  * 
267  * @return 
268  */
269 int rightOleType(unsigned char *oleBuf) {
270         return (oleBuf[0x42] == 1 || oleBuf[0x42] == 2 ||
271                         oleBuf[0x42] == 3 || oleBuf[0x42] == 5 );
272 }
273
274 /** 
275  * 
276  * 
277  * @param oleBuf 
278  * 
279  * @return 
280  */
281 oleType getOleType(unsigned char *oleBuf) {
282         return (oleType)((unsigned char)oleBuf[0x42]);
283 }
284
285 /** 
286  * Reads next directory entry from file
287  * 
288  * @param name buffer for name converted to us-ascii should be at least 33 chars long
289  * @param size size of file 
290  * 
291  * @return 0 if everything is ok -1 on error
292  */
293 FILE *ole_readdir(FILE *f) {
294         int i, nLen;
295         unsigned char *oleBuf;
296         oleEntry *e=NULL;
297         long int chainMaxLen, chainCurrent;
298         
299         if ( properties == NULL || propCurNumber >= propNumber || f == NULL )
300                 return NULL;
301         oleBuf=properties + propCurNumber*PROP_BLOCK_SIZE;
302         if( !rightOleType(oleBuf))
303                 return NULL;
304         if ((e = (oleEntry*)malloc(sizeof(oleEntry))) == NULL) {
305                 perror("Can\'t allocate memory");
306                 return NULL;
307         }
308         e->dirPos=oleBuf;
309         e->type=getOleType(oleBuf);
310         e->file=f;
311         e->startBlock=getlong(oleBuf,0x74);
312         e->blocks=NULL;
313         
314         nLen=getshort(oleBuf,0x40);
315         for (i=0 ; i < nLen /2; i++)
316                 e->name[i]=(char)oleBuf[i*2];
317         e->name[i]='\0';
318         propCurNumber++;
319         e->length=getulong(oleBuf,0x78);
320 /* Read sector chain for object */
321         chainMaxLen = 25;
322         e->numOfBlocks = 0;
323         chainCurrent = e->startBlock;
324         e->isBigBlock = (e->length >= 0x1000) || !strcmp(e->name, "Root Entry");
325 /*      fprintf(stderr, "e->name=%s e->length=%ld\n", e->name, e->length); */
326 /*      fprintf(stderr, "e->startBlock=%ld BBD=%p\n", e->startBlock, BBD); */
327         if (e->startBlock >= 0 &&
328                 e->length >= 0 &&
329                 (e->startBlock <=
330                  fileLength/(e->isBigBlock ? sectorSize : shortSectorSize))) {
331                 if((e->blocks=malloc(chainMaxLen*sizeof(long int))) == NULL ) {
332                         return NULL;
333                 }
334                 while(1) {
335 /*                      fprintf(stderr, "chainCurrent=%ld\n", chainCurrent); */
336                         e->blocks[e->numOfBlocks++] = chainCurrent;
337                         if (e->numOfBlocks >= chainMaxLen) {
338                                 long int *newChain;
339                                 chainMaxLen+=25;
340                                 if ((newChain=realloc(e->blocks,
341                                                                           chainMaxLen*sizeof(long int))) != NULL)
342                                         e->blocks=newChain;
343                                 else {
344                                         perror("Properties realloc error");
345                                         free(e->blocks);
346                                         e->blocks=NULL;
347                                         return NULL;
348                                 }
349                         }
350                         if ( e->isBigBlock ) {
351                                 chainCurrent = getlong(BBD, chainCurrent*4);
352                         } else if ( SBD != NULL ) {
353                                 chainCurrent = getlong(SBD, chainCurrent*4);
354                         } else {
355                                 chainCurrent=-1;
356                         }
357                         if(chainCurrent <= 0 ||
358                            chainCurrent >= ( e->isBigBlock ?
359                                                                  ((bbdNumBlocks*sectorSize)/4)
360                                                                  : ((sbdNumber*shortSectorSize)/4) ) ||
361                            (e->numOfBlocks >
362                                 e->length/(e->isBigBlock ? sectorSize : shortSectorSize))) {
363 /*                              fprintf(stderr, "chain End=%ld\n", chainCurrent);   */
364                                 break;
365                         }
366                 }
367         }
368         
369         if(e->length > (e->isBigBlock ? sectorSize : shortSectorSize)*e->numOfBlocks)
370                 e->length = (e->isBigBlock ? sectorSize : shortSectorSize)*e->numOfBlocks;
371 /*      fprintf(stderr, "READDIR: e->name=%s e->numOfBlocks=%ld length=%ld\n", */
372 /*                                      e->name, e->numOfBlocks, e->length); */
373         
374         return (FILE*)e;
375 }
376
377 /** 
378  * Open stream, which correspond to directory entry last read by
379  * ole_readdir 
380  * 
381  * 
382  * @return opaque pointer to pass to ole_read, casted to (FILE *)
383  */
384 int ole_open(FILE *stream) {
385         oleEntry *e=(oleEntry *)stream;
386         if ( e->type != oleStream)
387                 return -2;
388         
389         e->ole_offset=0;
390         e->file_offset= ftell(e->file);
391         return 0;
392 }
393
394 /** 
395  * 
396  * 
397  * @param e 
398  * @param blk 
399  * 
400  * @return 
401  */
402 long int calcFileBlockOffset(oleEntry *e, long int blk) {
403         long int res;
404         if ( e->isBigBlock ) {
405                 res=512+e->blocks[blk]*sectorSize;
406         } else {
407                 long int sbdPerSector=sectorSize/shortSectorSize;
408                 long int sbdSecNum=e->blocks[blk]/sbdPerSector;
409                 long int sbdSecMod=e->blocks[blk]%sbdPerSector;
410 /*              fprintf(stderr, "calcoffset: e->name=%s e->numOfBlocks=%ld length=%ld sbdSecNum=%ld rootEntry->blocks=%p\n", 
411                                                 e->name, e->numOfBlocks, e->length, sbdSecNum, rootEntry->blocks);*/
412                 res=512 + rootEntry->blocks[sbdSecNum]*sectorSize + sbdSecMod*shortSectorSize;
413         }
414         return res;
415 }
416
417
418 /** 
419  * Reads block from open ole stream interface-compatible with fread
420  * 
421  * @param ptr pointer to buffer for read to
422  * @param size size of block
423  * @param nmemb size in blocks 
424  * @param stream pointer to FILE* structure
425  * 
426  * @return number of readed blocks
427  */
428 size_t ole_read(void *ptr, size_t size, size_t nmemb, FILE *stream) {
429         oleEntry *e = (oleEntry*)stream;
430         long int llen = size*nmemb, rread=0, i;
431         long int blockNumber, modBlock, toReadBlocks, toReadBytes, bytesInBlock;
432         long int ssize;                         /**< Size of block */
433         long int newoffset;
434         unsigned char *cptr = ptr;      
435         if( e->ole_offset+llen > e->length )
436                 llen= e->length - e->ole_offset;
437         
438         ssize = (e->isBigBlock ? sectorSize : shortSectorSize);
439         blockNumber=e->ole_offset/ssize;
440 /*      fprintf(stderr, "blockNumber=%ld e->numOfBlocks=%ld llen=%ld\n", */
441 /*                      blockNumber, e->numOfBlocks, llen); */
442         if ( blockNumber >= e->numOfBlocks || llen <=0 )
443                 return 0;
444         
445         modBlock=e->ole_offset%ssize;
446         bytesInBlock = ssize - modBlock;
447         if(bytesInBlock < llen) {
448                 toReadBlocks = (llen-bytesInBlock)/ssize;
449                 toReadBytes = (llen-bytesInBlock)%ssize; 
450         } else {
451                 toReadBlocks = toReadBytes = 0;
452         }
453 /*      fprintf(stderr, "llen=%ld toReadBlocks=%ld toReadBytes=%ld bytesInBlock=%ld blockNumber=%ld modBlock=%ld\n", */
454 /*                      llen, toReadBlocks, toReadBytes, bytesInBlock, blockNumber, modBlock); */
455         newoffset = calcFileBlockOffset(e,blockNumber)+modBlock;
456         if (e->file_offset != newoffset) {
457                 fseek(e->file, e->file_offset=newoffset, SEEK_SET);
458         }
459         rread=fread(ptr, 1, min(llen,bytesInBlock), e->file);
460         e->file_offset += rread;
461         for(i=0; i<toReadBlocks; i++) {
462                 int readbytes;
463                 blockNumber++;
464                 newoffset = calcFileBlockOffset(e,blockNumber);
465                 if (newoffset != e->file_offset);
466                 fseek(e->file, e->file_offset=newoffset , SEEK_SET);
467                 readbytes=fread(cptr+rread, 1, min(llen-rread, ssize), e->file);
468                 rread +=readbytes;
469                 e->file_offset +=readbytes;
470         }
471         if(toReadBytes > 0) {
472                 int readbytes;
473                 blockNumber++;
474                 newoffset = calcFileBlockOffset(e,blockNumber);
475                 fseek(e->file, e->file_offset=newoffset, SEEK_SET);
476         readbytes=fread(cptr+rread, 1, toReadBytes,e ->file);
477                 rread +=readbytes;
478                 e->file_offset +=readbytes;
479         }
480 /*      fprintf(stderr, "ole_offset=%ld rread=%ld llen=%ld\n",
481         e->ole_offset, rread, llen);*/
482         e->ole_offset+=rread;
483         return rread;
484 }       
485
486 /** 
487  * 
488  * 
489  * @param stream 
490  * 
491  * @return 
492  */
493 int ole_eof(FILE *stream) {
494         oleEntry *e=(oleEntry*)stream;
495 /*      fprintf(stderr, "EOF: e->ole_offset=%ld  e->length=%ld\n",
496         e->ole_offset,  e->length);*/
497         return (e->ole_offset >=  e->length);
498 }
499
500 /** 
501  * 
502  * 
503  */
504 void ole_finish(void) {
505         if ( BBD != NULL ) free(BBD);
506         if ( SBD != NULL ) free(SBD);
507         if ( properties != NULL ) free(properties);
508         if ( rootEntry != NULL ) ole_close((FILE*)rootEntry);
509         properties = SBD = BBD = NULL;
510         rootEntry = NULL;
511 }
512
513 /** 
514  * 
515  * 
516  * @param stream 
517  * 
518  * @return 
519  */
520 int ole_close(FILE *stream) {
521         oleEntry *e=(oleEntry*)stream;
522         if(e == NULL)
523                 return -1;
524         if (e->blocks != NULL)
525                 free(e->blocks);
526         free(e);
527         return 0;
528 }
529
530 /**
531  * 
532  * 
533  * @param stream pointer to OLE stream structure
534  * @param offset 
535  * @param whence 
536  * 
537  * @return 
538  */
539 int ole_seek(FILE *stream, long offset, int whence) {
540         oleEntry *e=(oleEntry*)stream;
541         long int new_ole_offset=0, new_file_offset;
542         int ssize, modBlock, blockNumber;
543         
544         switch(whence) {
545         case SEEK_SET:
546                 new_ole_offset=offset;
547                 break;
548                 
549         case SEEK_CUR:
550                 new_ole_offset=e->ole_offset+offset;
551                 break;
552                 
553         case SEEK_END:
554                 new_ole_offset=e->length+offset;
555                 break;
556                 
557         default:
558                 errno=EINVAL;
559                 return -1;
560         }
561         if(new_ole_offset<0)
562                 new_ole_offset=0;
563         if(new_ole_offset >= e->length)
564                 new_ole_offset=e->length;
565
566         ssize = (e->isBigBlock ? sectorSize : shortSectorSize);
567         blockNumber=new_ole_offset/ssize;
568         if ( blockNumber >= e->numOfBlocks )
569                 return -1;
570         
571         modBlock=new_ole_offset%ssize;
572         new_file_offset = calcFileBlockOffset(e,blockNumber)+modBlock;
573         fseek(e->file, e->file_offset=new_file_offset, SEEK_SET);
574         e->ole_offset=new_ole_offset;
575         
576         return 0;
577 }
578
579 /** 
580  * Tell position inside OLE stream
581  * 
582  * @param stream pointer to OLE stream
583  * 
584  * @return current position inside OLE stream
585  */
586 long ole_tell(FILE *stream) {
587         oleEntry *e=(oleEntry*)stream;
588         return e->ole_offset;
589 }
590
591
592 /**
593  * 
594  * 
595  */
596 size_t (*catdoc_read)(void *ptr, size_t size, size_t nmemb, FILE *stream);
597 int (*catdoc_eof)(FILE *stream);
598 int (*catdoc_seek)(FILE *stream, long offset, int whence);
599 long (*catdoc_tell)(FILE *stream);
600
601 void set_ole_func(void) {
602         catdoc_read=ole_read;
603         catdoc_eof=ole_eof;
604         catdoc_seek=ole_seek;
605         catdoc_tell=ole_tell;
606 }
607
608 #ifdef feof
609 /* feof is macro in Turbo C, so we need a real function to assign to
610  * pointer
611  */ 
612 int my_feof(FILE *f) {
613     return feof(f);
614 }    
615 #define FEOF my_feof
616 #else
617 #define FEOF feof
618 #endif
619
620 void set_std_func(void) {
621         catdoc_read=fread;
622         catdoc_eof=FEOF;
623         catdoc_seek=fseek;
624         catdoc_tell=ftell;
625 }
626