3 * @author Alex Ott, Victor B Wagner
4 * @date Wed Jun 11 12:33:01 2003
5 * Version: $Id: ole.c,v 1.2 2006-02-25 15:28:14 vitus Exp $
6 * Copyright: Victor B Wagner, 1996-2003 Alex Ott, 2003
8 * @brief Parsing structure of MS Office compound document
10 * This file is part of catdoc project
11 * and distributed under GNU Public License
25 #define min(a,b) ((a) < (b) ? (a) : (b))
27 long int sectorSize, shortSectorSize;
29 long int bbdNumBlocks;
30 unsigned char *BBD=NULL;
32 long int sbdNumber, sbdStart, sbdLen;
33 unsigned char *SBD=NULL;
34 oleEntry *rootEntry=NULL;
36 long propCurNumber, propLen, propNumber, propStart;
37 unsigned char *properties=NULL;
38 long int fileLength=0;
40 char ole_sign[]={0xD0,0xCF,0x11,0xE0,0xA1,0xB1,0x1A,0xE1,0};
41 char zip_sign[]="PK\003\004";
44 * Initializes ole structure
46 * @param f (FILE *) compound document file, positioned at bufSize
47 * byte. Might be pipe or socket
48 * @param buffer (void *) bytes already read from f
49 * @param bufSize number of bytes already read from f should be less
54 FILE* ole_init(FILE *f, void *buffer, size_t bufSize) {
55 unsigned char oleBuf[BBD_BLOCK_SIZE];
56 unsigned char *tmpBuf;
60 long int sbdMaxLen, sbdCurrent, propMaxLen, propCurrent, mblock, msat_size;
63 /* deleting old data (if it was allocated) */
66 if (fseek(f,0,SEEK_SET) == -1) {
67 if ( errno == ESPIPE ) {
68 /* We got non-seekable file, create temp file */
69 if((newfile=tmpfile()) == NULL) {
70 perror("Can't create tmp file");
74 ret=fwrite(buffer, 1, bufSize, newfile);
76 perror("Can't write to tmp file");
82 ret=fread(oleBuf,1,BBD_BLOCK_SIZE,f);
83 fwrite(oleBuf, 1, ret, newfile);
85 fseek(newfile,0,SEEK_SET);
87 perror("Can't seek in file");
93 fseek(newfile,0,SEEK_END);
94 fileLength=ftell(newfile);
95 /* fprintf(stderr, "fileLength=%ld\n", fileLength); */
96 fseek(newfile,0,SEEK_SET);
97 ret=fread(oleBuf,1,BBD_BLOCK_SIZE,newfile);
98 if ( ret != BBD_BLOCK_SIZE ) {
101 if (strncmp((char *)&oleBuf,zip_sign,4) == 0) {
102 fprintf(stderr,"Looks like ZIP archive or Office 2007 or later. Not supported\n");
104 } else if (strncmp((char *)&oleBuf,ole_sign,8) != 0) {
107 sectorSize = 1<<getshort(oleBuf,0x1e);
108 shortSectorSize=1<<getshort(oleBuf,0x20);
110 /* Read BBD into memory */
111 bbdNumBlocks = getulong(oleBuf,0x2c);
112 bbdSize = bbdNumBlocks * sectorSize;
113 if (bbdSize > fileLength) {
114 /* broken file, BBD size greater than entire file*/
118 if((BBD=malloc(bbdNumBlocks*sectorSize)) == NULL ) {
122 if((tmpBuf=malloc(MSAT_ORIG_SIZE)) == NULL ) {
125 memcpy(tmpBuf,oleBuf+0x4c,MSAT_ORIG_SIZE);
126 mblock=getlong(oleBuf,0x44);
127 msat_size=getlong(oleBuf,0x48);
128 if (msat_size * sectorSize > fileLength) {
133 /* fprintf(stderr, "msat_size=%ld\n", msat_size); */
136 while((mblock >= 0) && (i < msat_size)) {
137 unsigned char *newbuf;
138 /* fprintf(stderr, "i=%d mblock=%ld\n", i, mblock); */
139 if ((newbuf=realloc(tmpBuf, sectorSize*(i+1)+MSAT_ORIG_SIZE)) != NULL) {
142 perror("MSAT realloc error");
148 fseek(newfile, 512+mblock*sectorSize, SEEK_SET);
149 if(fread(tmpBuf+MSAT_ORIG_SIZE+(sectorSize-4)*i,
150 1, sectorSize, newfile) != sectorSize) {
151 fprintf(stderr, "Error read MSAT!\n");
157 mblock=getlong(tmpBuf, MSAT_ORIG_SIZE+(sectorSize-4)*i);
160 /* fprintf(stderr, "bbdNumBlocks=%ld\n", bbdNumBlocks); */
161 for(i=0; i< bbdNumBlocks; i++) {
162 long int bbdSector=getlong(tmpBuf,4*i);
164 if (bbdSector >= fileLength/sectorSize || bbdSector < 0) {
165 fprintf(stderr, "Bad BBD entry!\n");
169 fseek(newfile, 512+bbdSector*sectorSize, SEEK_SET);
170 if ( fread(BBD+i*sectorSize, 1, sectorSize, newfile) != sectorSize ) {
171 fprintf(stderr, "Can't read BBD!\n");
179 /* Read SBD into memory */
182 sbdCurrent = sbdStart = getlong(oleBuf,0x3c);
184 if((SBD=malloc(sectorSize*sbdMaxLen)) == NULL ) {
189 fseek(newfile, 512+sbdCurrent*sectorSize, SEEK_SET);
190 fread(SBD+sbdLen*sectorSize, 1, sectorSize, newfile);
192 if (sbdLen >= sbdMaxLen) {
193 unsigned char *newSBD;
196 if ((newSBD=realloc(SBD, sectorSize*sbdMaxLen)) != NULL) {
199 perror("SBD realloc error");
204 if (sbdCurrent * 4 > bbdSize) {
208 sbdCurrent = getlong(BBD, sbdCurrent*4);
210 sbdCurrent >= fileLength/sectorSize)
213 sbdNumber = (sbdLen*sectorSize)/shortSectorSize;
214 /* fprintf(stderr, "sbdLen=%ld sbdNumber=%ld\n",sbdLen, sbdNumber); */
218 /* Read property catalog into memory */
221 propCurrent = propStart = getlong(oleBuf,0x30);
222 if (propStart >= 0) {
223 if((properties=malloc(propMaxLen*sectorSize)) == NULL ) {
228 /* fprintf(stderr, "propCurrent=%ld\n",propCurrent); */
229 fseek(newfile, 512+propCurrent*sectorSize, SEEK_SET);
231 if (fread(properties+propLen*sectorSize,
232 1, sectorSize, newfile)!=sectorSize) {
234 perror("reading properties catalog");
240 if (propLen >= propMaxLen) {
241 unsigned char *newProp;
244 if ((newProp=realloc(properties, propMaxLen*sectorSize)) != NULL)
247 perror("Properties realloc error");
253 propCurrent = getlong(BBD, propCurrent*4);
254 if(propCurrent < 0 ||
255 propCurrent >= bbdSize/4 ) {
259 /* fprintf(stderr, "propLen=%ld\n",propLen); */
260 propNumber = (propLen*sectorSize)/PROP_BLOCK_SIZE;
269 /* Find Root Entry */
270 while((tEntry=(oleEntry*)ole_readdir(newfile)) != NULL) {
271 if (tEntry->type == oleRootDir ) {
275 ole_close((FILE*)tEntry);
278 fseek(newfile, 0, SEEK_SET);
280 fprintf(stderr,"Broken OLE structure. Cannot find root entry in this file!\n"); ole_finish();
293 int rightOleType(unsigned char *oleBuf) {
294 return (oleBuf[0x42] == 1 || oleBuf[0x42] == 2 ||
295 oleBuf[0x42] == 3 || oleBuf[0x42] == 5 );
305 oleType getOleType(unsigned char *oleBuf) {
306 return (oleType)((unsigned char)oleBuf[0x42]);
310 * Reads next directory entry from file
312 * @param name buffer for name converted to us-ascii should be at least 33 chars long
313 * @param size size of file
315 * @return 0 if everything is ok -1 on error
317 FILE *ole_readdir(FILE *f) {
319 unsigned char *oleBuf;
321 long int chainMaxLen, chainCurrent;
323 if ( properties == NULL || propCurNumber >= propNumber || f == NULL )
325 oleBuf=properties + propCurNumber*PROP_BLOCK_SIZE;
326 if( !rightOleType(oleBuf))
328 if ((e = (oleEntry*) calloc(sizeof(oleEntry),1)) == NULL) {
329 perror("Can\'t allocate memory");
333 e->type=getOleType(oleBuf);
335 e->startBlock=getlong(oleBuf,0x74);
338 nLen=getshort(oleBuf,0x40);
339 if (nLen > OLENAMELENGTH) {
343 for (i=0 ; i < nLen /2; i++)
344 e->name[i]=(char)oleBuf[i*2];
347 e->length=getulong(oleBuf,0x78);
348 /* Read sector chain for object */
351 chainCurrent = e->startBlock;
352 e->isBigBlock = (e->length >= 0x1000) || !strcmp(e->name, "Root Entry");
353 /* fprintf(stderr, "e->name=%s e->length=%ld\n", e->name, e->length); */
354 /* fprintf(stderr, "e->startBlock=%ld BBD=%p\n", e->startBlock, BBD); */
355 if (e->startBlock >= 0 &&
358 fileLength/(e->isBigBlock ? sectorSize : shortSectorSize))) {
359 if((e->blocks=malloc(chainMaxLen*sizeof(long int))) == NULL ) {
364 if(chainCurrent < 0 ||
367 ((bbdNumBlocks*sectorSize)/4) :
368 ((sbdNumber*shortSectorSize)/4)
378 /* fprintf(stderr, "chain End=%ld\n", chainCurrent); */
381 /* fprintf(stderr, "chainCurrent=%ld\n", chainCurrent); */
382 e->blocks[e->numOfBlocks++] = chainCurrent;
383 if (e->numOfBlocks >= chainMaxLen) {
386 if ((newChain=realloc(e->blocks,
387 chainMaxLen*sizeof(long int))) != NULL) {
390 perror("Properties realloc error");
396 if ( e->isBigBlock ) {
397 chainCurrent = getlong(BBD, chainCurrent*4);
398 } else if ( SBD != NULL ) {
399 chainCurrent = getlong(SBD, chainCurrent*4);
406 if(e->length > (e->isBigBlock ? sectorSize : shortSectorSize)*e->numOfBlocks)
407 e->length = (e->isBigBlock ? sectorSize : shortSectorSize)*e->numOfBlocks;
408 /* fprintf(stderr, "READDIR: e->name=%s e->numOfBlocks=%ld length=%ld\n", */
409 /* e->name, e->numOfBlocks, e->length); */
415 * Open stream, which correspond to directory entry last read by
419 * @return opaque pointer to pass to ole_read, casted to (FILE *)
421 int ole_open(FILE *stream) {
422 oleEntry *e=(oleEntry *)stream;
423 if ( e->type != oleStream)
427 e->file_offset= ftell(e->file);
439 long int calcFileBlockOffset(oleEntry *e, long int blk) {
441 if ( e->isBigBlock ) {
442 res=512+e->blocks[blk]*sectorSize;
444 long int sbdPerSector=sectorSize/shortSectorSize;
445 long int sbdSecNum=e->blocks[blk]/sbdPerSector;
446 long int sbdSecMod=e->blocks[blk]%sbdPerSector;
447 /* fprintf(stderr, "calcoffset: e->name=%s e->numOfBlocks=%ld length=%ld sbdSecNum=%ld rootEntry->blocks=%p\n",
448 e->name, e->numOfBlocks, e->length, sbdSecNum, rootEntry->blocks);*/
449 res=512 + rootEntry->blocks[sbdSecNum]*sectorSize + sbdSecMod*shortSectorSize;
456 * Reads block from open ole stream interface-compatible with fread
458 * @param ptr pointer to buffer for read to
459 * @param size size of block
460 * @param nmemb size in blocks
461 * @param stream pointer to FILE* structure
463 * @return number of readed blocks
465 size_t ole_read(void *ptr, size_t size, size_t nmemb, FILE *stream) {
466 oleEntry *e = (oleEntry*)stream;
467 long int llen = size*nmemb, rread=0, i;
468 long int blockNumber, modBlock, toReadBlocks, toReadBytes, bytesInBlock;
469 long int ssize; /**< Size of block */
471 unsigned char *cptr = ptr;
472 if( e->ole_offset+llen > e->length )
473 llen= e->length - e->ole_offset;
475 ssize = (e->isBigBlock ? sectorSize : shortSectorSize);
476 blockNumber=e->ole_offset/ssize;
477 /* fprintf(stderr, "blockNumber=%ld e->numOfBlocks=%ld llen=%ld\n", */
478 /* blockNumber, e->numOfBlocks, llen); */
479 if ( blockNumber >= e->numOfBlocks || llen <=0 )
482 modBlock=e->ole_offset%ssize;
483 bytesInBlock = ssize - modBlock;
484 if(bytesInBlock < llen) {
485 toReadBlocks = (llen-bytesInBlock)/ssize;
486 toReadBytes = (llen-bytesInBlock)%ssize;
488 toReadBlocks = toReadBytes = 0;
490 /* fprintf(stderr, "llen=%ld toReadBlocks=%ld toReadBytes=%ld bytesInBlock=%ld blockNumber=%ld modBlock=%ld\n", */
491 /* llen, toReadBlocks, toReadBytes, bytesInBlock, blockNumber, modBlock); */
492 newoffset = calcFileBlockOffset(e,blockNumber)+modBlock;
493 if (e->file_offset != newoffset) {
494 fseek(e->file, e->file_offset=newoffset, SEEK_SET);
496 rread=fread(ptr, 1, min(llen,bytesInBlock), e->file);
497 e->file_offset += rread;
498 for(i=0; i<toReadBlocks; i++) {
501 newoffset = calcFileBlockOffset(e,blockNumber);
502 if (newoffset != e->file_offset);
503 fseek(e->file, e->file_offset=newoffset , SEEK_SET);
504 readbytes=fread(cptr+rread, 1, min(llen-rread, ssize), e->file);
506 e->file_offset +=readbytes;
508 if(toReadBytes > 0) {
511 newoffset = calcFileBlockOffset(e,blockNumber);
512 fseek(e->file, e->file_offset=newoffset, SEEK_SET);
513 readbytes=fread(cptr+rread, 1, toReadBytes,e ->file);
515 e->file_offset +=readbytes;
517 /* fprintf(stderr, "ole_offset=%ld rread=%ld llen=%ld\n",
518 e->ole_offset, rread, llen);*/
519 e->ole_offset+=rread;
530 int ole_eof(FILE *stream) {
531 oleEntry *e=(oleEntry*)stream;
532 /* fprintf(stderr, "EOF: e->ole_offset=%ld e->length=%ld\n",
533 e->ole_offset, e->length);*/
534 return (e->ole_offset >= e->length);
541 void ole_finish(void) {
542 if ( BBD != NULL ) free(BBD);
543 if ( SBD != NULL ) free(SBD);
544 if ( properties != NULL ) free(properties);
545 if ( rootEntry != NULL ) ole_close((FILE*)rootEntry);
546 properties = SBD = BBD = NULL;
557 int ole_close(FILE *stream) {
558 oleEntry *e=(oleEntry*)stream;
561 if (e->blocks != NULL)
570 * @param stream pointer to OLE stream structure
576 int ole_seek(FILE *stream, long offset, int whence) {
577 oleEntry *e=(oleEntry*)stream;
578 long int new_ole_offset=0, new_file_offset;
579 int ssize, modBlock, blockNumber;
583 new_ole_offset=offset;
587 new_ole_offset=e->ole_offset+offset;
591 new_ole_offset=e->length+offset;
600 if(new_ole_offset >= e->length)
601 new_ole_offset=e->length;
603 ssize = (e->isBigBlock ? sectorSize : shortSectorSize);
604 blockNumber=new_ole_offset/ssize;
605 if ( blockNumber >= e->numOfBlocks )
608 modBlock=new_ole_offset%ssize;
609 new_file_offset = calcFileBlockOffset(e,blockNumber)+modBlock;
610 fseek(e->file, e->file_offset=new_file_offset, SEEK_SET);
611 e->ole_offset=new_ole_offset;
617 * Tell position inside OLE stream
619 * @param stream pointer to OLE stream
621 * @return current position inside OLE stream
623 long ole_tell(FILE *stream) {
624 oleEntry *e=(oleEntry*)stream;
625 return e->ole_offset;
633 size_t (*catdoc_read)(void *ptr, size_t size, size_t nmemb, FILE *stream);
634 int (*catdoc_eof)(FILE *stream);
635 int (*catdoc_seek)(FILE *stream, long offset, int whence);
636 long (*catdoc_tell)(FILE *stream);
638 void set_ole_func(void) {
639 catdoc_read=ole_read;
641 catdoc_seek=ole_seek;
642 catdoc_tell=ole_tell;
646 /* feof is macro in Turbo C, so we need a real function to assign to
649 int my_feof(FILE *f) {
657 void set_std_func(void) {