1 /*****************************************************************/
2 /* Reading routines for rtf files */
4 /* This file is part of catdoc project */
5 /* (c) Victor Wagner 2003, (c) Alex Ott 2003 */
6 /*****************************************************************/
17 /********************************************************
18 * Datatypes declaration
41 RTF_LISTOVERRIDETABLE,
67 RTFTypeMap rtf_types[]={
69 {"ansicpg",RTF_CODEPAGE},
74 {"overlay",RTF_OVERLAY},
76 {"author",RTF_AUTHOR},
78 {"fonttbl",RTF_FONTTBL},
80 {"stylesheet",RTF_STYLESHEET},
81 {"colortbl",RTF_COLORTBL},
82 {"listtable",RTF_LISTTABLE},
83 {"listoverridetable",RTF_LISTOVERRIDETABLE},
84 {"rsidtbl",RTF_RSIDTBL},
85 {"generator",RTF_GENERATOR},
86 {"datafield",RTF_DATAFIELD},
89 {"emdash",RTF_EMDASH},
90 {"endash",RTF_ENDASH},
91 {"emspace",RTF_EMDASH},
92 {"enspace",RTF_ENDASH},
93 {"bullet",RTF_BULLET},
94 {"lquote",RTF_LQUOTE},
95 {"rquote",RTF_RQUOTE},
96 {"ldblquote",RTF_LDBLQUOTE},
97 {"rdblquote",RTF_RDBLQUOTE},
98 {"zwnj",RTF_ZWNONJOINER},
101 {"u",RTF_UNICODE_CHAR}
104 #define RTFNAMEMAXLEN 32
105 #define RTFARGSMAXLEN 64
106 #define MAX_DIGITS_IN_NUMBER 10
109 * Structure describing rtf command
114 char name[RTFNAMEMAXLEN+1];
120 #define MAXFONTNAME 64
127 char fontname[MAXFONTNAME+1];
131 * Structure to describe style
139 * Structure to store values, local to rtf group
143 int uc; /**< How much symbols to skip */
144 RTFStyle* style; /**< curren style */
147 /********************************************************
148 * Functions declaration
152 extern int forced_charset;
153 signed long getNumber(FILE *f);
155 int getRtfCommand(FILE *f, RTFcommand *command );
156 unsigned short int rtf_to_unicode(int code);
157 RTFTypes getCommandType(char *name);
158 signed int getCharCode(FILE *f);
159 void rtfSetCharset(short int **charset_ptr,unsigned int codepage);
161 /********************************************************
165 short int *current_charset;
168 /********************************************************
169 * Functions implementation
172 extern unsigned short int buffer[];
173 void add_to_buffer(int *bufptr,unsigned short int c) {
174 buffer[++(*bufptr)]=c;
175 if (*bufptr >= PARAGRAPH_BUFFER-2) {
176 buffer[++(*bufptr)]=0;
177 output_paragraph(buffer);
182 void end_paragraph(int *bufptr) {
183 add_to_buffer(bufptr,0x000a);
184 add_to_buffer(bufptr,0);
185 output_paragraph(buffer);
190 * Parses RTF file from file stream
192 * @param f - file stream descriptor
194 int parse_rtf(FILE *f) {
195 int para_mode=0, data_skip_mode=0,i;
196 RTFGroupData *groups=NULL;
197 int group_count=0, group_store=20;
199 current_charset=source_charset;
201 if((groups=(RTFGroupData*)calloc(group_store,sizeof(RTFGroupData))) == NULL ) {
202 perror("Can\'t allocate memory: ");
205 groups[0].uc = 2; /* DEfault uc = 2 */
214 if ((code=getRtfCommand(f, &com)) != 0)
218 /* fprintf(stderr, "Spec Char found=%s and arg=%c\n", */
219 /* com.name, com.numarg); */
220 if (com.numarg == '*' && data_skip_mode == 0) {
221 data_skip_mode=group_count;
222 } else if (com.numarg == '\r') {
223 end_paragraph(&bufptr);
224 } else if (com.numarg == '~') {
225 add_to_buffer(&bufptr,0xA0);/* NO-BREAK SPACE */
226 } else if (com.numarg == '-') {
227 add_to_buffer(&bufptr,0xAD);/* Optional hyphen */
232 add_to_buffer(&bufptr,0x2014);/* EM DASH*/
235 add_to_buffer(&bufptr,0x2013);break;
237 add_to_buffer(&bufptr,0x2022);break;
238 case RTF_LQUOTE: add_to_buffer(&bufptr,0x2018);break;
239 case RTF_RQUOTE: add_to_buffer(&bufptr,0x2019);break;
240 case RTF_LDBLQUOTE: add_to_buffer(&bufptr,0x201C);break;
241 case RTF_RDBLQUOTE: add_to_buffer(&bufptr,0x201D);break;
242 case RTF_ZWNONJOINER: add_to_buffer(&bufptr,0xfeff);break;
245 add_to_buffer(&bufptr,' ');break;
247 /* fprintf(stderr, "RTF char %d\n", com.numarg); */
248 if (data_skip_mode == 0) {
249 add_to_buffer(&bufptr,rtf_to_unicode(com.numarg));
253 groups[group_count].uc=com.numarg;
256 add_to_buffer(&bufptr,0x0009);
258 case RTF_UNICODE_CHAR:
261 /* fprintf(stderr, "Unicode char %d\n", com.numarg); */
262 if (data_skip_mode == 0)
263 add_to_buffer(&bufptr,com.numarg);
264 i=groups[group_count].uc;
272 /* skip two hex digits */
284 /*if (para_mode > 0) {*/
285 end_paragraph(&bufptr);
287 para_mode=group_count;
295 case RTF_LISTOVERRIDETABLE:
299 if (data_skip_mode == 0){
300 data_skip_mode=group_count;
304 /* fprintf(stderr, "Selected lang = %d\n",com.numarg); */
307 rtfSetCharset(¤t_charset,com.numarg);
309 /* fprintf(stderr, "Unknown command with name %s and arg=%d\n", */
310 /* com.name, com.numarg); */
317 if (group_count >= group_store ) {
319 if((groups=(RTFGroupData*)realloc(groups,
320 group_store*sizeof(RTFGroupData)))
322 perror("Can\'t allocate memory: ");
327 add_to_buffer(&bufptr,0x20);
328 groups[group_count]=groups[group_count-1];
334 if(para_mode > 0 && para_mode > group_count) {
335 /*add_to_buffer(&bufptr,0);
336 output_paragraph(buffer);
337 fprintf(stderr,"\nGROUP_END para_mode=%d group_count=%d bufptr=%d\n", para_mode,group_count,bufptr);
341 if(data_skip_mode > group_count) {
346 if (data_skip_mode == 0)
347 if (c != '\n' && c != '\r')
348 add_to_buffer(&bufptr,rtf_to_unicode(c));
352 add_to_buffer(&bufptr,'\n');
353 add_to_buffer(&bufptr,0);
354 output_paragraph(buffer);
361 * Convert text string to number
363 * @param f stream to read data from
365 * @return converted number
367 signed long getNumber(FILE *f) {
369 char buf[RTFARGSMAXLEN];
371 while(isdigit(c=fgetc(f)) || c=='-') {
374 if (count > MAX_DIGITS_IN_NUMBER)
376 buf[count++]=(char)c;
380 return strtol(buf, (char **)NULL, 10);
384 * Parse command stream from rtf file and fill command structure
386 * @param f - rtf file stream
387 * @param command - pointer to RTFcommand structure to fill
389 * @return parse code not 0 - error, 0 - success
391 int getRtfCommand(FILE *f, RTFcommand *command ) {
395 command->name[0]=(char)c;
396 while(isalpha(c=fgetc(f)) && name_count < RTFNAMEMAXLEN) {
399 command->name[name_count++]=(char)c;
401 command->name[name_count]='\0';
402 command->type=getCommandType(command->name);
403 /* command->args=NULL; */
405 if (isdigit(c) || c == '-' )
406 command->numarg=getNumber(f);
410 if(!(c==' ' || c=='\t'))
413 command->name[0]=(char)c;
414 command->name[1]='\0';
415 /* command->args=NULL; */
417 command->type=RTF_CHAR;
418 command->numarg=getCharCode(f);
422 command->type=RTF_SPEC_CHAR;
431 * Converts char to unicode.
433 * @param code - integer code of char
435 * @return converted char
437 unsigned short int rtf_to_unicode(int code) {
439 if (code < 0 || (cc=to_unicode(current_charset, code)) < 0 ) return 0xFEFF;
444 * Convert name of RTF command to RTFType
446 * @param name name to convert
448 * @return RTFType, if unknown command, then return RTF_UNKNOWN
450 RTFTypes getCommandType(char *name) {
451 int i, olen=sizeof(rtf_types)/sizeof(RTFTypeMap);
452 for (i = 0; i < olen ; i++) {
453 if ( strcmp(name,rtf_types[i].name) == 0 ) {
454 return rtf_types[i].type;
461 * Return number representing char code in Hex
463 * @param f stream to read data from
465 * @return converted number
467 signed int getCharCode(FILE *f) {
469 char buf[RTFARGSMAXLEN];
471 if (isdigit(c=fgetc(f))||(c>='a' && c<='f')) {
474 buf[count++]=(char)c;
480 return strtol(buf, (char **)NULL, 16);
483 void rtfSetCharset(short int **charset_ptr,unsigned int codepage)
485 /* Do not override charset if it is specified in the command line */
486 const char *charset_name;
487 char *save_buf = input_buffer;
488 if (forced_charset) return;
489 charset_name = charset_from_codepage(codepage);
490 check_charset(&source_csname,charset_name);
492 *charset_ptr = read_charset(source_csname);
493 input_buffer = save_buf;