1 /*****************************************************************/
2 /* Reading routines for rtf files */
4 /* This file is part of catdoc project */
5 /* (c) Victor Wagner 2003, (c) Alex Ott 2003 */
6 /*****************************************************************/
17 /********************************************************
18 * Datatypes declaration
41 RTF_LISTOVERRIDETABLE,
67 RTFTypeMap rtf_types[]={
69 {"ansicpg",RTF_CODEPAGE},
74 {"overlay",RTF_OVERLAY},
76 {"author",RTF_AUTHOR},
78 {"fonttbl",RTF_FONTTBL},
80 {"stylesheet",RTF_STYLESHEET},
81 {"colortbl",RTF_COLORTBL},
82 {"listtable",RTF_LISTTABLE},
83 {"listoverridetable",RTF_LISTOVERRIDETABLE},
84 {"rsidtbl",RTF_RSIDTBL},
85 {"generator",RTF_GENERATOR},
86 {"datafield",RTF_DATAFIELD},
89 {"emdash",RTF_EMDASH},
90 {"endash",RTF_ENDASH},
91 {"emspace",RTF_EMDASH},
92 {"enspace",RTF_ENDASH},
93 {"bullet",RTF_BULLET},
94 {"lquote",RTF_LQUOTE},
95 {"rquote",RTF_RQUOTE},
96 {"ldblquote",RTF_LDBLQUOTE},
97 {"rdblquote",RTF_RDBLQUOTE},
98 {"zwnj",RTF_ZWNONJOINER},
101 {"u",RTF_UNICODE_CHAR}
104 #define RTFNAMEMAXLEN 32
105 #define RTFARGSMAXLEN 64
108 * Structure describing rtf command
113 char name[RTFNAMEMAXLEN+1];
119 #define MAXFONTNAME 64
126 char fontname[MAXFONTNAME+1];
130 * Structure to describe style
138 * Structure to store values, local to rtf group
142 int uc; /**< How much symbols to skip */
143 RTFStyle* style; /**< curren style */
146 /********************************************************
147 * Functions declaration
151 extern int forced_charset;
152 signed long getNumber(FILE *f);
154 int getRtfCommand(FILE *f, RTFcommand *command );
155 unsigned short int rtf_to_unicode(int code);
156 RTFTypes getCommandType(char *name);
157 signed int getCharCode(FILE *f);
158 void rtfSetCharset(short int **charset_ptr,unsigned int codepage);
160 /********************************************************
164 short int *current_charset;
167 /********************************************************
168 * Functions implementation
171 extern unsigned short int buffer[];
172 void add_to_buffer(int *bufptr,unsigned short int c) {
173 buffer[++(*bufptr)]=c;
174 if (*bufptr > PARAGRAPH_BUFFER-2) {
175 buffer[++(*bufptr)]=0;
176 output_paragraph(buffer);
181 void end_paragraph(int *bufptr) {
182 add_to_buffer(bufptr,0x000a);
183 add_to_buffer(bufptr,0);
184 output_paragraph(buffer);
189 * Parses RTF file from file stream
191 * @param f - file stream descriptor
193 int parse_rtf(FILE *f) {
194 int para_mode=0, data_skip_mode=0,i;
195 RTFGroupData *groups=NULL;
196 int group_count=0, group_store=20;
198 current_charset=source_charset;
200 if((groups=(RTFGroupData*)calloc(group_store,sizeof(RTFGroupData))) == NULL ) {
201 perror("Can\'t allocate memory: ");
204 groups[0].uc = 2; /* DEfault uc = 2 */
213 if ((code=getRtfCommand(f, &com)) != 0)
217 /* fprintf(stderr, "Spec Char found=%s and arg=%c\n", */
218 /* com.name, com.numarg); */
219 if (com.numarg == '*' && data_skip_mode == 0) {
220 data_skip_mode=group_count;
221 } else if (com.numarg == '\r') {
222 end_paragraph(&bufptr);
223 } else if (com.numarg == '~') {
224 add_to_buffer(&bufptr,0xA0);/* NO-BREAK SPACE */
225 } else if (com.numarg == '-') {
226 add_to_buffer(&bufptr,0xAD);/* Optional hyphen */
231 add_to_buffer(&bufptr,0x2014);/* EM DASH*/
234 add_to_buffer(&bufptr,0x2013);break;
236 add_to_buffer(&bufptr,0x2022);break;
237 case RTF_LQUOTE: add_to_buffer(&bufptr,0x2018);break;
238 case RTF_RQUOTE: add_to_buffer(&bufptr,0x2019);break;
239 case RTF_LDBLQUOTE: add_to_buffer(&bufptr,0x201C);break;
240 case RTF_RDBLQUOTE: add_to_buffer(&bufptr,0x201D);break;
241 case RTF_ZWNONJOINER: add_to_buffer(&bufptr,0xfeff);break;
244 add_to_buffer(&bufptr,' ');break;
246 /* fprintf(stderr, "RTF char %d\n", com.numarg); */
247 if (data_skip_mode == 0) {
248 add_to_buffer(&bufptr,rtf_to_unicode(com.numarg));
252 groups[group_count].uc=com.numarg;
255 add_to_buffer(&bufptr,0x0009);
257 case RTF_UNICODE_CHAR:
260 /* fprintf(stderr, "Unicode char %d\n", com.numarg); */
261 if (data_skip_mode == 0)
262 add_to_buffer(&bufptr,com.numarg);
263 i=groups[group_count].uc;
268 /*if (para_mode > 0) {*/
269 end_paragraph(&bufptr);
271 para_mode=group_count;
279 case RTF_LISTOVERRIDETABLE:
283 if (data_skip_mode == 0){
284 data_skip_mode=group_count;
288 /* fprintf(stderr, "Selected lang = %d\n",com.numarg); */
291 rtfSetCharset(¤t_charset,com.numarg);
293 /* fprintf(stderr, "Unknown command with name %s and arg=%d\n", */
294 /* com.name, com.numarg); */
301 if (group_count >= group_store ) {
303 if((groups=(RTFGroupData*)realloc(groups,
304 group_store*sizeof(RTFGroupData)))
306 perror("Can\'t allocate memory: ");
311 add_to_buffer(&bufptr,0x20);
312 groups[group_count]=groups[group_count-1];
318 if(para_mode > 0 && para_mode > group_count) {
319 /*add_to_buffer(&bufptr,0);
320 output_paragraph(buffer);
321 fprintf(stderr,"\nGROUP_END para_mode=%d group_count=%d bufptr=%d\n", para_mode,group_count,bufptr);
325 if(data_skip_mode > group_count) {
330 if (data_skip_mode == 0)
331 if (c != '\n' && c != '\r')
332 add_to_buffer(&bufptr,rtf_to_unicode(c));
336 add_to_buffer(&bufptr,'\n');
337 add_to_buffer(&bufptr,0);
338 output_paragraph(buffer);
345 * Convert text string to number
347 * @param f stream to read data from
349 * @return converted number
351 signed long getNumber(FILE *f) {
353 char buf[RTFARGSMAXLEN];
355 while(isdigit(c=fgetc(f)) || c=='-') {
358 buf[count++]=(char)c;
362 return strtol(buf, (char **)NULL, 10);
366 * Parse command stream from rtf file and fill command structure
368 * @param f - rtf file stream
369 * @param command - pointer to RTFcommand structure to fill
371 * @return parse code not 0 - error, 0 - success
373 int getRtfCommand(FILE *f, RTFcommand *command ) {
377 command->name[0]=(char)c;
378 while(isalpha(c=fgetc(f)) && name_count < RTFNAMEMAXLEN) {
381 command->name[name_count++]=(char)c;
383 command->name[name_count]='\0';
384 command->type=getCommandType(command->name);
385 /* command->args=NULL; */
387 if (isdigit(c) || c == '-' )
388 command->numarg=getNumber(f);
392 if(!(c==' ' || c=='\t'))
395 command->name[0]=(char)c;
396 command->name[1]='\0';
397 /* command->args=NULL; */
399 command->type=RTF_CHAR;
400 command->numarg=getCharCode(f);
404 command->type=RTF_SPEC_CHAR;
413 * Converts char to unicode.
415 * @param code - integer code of char
417 * @return converted char
419 unsigned short int rtf_to_unicode(int code) {
421 if (code < 0 || (cc=to_unicode(current_charset, code)) < 0 ) return 0xFEFF;
426 * Convert name of RTF command to RTFType
428 * @param name name to convert
430 * @return RTFType, if unknown command, then return RTF_UNKNOWN
432 RTFTypes getCommandType(char *name) {
433 int i, olen=sizeof(rtf_types)/sizeof(RTFTypeMap);
434 for (i = 0; i < olen ; i++) {
435 if ( strcmp(name,rtf_types[i].name) == 0 ) {
436 return rtf_types[i].type;
443 * Return number representing char code in Hex
445 * @param f stream to read data from
447 * @return converted number
449 signed int getCharCode(FILE *f) {
451 char buf[RTFARGSMAXLEN];
453 if (isdigit(c=fgetc(f))||(c>='a' && c<='f')) {
456 buf[count++]=(char)c;
462 return strtol(buf, (char **)NULL, 16);
465 void rtfSetCharset(short int **charset_ptr,unsigned int codepage)
467 /* Do not override charset if it is specified in the command line */
468 const char *charset_name;
469 char *save_buf = input_buffer;
470 if (forced_charset) return;
471 charset_name = charset_from_codepage(codepage);
472 check_charset(&source_csname,charset_name);
474 *charset_ptr = read_charset(source_csname);
475 input_buffer = save_buf;