]> www.wagner.pp.ru Git - oss/catdoc.git/blob - src/substmap.c
Recreated CVS repository from working copy
[oss/catdoc.git] / src / substmap.c
1 /*****************************************************************/
2 /* Substitution maps to replace some unicode characters with     */
3 /* multicharacter sequences                                      */
4 /*                                                               */
5 /* This file is part of catdoc project                           */
6 /* (c) Victor Wagner 1998-2003, (c) Alex Ott 2003                    */
7 /*****************************************************************/
8 #ifdef HAVE_CONFIG_H
9 #include <config.h>
10 #endif
11 #include <string.h>
12 #include <ctype.h>
13 #include <stdlib.h>
14 #include "catdoc.h"
15 SUBSTMAP spec_chars, replacements;
16 char *map_path=CHARSETPATH;
17 void map_insert(SUBSTMAP map, int uc, const char *s) ;
18 int longest_sequence=6;/* six is longest character sequence which can be
19                                                   generated by catdoc internally*/
20
21 /******************************************************************/
22 /* checks for terminator of character sequence. If stop is something
23  * like quote - check for same char. If it is \n, check for any space
24  ********************************************************************/ 
25 int isstop(char  c, char stop) {
26         if (stop=='\n') {
27                 return isspace(c);
28         } else {
29                 return c==stop;
30         }
31 }
32 /************************************************************************/
33 /* Reads substitution map file.                                         */
34 /************************************************************************/
35 SUBSTMAP read_substmap(char* filename) {
36         FILE *f;
37         SUBSTMAP map=calloc(sizeof(char **),256);
38         char *path, line[1024], *p, *q;
39         char s[256];
40         char stopchar;
41         int escaped, lineno=0, i;
42         unsigned int uc;
43         path=find_file(filename,add_exe_path(map_path));
44         if (!path) {
45                 free(map);
46                 return(NULL);
47         }   
48         if (!map) {
49                 fprintf(stderr,"Insufficient memory\n");
50                 exit(1);
51         }
52         f=fopen(path,"rb");
53         if (!f) {
54                 perror("catdoc");
55                 return NULL;
56         }
57         if (input_buffer)
58                 setvbuf(f,input_buffer,_IOFBF,FILE_BUFFER);
59
60         while (!feof(f)) {
61                 if (!fgets(line,1024,f)) continue;
62                 lineno++;
63                 /* parse line */
64
65                 /* skip leading space */
66                 for(p=line;*p && isspace(*p);p++);
67                 /* if #, it is comment */
68                 if (!*p ||
69 #ifdef  __MSDOS__
70                                 *p==0x1A || /* DOS have strange habit of using ^Z as eof */
71 #endif
72                                 *p=='#') continue;
73                 /* read hexadecimal code */
74                 uc = strtol(p,&p,16);
75                 if (!isspace(*p)|| uc<0 || uc>0xfffd) {
76                         fprintf(stderr,"Error parsing %s(%d)\n",path,lineno);
77                         continue;
78                 }
79                 /* skip space between  code and sequence */
80                 for(;*p && isspace(*p);p++);
81                 if (!p) continue;
82                 switch (*p) {
83                         case '\'':
84                         case '"':
85                                 stopchar=*p;
86                                 break;
87                         case '(':
88                                 stopchar=')';
89                                 break;
90                         case '[':
91                                 stopchar=']';
92                                 break;
93                         case '{':
94                                 stopchar='}';
95                                 break;
96                         default:
97                                 p--;
98                                 stopchar='\n';
99                 }
100                 p++;
101                 q=p;
102                 escaped=0;
103                 while (*q && (!isstop(*q,stopchar) || escaped)) {
104                         if (escaped) {
105                                 escaped=0;
106                         } else {
107                                 escaped= (*q=='\\');
108                         }
109                         q++;
110                 }
111                 if (*q!=stopchar && !(isspace(*q) && stopchar=='\n')) {
112                         fprintf(stderr,"Error parsing %s(%d): unterminated sequence\n",
113                                         path,lineno);
114                         continue;
115                 }
116                 /* HERE SHOULD BE BACKSLASH ESCAPE PROCESSING !!!*/
117                 *q=0;
118                 for (q=s,i=0;*p && i<256;q++,i++) {
119                         if (*p!='\\') {
120                                 *q=*p++;
121                         } else {
122                                 switch (*(++p)) {
123                                         case 'n': *q='\n'; break;
124                                         case 'r': *q='\r'; break;
125                                         case 't': *q='\t'; break;
126                                         case 'b': *q='\b'; break;
127                                                           case '\"': *q='\"'; break;
128                                         case '\'': *q='\''; break;
129                                         case '0': *q=strtol(p,&p,8); p--; break;
130                                         case '\\':
131                                         default:
132                                                           *q=*p;
133                                 }
134                                 p++;
135                         }
136                 }
137                 *q=0;
138                 if (i>longest_sequence)
139                         longest_sequence=i;
140                 map_insert(map,uc,s);
141         }
142         fclose(f);
143         free(path);
144         return map;
145 }
146
147 /*************************************************************************/
148 /* searches for string in the substituton map. Returns NULL if not found */
149 /*************************************************************************/
150 char * map_subst ( SUBSTMAP map, int uc) {
151         char **p=map[(unsigned)uc >>8];
152         if (!p) return NULL;
153         return p[uc & 0xff];
154 }
155
156 /*************************************************************************/
157 /*  inserts string + unicode code into map                               */
158 /*************************************************************************/
159 void map_insert(SUBSTMAP map, int uc, const char *s) {
160         SUBSTMAP p=map+((unsigned)uc>>8);
161
162         if (!*p) {
163                 *p= calloc(sizeof(char*),256);
164                 if (!*p) { 
165                         fprintf(stderr,"Insufficient memory\n");
166                         exit(1);
167                 }
168         }
169         (*p)[uc & 0xff] = strdup(s);
170 }