2 Copyright 1998-2003 Victor Wagner
3 Copyright 2003 Alex Ott
4 This file is released under the GPL. Details can be
5 found in the file COPYING accompanying this distribution.
17 /* If there is no stdint.h, lets define some common integer types
19 #define int16_t short int
20 #define uint16_t unsigned short int
22 /* MS-DOS is only supported platform where int is 16-bit */
23 #define int32_t long int
24 #define uint32_t unsigned long int
27 #define uint32_t unsigned int
32 /* There is some strange thing on aix */
33 #if (defined(_AIX)||defined(___AIX)) && !defined(__unix)
37 /* These include files are always available */
41 /* This is our own file */
50 #if defined(__MSDOS__) || defined(_WIN32)
51 /* MS-DOS doesn't like dot at first char and thinks that suffix
52 * should be separated by dot. So we'd call personal config catdoc.rc
54 # define USERRC "catdoc.rc"
55 /* In DOS, %s in path gets replaced with full path to executable including
59 # define SYSTEMRC "%s\\catdoc.rc"
62 # define CHARSETPATH "%s\\charsets"
64 /* Function to add executable directory in place of %s in path.
65 Not usable in Unix, where executable can have more then one
66 link and configuration files are usially kept separately from executables
68 char *add_exe_path(const char* name);
69 /* Separator of directories in list, such as PATH env var. */
71 /* Separator of levels inside path */
74 /* On POSIX systems personal configuration files should start with dot*/
76 # define USERRC ".catdocrc"
80 # define SYSTEMRC "/usr/local/lib/catdoc/catdocrc"
84 # define CHARSETPATH "/usr/local/lib/catdoc"
86 /* Macro to add executable directory in place of %s in path.
87 Not usable in Unix, where executable can have more then one
88 link and configuration files are usially kept separately from executables
90 # define add_exe_path(name) name
91 /* Separator of directories in list, such as PATH env var. */
93 /* Separator of levels inside path */
97 /* Charset files distributed with catdoc always have .txt extension*/
99 # define CHARSET_EXT ".txt"
102 /* Default charsets */
103 #ifndef TARGET_CHARSET
104 #if defined(__MSDOS__) || defined(_WIN32)
105 #define TARGET_CHARSET "cp866"
107 #define TARGET_CHARSET "koi8-r"
111 #ifndef SOURCE_CHARSET
112 #define SOURCE_CHARSET "cp1251"
116 #define UNKNOWN_CHAR "?"
118 /* On MS-DOS and WIN32 files have to have 3-char extension */
119 #if defined(__MSDOS__) || defined(_WIN32)
121 # define SPEC_EXT ".spc"
124 # define REPL_EXT ".rpl"
128 /* On other system we'll rename them to something more readable */
130 # define SPEC_EXT ".specchars"
133 # define REPL_EXT ".replchars"
136 #if defined(__MSDOS__) && !defined(__DJGPP__)
137 /* Buffer sizes for 16-bit DOS progran */
138 #define PARAGRAPH_BUFFER 16384
139 #define FILE_BUFFER 32256
140 #define PATH_BUF_SIZE 80
142 /* Buffers for 32-bit and more program */
143 #define PARAGRAPH_BUFFER 262144
144 #define FILE_BUFFER 262144
145 #define PATH_BUF_SIZE 1024
148 /* Buffer for single line. Should be greater than wrap margin +
149 longest substitution sequence */
150 #define LINE_BUF_SIZE 512
151 /* Default value for wrap margin */
153 #define WRAP_MARGIN 72
155 /* variable (defined in catdoc.c) which holds actual value of wrap margin*/
156 extern int wrap_margin;
158 * Public types variables and procedures which should be avalable
159 * to all files in the program
163 /* Turbo C defines broken isspace, which works only for us-ascii */
165 #define isspace(c) ((unsigned char)(c) <=32)
168 /* Structure to store UNICODE -> target charset mappings */
169 /* array of 256 pointers (which may be null) to arrays of 256 short ints
170 which contain 8-bit character codes or -1 if no matching char */
171 typedef int16_t ** CHARSET;
173 /* structure to store multicharacter substitution mapping */
174 /* Array of 256 pointers to arrays of 256 pointers to string */
175 /* configuration variables defined in catdoc.c */
176 typedef char *** SUBSTMAP;
178 extern uint16_t *source_charset;
179 extern char bad_char[]; /* defines one-symbol string to replace unknown unicode chars */
180 extern char *source_csname;
181 extern char *dest_csname;
182 extern char *format_name;
183 extern CHARSET target_charset;
184 extern SUBSTMAP spec_chars;
185 /* Defines unicode chars which should be
186 replaced by strings before UNICODE->target chatset
187 mappigs are applied i.e. TeX special chars like %
189 extern SUBSTMAP replacements;
190 /* Defines unicode chars which could be
191 mapped to some character sequence if no
192 corresponding character exists in the target charset
193 i.e copyright sign */
194 extern int verbose; /* if true, some additional information would be
195 printed. defined in analyze.c */
196 extern int (*get_unicode_char)(FILE *f,long *offset,long fileend);
197 /* pointer to function which gets
198 a char from stream */
200 extern int get_utf16lsb (FILE *f,long *offset,long fileend);
201 extern int get_utf16msb (FILE *f,long *offset,long fileend);
202 extern int get_utf8 (FILE *f,long *offset,long fileend);
203 extern int get_8bit_char (FILE *f,long *offset,long fileend);
205 extern int get_word8_char (FILE *f,long *offset,long fileend);
207 extern const char *charset_from_codepage(unsigned int codepage);
208 extern uint16_t *read_charset(const char *filename);
209 extern CHARSET make_reverse_map (short int *charset);
211 extern int to_unicode (uint16_t *charset, int c) ;
213 extern int from_unicode (CHARSET charset, int u) ;
215 extern char* convert_char(int unicode_char);
217 extern char* to_utf8(unsigned int uc);
219 extern char* map_path, *charset_path;
220 extern int signature_check;
221 extern int unknown_as_hex;
222 char *find_file(char *name, const char *path);
223 char *stradd(const char *s1, const char *s2);
224 void read_config_file(const char *filename);
226 void get_locale_charset(void);
227 #if defined(HAVE_STRFTIME) && !defined(__TURB0C__)
228 void set_time_locale();
231 SUBSTMAP read_substmap(char* filename);
232 extern int longest_sequence;/* for checking which value of wrap_margin
233 can cause buffer overflow*/
234 char *map_subst(SUBSTMAP map,int uc);
236 int check_charset(char **filename,const char *charset);
237 int process_file(FILE *f,long stop);
238 void copy_out(FILE *f, char *header);
239 void output_paragraph(unsigned short int *buffer) ;
240 int parse_rtf(FILE *f);
241 /* format recognition*/
242 int analyze_format(FILE *f);
243 void list_charsets(void);
244 int parse_word_header(unsigned char *buffer,FILE *f,int offset,long curpos);
245 /* large buffers for file IO*/
246 extern char *input_buffer,*output_buffer;
248 char *strdup(const char *s);
250 /* numeric conversions */
251 int32_t getlong(unsigned char *buffer,int offset);
252 uint32_t getulong(unsigned char *buffer,int offset);
253 uint16_t getshort(unsigned char *buffer,int offset);