2 Copyright 1998-2003 Victor Wagner
3 Copyright 2003 Alex Ott
4 This file is released under the GPL. Details can be
5 found in the file COPYING accompanying this distribution.
14 /* There is some strange thing on aix */
15 #if (defined(_AIX)||defined(___AIX)) && !defined(__unix)
19 /* These include files are always available */
23 /* This is our own file */
32 #if defined(__MSDOS__) || defined(_WIN32)
33 /* MS-DOS doesn't like dot at first char and thinks that suffix
34 * should be separated by dot. So we'd call personal config catdoc.rc
36 # define USERRC "catdoc.rc"
37 /* In DOS, %s in path gets replaced with full path to executable including
41 # define SYSTEMRC "%s\\catdoc.rc"
44 # define CHARSETPATH "%s\\charsets"
46 /* Function to add executable directory in place of %s in path.
47 Not usable in Unix, where executable can have more then one
48 link and configuration files are usially kept separately from executables
50 char *add_exe_path(const char* name);
51 /* Separator of directories in list, such as PATH env var. */
53 /* Separator of levels inside path */
56 /* On POSIX systems personal configuration files should start with dot*/
58 # define USERRC ".catdocrc"
62 # define SYSTEMRC "/usr/local/lib/catdoc/catdocrc"
66 # define CHARSETPATH "/usr/local/lib/catdoc"
68 /* Macro to add executable directory in place of %s in path.
69 Not usable in Unix, where executable can have more then one
70 link and configuration files are usially kept separately from executables
72 # define add_exe_path(name) name
73 /* Separator of directories in list, such as PATH env var. */
75 /* Separator of levels inside path */
79 /* Charset files distributed with catdoc always have .txt extension*/
81 # define CHARSET_EXT ".txt"
84 /* Default charsets */
85 #ifndef TARGET_CHARSET
86 #if defined(__MSDOS__) || defined(_WIN32)
87 #define TARGET_CHARSET "cp866"
89 #define TARGET_CHARSET "koi8-r"
93 #ifndef SOURCE_CHARSET
94 #define SOURCE_CHARSET "cp1251"
98 #define UNKNOWN_CHAR "?"
100 /* On MS-DOS and WIN32 files have to have 3-char extension */
101 #if defined(__MSDOS__) || defined(_WIN32)
103 # define SPEC_EXT ".spc"
106 # define REPL_EXT ".rpl"
110 /* On other system we'll rename them to something more readable */
112 # define SPEC_EXT ".specchars"
115 # define REPL_EXT ".replchars"
118 #if defined(__MSDOS__) && !defined(__DJGPP__)
119 /* Buffer sizes for 16-bit DOS progran */
120 #define PARAGRAPH_BUFFER 16384
121 #define FILE_BUFFER 32256
122 #define PATH_BUF_SIZE 80
124 /* Buffers for 32-bit and more program */
125 #define PARAGRAPH_BUFFER 262144
126 #define FILE_BUFFER 262144
127 #define PATH_BUF_SIZE 1024
130 /* Buffer for single line. Should be greater than wrap margin +
131 longest substitution sequence */
132 #define LINE_BUF_SIZE 512
133 /* Default value for wrap margin */
135 #define WRAP_MARGIN 72
137 /* variable (defined in catdoc.c) which holds actual value of wrap margin*/
138 extern int wrap_margin;
140 * Public types variables and procedures which should be avalable
141 * to all files in the program
145 /* Turbo C defines broken isspace, which works only for us-ascii */
147 #define isspace(c) ((unsigned char)(c) <=32)
150 /* Structure to store UNICODE -> target charset mappings */
151 /* array of 256 pointers (which may be null) to arrays of 256 short ints
152 which contain 8-bit character codes or -1 if no matching char */
153 typedef short int ** CHARSET;
155 /* structure to store multicharacter substitution mapping */
156 /* Array of 256 pointers to arrays of 256 pointers to string */
157 /* configuration variables defined in catdoc.c */
158 typedef char *** SUBSTMAP;
160 extern short int *source_charset;
161 extern char bad_char[]; /* defines one-symbol string to replace unknown unicode chars */
162 extern char *source_csname;
163 extern char *dest_csname;
164 extern char *format_name;
165 extern CHARSET target_charset;
166 extern SUBSTMAP spec_chars;
167 /* Defines unicode chars which should be
168 replaced by strings before UNICODE->target chatset
169 mappigs are applied i.e. TeX special chars like %
171 extern SUBSTMAP replacements;
172 /* Defines unicode chars which could be
173 mapped to some character sequence if no
174 corresponding character exists in the target charset
175 i.e copyright sign */
176 extern int verbose; /* if true, some additional information would be
177 printed. defined in analyze.c */
178 extern int (*get_unicode_char)(FILE *f,long *offset,long fileend);
179 /* pointer to function which gets
180 a char from stream */
182 extern int get_utf16lsb (FILE *f,long *offset,long fileend);
183 extern int get_utf16msb (FILE *f,long *offset,long fileend);
184 extern int get_utf8 (FILE *f,long *offset,long fileend);
185 extern int get_8bit_char (FILE *f,long *offset,long fileend);
187 extern int get_word8_char (FILE *f,long *offset,long fileend);
189 extern const char *charset_from_codepage(unsigned int codepage);
190 extern short int *read_charset(const char *filename);
191 extern CHARSET make_reverse_map (short int *charset);
193 extern int to_unicode (short int *charset, int c) ;
195 extern int from_unicode (CHARSET charset, int u) ;
197 extern char* convert_char(int unicode_char);
199 extern char* to_utf8(unsigned int uc);
201 extern char* map_path, *charset_path;
202 extern int signature_check;
203 extern int unknown_as_hex;
204 char *find_file(char *name, const char *path);
205 char *stradd(const char *s1, const char *s2);
206 void read_config_file(const char *filename);
208 void get_locale_charset(void);
209 #if defined(HAVE_STRFTIME) && !defined(__TURB0C__)
210 void set_time_locale();
213 SUBSTMAP read_substmap(char* filename);
214 extern int longest_sequence;/* for checking which value of wrap_margin
215 can cause buffer overflow*/
216 char *map_subst(SUBSTMAP map,int uc);
218 int check_charset(char **filename,const char *charset);
219 int process_file(FILE *f,long stop);
220 void copy_out(FILE *f, char *header);
221 void output_paragraph(unsigned short int *buffer) ;
222 int parse_rtf(FILE *f);
223 /* format recognition*/
224 int analyze_format(FILE *f);
225 void list_charsets(void);
226 int parse_word_header(unsigned char *buffer,FILE *f,int offset,long curpos);
227 /* large buffers for file IO*/
228 extern char *input_buffer,*output_buffer;
230 char *strdup(const char *s);
232 /* numeric conversions */
233 long int getlong(unsigned char *buffer,int offset);
234 unsigned long int getulong(unsigned char *buffer,int offset);
235 unsigned int getshort(unsigned char *buffer,int offset);