]> www.wagner.pp.ru Git - oss/catdoc.git/commitdiff
Fix bug #1 - avoid broken UTF-8 at line breaks
authorVictor Wagner <vitus@wagner.pp.ru>
Tue, 17 Oct 2006 18:07:06 +0000 (18:07 +0000)
committerVictor Wagner <vitus@wagner.pp.ru>
Tue, 17 Oct 2006 18:07:06 +0000 (18:07 +0000)
src/writer.c

index 5448d6c0b805d1e5b26e7e9f10081663eec42be3..5f95ba2fdb3659f09ba1fe8c24e3a508bf89c423 100644 (file)
@@ -54,12 +54,18 @@ void out_char(const char *chunk) {
        } else if (bufpos>wrap_margin) {
                char *q=outputbuffer,*p=outputbuffer+wrap_margin;
                
-               while (p>outputbuffer&&!isspace(*p)) p--;
+               while (p>outputbuffer&&*p!=' '&& *p!='\t') p--;
                if (p==outputbuffer) {
                        /*worst case - nowhere to wrap. Will use brute force */
-                       fwrite(outputbuffer,wrap_margin,1,stdout);
+                       int i = wrap_margin;
+                       if (from_unicode == to_utf8) {
+                               /* go back to start of nearest utf-8 character */
+                               while(i>0 && (outputbuffer[i] & 0xC0) == 0x80) i--;
+                       }                       
+                       fwrite(outputbuffer,i,1,stdout);
+
                        fputc('\n',stdout);
-                       p=outputbuffer+wrap_margin;
+                       p=outputbuffer+i;
                } else {
                        *p=0;p++;
                        fputs(outputbuffer,stdout);