utf8towc() has to be in lib.c if strlower() is going to use it, because scripts/*.c builds against lib.c but not linestack.c.

commit: 6e766936396e2da7fb3820cadb3a9ae823caa9a8 [log] [tgz]
author: Rob Landley <rob@landley.net> Sat Sep 02 20:40:24 2017 -0500
committer: Rob Landley <rob@landley.net> Sat Sep 02 20:40:24 2017 -0500
tree: 2f451efca683de7858d201fb8de7b3719f7429df
parent: 67ddade3373d0fefeff25b48430e5f08c3a7711b [diff]
diff --git a/lib/lib.c b/lib/lib.c
index d011af0..c482dca 100644
--- a/lib/lib.c
+++ b/lib/lib.c

@@ -335,6 +335,38 @@
   return off-haystack;
 }
 
+// Convert utf8 sequence to a unicode wide character
+int utf8towc(wchar_t *wc, char *str, unsigned len)
+{
+  unsigned result, mask, first;
+  char *s, c;
+
+  // fast path ASCII
+  if (len && *str<128) return !!(*wc = *str);
+
+  result = first = *(s = str++);
+  for (mask = 6; (first&0xc0)==0xc0; mask += 5, first <<= 1) {
+    if (mask>21) return -1;
+    if (!--len) return -2;
+    c = *(str++);
+    if ((c&0xc0) != 0x80) return -1;
+    result = (result<<6)|(c&0x3f);
+  }
+  result &= (1<<mask)-1;
+  c = str-s;
+  if (mask==6) return -1;
+
+  // Avoid overlong encodings
+  if (mask==6 || mask>21 || result<(unsigned []){0x80,0x800,0x10000}[c-2])
+    return -1;
+
+  // Limit unicode so it can't encode anything UTF-16 can't.
+  if (result>0x10ffff || (result>=0xd800 && result<=0xdfff)) return -1;
+  *wc = result;
+
+  return str-s;
+}
+
 char *strlower(char *s)
 {
   char *try, *new;
@@ -348,7 +380,7 @@
 
     while (*s) {
       wchar_t c;
-      int len = mbrtowc(&c, s, MB_CUR_MAX, 0);
+      int len = utf8towc(&c, s, MB_CUR_MAX);
 
       if (len < 1) *(new++) = *(s++);
       else {

diff --git a/lib/lib.h b/lib/lib.h
index 3756652..9325a89 100644
--- a/lib/lib.h
+++ b/lib/lib.h

@@ -204,6 +204,7 @@
 long long atolx(char *c);
 long long atolx_range(char *numstr, long long low, long long high);
 int stridx(char *haystack, char needle);
+int utf8towc(wchar_t *wc, char *str, unsigned len);
 char *strlower(char *s);
 char *strafter(char *haystack, char *needle);
 char *chomp(char *s);
@@ -257,7 +258,6 @@
 void linestack_insert(struct linestack **lls, long pos, char *line, long len);
 void linestack_append(struct linestack **lls, char *line);
 struct linestack *linestack_load(char *name);
-int utf8towc(wchar_t *wc, char *str, unsigned len);
 int crunch_escape(FILE *out, int cols, int wc);
 int crunch_rev_escape(FILE *out, int cols, int wc);
 int crunch_str(char **str, int width, FILE *out, char *escmore,

diff --git a/lib/linestack.c b/lib/linestack.c
index 4466710..91dec56 100644
--- a/lib/linestack.c
+++ b/lib/linestack.c

@@ -80,37 +80,6 @@
   return ls;
 }
 
-// Convert utf8 sequence to a unicode wide character
-int utf8towc(wchar_t *wc, char *str, unsigned len)
-{
-  unsigned result, mask, first;
-  char *s, c;
-
-  // fast path ASCII
-  if (len && *str<128) return !!(*wc = *str);
-
-  result = first = *(s = str++);
-  for (mask = 6; (first&0xc0)==0xc0; mask += 5, first <<= 1) {
-    if (!--len) return -2;
-    c = *(str++);
-    if ((c&0xc0) != 0x80) return -1;
-    result = (result<<6)|(c&0x3f);
-  }
-  result &= (1<<mask)-1;
-  c = str-s;
-  if (mask==6 || mask>21) return -1;
-
-  // Avoid overlong encodings
-  if (mask==6 || mask>21 || result<(unsigned []){0x80,0x800,0x10000}[c-2])
-    return -1;
-
-  // Limit unicode so it can't encode anything UTF-16 can't.
-  if (result>0x10ffff || (result>=0xd800 && result<=0xdfff)) return -1;
-  *wc = result;
-
-  return str-s;
-}
-
 // Show width many columns, negative means from right edge, out=0 just measure
 // if escout, send it unprintable chars, otherwise pass through raw data.
 // Returns width in columns, moves *str to end of data consumed.
@@ -123,7 +92,7 @@
   for (end = start = *str; *end; columns += col, end += bytes) {
     wchar_t wc;
 
-    if ((bytes = mbrtowc(&wc, end, MB_CUR_MAX, 0))>0 && (col = wcwidth(wc))>=0)
+    if ((bytes = utf8towc(&wc, end, 4))>0 && (col = wcwidth(wc))>=0)
     {
       if (!escmore || wc>255 || !strchr(escmore, wc)) {
         if (width-columns<col) break;
commit	6e766936396e2da7fb3820cadb3a9ae823caa9a8	[log] [tgz]
author	Rob Landley <rob@landley.net>	Sat Sep 02 20:40:24 2017 -0500
committer	Rob Landley <rob@landley.net>	Sat Sep 02 20:40:24 2017 -0500
tree	2f451efca683de7858d201fb8de7b3719f7429df
parent	67ddade3373d0fefeff25b48430e5f08c3a7711b [diff]