Tweak utf8towc() to return -1 earlier sometimes (instead of -2), and add test
program to compare against libc output.
diff --git a/lib/lib.c b/lib/lib.c
index c482dca..a4b7229 100644
--- a/lib/lib.c
+++ b/lib/lib.c
@@ -345,20 +345,17 @@
   if (len && *str<128) return !!(*wc = *str);
 
   result = first = *(s = str++);
+  if (result<0xc2 || result>0xf4) return -1;
   for (mask = 6; (first&0xc0)==0xc0; mask += 5, first <<= 1) {
-    if (mask>21) return -1;
     if (!--len) return -2;
-    c = *(str++);
-    if ((c&0xc0) != 0x80) return -1;
+    if (((c = *(str++))&0xc0) != 0x80) return -1;
     result = (result<<6)|(c&0x3f);
   }
   result &= (1<<mask)-1;
   c = str-s;
-  if (mask==6) return -1;
 
   // Avoid overlong encodings
-  if (mask==6 || mask>21 || result<(unsigned []){0x80,0x800,0x10000}[c-2])
-    return -1;
+  if (result<(unsigned []){0x80,0x800,0x10000}[c-2]) return -1;
 
   // Limit unicode so it can't encode anything UTF-16 can't.
   if (result>0x10ffff || (result>=0xd800 && result<=0xdfff)) return -1;
diff --git a/toys/example/test_utf8towc.c b/toys/example/test_utf8towc.c
new file mode 100644
index 0000000..f939eaa
--- /dev/null
+++ b/toys/example/test_utf8towc.c
@@ -0,0 +1,42 @@
+/* test_utf8towc() against libc mbrtowc()
+ *
+ * Copyright 2017 Rob Landley <rob@landley.net>
+
+USE_TEST_UTF8TOWC(NEWTOY(test_utf8towc, 0, TOYFLAG_USR|TOYFLAG_BIN))
+
+config TEST_UTF8TOWC
+  bool "test_utf8towc"
+  default n
+  help
+    usage: test_utf8towc
+
+    Print differences between toybox's utf8 conversion routines vs libc du jour.
+*/
+
+#include "toys.h"
+
+void test_utf8towc_main(void)
+{
+  mbstate_t mb;
+  int len1, len2;
+  unsigned u, h;
+  wchar_t wc1, wc2;
+
+  setlocale(LC_ALL, "en_US.UTF-8");
+
+  memset(&mb, 0, sizeof(mb));
+  for (u=1; u; u++) {
+    char *str = (void *)&h;
+
+    wc1 = wc2 = 0;
+    len2 = 4;
+    h = htonl(u);
+    while (!*str) str++, len2--;
+
+    len1 = mbrtowc(&wc1, str, len2, &mb);
+    if (len1<0) memset(&mb, 0, sizeof(mb));
+    len2 = utf8towc(&wc2, str, len2);
+    if (len1 != len2 || wc1 != wc2)
+      printf("%x %d %x %d %x\n", u, len1, wc1, len2, wc2);
+  }
+}