working prototype of libtext - iomenu - interactive terminal-based selection menu
HTML git clone git://bitreich.org/iomenu git://enlrupgkhuxnvlhsf6lc3fziv5h2hhfrinws65d7roiv6bfj7d652fid.onion/iomenu
DIR Log
DIR Files
DIR Refs
DIR Tags
DIR README
DIR LICENSE
---
DIR commit 353e9eb3f5f4df50ed5802ee14c593c29eba706e
DIR parent 61b9b7eeca080291752b9813705c17208e83505a
HTML Author: Josuah Demangeonā ā µ <mail@josuah.net>
Date: Sun, 2 Apr 2017 14:56:16 +0200
working prototype of libtext
Diffstat:
M text.c | 162 ++++++++++++++++---------------
M text.h | 15 ++++++++++-----
2 files changed, 93 insertions(+), 84 deletions(-)
---
DIR diff --git a/text.c b/text.c
@@ -2,8 +2,8 @@
* Functions handling UTF-8 srings:
*
* stdin -> buffer -> stdout
- * char[] -> long[] -> char[]
* UTF-8 -> rune -> UTF-8
+ * char[] -> long[] -> char[]
*/
@@ -15,15 +15,16 @@
/*
- * Return the number of bytes in rune for the `len` next char in `s`,
- * or 0 if `utf` is misencoded.
+ * Return the number of bytes in rune for the `n` next char in `s`,
+ * or 0 if ti is misencoded.
*
- * Thanks to Connor Lane Smith for some ideas.
+ * Thanks to Connor Lane Smith for the idea of using 0x??.
*/
int
-utflen(char *s, int n) {
+utflen(char *s, int n)
+{
int len = 1;
- int contiunation_bytes =
+ int continuation_bytes =
(s[0] & 0x80) == 0x00 ? 0 : /* 0xxxxxxx */
(s[0] & 0xc0) == 0x80 ? 1 : /* 10xxxxxx */
(s[0] & 0xe0) == 0xc0 ? 2 : /* 110xxxxx */
@@ -34,11 +35,12 @@ utflen(char *s, int n) {
(s[0] & 0xff) == 0xfe ? 7 : /* 11111110 */
8; /* 11111111 */
- if (contiunation_bytes > 6 || contiunation_bytes > n)
+ if (continuation_bytes > 6 || continuation_bytes > n)
return 0;
/* check if continuation bytes are 10xxxxxx and increment `len` */
- switch (contiunation_bytes) { /* FALLTHROUGH */
+ switch (continuation_bytes) { /* FALLTHROUGH */
+ case 7: if ((s[6] & 0xc0) != 0x80) return 0; else len++;
case 6: if ((s[5] & 0xc0) != 0x80) return 0; else len++;
case 5: if ((s[4] & 0xc0) != 0x80) return 0; else len++;
case 4: if ((s[3] & 0xc0) != 0x80) return 0; else len++;
@@ -51,10 +53,11 @@ utflen(char *s, int n) {
/*
- * return the number of bytes required to display `rune`
+ * Return the number of bytes required to display `rune`
*/
int
-runelen(long r) {
+runelen(long r)
+{
if (r <= 0x0000007f) return 1;
if (r <= 0x000007ff) return 2;
if (r <= 0x0000ffff) return 3;
@@ -66,12 +69,13 @@ runelen(long r) {
/*
- * return the firsts `len` bytes in the sring poined by `utf` to a rune.
- * if the `utf` is misencoded, the first char is returned as a
- * negative value.
+ * Sets `r` to a rune corresponding to the firsts `n` bytes of `s`
+ * and return the number of bytes read.
+ * if `s` is misencoded, the rune is stored as a negative value.
*/
int
-utftorune(long *r, char *s, int n) {
+utftorune(long *r, char *s, int n)
+{
int len = utflen(s, n);
/* first byte */
@@ -100,36 +104,37 @@ utftorune(long *r, char *s, int n) {
/*
- * return the next rune in the `len` next `utf`, or 0 if
- * `utf` is misencoded.
+ * Encode the rune `r` in utf-8 in `s`, null-terminated, and return
+ * the number of bytes written, 0 if `r` is invalid.
*/
int
-runetoutf(char *s, long r) {
+runetoutf(char *s, long r)
+{
switch (runelen(r)) {
case 1:
- s[0] = r; /* 0xxxxxxx */
+ s[0] = r; /* 0xxxxxxx */
s[1] = '\0';
return 1;
case 2:
- s[0] = 0xc0 | (0x3f & (r >> 6)); /* 110xxxxx */
- s[1] = 0x80 | (0x3f & (r)); /* 10xxxxxx */
+ s[0] = 0xc0 | (0x1f & (r >> 6)); /* 110xxxxx */
+ s[1] = 0x80 | (0x3f & (r)); /* 10xxxxxx */
s[2] = '\0';
return 2;
case 3:
- s[0] = 0xe0 | (0x3f & (r >> 12)); /* 1110xxxx */
- s[1] = 0x80 | (0x3f & (r >> 6)); /* 10xxxxxx */
- s[2] = 0x80 | (0x3f & (r)); /* 10xxxxxx */
+ s[0] = 0xe0 | (0x0f & (r >> 12)); /* 1110xxxx */
+ s[1] = 0x80 | (0x3f & (r >> 6)); /* 10xxxxxx */
+ s[2] = 0x80 | (0x3f & (r)); /* 10xxxxxx */
s[3] = '\0';
return 3;
case 4:
- s[0] = 0xf0 | (0x3f & (r >> 6)); /* 11110xxx */
- s[1] = 0x80 | (0x3f & (r >> 6)); /* 10xxxxxx */
- s[2] = 0x80 | (0x3f & (r >> 6)); /* 10xxxxxx */
- s[3] = 0x80 | (0x3f & (r)); /* 10xxxxxx */
+ s[0] = 0xf0 | (0x07 & (r >> 18)); /* 11110xxx */
+ s[1] = 0x80 | (0x3f & (r >> 12)); /* 10xxxxxx */
+ s[2] = 0x80 | (0x3f & (r >> 6)); /* 10xxxxxx */
+ s[3] = 0x80 | (0x3f & (r)); /* 10xxxxxx */
s[4] = '\0';
return 4;
case 5:
- s[0] = 0xf8 | (0x3f & (r >> 24)); /* 111110xx */
+ s[0] = 0xf8 | (0x03 & (r >> 24)); /* 111110xx */
s[1] = 0x80 | (0x3f & (r >> 18)); /* 10xxxxxx */
s[2] = 0x80 | (0x3f & (r >> 12)); /* 10xxxxxx */
s[3] = 0x80 | (0x3f & (r >> 6)); /* 10xxxxxx */
@@ -137,7 +142,7 @@ runetoutf(char *s, long r) {
s[5] = '\0';
return 5;
case 6:
- s[0] = 0xfc | (0x3f & (r >> 30)); /* 1111110x */
+ s[0] = 0xfc | (0x01 & (r >> 30)); /* 1111110x */
s[1] = 0x80 | (0x3f & (r >> 24)); /* 10xxxxxx */
s[2] = 0x80 | (0x3f & (r >> 18)); /* 10xxxxxx */
s[3] = 0x80 | (0x3f & (r >> 12)); /* 10xxxxxx */
@@ -152,31 +157,56 @@ runetoutf(char *s, long r) {
/*
+ * Read a newly allocated string from `f` up to the first '\n'
+ * character or the end of the fifle. It is stored as a rune array,
+ * and `r` is set to point to it.
+ */
+int
+getutf(long **r, FILE *f)
+{
+ int slen, rlen = 0, c, size = BUFSIZ;
+ char *s;
+
+ if (!(s = malloc(size))) return -1;
+ for (slen = 0; (c = fgetc(f)) != EOF && (c != '\n'); slen++) {
+ s[slen] = c;
+
+ if (slen >= size)
+ if (!(s = realloc(s, ++size))) return -1;
+ }
+
+ if (!(*r = malloc(size * sizeof (long)))) return -1;
+ for (int i = 0; i < slen; rlen++)
+ i += utftorune(*r + rlen, s + i, slen - i);
+
+ free(s);
+ return rlen;
+}
+
+
+/*
* Fill `s` with a printable representation of `r` and return the
- * width of the character
+ * width of the character. The tab characters are converted to
+ * spaces as if it was at the column `col`.
*/
int
runetoprint(char *s, long r, int col)
{
- /* ASCII control characters and invalid characters */
- if (r == '\t') {
+ /* invalid */
+ if (r < 0) {
+ sprintf(s, "[%02x]", (unsigned char) -r);
+
+ } else if (r == '\t') {
int i;
for (i = 0; i < (col + 1) % 8 - 1; i++)
s[i] = ' ';
- s[i] = '\0';
-
- } else if (r < ' ' || r == 0x7f) {
- sprintf(s, "[%02x]", (char) r);
-
- /* non-breaking space */
- } else if (r == 0xa0) {
- sprintf(s, "[ ]");
+ s[i] = '\0'; s[0] = '|';
- /* soft hyphen */
- } else if (r == 0xad) {
- sprintf(s, "[-]");
+ /* ascii control */
+ } else if (r == 0x7f || r < ' ') {
+ sprintf(s, "[%02lx]", r);
- /* valid UTF-8 but not printable Unicode code points */
+ /* utf-8 but not printable */
} else if (
/* unicode control */
(0x80 <= r && r < 0xa0) ||
@@ -209,47 +239,21 @@ runetoprint(char *s, long r, int col)
}
-/*
- * Read a newly allocated string `s` from `file` up to the first '\n'
- * character or the end of the file.
- */
-int
-getutf(char **s, FILE *file)
-{
- int i; int c;
-
- *s = malloc(BUFSIZ);
-
- for (i = 0; (c = fgetc(file)) != EOF && (c != '\n'); i++) {
- (*s)[i] = c;
-
- if ((size_t) i + 16 >= sizeof(s))
- *s = realloc(*s, sizeof(s) + BUFSIZ);
- }
-
- return i;
-}
-
-
int
main()
{
- char s[7];
- long r;
-
- for (int i = 0; i < 9000; i++) {
- runetoutf(s, i);
- utftorune(&r, s, 7);
- runetoutf(s, r);
- utftorune(&r, s, 7);
- runetoprint(s, r, 0);
+ char s[BUFSIZ];
+ long *r;
- printf("%5X: ", r);
- printf("'%s'\t", s);
+ for (int len; (len = getutf(&r, stdin)) >= 0 && !feof(stdin); free(r)) {
+ for (int i = 0; i < len; i++) {
+ runetoprint(s, r[i], 0);
+ fputs(s, stdout);
+ }
- if (i % 8 == 0)
- puts("");
+ putchar('\n');
}
+ free(r);
return 0;
}
DIR diff --git a/text.h b/text.h
@@ -1,6 +1,11 @@
-typedef int Rune;
-
+/* rune / utf length */
int utflen(char *, int);
-int runelen(Rune);
-int utftorune(Rune *, char *, int);
-int runetoutf(char *, Rune);
+int runelen(long);
+
+/* decode / encode */
+int utftorune(long *, char *, int);
+int runetoutf(char *, long);
+
+/* stdin / stdout */
+int getutf(long **, FILE *);
+int runetoprint(char *, long, int);