URI: 
       Initial commit. - utf8expr - expr(1) for UTF-8
  HTML git clone git://bitreich.org/utf8expr/ git://enlrupgkhuxnvlhsf6lc3fziv5h2hhfrinws65d7roiv6bfj7d652fid.onion/utf8expr/
   DIR Log
   DIR Files
   DIR Refs
   DIR Tags
   DIR LICENSE
       ---
   DIR commit 4d85a682220a55d83e8c6460808329e72becca36
  HTML Author: Christoph Lohmann <20h@r-36.net>
       Date:   Mon, 21 May 2012 17:35:22 +0200
       
       Initial commit.
       
       Diffstat:
         A LICENSE                             |      21 +++++++++++++++++++++
         A Makefile                            |      56 +++++++++++++++++++++++++++++++
         A arg.h                               |      41 +++++++++++++++++++++++++++++++
         A config.mk                           |      23 +++++++++++++++++++++++
         A utf8expr.1                          |      41 +++++++++++++++++++++++++++++++
         A utf8expr.c                          |     173 +++++++++++++++++++++++++++++++
       
       6 files changed, 355 insertions(+), 0 deletions(-)
       ---
   DIR diff --git a/LICENSE b/LICENSE
       @@ -0,0 +1,21 @@
       +MIT/X Consortium License
       +
       +© 2012 Christoph Lohmann <20h@r-36.net>
       +
       +Permission is hereby granted, free of charge, to any person obtaining a
       +copy of this software and associated documentation files (the "Software"),
       +to deal in the Software without restriction, including without limitation
       +the rights to use, copy, modify, merge, publish, distribute, sublicense,
       +and/or sell copies of the Software, and to permit persons to whom the
       +Software is furnished to do so, subject to the following conditions:
       +
       +The above copyright notice and this permission notice shall be included in
       +all copies or substantial portions of the Software.
       +
       +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
       +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
       +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
       +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
       +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
       +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
       +DEALINGS IN THE SOFTWARE.
   DIR diff --git a/Makefile b/Makefile
       @@ -0,0 +1,56 @@
       +# utf8expr – expr(1) for utf8 
       +# See LICENSE file for copyright and license details.
       +
       +include config.mk
       +
       +SRC = ${NAME}.c
       +OBJ = ${SRC:.c=.o}
       +
       +all: options ${NAME}
       +
       +options:
       +        @echo ${NAME} build options:
       +        @echo "CFLAGS   = ${CFLAGS}"
       +        @echo "LDFLAGS  = ${LDFLAGS}"
       +        @echo "CC       = ${CC}"
       +
       +.c.o:
       +        @echo CC $<
       +        @${CC} -c ${CFLAGS} $<
       +
       +${OBJ}: config.mk
       +
       +${NAME}: ${OBJ}
       +        @echo CC -o $@
       +        @${CC} -o $@ ${OBJ} ${LDFLAGS}
       +
       +clean:
       +        @echo cleaning
       +        @rm -f ${NAME} ${OBJ} ${NAME}-${VERSION}.tar.gz
       +
       +dist: clean
       +        @echo creating dist tarball
       +        @mkdir -p ${NAME}-${VERSION}
       +        @cp -R LICENSE Makefile config.mk \
       +                ${SRC} ${NAME}.8 *.h ${NAME}-${VERSION}
       +        @tar -cf ${NAME}-${VERSION}.tar ${NAME}-${VERSION}
       +        @gzip ${NAME}-${VERSION}.tar
       +        @rm -rf ${NAME}-${VERSION}
       +
       +install: all
       +        @echo installing executable file to ${DESTDIR}${PREFIX}/bin
       +        @mkdir -p ${DESTDIR}${PREFIX}/bin
       +        @cp -f ${NAME} ${DESTDIR}${PREFIX}/bin
       +        @chmod 755 ${DESTDIR}${PREFIX}/bin/${NAME}
       +        @echo installing manual page to ${DESTDIR}${MANPREFIX}/man1
       +        @mkdir -p ${DESTDIR}${MANPREFIX}/man1
       +        @cp -f ${NAME}.1 ${DESTDIR}${MANPREFIX}/man1
       +        @chmod 644 ${DESTDIR}${MANPREFIX}/man1/${NAME}.1
       +
       +uninstall:
       +        @echo removing executable file from ${DESTDIR}${PREFIX}/bin
       +        @rm -f ${DESTDIR}${PREFIX}/bin/${NAME}
       +        @echo removing manual page from ${DESTDIR}${PREFIX}/man1
       +        @rm -f ${DESTDIR}${MANPREFIX}/man1/${NAME}.1
       +
       +.PHONY: all options clean dist install uninstall
   DIR diff --git a/arg.h b/arg.h
       @@ -0,0 +1,41 @@
       +/*
       + * Copy me if you can.
       + * by 20h
       + */
       +
       +#ifndef __ARG_H__
       +#define __ARG_H__
       +
       +extern char *argv0;
       +
       +#define USED(x) ((void)(x))
       +
       +#define ARGBEGIN        for (argv0 = *argv, argv++, argc--;\
       +                                        argv[0] && argv[0][1]\
       +                                        && argv[0][0] == '-';\
       +                                        argc--, argv++) {\
       +                                char _argc;\
       +                                char **_argv;\
       +                                if (argv[0][1] == '-' && argv[0][2] == '\0') {\
       +                                        argv++;\
       +                                        argc--;\
       +                                        break;\
       +                                }\
       +                                for (argv[0]++, _argv = argv; argv[0][0];\
       +                                                argv[0]++) {\
       +                                        if (_argv != argv)\
       +                                                break;\
       +                                        _argc = argv[0][0];\
       +                                        switch (_argc)
       +
       +#define ARGEND                        }\
       +                                USED(_argc);\
       +                        }\
       +                        USED(argv);\
       +                        USED(argc);
       +
       +#define EARGF(x)        ((argv[1] == NULL)? ((x), abort(), (char *)0) :\
       +                        (argc--, argv++, argv[0]))
       +
       +#endif
       +
   DIR diff --git a/config.mk b/config.mk
       @@ -0,0 +1,23 @@
       +# nldev metadata
       +NAME = utf8expr
       +VERSION = 0.8
       +
       +# Customize below to fit your system
       +
       +# paths
       +PREFIX = /usr/local
       +MANPREFIX = ${PREFIX}/share/man
       +
       +# includes and libs
       +INCS = -I. -I/usr/include
       +LIBS = -L/usr/lib -lc
       +
       +# flags
       +CPPFLAGS = -DVERSION=\"${VERSION}\"
       +CFLAGS = -g -std=c99 -pedantic -Wall -O0 ${INCS} ${CPPFLAGS}
       +LDFLAGS = -static -g ${LIBS}
       +#LDFLAGS = -s ${LIBS}
       +
       +# compiler and linker
       +CC = cc
       +
   DIR diff --git a/utf8expr.1 b/utf8expr.1
       @@ -0,0 +1,41 @@
       +.Dd May 21, 2012 
       +.Dt UTF8EXPR 1
       +.Os
       +.
       +.Sh NAME
       +.Nm utf8expr
       +.Nd evalute UTF-8 expressions
       +.
       +.Sh SYNOPSIS
       +.Nm
       +.Bk -words
       +EXPRESSION
       +.
       +.Sh DESCRIPTION
       +.Bd -filled
       +.Nm
       +will evalute a subset of the expr(1) syntax with taking care
       +of UTF-8 characters.
       +.Ed
       +. 
       +.Sh EXPRESSIONS 
       +.Pp
       +.Bl -tag -width ".Fl test Ao Ar string Ac"
       +.
       +.It substr STRING POS LENGTH
       +substring of STRING, POS counted from 1
       +.
       +.It index STRING CHARS
       +index in STRING where any CHARS is found, or 0
       +.
       +.It length STRING
       +length of STRING
       +. 
       +.Sh AUTHORS
       +See the LICENSE file for the authors of this software.
       +.
       +.Sh LICENSE
       +.Nm
       +is released under the MIT/X Consortium License.
       +.
       +
   DIR diff --git a/utf8expr.c b/utf8expr.c
       @@ -0,0 +1,173 @@
       +/*
       + * Copy me if you can.
       + * by 20h
       + */
       +
       +#include <unistd.h>
       +#include <string.h>
       +#include <stdlib.h>
       +#include <stdio.h>
       +#include <libgen.h>
       +
       +#include "arg.h"
       +
       +char *argv0;
       +
       +/*
       + * Idea taken from:
       + *        http://canonical.org/~kragen/strlen-utf8.html
       + */
       +size_t
       +utf8strlen(char *s)
       +{
       +        size_t i;
       +
       +        i = 0;
       +        for (; s[0]; s++) {
       +                if ((s[0] & 0xc0) != 0x80)
       +                        i++;
       +        }
       +
       +        return i;
       +}
       +
       +char *
       +utf8strchr(char *s, char *c)
       +{
       +        size_t j, cl;
       +
       +        cl = strlen(c);
       +        if (cl == 0)
       +                return NULL;
       +
       +        for (j = 0; ; s++) {
       +                if (j > 6)
       +                        return NULL;
       +                j++;
       +
       +                if ((s[0] & 0xc0) != 0x80 || s[0] == '\0') {
       +                        if (cl == j) {
       +                                if (!memcmp(&s[-j], c, cl))
       +                                        return &s[-j];
       +                        }
       +                        j = 0;
       +
       +                        if (s[0] == '\0')
       +                                break;
       +                }
       +        }
       +
       +        return NULL;
       +}
       +
       +char *
       +utf8substr(char *s, size_t pos, size_t *length)
       +{
       +        size_t i, j, rl;
       +        char *ret;
       +
       +        if (*length < 1)
       +                return NULL;
       +
       +        ret = NULL;
       +        rl = 0;
       +        for (i = 0, j = 0; *length > 0; s++) {
       +                if (j > 6)
       +                        return NULL;
       +                j++;
       +
       +                if (ret != NULL)
       +                        rl++;
       +
       +                if ((s[0] & 0xc0) != 0x80 || s[0] == '\0') {
       +                        if (i >= pos) {
       +                                if (ret == NULL) {
       +                                        ret = &s[-j];
       +                                        rl = j;
       +                                }
       +                                (*length)--;
       +                        }
       +                        i++;
       +                        j = 0;
       +
       +                        if (s[0] == '\0')
       +                                break;
       +                }
       +        }
       +
       +        *length = rl;
       +        return ret;
       +}
       +
       +size_t
       +utf8index(char *s, char *chars)
       +{
       +        size_t i, j;
       +        char c[7];
       +
       +        j = 0;
       +        for (i = 0; ; s++) {
       +                if (j > 6)
       +                        return 0;
       +                j++;
       +
       +                if ((s[0] & 0xc0) != 0x80 || s[0] == '\0') {
       +                        memset(c, 0, sizeof(c));
       +                        memmove(c, &s[-j], j);
       +                        if (utf8strchr(chars, c))
       +                                return i;
       +                        i++;
       +                        j = 0;
       +
       +                        if (s[0] == '\0')
       +                                break;
       +                }
       +        }
       +
       +        return 0;
       +}
       +
       +void
       +usage(void)
       +{
       +        fprintf(stderr, "usage: %s [substr|index|length] str [args ...]\n",
       +                        basename(argv0));
       +        exit(1);
       +}
       +
       +int
       +main(int argc, char *argv[])
       +{
       +        char *s;
       +        size_t len;
       +
       +        argv0 = argv[0];
       +
       +        if (argc < 3)
       +                usage();
       +
       +        switch(argv[1][0]) {
       +        case 'i':
       +                if (argc < 4)
       +                        usage();
       +                printf("%ld\n", utf8index(argv[2], argv[3]));
       +                break;
       +        case 'l':
       +                printf("%ld\n", utf8strlen(argv[2]));
       +                break;
       +        case 's':
       +                if (argc < 5)
       +                        usage();
       +                len = atoi(argv[4]);
       +                s = utf8substr(argv[2], atoi(argv[3]), &len);
       +                if (s == NULL)
       +                        return -1;
       +                printf("%.*s\n", (int)len, s);
       +                break;
       +        default:
       +                usage();
       +        };
       +
       +        return 0;
       +}
       +