Re: [RFC] New kernel-message logging API - Kernel

This is a discussion on Re: [RFC] New kernel-message logging API - Kernel ; On 9/24/07, Joe Perches wrote: > On Mon, 2007-09-24 at 18:43 +0200, Vegard Nossum wrote: > > Storing the format-string separately allows us to hash THAT instead of > > the formatted (ie. console output) message. Since this will never ...

+ Reply to Thread
Results 1 to 2 of 2

Thread: Re: [RFC] New kernel-message logging API

  1. Re: [RFC] New kernel-message logging API

    On 9/24/07, Joe Perches wrote:
    > On Mon, 2007-09-24 at 18:43 +0200, Vegard Nossum wrote:
    > > Storing the format-string separately allows us to hash THAT instead of
    > > the formatted (ie. console output) message. Since this will never
    > > change from message to message, it can be looked up in a table or
    > > whatever and allow user-space to do translations without for example
    > > regular expressions.

    >
    > That hash will change with each linux version given the
    > inevitable spelling fixes, message reformatting and such.


    But we can keep the old ones too. That shouldn't be much of a problem.
    I mean, it probably wouldn't rely on a hash alone. The format string
    itself can be compared with the translation database.

    > > I will follow up with some code to demonstrate as soon as I can.

    >
    > Looking forward to it.


    Okay, so I have one huge file that does more or less what I want. The
    main problem with this, as I see it, is that it largely duplicates
    vsnprintf() from lib/vsprintf.c (minus the functions I coped verbatim
    from the same file). This is bad because the job of maintaining that
    is now doubled (or worse). Hopefully it won't change much in the
    future either.

    For now, the main() function is most important. This demonstrates that
    what I want is in fact possible. printf() is now split in two --
    args_printf_prepare() and args_snprintf(). The former takes your
    arguments as you would pass them to any *printff() function, but only
    converts your arguments (and stores them in a struct args). The latter
    takes the format string and the stored struct args and puts the whole
    thing in a buffer (just like snprintf()).

    I know Gmail isn't too friendly with code formatting, but this is just
    an example program anyway, so it shouldn't really matter. Also, a
    disclaimer: Almost completely untested.


    Vegard


    #define _GNU_SOURCE
    #include
    #include
    #include
    #include
    #include
    #include

    /* This was shamelessly copied form include/asm-generic/div64.h */
    # define do_div(n,base) ({ \
    uint32_t __base = (base); \
    uint32_t __rem; \
    __rem = ((uint64_t)(n)) % __base; \
    (n) = ((uint64_t)(n)) / __base; \
    __rem; \
    })

    /* This was shamelessly copied from lib/vsprintf.c */
    static char* put_dec_trunc(char *buf, unsigned q)
    {
    unsigned d3, d2, d1, d0;
    d1 = (q>>4) & 0xf;
    d2 = (q>>8) & 0xf;
    d3 = (q>>12);

    d0 = 6*(d3 + d2 + d1) + (q & 0xf);
    q = (d0 * 0xcd) >> 11;
    d0 = d0 - 10*q;
    *buf++ = d0 + '0'; /* least significant digit */
    d1 = q + 9*d3 + 5*d2 + d1;
    if (d1 != 0) {
    q = (d1 * 0xcd) >> 11;
    d1 = d1 - 10*q;
    *buf++ = d1 + '0'; /* next digit */

    d2 = q + 2*d2;
    if ((d2 != 0) || (d3 != 0)) {
    q = (d2 * 0xd) >> 7;
    d2 = d2 - 10*q;
    *buf++ = d2 + '0'; /* next digit */

    d3 = q + 4*d3;
    if (d3 != 0) {
    q = (d3 * 0xcd) >> 11;
    d3 = d3 - 10*q;
    *buf++ = d3 + '0'; /* next digit */
    if (q != 0)
    *buf++ = q + '0'; /* most sign. digit */
    }
    }
    }
    return buf;
    }
    static char* put_dec_full(char *buf, unsigned q)
    {
    unsigned d3, d2, d1, d0;
    d1 = (q>>4) & 0xf;
    d2 = (q>>8) & 0xf;
    d3 = (q>>12);

    d0 = 6*(d3 + d2 + d1) + (q & 0xf);
    q = (d0 * 0xcd) >> 11;
    d0 = d0 - 10*q;
    *buf++ = d0 + '0';
    d1 = q + 9*d3 + 5*d2 + d1;
    q = (d1 * 0xcd) >> 11;
    d1 = d1 - 10*q;
    *buf++ = d1 + '0';

    d2 = q + 2*d2;
    q = (d2 * 0xd) >> 7;
    d2 = d2 - 10*q;
    *buf++ = d2 + '0';

    d3 = q + 4*d3;
    q = (d3 * 0xcd) >> 11; /* - shorter code */
    d3 = d3 - 10*q;
    *buf++ = d3 + '0';
    *buf++ = q + '0';
    return buf;
    }
    static char* put_dec(char *buf, unsigned long long num)
    {
    while (1) {
    unsigned rem;
    if (num < 100000)
    return put_dec_trunc(buf, num);
    rem = do_div(num, 100000);
    buf = put_dec_full(buf, rem);
    }
    }
    #define ZEROPAD 1 /* pad with zero */
    #define SIGN 2 /* unsigned/signed long */
    #define PLUS 4 /* show plus */
    #define SPACE 8 /* space if plus */
    #define LEFT 16 /* left justified */
    #define SPECIAL 32 /* 0x */
    #define LARGE 64 /* use 'ABCDEF' instead of 'abcdef' */
    static int skip_atoi(const char **s)
    {
    int i=0;
    while (isdigit(**s))
    i = i*10 + *((*s)++) - '0';
    return i;
    }
    /* End of shameless copy. */


    /**
    * Simple string buffer implementation
    */
    struct buffer {
    unsigned int size;

    unsigned int pos;
    char *data;
    };

    void buffer_init(struct buffer *buf)
    {
    buf->size = 0;
    buf->pos = 0;
    buf->data = NULL;
    }

    void buffer_deinit(struct buffer *buf)
    {
    free(buf->data);

    buf->size = 0;
    buf->pos = 0;
    buf->data = NULL;
    }

    void buffer_write(struct buffer *buf, char c)
    {
    if(buf->pos == buf->size) {
    buf->size = buf->size ? 2 * buf->size : 8;
    buf->data = realloc(buf->data, buf->size);
    }

    buf->data[buf->pos++] = c;
    }

    /* The equivalent of number() in lib/vsprintf.c */
    void buffer_write_number(struct buffer *buf, unsigned long long num, int base,
    int size, int precision, int type)
    {
    char sign, tmp[66];
    const char *digits;
    static const char small_digits[] = "0123456789abcdefx";
    static const char large_digits[] = "0123456789ABCDEFX";
    int need_pfx = ((type & SPECIAL) && base != 10);
    int i;

    digits = (type & LARGE) ? large_digits : small_digits;
    if(type & LEFT)
    type &= ~ZEROPAD;
    if(base < 2 || base > 36)
    return;

    sign = 0;
    if(type & SIGN) {
    if((signed long long) num < 0) {
    sign = '-';
    num = -(signed long long) num;
    --size;
    } else if(type & PLUS) {
    sign = '+';
    --size;
    } else if(type & SPACE) {
    sign = ' ';
    --size;
    }
    }

    if(need_pfx) {
    --size;
    if(base == 16)
    size--;
    }

    i = 0;
    if(num == 0)
    tmp[i++] = '0';
    else if(base != 10) {
    int mask = base - 1;
    int shift = 3;

    if(base == 16)
    shift = 4;

    do {
    tmp[i++] = digits[((unsigned char) num) & mask];
    num >>= shift;
    } while(num);
    } else {
    i = put_dec(tmp, num) - tmp;
    }

    if(i > precision)
    precision = i;

    size -= precision;
    if(!(type & (ZEROPAD + LEFT))) {
    while(--size >= 0)
    buffer_write(buf, ' ');
    }

    if(sign)
    buffer_write(buf, sign);

    if(need_pfx) {
    buffer_write(buf, '0');
    if(base == 16)
    buffer_write(buf, digits[16]);
    }

    if(!(type & LEFT)) {
    char c = (type & ZEROPAD) ? '0' : ' ';
    while(--size >= 0)
    buffer_write(buf, c);
    }

    while(i <= --precision)
    buffer_write(buf, '0');

    while(--i >= 0)
    buffer_write(buf, tmp[i]);

    while(--size >= 0)
    buffer_write(buf, ' ');
    }

    char *buffer_dup(struct buffer *buf)
    {
    return strndup(buf->data, buf->pos);
    }

    /**
    * Expanding args-array
    */
    struct args {
    unsigned int size;

    unsigned int argc;
    char **argv;
    };

    void args_push(struct args *args, char *arg)
    {
    if(args->argc == args->size) {
    args->size = args->size ? 2 * args->size : 8;
    args->argv = realloc(args->argv, args->size);
    }

    args->argv[args->argc++] = arg;
    }

    void args_vprintf_prepare(struct args *args, const char *fmt, va_list ap)
    {
    const char *p;

    for(p = fmt; *p; ++p) {
    int flags;
    int field_width;
    int precision;
    int qualifier;
    int base;

    int i;
    char c;
    const char *s;
    int len;
    unsigned long long num;

    struct buffer str;

    /* skip to the next format */
    if(*p != '%')
    continue;

    buffer_init(&str);

    /* read flags */
    flags = 0;
    repeat:
    ++p;
    switch(*p) {
    case '-': flags |= LEFT; goto repeat;
    case '+': flags |= PLUS; goto repeat;
    case ' ': flags |= SPACE; goto repeat;
    case '#': flags |= SPECIAL; goto repeat;
    case '0': flags |= ZEROPAD; goto repeat;
    }

    /* read field width */
    field_width = -1;
    if(isdigit(*p))
    field_width = skip_atoi(&p);
    else if(*p == '*') {
    ++p;
    field_width = va_arg(ap, int);
    if(field_width < 0) {
    field_width = -field_width;
    flags |= LEFT;
    }
    }

    /* read precision */
    precision = -1;
    if(*p == '.') {
    ++p;
    if(isdigit(*p))
    precision = skip_atoi(&p);
    else if(*p == '*') {
    ++p;
    precision = va_arg(ap, int);
    }

    if(precision < 0)
    precision = 0;
    }

    /* read conversion qualifier */
    qualifier = -1;
    if(*p == 'h' || *p == 'l' || *p == 'L'
    || *p == 'Z' || *p == 'z' || *p == 't')
    {
    qualifier = *p;
    ++p;
    if(qualifier == 'l' && *p == 'l') {
    qualifier = 'L';
    ++p;
    }
    }

    /* default base */
    base = 10;

    /* XXX: only %c and %s supported yet */
    switch(*p) {
    case 'c':
    if(!(flags & LEFT)) {
    while(--field_width > 0)
    buffer_write(&str, ' ');
    }

    c = (unsigned char) va_arg(ap, int);
    buffer_write(&str, c);

    while(--field_width > 0)
    buffer_write(&str, ' ');

    args_push(args, buffer_dup(&str));
    buffer_deinit(&str);
    continue;
    case 's':
    /* XXX: see vnprintf() in lib/vsprintf.c */
    s = va_arg(ap, char *);
    len = strnlen(s, precision);

    if(!(flags & LEFT)) {
    while(len < field_width--)
    buffer_write(&str, ' ');
    }
    for(i = 0; i < len; ++i, ++s)
    buffer_write(&str, *s);
    while(len < field_width--)
    buffer_write(&str, ' ');

    args_push(args, buffer_dup(&str));
    buffer_deinit(&str);
    continue;
    case 'p':
    if(field_width == -1) {
    field_width = 2 * sizeof(void *);
    flags |= ZEROPAD;
    }

    buffer_write_number(&str,
    (unsigned long) va_arg(ap, void *),
    16, field_width, precision, flags);

    args_push(args, buffer_dup(&str));
    buffer_deinit(&str);
    continue;
    case 'n':
    /* Nothing happens. */
    buffer_deinit(&str);
    continue;
    case '%':
    /* Nothing happens. */
    buffer_deinit(&str);
    continue;
    case 'o':
    base = 8;
    break;
    case 'X':
    flags |= LARGE;
    case 'x':
    base = 16;
    break;
    case 'd':
    case 'i':
    flags |= SIGN;
    case 'u':
    break;

    default:
    /* In this case, we need to pop an empty argument in
    * the real snprintf(). */
    args_push(args, buffer_dup(&str));
    buffer_deinit(&str);
    continue;
    }

    if(qualifier == 'L')
    num = va_arg(ap, long long);
    else if(qualifier == 'l') {
    num = va_arg(ap, unsigned long);
    if(flags & SIGN)
    num = (signed long) num;
    } else if(qualifier == 'Z' || qualifier == 'z') {
    num = va_arg(ap, size_t);
    } else if(qualifier == 't') {
    #define ptrdiff_t typeof((void *) 0 - (void *) 0)
    num = va_arg(ap, ptrdiff_t);
    } else if(qualifier == 'h') {
    num = (unsigned short) va_arg(ap, int);
    if(flags & SIGN)
    num = (signed short) num;
    } else {
    num = va_arg(ap, unsigned int);
    if(flags & SIGN)
    num = (signed int) num;
    }

    buffer_write_number(&str, num, base, field_width,
    precision, flags);

    args_push(args, buffer_dup(&str));
    buffer_deinit(&str);
    }
    }

    void args_printf_prepare(struct args *args, const char *fmt, ...)
    {
    va_list ap;

    va_start(ap, fmt);
    args_vprintf_prepare(args, fmt, ap);
    va_end(ap);
    }

    /* Here we just parse the format string without really caring about the field
    * parameters; they've been formatted already by args_printf_prepare(). */
    int args_snprintf(struct args *args,
    char *buf, size_t size, const char *fmt)
    {
    const char *p;
    unsigned int argp = 0;

    char *str;
    char *end;

    if((int) size < 0)
    return 0;

    str = buf;
    end = buf + size;

    /* Make sure end is always >= buf */
    if(end < buf) {
    end = ((void *) -1);
    size = end - buf;
    }

    for(p = fmt; *p; ++p) {
    char *arg;

    if(*p != '%') {
    if(str < end)
    *str = *p;
    ++str;
    continue;
    }

    /* skip flags */
    repeat:
    ++p;
    switch(*p) {
    case '-': goto repeat;
    case '+': goto repeat;
    case ' ': goto repeat;
    case '#': goto repeat;
    case '0': goto repeat;
    }

    /* skip field width */
    if(*p == '*')
    ++p;
    else while(isdigit(*p))
    ++p;

    /* skip precision */
    if(*p == '.') {
    ++p;

    if(*p == '*')
    ++p;
    else while(isdigit(*p))
    ++p;
    }

    /* skip the conversion qualifier */
    if(*p == 'h' || *p == 'l' || *p == 'L'
    || *p == 'Z' || *p == 'z' || *p == 't')
    {
    if(*p == 'l') {
    ++p;

    if(*p == 'l')
    ++p;
    } else
    ++p;
    }

    /* skip the conversion specifier */
    switch(*p) {
    case 'n':
    /* XXX: (not implemented) */
    continue;
    case '%':
    if(str < end)
    *str = '%';
    ++str;
    continue;
    }

    for(arg = args->argv[argp]; *arg; ++arg) {
    if(str < end)
    *str = *arg;
    ++str;
    }

    ++argp;
    }

    if(size > 0) {
    if(str < end)
    *str = '\0';
    else
    end[-1] = '\0';
    }

    return str - buf;
    }

    int
    main(int argc, char *argv[])
    {
    const char format[] = "Hello %s (%d times)!";
    struct args args;

    args.size = 0;
    args.argc = 0;
    args.argv = NULL;

    args_printf_prepare(&args, format, "world", 5);

    /* We can do whatever we want with the arguments now, like dumping
    * them. */
    {
    unsigned int i;
    printf("Argument dump:\n");
    for(i = 0; i < args.argc; i++)
    printf("arg #%d: %s\n", 1 + i, args.argv[i]);
    printf("\n");
    }

    /* Use the format string and our "prepared" (ie. pre-formatted)
    * argument list to actually format the string. */
    {
    static char out[512];
    printf("Reformatting:\n");

    args_snprintf(&args, out, sizeof(out), format);

    printf("%s\n", out);
    printf("\n");
    }

    return 0;
    }
    -
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  2. Re: [RFC] New kernel-message logging API

    On Monday 24 September 2007 3:37:55 pm Vegard Nossum wrote:
    > On 9/24/07, Joe Perches wrote:
    > > On Mon, 2007-09-24 at 18:43 +0200, Vegard Nossum wrote:
    > > > Storing the format-string separately allows us to hash THAT instead of
    > > > the formatted (ie. console output) message. Since this will never
    > > > change from message to message, it can be looked up in a table or
    > > > whatever and allow user-space to do translations without for example
    > > > regular expressions.

    > >
    > > That hash will change with each linux version given the
    > > inevitable spelling fixes, message reformatting and such.

    >
    > But we can keep the old ones too. That shouldn't be much of a problem.
    > I mean, it probably wouldn't rely on a hash alone. The format string
    > itself can be compared with the translation database.


    I point out that the thread started with a comment about how to _reduce_
    bloat.

    Just sayin'.

    Rob
    --
    "One of my most productive days was throwing away 1000 lines of code."
    - Ken Thompson.
    -
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

+ Reply to Thread