I've been writing my own implementation of stdio.h whenever I get a few spare minutes recently. Nothing fancy or thorough, and without wide character support, it's hardly conforming. But I was looking at one of my intermediate versions of printf and could only marvel at how ugly it was. So much that I'm going to share. :)

Have a peek (and keep in mind that I have no intention of keeping it. ugh):

#include "format_string.h"
#include "jstdio.h"
#include "numeric.h"
#include "print_stream.h"
#include "xtype.h"

static int pad_buffer ( J_FILE_T *out, char buffer[], const char *prefix,
  struct print_convspec *spec, j_size_t *nwrit )
{
  char *p = buffer;
  int width = spec->field_width - j_str_len ( prefix );
  int prec = spec->precision;

  if ( spec->flags & LEFT_JUSTIFY ) {
    if ( spec->flags & SHOW_SIGN && buffer[0] != '-' ) {
      if ( _put ( '+', out, *nwrit ) == J_EOF )
        return 0;
    }

    while ( *prefix != '\0' ) {
      if ( _put ( *prefix++, out, *nwrit ) == J_EOF )
        return 0;
    }

    while ( *p != '\0' && --prec > 0 ) {
      if ( _put ( *p++, out, *nwrit ) == J_EOF )
        return 0;
      --width;
    }

    while ( width-- > 0 ) {
      if ( _put ( ' ', out, *nwrit ) == J_EOF )
        return 0;
    }
  }
  else {
    int show_sign = ( spec->flags & SHOW_SIGN ) != 0;
    int len = width - j_str_len ( buffer ) - show_sign;

    /* We want the sign before leading zeros */
    if ( spec->pad != ' '  && show_sign && buffer[0] != '-' ) {
      if ( _put ( '+', out, *nwrit ) == J_EOF )
        return 0;
    }

    if ( spec->pad != ' ' && spec->flags & ALT_FORMAT ) {
      while ( *prefix != '\0' ) {
        if ( _put ( *prefix++, out, *nwrit ) == J_EOF )
          return 0;
      }
    }

    while ( len-- > 0 ) {
      if ( _put ( spec->pad, out, *nwrit ) == J_EOF )
        return 0;
    }

    /* We want the sign after leading psaces */
    if ( spec->pad == ' ' && show_sign && buffer[0] != '-' ) {
      if ( _put ( '+', out, *nwrit ) == J_EOF )
        return 0;
    }

    if ( spec->pad == ' ' && spec->flags & ALT_FORMAT ) {
      while ( *prefix != '\0' ) {
        if ( _put ( *prefix++, out, *nwrit ) == J_EOF )
          return 0;
      }
    }

    while ( *p != '\0' && --prec > 0 ) {
      if ( _put ( *p++, out, *nwrit ) == J_EOF )
        return 0;
    }
  }

  return 1;
}

static char *fp_cvt ( char buffer[], double val, int flag, int prec, int alt )
{
  int show_trailing_zeros = 1;
  int show_trailing_radix = 0;
  int show_float = 0;

  if ( prec == -1 )
    prec = 6;

  switch ( flag ) {
  case FMT_NO_FORMAT:
    /* %g specifier */
    show_float = 1;

    if ( alt )
      show_trailing_radix = 1;
    else
      show_trailing_zeros = 0;
    break;
  default:
    /* Assume %e or %f */
    if ( prec == 0 && alt )
      show_trailing_radix = 1;
    break;
  }

  if ( flag == FMT_FLOAT )
    fp_tostring ( buffer, val, prec, '.', show_trailing_zeros, show_trailing_radix, show_float );
  else
    fp_normalize ( buffer, val, prec, '.', 'e', show_trailing_zeros, show_trailing_radix, show_float );

  return buffer;
}

int _print_stream ( J_FILE_T *out, const char *fmt, j_va_list args )
{
  struct print_convspec spec = {0};

  j_size_t nwrit = 0;
  int err = 0;

  while ( *fmt != '\0' ) {
    /* Process a conversion specifier */
    if ( *fmt == '%' ) {
      j_size_t count = _get_print_specifier ( &spec, fmt );
      
      if ( count == 0 ) {
        /* The conversion failed, process the error state */
        goto process_state;
      }

      /* Jump past the conversion specifier */
      fmt += count;

      switch ( spec.type ) {
      case MOD_STRING:
        if ( !pad_buffer ( out,
          j_va_arg ( args, char * ),
          "", &spec, &nwrit ) )
        {
          err = 1;
          goto process_state;
        }

        break;
      case MOD_CHAR:
        if ( _put ( j_va_arg ( args, unsigned char ), out, nwrit ) == J_EOF ) {
          err = 1;
          goto process_state;
        }

        break;
      case MOD_POINTER:
      case MOD_USHORT:
      case MOD_UINT:
      case MOD_ULONG:
        spec.flags &= ~SHOW_SIGN;
      case MOD_SHORT:
      case MOD_INT:
      case MOD_LONG:
        {
          const char *prefix = "";
          long long value;

          if ( spec.flags & ALT_FORMAT && spec.format == FMT_OCTAL )
            prefix = "0";
          else if ( spec.flags & ALT_FORMAT && spec.format == FMT_HEX )
            prefix = "0x";

          switch ( spec.type ) {
          case MOD_SHORT:
            value = j_va_arg ( args, short );
            break;
          case MOD_INT:
            value = j_va_arg ( args, int );
            break;
          case MOD_LONG:
            value = j_va_arg ( args, long );
            break;
          case MOD_USHORT:
            value = j_va_arg ( args, unsigned short );
            break;
          case MOD_UINT:
          case MOD_POINTER:
            value = j_va_arg ( args, unsigned );
            break;
          case MOD_ULONG:
            value = j_va_arg ( args, unsigned long );
            break;
          }

          if ( !pad_buffer ( out,
            dtoa_nobuf ( value, spec.format, 0 ),
            prefix, &spec, &nwrit ) )
          {
            err = 1;
            goto process_state;
          }
        }

        break;
      case MOD_FLOAT:
      case MOD_DOUBLE:
      case MOD_LDOUBLE:
        {
          char buffer[JBUFSIZ];
          
          fp_cvt ( buffer, j_va_arg ( args, double ),
            spec.format, spec.precision, spec.flags & ALT_FORMAT );

          if ( !pad_buffer ( out, buffer, "", &spec, &nwrit ) ) {
            err = 1;
            goto process_state;
          }
        }

        break;
      case MOD_COUNT:
        *j_va_arg ( args, int* ) = nwrit;
        break;
      case MOD_LITERAL:
        if ( _put ( '%', out, nwrit ) == J_EOF ) {
          err = 1;
          goto process_state;
        }

        break;
      }
    }
    else {
      if ( _put ( *fmt++, out, nwrit ) == J_EOF ) {
        err = 1;
        goto process_state;
      }
    }
  }

process_state:
  return !err ? nwrit : -1;
}

I can only imagine how many bugs are hidden in that mess. The lesson to learn is that writing code without a plan results in ugly code, and adding to ugly code only makes it worse.

Cheers!

Dave Sinkula commented: I'll hold those 7 gotos against you some day. ;) +11
iamthwee commented: I'll hold those 472 variable prefixed with your christian name some day. :P +12

Recommended Answers

All 5 Replies

Seriously! (Love your avitar!)

One of the major problems I find with people (particularly, newbie programmers) is that they somehow expect the computer to solve their problems.

Natural, because that is how they have used computers so far...

Coding is different. It is important to remember that computers aren't just stupid, they are about as smart as a rock.

When preparing to code anything, take the time to figure out, for yourself, exactly how the problem is solved. Then tell the computer how to solve the problem.

"Just hacking" never works...

Thanks for sharing the cruft. ;)

Alas.

For comparison, here's a more complete version that's been cleaned up quite a bit. There are still signs of missing design, but it's roughly what I had in mind when I started typing:

#include "format_string.h"
#include "jstdio.h"
#include "numeric.h"
#include "print_stream.h"
#include "xtype.h"

#define _NO_ERR      0
#define _ENCODE_ERR -1 /* Error parsing or converting a specifier */
#define _WRITE_ERR  -2 /* Error writing the encoded specifier */

static int write_count = 0; /* Total number of characters written */
static int error_state = 0; /* The most recently flagged error */

/*
  The next three utility functions are no-ops if
  an error was flagged. This is largely to avoid
  reams of error checking code in pad_buffer
*/
static void put_padding ( J_FILE_T *out, char pad, int len )
{
  if ( error_state == _NO_ERR ) {
    while ( --len >= 0 ) {
      if ( _put ( pad, out, write_count ) == J_EOF ) {
        error_state = _WRITE_ERR;
        break;
      }
    }
  }
}

static void put_buffer ( J_FILE_T *out, char *buffer, int len )
{
  if ( error_state == _NO_ERR ) {
    while ( --len >= 0 && *buffer != '\0' ) {
      if ( _put ( *buffer, out, write_count ) == J_EOF ) {
        error_state = _WRITE_ERR;
        break;
      }

      ++buffer;
    }
  }
}

static void put_sign ( J_FILE_T *out, char **buffer )
{
  if ( error_state == _NO_ERR ) {
    if ( **buffer == '-' || **buffer == '+' ) {
      if ( _put ( **buffer, out, write_count ) == J_EOF )
        error_state = _WRITE_ERR;

      ++(*buffer);
    }
  }
}

/*
  Given a partially converted argument, complete the conversion
  by adding padding and prefix values according to the specifier
  The result is printed directly to the output stream
*/
static int pad_buffer ( J_FILE_T *out,
  char *buffer, struct print_convspec *spec )
{
  int width = spec->field_width;

  /* Default our precision to the length of the string */
  int len = j_str_len ( buffer );

  if ( spec->type == MOD_STRING ) {
    /* Fit the length to a valid precision */
    if ( spec->precision < len && spec->precision != -1 )
      len = spec->precision;

    width -= len;

    /* String padding is *very* straightforward */
    if ( spec->flags & LEFT_JUSTIFY ) {
      put_buffer ( out, buffer, len );
      put_padding ( out, ' ', width );
    }
    else {
      put_padding ( out, ' ', width );
      put_buffer ( out, buffer, len );
    }
  }
  else {
    char *prefix = "";
    int prefix_len = 0;

    /*
      Add a special alternate form prefix for
      octal and hexadecimal if specified
    */
    if ( spec->flags & ALT_FORMAT ) {
      if ( spec->format == FMT_OCTAL ) {
        prefix = "0";
        prefix_len = 1;
      }
      else if ( spec->format == FMT_HEX ) {
        prefix = spec->alt_case ? "0X" : "0x";
        prefix_len = 2;
      }
    }

    /* Strip a leading positive sign if necessary */
    if ( *buffer == '+' && !( spec->flags & SHOW_SIGN ) ) {
      ++buffer;
      --len;
    }

    width -= prefix_len + len;

    /* Add a leading space if necessary */
    if ( width <= 0                     /* Don't add to existing width */
      && spec->flags & SHOW_SPACE       /* Only add if ' ' was set */
      && !( spec->flags & SHOW_SIGN ) ) /* '+' ignores ' ' */
    {
      ++width;
    }

    /*
      Padding with leading zeros for the precision isn't
      specified for floating-point, but because the precision
      won't ever be greater than the length of the buffer
      in that case, it's a non-issue.
    */
    if ( spec->precision > len )
      width -= spec->precision - len;

    if ( spec->flags & LEFT_JUSTIFY ) {
      /* No special padding rules apply when left justified */
      put_buffer ( out, prefix, prefix_len );

      /* Pad leading zeros if precision > length */
      put_padding ( out, '0', spec->precision - len );
      put_buffer ( out, buffer, len );
      put_padding ( out, spec->pad, width );
    }
    else {
      /*
        When right justified, we need to account
        for leading zeros after the sign rather
        than before
      */
      if ( spec->pad != ' ' ) {
        /* Show the sign first, then pad */
        put_sign ( out, &buffer );
        put_buffer ( out, prefix, prefix_len );
        put_padding ( out, spec->pad, width );
      }
      else {
        /* Pad first, then show the sign */
        put_padding ( out, spec->pad, width );
        put_sign ( out, &buffer );
        put_buffer ( out, prefix, prefix_len );
      }

      /* Pad leading zeros if precision > length */
      put_padding ( out, '0', spec->precision - len );
      put_buffer ( out, buffer, len );
    }
  }

  return error_state == _NO_ERR;
}

/*
  Convert a double to a string using printf rules
  The buffer is stored internally, so this
  function is not re-entrant
*/
static char *fp_cvt_nobuf ( double val,
  int flag, int prec, int alt, int alt_case )
{
  static char buffer[JBUFSIZ];
  int show_trailing_zeros = 1; /* Fill leftover precision with '0' */
  int show_trailing_radix = 0; /* Show '.' even without precision */

  /*
    show_float pulls double duty:

    1) Specify whether precision means "digits
       after the radix" or "significant digits"

    2) Determine whether to convert the buffer
       to floating-point or normalized scientific
  */
  int show_float = 0;

  /* Set a reasonable default if there's no precision */
  if ( prec == NOT_SPECIFIED )
    prec = 6;

  if ( flag == FMT_NO_FORMAT ) {
    /*
      FMT_NO_FORMAT means either FMT_FLOAT
      or FMT_NORMAL, ie. the %g specifier
    */
    show_float = 1;

    if ( alt )
      show_trailing_radix = 1;
    else
      show_trailing_zeros = 0;

    /* %g requires treating a 0 precision as 1 */
    if ( prec == 0 )
      prec = 1;
  }
  else {
    /* %e and %f have the same alternate format rules */
    if ( prec == 0 && alt )
      show_trailing_radix = 1;
  }

  if ( flag == FMT_FLOAT ) {
    /*
      We can always use the straight conversion for %f
    */
    fp_tostring ( buffer, val, prec, '.',
      show_trailing_zeros, show_trailing_radix, show_float );
  }
  else {
    /*
      We can't slack with %e or %g because both produce
      (if only conditionally) normalized scientific strings
    */
    fp_normalize ( buffer, val, prec, '.', alt_case ? 'E' : 'e',
      show_trailing_zeros, show_trailing_radix, show_float );
  }

  return buffer;
}

/*
  Internal worker for the printf family
*/
int _print_stream ( J_FILE_T *out, const char *fmt, j_va_list args )
{
  struct print_convspec spec = {0};

  while ( *fmt != '\0' ) {
    char *padbuf = J_NULL; /* Full string encoding (buffer used) */
    char single = J_EOF;   /* Single encoding (no buffer needed) */

    /* Process an encoding specifier */
    if ( *fmt == '%' ) {
      j_size_t count = _get_print_specifier ( &spec, fmt );
      
      /* The specifier was invalid */
      if ( count == 0 ) {
        error_state = _ENCODE_ERR;
        break;
      }

      /* Jump past the encoding specifier now that it's saved */
      fmt += count;

      /* Load any argument-specified widths */
      if ( spec.field_width == ARG_SPECIFIED ) {
        spec.field_width = j_va_arg ( args, int );

        /* A negative argument isn't possible */
      }

      if ( spec.precision == ARG_SPECIFIED ) {
        spec.precision = j_va_arg ( args, int );

        /* A negative argument is taken as no precision */
        if ( spec.precision < 0 )
          spec.precision = NOT_SPECIFIED;
      }

      /* Encode or process the argument */
      if ( spec.type >= MOD_SHORT && spec.type <= MOD_POINTER ) {
        /*
          Try to encode any of the integral types (except char)
        */
        long long value = 0; /* Temporary storage for argument type */

        /*
          Disable showing the sign on unsigned types. Ideally,
          we would error out of the specifier parser rather than
          allow the '-' flag for unsigned specifiers
        */
        if ( spec.type >= MOD_USHORT && spec.type <= MOD_POINTER )
          spec.flags &= ~SHOW_SIGN;

        /* Load the temporary value with the next provided argument */
        switch ( spec.type ) {
        case MOD_SHORT:
          value = (short)j_va_arg ( args, int );
          break;
        case MOD_INT:
          value = j_va_arg ( args, int );
          break;
        case MOD_LONG:
          value = j_va_arg ( args, long );
          break;
        case MOD_USHORT:
          value = (unsigned short)j_va_arg ( args, unsigned int );
          break;
        case MOD_UINT:
          value = j_va_arg ( args, unsigned int );
          break;
        case MOD_POINTER:
          value = (unsigned int)j_va_arg ( args, void* );
          break;
        case MOD_ULONG:
          value = j_va_arg ( args, unsigned long );
          break;
        }

        /* Correct any formatting issues that depend on the value */
        if ( spec.flags & ALT_FORMAT && value == 0 ) {
          if ( spec.format == FMT_OCTAL
            || spec.format == FMT_HEX )
          {
            spec.flags &= ~ALT_FORMAT;
          }
        }

        /* Always show the sign and let the padding algorithm deal with it */
        padbuf = lltoa_nobuf ( value, spec.format, SHOW_SIGN, spec.alt_case );
      }
      else if ( spec.type >= MOD_FLOAT && spec.type <= MOD_LDOUBLE ) {
        /*
          Double and long double equate to the same type on
          this system; we don't need to do anything special
        */
        padbuf = fp_cvt_nobuf (
          j_va_arg ( args, double ), spec.format, spec.precision,
          spec.flags & ALT_FORMAT, spec.alt_case );
      }
      else if ( spec.type == MOD_STRING )
        padbuf = j_va_arg ( args, char* );
      else if ( spec.type == MOD_CHAR )
        single = (unsigned char)j_va_arg ( args, unsigned int );
      else if ( spec.type == MOD_LITERAL )
        single = '%';
      else if ( spec.type == MOD_COUNT )
        *j_va_arg ( args, int* ) = write_count;
      else {
        error_state = _ENCODE_ERR;
        break;
      }
    }
    else {
      /*
        No encoding specifier, just use the
        current format string character
      */
      single = *fmt++;
    }

    if ( padbuf != J_NULL ) {
      /* We encoded a full buffer, so it needs to be padded */
      if ( !pad_buffer ( out, padbuf, &spec ) )
        break;
    }
    else if ( single != J_EOF ) {
      /* We only encoded one character, so no padding needed */
      if ( _put ( single, out, write_count ) == J_EOF ) {
        error_state = _WRITE_ERR;
        break;
      }
    }
  }

  return error_state == _NO_ERR ? write_count : error_state;
}

p.s. If you want a challenge, try implementing some of the standard C library. printf in particular is a bear.

Yoinks!

p.s. If you want a challenge, try implementing some of the standard C library. printf in particular is a bear.

Eh, it's not that bad. Just one big loop and a lot of branches.

I actually implemented sprintf in scheme. You wouldn't believe how many compilers get it wrong in spots. It was kind of fun, actually... ;)

>Eh, it's not that bad. Just one big loop and a lot of branches.
By comparison with the rest of stdio, it's very tedious and filled with subtle edge cases.

True. It is all the edge cases that various different compilers are likely to get wrong.

Be a part of the DaniWeb community

We're a friendly, industry-focused community of developers, IT pros, digital marketers, and technology enthusiasts meeting, networking, learning, and sharing knowledge.