Dynamic string input with memory pooling

Narue 3 Tallied Votes 372 Views Share

In answer to a coworker's question today about memory management today, I wrote the following code. It's an implementation of the gets function that accepts an infinite length string (bounded only by the heap) and does not force the caller to free the resulting pointer.

I figure that it's a useful enough example of memory management and library design to offer as a Daniweb snippet.

I make no claims as to the completeness or correctness of the code, seeing as how it was written in the span of perhaps an hour. Though don't let that stop you from replying if you find any bugs or have comments.

jephthah commented: thanks for a great example of a meaningful way to use "Code Snippets" . +8
kvprajapati commented: Forever. +9
/*
    memory_pool.h
*/
#ifndef MEMORY_POOL
#define MEMORY_POOL

#include <stddef.h>

#define MEM_POOL_STATIC_NODES 8
#define MEM_POOL_NODE_SIZE 16

typedef void (*mem_pool_onexit)(void);
typedef void (*mem_pool_destructor)(void*);
typedef struct mem_pool_node mem_pool_node;
typedef struct mem_pool mem_pool;

struct mem_pool_node
{
    void *mem[MEM_POOL_NODE_SIZE];
    int is_dynamic;
    size_t used;
    mem_pool_node *next;
};

struct mem_pool
{
    mem_pool_node nodes[MEM_POOL_STATIC_NODES];
    mem_pool_onexit action;
    mem_pool_node *head;
    size_t current_static;
    int onexit;
};

mem_pool *mem_pool_create(mem_pool_onexit action);
int mem_pool_add(mem_pool *pool, void *p);
void mem_pool_release(mem_pool *pool, mem_pool_destructor destructor);

#endif


/*
    memory_pool.c
*/
#include <stdlib.h>
#include "memory_pool.h"

void mem_pool_release(mem_pool *pool, mem_pool_destructor destructor)
{
    mem_pool_node *current = pool->head;

    while (current != NULL)
    {
        mem_pool_node *next = current->next;
        size_t i;

        /* Destroy all pooled pointers for this node */
        for (i = 0; i < current->used; i++)
        {
            destructor(current->mem[i]);
        }

        if (current->is_dynamic)
        {
            /* Destroy the node if it was dynamically allocated */
            free(current);
        }
        else
        {
            /* Reset the node if it was statically allocated */
            current->used = 0;
            current->next = NULL;
        }

        current = next;
    }

    /* Restore the pool to an empty state */
    pool->head = &pool->nodes[0];
    pool->current_static = 0;
}

mem_pool *mem_pool_create(mem_pool_onexit action)
{
    mem_pool *pool = malloc(sizeof *pool);
    size_t i;

    if (pool != NULL)
    {
        /* Initialize the static nodes to an empty state */
        for (i = 0; i < MEM_POOL_STATIC_NODES; i++)
        {
            pool->nodes[i].is_dynamic = 0;
            pool->nodes[i].used = 0;
            pool->nodes[i].next = NULL;
        }

        /* Initialize the pool to an empty state */
        pool->current_static = 0;
        pool->action = action;
        pool->head = &pool->nodes[0];

        /* Attempt to register the release action */
        if (atexit(action) == 0)
        {
            pool->onexit = 1;
        }
    }

    return pool;
}

int mem_pool_add(mem_pool *pool, void *p)
{
    mem_pool_node *current = pool->head;

    if (current->used >= MEM_POOL_NODE_SIZE)
    {
        /* Prepend a new head to the pool list */
        if (++pool->current_static < MEM_POOL_STATIC_NODES)
        {
            /* Use a static node since we have one available */
            current = &pool->nodes[pool->current_static];
            current->next = pool->head;
            pool->head = current;
        }
        else
        {
            /* Resort to dynamic memory once static nodes are exhausted */
            current = malloc(sizeof *current);
            
            if (current == NULL)
            {
                return 0;
            }

            current->is_dynamic = 1;
            current->used = 0;
            current->next = pool->head;
            pool->head = current;
        }
    }

    current->mem[current->used++] = p;

    return 1;
}


/*
    io_gets.h
*/
#ifndef IO_GETS
#define IO_GETS

#include <stdio.h>

#define IO_GETS_ALLOC  0x1DE27
#define IO_GETS_NOPOOL 0x1B24A

extern size_t io_gets_threshold;

char *io_gets(FILE *in);

#endif


/*
    io_gets.c
*/
#include <stdio.h>
#include <stdlib.h>
#include "io_gets.h"
#include "memory_pool.h"

size_t io_gets_threshold = 16;

static mem_pool *pool;

static void release(void)
{
    mem_pool_release(pool, free);
}

char *io_gets(FILE *in)
{
    char *mem = NULL; /* Input string returned to caller */
    size_t cap = 0;   /* Current capacity of mem */
    size_t n = 0;     /* Current size of mem */
    int ch;

    while ((ch = getc(in)) != EOF)
    {
        if (n % io_gets_threshold == 0)
        {
            /* Extend the input string */
            char *save = NULL;
            
            cap += io_gets_threshold;
            save = realloc(mem, cap + 1);

            if (save == NULL)
            {
                ungetc(ch, in);
                errno = IO_GETS_ALLOC;
                break;
            }

            mem = save;
        }

        if (ch == '\n')
        {
            /* Storing the newline is unnecessary */
            break;
        }

        mem[n++] = (char)ch;
    }

    if (mem != NULL)
    {
        mem[n] = '\0';

        /* Add to the pool */
        if (pool == NULL)
        {
            pool = mem_pool_create(release);
        }

        if (!mem_pool_add(pool, mem))
        {
            errno = IO_GETS_NOPOOL;
        }
    }

    return mem;
}


/*
    main.c
*/
#include <stdio.h>
#include "io_gets.h"

int main()
{
    // Replace with an fopen call if desired
    FILE *in = stdin;

    if (in != NULL)
    {
        char *p;

        while ((p = io_gets(in)) != NULL)
        {
            printf(">%s<\n", p);
        }

        if (in != stdin)
        {
            fclose(in);
        }
    }

    return 0;
}
nezachem 616 Practically a Posting Shark

Just a few (very random) comments.

1. io_gets.c calls for #include <errno.h> . I realize it gets included through one of the headers anyway, but let's be explicit. BTW, why bother with errno at all? In this particular case it doesn't add anything which would help in recovery.
Besides, I firmly (and humbly) believe that a library function may resort to errno only if it can't possibly flag an error in any other way.

2. > does not force the caller to free the resulting pointer

I'd say, it forces the caller not to free it. I don't think it is right. The library should at least provide a way to reclaim unneeded strings. That said, I seriously doubt the value of deallocation at exit time.

3. > Storing the newline is unnecessary
Again, can't agree with this. It destroys a very valuable piece of information, namely, did io_gets return due to a newline, or due to an allocation failure. The way the code handles it (via errno) forces the caller to check errno after every call to io_gets. I don't think it is right.

Narue 5,707 Bad Cop Team Colleague

Thanks for the comments. Good observations.

io_gets.c calls for #include <errno.h>.

Indeed. It's embarrassing to be anal about including all used headers explicitly and then get caught forgetting one in my own code.

BTW, why bother with errno at all?

I don't disagree. I did consider the various options for error handling, and ultimately decided to keep it simple. In this case, simple meant "write it without error handling, then splice in an errno solution".

I firmly (and humbly) believe that a library function may resort to errno only if it can't possibly flag an error in any other way.

I'm not sure I agree with that. There are few (if any) circumstances where errno is the only option. However, for rare errors other options can be too instrusive. For example, an output parameter for out of range conversions in strtol:

result = strtol(s, &end, 0, &out_of_range);

Is that extra parameter really necessary? Maybe, maybe not. But over the years I don't once recall anyone complaining about the design flaw of setting errno to ERANGE. ;)

I'd say, it forces the caller not to free it.

I suppose it depends on your perspective. If not having to call free feels like a straight jacket then we can word it in a bad way. If not having to call free feels liberating then we can word it in a good way. What's your take on garbage collection? ;)

I don't think it is right.

Right for what? I get the impression that you have unreasonable expectations here. :icon_rolleyes:

The library should at least provide a way to reclaim unneeded strings.

I left that part out because I felt it didn't add anything to the example.

The way the code handles it (via errno)

Keeping in mind that the code does handle that case, whether you agree with the method or not, why add redundancy?

forces the caller to check errno after every call to io_gets. I don't think it is right.

Compared to forcing the caller to check for a newline after every call? Are we to quibble over equivalent implementation alternatives?

jephthah 1,888 Posting Maven

can you explain where these error codes come from?

#define IO_GETS_ALLOC 0x1DE27
#define IO_GETS_NOPOOL 0x1B24A
Narue 5,707 Bad Cop Team Colleague

can you explain where these error codes come from?

#define IO_GETS_ALLOC 0x1DE27
#define IO_GETS_NOPOOL 0x1B24A

My daughter's birthday and my birthday in hexadecimal, respectively. The former is actually a typo. It should be 0x1DE26.

jephthah 1,888 Posting Maven

ah, okay. i'm glad i didnt spend any time searching the standard libraries. :-)

jephthah 1,888 Posting Maven

oh, scorpio. that's hot.

nezachem 616 Practically a Posting Shark

> I suppose it depends on your perspective. If not having to call free feels like a straight jacket then we can word it in a bad way. If not having to call free feels liberating then we can word it in a good way. What's your take on garbage collection?

The announced goal of the io_gets is to handle very long strings. Therefore it feels that the library must be prepared to the out-of-memory condition. Now, what can be done in that situation? The answer is, absolutely nothing. Me the user can't call free, or face some unpleasantness at the atexit time. So, even though the pointers to allocated memory aren't technically lost, the memory itself is unusable. It becomes a well-managed memory leak.

Which brings another point to consider. Correct me if I am wrong, but the memory will be deallocated at the program exit time anyway. The memory pool library -as written - just takes over the system job, nothing more. The pooling would make sense only if it lets the user manage itself.

That said, you already disclaimed that the library is not complete.

kvprajapati commented: Good discussion and a point. +9
Narue 5,707 Bad Cop Team Colleague

The answer is, absolutely nothing.

That's usually the answer to out of memory conditions. Most of the time all you can do is save state and terminate gracefully. The pointer is still usable, it just contains a potentially partial line. If recovery is possibly by freeing memory elsewhere, nothing stops you from calling io_gets again and pasting the two strings together manually (like you would when fgets reads long lines). I don't see this as an unreasonable situation unless out of memory conditions are common, and in that case you probably shouldn't be using something like io_gets in the first place. ;)

I couldn't agree more that some extra management functionality for the pool is warranted if it's to be truly usable in real code. For basic usage in the example, it's not a big deal. I appreciate you bringing it up though. Hopefully now I won't get bug reports that production code is failing because a pointer couldn't be removed from the pool. :D

Correct me if I am wrong, but the memory will be deallocated at the program exit time anyway.

You're not wrong, but you're not entirely right either. While it's common, this behavior is not guaranteed. Strictly portable code will release memory explicitly.

Be a part of the DaniWeb community

We're a friendly, industry-focused community of developers, IT pros, digital marketers, and technology enthusiasts meeting, networking, learning, and sharing knowledge.