0

This is what I came up with for reading a csv file with multiple types. It seems to get the job done in all cases but 1,2,,"a". Where there is a blank space. Can I please have some ideas on how to fix this?

const char* getfield(char* line, int num)
{
const char* tok;
for (tok = strtok(line, ",");
        tok && *tok;
        tok = strtok(NULL, ",\n"))
{
    if (!--num)
        return tok;
}
return NULL;
}

while(fgets(line, 80, inputfp1) != NULL)
{
    printf(" line is %s \n", line);

    char* tmp1 = strdup(line);
    char* tmp2 = strdup(line);
    char* tmp3 = strdup(line);
    char* tmp4 = strdup(line);
    printf("Field 1 would be %s\n", getfield(tmp1, 1));
    printf("Field 2 would be %s\n", getfield(tmp2, 2));
    printf("Field 3 would be %s\n", getfield(tmp3, 3));
    printf("Field 4 would be %s\n", getfield(tmp4, 4));
    // NOTE strtok clobbers tmp
    free(tmp1);
    free(tmp2);
    free(tmp3);
    free(tmp4);

    //sscanf(line, "%d, %d, %d, %d", &column1[i], &column2[i], &column3[i], &column4[i]);
    //printf(" column1[i] is %d column2[i] is %d column3[i] is %d column4[i] is %d \n", column1[i], column2[i], column3[i], column4[i]);
    i++;
    memset(line, 0, 80);
}
2
Contributors
2
Replies
18
Views
3 Weeks
Discussion Span
Last Post by COKEDUDE
0

Yes, use a library. Writing robust CSV parsers is very difficult because of the difficulty of nicely dealing with "bad" data.

0

Can you tell me what I did wrong here then?

https://ideone.com/RylYp1

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

/*
 * Given a string which might contain unescaped newlines, split it up into
 * lines which do not contain unescaped newlines, returned as a
 * NULL-terminated array of malloc'd strings.
 */
char **split_on_unescaped_newlines(const char *txt) {
    const char *ptr, *lineStart;
    char **buf, **bptr;
    int fQuote, nLines;

    /* First pass: count how many lines we will need */
    for ( nLines = 1, ptr = txt, fQuote = 0; *ptr; ptr++ ) {
        if ( fQuote ) {
            if ( *ptr == '\"' ) {
                if ( ptr[1] == '\"' ) {
                    ptr++;
                    continue;
                }
                fQuote = 0;
            }
        } else if ( *ptr == '\"' ) {
            fQuote = 1;
        } else if ( *ptr == '\n' ) {
            nLines++;
        }
    }

    buf = malloc( sizeof(char*) * (nLines+1) );

    if ( !buf ) {
        return NULL;
    }

    /* Second pass: populate results */
    lineStart = txt;
    for ( bptr = buf, ptr = txt, fQuote = 0; ; ptr++ ) {
        if ( fQuote ) {
            if ( *ptr == '\"' ) {
                if ( ptr[1] == '\"' ) {
                    ptr++;
                    continue;
                }
                fQuote = 0;
                continue;
            } else if ( *ptr ) {
                continue;
            }
        }

        if ( *ptr == '\"' ) {
            fQuote = 1;
        } else if ( *ptr == '\n' || !*ptr ) {
            size_t len = ptr - lineStart;

            if ( len == 0 ) {
                *bptr = NULL;
                return buf;
            }

            *bptr = malloc( len + 1 );

            if ( !*bptr ) {
                for ( bptr--; bptr >= buf; bptr-- ) {
                    free( *bptr );
                }
                free( buf );
                return NULL;
            }

            memcpy( *bptr, lineStart, len );
            (*bptr)[len] = '\0';

            if ( *ptr ) {
                lineStart = ptr + 1;
                bptr++;
            } else {
                bptr[1] = NULL;
                return buf;
            }
        }
    }
}

int main(void) {
    // your code goes here
    char line[80] = "52,243,,542";

    split_on_unescaped_newlines(line);
    printf("line  %s\n", line[0]);
    printf("line  %s\n", line[1]);

    return 0;
}
Have something to contribute to this discussion? Please be thoughtful, detailed and courteous, and be sure to adhere to our posting rules.