Hello everybody.
I have to make "an-OCR-like" programm in C - it gets for example this to stdin:

***         ***   ***       
*   *     *     *     * *   *
*   *     *     *     * *   *
*   *     *     *     * *   *
             ***   ***   *** 
*   *     * *         *     *
*   *     * *         *     *
*   *     * *         *     *
 ***         ***   ***

and it must recognize the numbers. The complication is, that there can be also question marks ('?'):

***   ***  ?????  *** 
?   * ?????     * *   *
?   *  ????     * *   *
?   * ?????     * *   *
 ***   ***   ***  ??*??
*   *     *     * *   *
*   *     *     * *   *
*   *     *     * *   *
 ***   ***   ***   ***

If the number(s) can be uniquely determined, it prints them into stdout (in this case 8338), in the opposite case, it returns '?' instead of the number. If the number can't be identified, it returns X. The "display" has 9 rows, length is not specified. Numbers on the "display" are separated by a single column of gaps.
If the input is invalid (less than 9 rows, each row has different length, numbers aren't divided by a gap, input contains other characters than ' ', '*' and '?', it prints "Nespravny vstup" (it means "invalid input").

My code is:

/*
 * File:   main.c
 * Author: Honza
 *
 * Created on 31. prosinec 2009, 16:11
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define MAX_LEN 20000

/*
 *
 */

int getNum (char *rows[], int n) {
    char num[46];
    char mask[10][46] = {" *** *   **   **   *     *   **   **   * *** ", "         *    *    *         *    *    *     ", " ***     *    *    * *** *    *    *     *** ", " ***     *    *    * ***     *    *    * *** ", "     *   **   **   * ***     *    *    *     ", " *** *    *    *     ***     *    *    * *** ", " *** *    *    *     *** *   **   **   * *** ", " ***     *    *    *         *    *    *     ", " *** *   **   **   * *** *   **   **   * *** ", " *** *   **   **   * ***     *    *    * *** "};
    int i, j, c=0, match = 1, number = 11, recognized = 0;

    // Get the number

    for (i = 0; i < 9; i++) {
        for (j = 0; j < 5; j++, c++) {
            num[c] = rows[i][j+(n-1)*6];
        }
    }
    num[46] = '\0';

    // What number are you?

    for (i = 0; i < 10; i++) {
        for (j = 0; j < 45; j++) {
            if (num[j] != mask[i][j]) {
                if (num[j] != '?') {
                    match = 0;
                    break;
                }
            }
        }
        if (match == 1) {
            if (number == 11) {
                number = i;
                recognized = 1;
            }
            else {
                printf("?");
                number = 12;
                break;
            }
        }
        match = 1;
    }

    if (recognized == 0) printf("X");
    else if (number != 12) printf("%d", number);

    return(0);
}

int main(int argc, char** argv) {

    int i=1, j=0, n, len, length;
    char** rows;
    char str_buf[MAX_LEN];

    // Get the length of the first line and store it into memory

    fgets(str_buf, MAX_LEN, stdin);
    len = strlen(str_buf);
    rows = (char**)malloc(9 * sizeof(char*));

    rows[0] = (char*)malloc((len-1) * sizeof(char));
    strcpy(rows[0], str_buf);


    // Get all the remaining lines

    while (fgets(str_buf, MAX_LEN, stdin) != NULL && i<9) {
        rows[i] = (char*)malloc((len-1) * sizeof(char));
        strcpy(rows[i], str_buf);
        i++;
    }


    // Where there enough lines?

    if (i < 8) {
        printf("Nespravny vstup.\n");
        for (j = 0; j < i; j++) {
            free(rows[j]);
        }
        free(rows);
        return(0);
    }

    // If you find a line, which has a different length than others, quit

    for (i = 1; i < 8; i++) {
        length = strlen(rows[i]);
        if (length != len) {
            printf("Nespravny vstup.\n");
            for (j = 0; j <= 8; j++) {
                free(rows[j]);
            }
            free(rows);
            return(0);
        }
    }

    // Check for disallowed characters and missing spaces between numbers

    for (i = 0; i < 9; i++) {
        for (j = 0; j < len-3; j++) {
            if (rows[i][j]!=' ' && rows[i][j]!='*' && rows[i][j]!='?') {
                printf("Nespravny vstup.\n");
                for (n=0; n<9; n++) {
                    free(rows[n]);
                }
                free(rows);
                return(0);
            }

            if ((j+1)%6==0 && rows[i][j]!=' ' && (j+1)<(len-3)) {
                printf("Nespravny vstup.\n");
                for (i = 0; i < 9; i++) {
                    free(rows[i]);
                }
                free(rows);
                return(0);
            }
        }
    }

    // Get the numbers

    for (i = 1; i<= len/6; i++) {
        getNum(rows,i);
    }

    printf("\n");

    // Clean up

    for (i = 0; i < 9; i++) {
        free(rows[i]);
    }
    free(rows);

    return (EXIT_SUCCESS);
}

The sample data are (first 9 rows are input, last line is a required output) following (11 in total):

***         ***   ***       
*   *     *     *     * *   *
*   *     *     *     * *   *
*   *     *     *     * *   *
             ***   ***   *** 
*   *     * *         *     *
*   *     * *         *     *
*   *     * *         *     *
 ***         ***   ***       
01234
***   ***   ***   ***   *** 
*   * *   *     * *     *    
*   * *   *     * *     *    
*   * *   *     * *     *    
 ***   ***         ***   *** 
    * *   *     * *   *     *
    * *   *     * *   *     *
    * *   *     * *   *     *
 ***   ***         ***   *** 
98765
***   ***   ***   *** 
*   *     *     * *   *
*   *     *     * *   *
*   *     *     * *   *
 ***   ***   ***   *** 
*   *     *     * *   *
*   *     *     * *   *
*   *     *     * *   *
 ***   ***   ***   *** 
8338
***   ***  ?????  *** 
?   * ?????     * *   *
?   *  ????     * *   *
?   * ?????     * *   *
 ***   ***   ***  ??*??
*   *     *     * *   *
*   *     *     * *   *
*   *     *     * *   *
 ***   ***   ***   *** 
8338
***   ***   ***   *** 
?????     * ?   * *   *
?????     * ?   * *   *
?????     * ?   * *   *
 ***   ???   ***   *** 
*   *     * ?   * ?   *
*   *     * ?   * ?   *
*   *     * ?   * ?   *
 ***   ???   ***   *** 
????
***   ***   ***   *** 
*   *     * *     *   *
*   *     * ?         *
*   *     * ?     *   *
 ***   ***   ***   *** 
*   ? *     ?   *     *
*     * *   ?   *     *
*   ? *     ?   * ?   *
 ***   ***   ***   *** 
XX?X
***   ***   ***  *** 
*   *     *     **   *
*   *     *     **   *
*   *     *     **   *
 ***   ***   ***  *** 
*   *     *     **   *
*   *     *     **   *
*   *     *     **   *
 ***   ***   ***  *** 
Nespravny vstup.
***   ***   ***   *** 
*   *     *     * *   *
*   *     *     * *   *
*   *     *     * *   *
 ***   ***  ***   *** 
*   *     *     * *   *
*   *     *     * *   *
*   *     *     * *   *
 ***   ***   ***   *** 
Nespravny vstup.
***   ***   ***   *** 
*   *     *     *x*   *
*   *     *     * *   *
*   *     *     * *   *
 ***   ***   ***   *** 
*   *     *     * *   *
*   *     *     * *   *
*   *     *     * *   *
 ***   ***   ***   *** 
Nespravny vstup.
***   ***   ***   *** 
*   *     *     x *   *
*   *     *     x *   *
*   *     *     x *   *
 ***   ***   ***   *** 
*   *     *     * *   *
*   *     *     * *   *
*   *     *     * *   *
 ***   ***   ***   *** 
Nespravny vstup.
*** 
*   *
Nespravny vstup.

My programm works for 7 of 11 (I don't know which ones). In all cases it returns required output, so I guess I forgot to deallocate some memory or something.

I'm really desperate and I would very appreciate your help.

Edited 6 Years Ago by Hofik: n/a

I'm not sure I understand what your problem is. After fixing the immediate bugs, the program works for me. All of the examples you gave produced what I understand to be the correct output. Following are the bugs that caused a crash when I tried to run your code as posted (I suspect that there may be fencepost errors that affect your logic as well, but that's just from eyeballing the code):

>num[46] = '\0';
num is defined as an array of size 46, which means the last valid index that you can access is 45.

>rows[0] = (char*)malloc((len-1) * sizeof(char));
strlen gives you the number of characters in a string, excluding the null character. Thus you need to allocate at least len+1 for the following strcpy not to overflow the buffer.

>rows = (char*)malloc((len-1) * sizeof(char));
Exact same problem here. You can also remove sizeof(char) because it's guaranteed to be 1 in all cases, and the case is unnecessary in C.

Thank you for a quick reply.
I haven't noticed these bugs, thanks for that. However, my problem is different. The programm works for me for all 11 examples I posted above, but when post it to our server, which compiles the programm and tryes it for those inputs, the result is only 63,64% (which means it worked only for 7 examples out of 11). This validator (or how should I call it...) also checks, if my programm deallocated all the memory etc. And there is, i think, the problem of my programm.

Well, a more specific description of which tests failed would be helpful. Do you only get a percentage from the server?

Yes, I only get the percentage.
In my opinion, it works for correct input (first 6 examples) and the bug is somewhere, where the programm gets something else than a gap, * or question mark.

Another obvious bug is at line 101 (length of a last line is not tested).

Also, can you explain the logic of line 126, particularly a len-3 part? I strongly suspect your problem is there.

Edited 6 Years Ago by Nick Evan: Fixed icode tags

Another obvious bug is at line 101 (length of a last line is not tested).

Also, can you explain the logic of line 126, particularly a len-3 part? I strongly suspect your problem is there.

Well, I didn't test the last line, because when I did, it always tested it as an incorrect input... Any solution is welcome :)
About the second problem: when I tested the whole array (so there was len instead of len-3), it also tested it all as incorrect (when I used Windows' ends of line in the text documents). I don't think it's a bug, it just doesn't test last two columns.

when I used Windows' ends of line in the text documents

That's exactly what I had in mind. Your environment may (and most likely does) differ from the server's. Treat end-of-lines correctly. Find out what's wrong with the last line testing (hint: restore it and step through the code with the debugger). I am sure the problems will go away.

That's exactly what I had in mind. Your environment may (and most likely does) differ from the server's. Treat end-of-lines correctly. Find out what's wrong with the last line testing (hint: restore it and step through the code with the debugger). I am sure the problems will go away.

Well, I still don't think it's the main problem...
End of line in windows is represented by two characters: \r and \n , in Solaris (where it's probably tested), it's \n . I saved all 11 examples as .TXT documents and I replaced all \r\n with \n . If it helps, I can upload them.

And if the problem was in this, it would test all the data incorrectly, wouldn't it?

Edited 6 Years Ago by Hofik: n/a

So those 63% were caused by the bugs found by Narue, thank you very much.

Now the last thing I need is to make the programm work for long input (now each line can be up to 10000 characters long) using realloc. Unfortunatelly, I've never worked with realloc before. Could you help me, please?

This article has been dead for over six months. Start a new discussion instead.