Hi. The following calculator will return wrong values for operations which are more complicated than <INTEGER><OPERATION><INTEGER>. I suppose I'm making a mistake in parser.y, but I can't figure out what.

I know there are mistakes, memory leaks and other technical issues I'm doing wrong, but please concentrate on my biggest issue.

The code

//--------------------------------------------------------- ./handcrafted_parser.c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "scanner.h"
 
int main(int argc, char **argv) {
	int r;
	scanner_state *state;
	scanner_token *token;
	if(argc>1) {
		state = malloc(sizeof(scanner_state));
		token = malloc(sizeof(scanner_token));
		if(NULL == state  || NULL == token) {
			return EXIT_FAILURE;
		}
		state->start = argv[1];
		//it's here, but not used:
		state->end = state->start;
		while(0 <= (r = scan(state, token))) {
			switch(token->opcode) {
				case T_INTEGER:
					printf("\tscanner: %d\n",token->data.n);
					break;
				case T_OP_ADD:
				case T_OP_SUB:
				case T_OP_MUL:
				case T_OP_DIV:
				case T_OP_ROUND_BRACKET_OPEN:
				case T_OP_ROUND_BRACKET_CLOSE:
					printf("\tscanner: %c\n",token->opcode);
					break;
				default:
					printf("\tscanner: unknown opcode\n");
					break;
			}
			//it's here, but not used:
			state->end = state->start;
		}
		printf("\nend of scanning with code: %d\n",r);
		free(state);
		free(token);
		return SCANNER_RETCODE_EOF==r ? EXIT_SUCCESS:EXIT_FAILURE;
	}
	else {
		return EXIT_FAILURE;
	}
	return EXIT_SUCCESS;
}
 
 
 
 
//--------------------------------------------------------- ./lemon_parser.h
#ifndef _LEMON_PARSER_H
# define _LEMON_PARSER_H
# include "scanner.h"
 
scanner_token parse_math_expr(char* s);
 
#endif
 
 
 
//--------------------------------------------------------- ./lemon_parser.c
/*
 * this is the "main part" of the parser
 * gets concatenated to parser.c, which is generated by lemon parser.y
 */
 
/*
 * include types, we do it here for the sake of obviousness,
 * these headers are actually included by parser.c, which is
 * generated by parser.y
 */
#include "scanner.h"
#include "parser.h"
#include "lemon_parser.h"
 
scanner_token parse_math_expr(char* s) {
	scanner_token *token;
	scanner_state *state;
	int stat;
	//TODO: fix memory leaks
	void* pParser = ParseAlloc(malloc);
	//do similar things to handcrafted_parser.c
	state = malloc(sizeof(scanner_state));
	token = malloc(sizeof(scanner_token));
	if(NULL == state  || NULL == token) {
		return *token;
	}
	state->start = s;
	while(0 <= (stat = scan(state,token))) {
		switch(token->opcode) {
			case TOKEN_INTEGER:
				printf("\tscanner says: %d\n",token->data.n);
				Parse(pParser,OP_INTEGER,token);
				break;
			case TOKEN_ADD:
				printf("\tscanner: %c\n",token->opcode);
				Parse(pParser,OP_ADD,token);
				break;
			case TOKEN_SUB:
				printf("\tscanner: %c\n",token->opcode);
				Parse(pParser,OP_SUB,token);
				break;
			case TOKEN_MUL:
				printf("\tscanner: %c\n",token->opcode);
				Parse(pParser,OP_MUL,token);
				break;
			case TOKEN_DIV:
				printf("\tscanner: %c\n",token->opcode);
				Parse(pParser,OP_DIV,token);
				break;
				/*
			case OP_ROUND_BRACKET_OPEN:
				printf("\tscanner: %c\n",token->opcode);
				break;
			case OP_ROUND_BRACKET_CLOSE:
				printf("\tscanner: %c\n",token->opcode);
				break;
				*/
			default:
				printf("\tscanner: unknown opcode: %c\n",token->opcode);
				break;
		}
		//it's here, but not used:
		state->end = state->start;
	}
	Parse(pParser,0,0);
	return *token;
}
 
 
 
 
//--------------------------------------------------------- ./parser.y
%include {
	#include <stdio.h>
	#include <stdlib.h>
	#include "scanner.h"
	#include "parser.h"
	#include "lemon_parser.h"
}
%token_type {scanner_token*}
%default_type {scanner_token*}
 
%type expr {scanner_token*}
%type OP_INTEGER {scanner_token*}
 
 
 
%left OP_ADD OP_SUB.
%left OP_MUL OP_DIV.
 
 
%syntax_error {
	printf("syntax error\n");
}
 
in ::= expr(A). {
   printf("in expr(A):\n");
}
 
expr(A) ::= expr(B) OP_ADD expr(C). {
	A->data.n = B->data.n + C->data.n;
}
 
expr(A) ::= expr(B) OP_SUB expr(C). {
	A->data.n = B->data.n - C->data.n;
}
 
expr(A) ::= expr(B) OP_MUL expr(C). {
	A->data.n = B->data.n * C->data.n;
}
 
expr(A) ::= expr(B) OP_DIV expr(C). {
	/* TODO: fix division by 0 */
	A->data.n = B->data.n / C->data.n;
}
expr(A) ::= OP_INTEGER(B). { A->data.n = B->data.n; }
 
 
 
 
 
//--------------------------------------------------------- ./Makefile
CC = gcc
RE2C = re2c
CFLAGS = -Wall -ggdb -c
RE2C_FLAGS =
 
all: calc handcrafted_parser parser
 
calc: scanner.o main.o parser.o
	$(CC) -Wall -o $@ $?
 
scanner.o: scanner.c
	$(CC) $(CFLAGS) $?
scanner.c: scanner.re
	$(RE2C) $(RE2C_FLAGS) -o $@ $?
	$(RE2C) -D -o scanner.graphviz $?
main.o: main.c
	$(CC) $(CFLAGS) $?
handcrafted_parser: handcrafted_parser.o
	$(CC) -Wall -o $@ $?
handcrafted_parser.o: handcrafted_parser.c
	$(CC) $(CFLAGS) $?
parser.o: parser.c
	cat lemon_parser.c >> parser.c
	$(CC) $(CFLAGS) $?
 
parser.c: lemon
	./lemon parser.y
 
lemon: lemon.o
	$(CC) -Wall -o $@ $?
lemon.o:
	$(CC) -o $@ $(CFLAGS) lemon.c $<
 
clean:
	rm -f *.o calc handcrafted_parser parser.{c,h,out}
distclean: clean
	rm -f scanner.c scanner.graphviz lemon
 
 
 
//--------------------------------------------------------- ./main.c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
//defines the parse() function
#include "lemon_parser.h"
 
int main(int argc, char **argv) {
	scanner_token result;
	result = parse_math_expr(argv[1]);
	printf("calc(%s) = %d\n",argv[1],result.data.n);
	return EXIT_SUCCESS;
}
 
 
 
 
//--------------------------------------------------------- ./scanner.h
#ifndef _SCANNER_H
# define _SCANNER_H
 
/*
 * opcodes:
 */
#define SCANNER_RETCODE_EOF -1
#define SCANNER_RETCODE_ERR -2
#define SCANNER_RETCODE_IMPOSSIBLE -3
 
#define TOKEN_INTEGER 257
#define TOKEN_ADD '+'
#define TOKEN_SUB '-'
#define TOKEN_MUL '*'
#define TOKEN_DIV '/'
#define TOKEN_ROUND_BRACKET_OPEN '('
#define TOKEN_ROUND_BRACKET_CLOSE ')'
 
typedef struct _scanner_state {
	char* start;
	char* end;
} scanner_state;
 
typedef struct _scanner_token {
	int opcode;
	union {
		int n;
		char* str;
	} data;
} scanner_token;
 
int scan(scanner_state *state, scanner_token *token);
//TODO: some functions to manipulate scanner states
#endif
 
 
 
 
//--------------------------------------------------------- ./scanner.re
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define _IN_SCANNER
#include "scanner.h"
 
#if 0
#define DEBUG(stmt) stmt
#else
#define DEBUG(stmt)
#endif
 
int scan(scanner_state *s, scanner_token *token) {
//	char *cursor = s->start;
	int r=SCANNER_RETCODE_IMPOSSIBLE;
	char *q=s->start;//keep initial start
#define YYCTYPE		char
#define YYCURSOR	(s->start)
#define YYLIMIT		(s->end)
 
	while(SCANNER_RETCODE_IMPOSSIBLE == r) {
/*!re2c
 	re2c:indent:top = 2;
	re2c:yyfill:enable = 0;
 
	INTEGER		= [0-9]+;
 
 
	INTEGER		{
				char *num;
				int n;
				num = strndup(q,YYCURSOR - q);
				n = atoi(num);
				DEBUG(printf("scanner num: '%s', YYCURSOR: '%s' YYLIMIT: '%s' q: '%s'\n",num,YYCURSOR,YYLIMIT,q));
				free(num);
				q = YYCURSOR;
			//	DEBUG(printf("integer, cursor: %s, number len: %ld, number: %d\n",YYCURSOR,YYCURSOR-s->start,n));
				token->data.n = n;
				token->opcode = TOKEN_INTEGER;
				return 0;
			}
	"+"		{
				token->opcode = TOKEN_ADD;
				return 0;
			}
	"-"		{
				token->opcode = TOKEN_SUB;
				return 0;
			}
	"*"		{
				token->opcode = TOKEN_MUL;
				return 0;
			}
	"/"		{
				token->opcode = TOKEN_DIV;
				return 0;
			}
	"("		{
				token->opcode = TOKEN_ROUND_BRACKET_OPEN;
				return 0;
			}
	")"		{
				token->opcode = TOKEN_ROUND_BRACKET_CLOSE;
				return 0;
			}
 
	"\000"		{ r = SCANNER_RETCODE_EOF; break; }
	[^]		{ r = SCANNER_RETCODE_ERR; break; }
*/
	}
	return r;
}

Hi,
this code is tooooo big to analyse... tell us whats the issue you are facing(be specific) so that we can help you...

I'm almost done with it, the previous errors are fixed now. The only issue I still have is how to get the last token from the last reduction
("in" in parser.y, but in the current code it looks totally different). The problem is it returns the very last number, ie in "1+2+3" Parse(pParser,0,token) returns the token that contains the value 3.

How to get the result in a thread-safe/reentrand way, just as "lemon" recommends me to do it?

Good Job!!!... Please let us know if it was a technical error or logical... if technical, give us a brief note about it...

This article has been dead for over six months. Start a new discussion instead.