DaniWeb IT Discussion Community

DaniWeb IT Discussion Community (http://www.daniweb.com/forums/index.php)
-   C++ (http://www.daniweb.com/forums/forum8.html)
-   -   Code Snippet: A simple interpreter called 'minibas' (http://www.daniweb.com/forums/thread217410.html)

tux4life Jul 3rd, 2009 12:42 pm
A simple interpreter called 'minibas'
 
I didn't know what to do and I thought: why not try writing an interpreter, I've never done this before and it seemed quite challenging, so I started writing code....

The code which you can find below is only a simple base upon which you can start implementing your own simple programming language.
(Yet to mention: the code doesn't feature an expression parser or variables, so you'll have to implement that yourself)

As this is my first time I do a programming challenge like this one, it can of course be that there are several improvements possible in my code, I'd be glad if you could improve me at some point(s).

For simplicity (and testing) I've only implemented two functions :P: PRINT and END (they're both written in uppercase, and the interpreter will report a syntax error if you don't write them like that)

Description of the two built-in functions:
  • PRINT: Print some text (or a number) to the screen, only one token at a time is allowed, supported tokens are: STRING and NUMBER (where STRING is anything you can type on your keyboard (as long as it's in between two quotes) and NUMBER can be any valid integer (floating point numbers aren't supported), the output is always ended with a newline character.
  • END: This function signals the end of the program and shuts down the interpreter.

  1. /*****************
  2. @file: minibas.cpp
  3. *****************/
  4.  
  5. #include <iostream>
  6. #include <cstdio>
  7. #include <string>
  8. #include "minibas.h"
  9. using namespace std;
  10.  
  11. /* All global variables needed for the interpreter */
  12. tok_t tok_type;
  13. const int MAX_INP = 256, MAX_TOK = 80;
  14.  
  15. char input[MAX_INP] = {0};
  16. char token[MAX_TOK] = {0};
  17. bool new_inp;
  18.  
  19. /* MINIBAS Source Code */
  20.  
  21. int main()
  22. {
  23. for(;;)
  24. {
  25. flush_input();
  26.  
  27. printf("> ");
  28. cin.getline(input, 256);
  29. printf("\n");
  30.  
  31. parse();
  32. }
  33.  
  34. return 0;
  35. }
  36.  
  37. void parse()
  38. {
  39. enum stat_t {PRINT = 0, END, NOSTAT} stat_type; // statements (have to be in exact the same order as the keywords below)
  40. const char *keywords[] = {
  41. "PRINT",
  42. "END",
  43. "NOSTAT", // always one place before the last element
  44. "" // null-terminate the list
  45. };
  46. const char **ptr = keywords;
  47.  
  48. get_token();
  49. if(tok_type == NOTOK) return; // No token present
  50.  
  51. for(int i = 0; *ptr[i]; ++i) { // Start converting to integer token
  52. stat_type = (stat_t) i;
  53. if( !strcmp(token, ptr[i] ) ) break;
  54. }
  55.  
  56. // We assume every line starts with a statement
  57. switch(stat_type)
  58. {
  59. case END:
  60. do_end();
  61. break;
  62. case PRINT:
  63. do_print();
  64. break;
  65. default:
  66. serror(0, string(token));
  67. }
  68. }
  69.  
  70. void get_token()
  71. {
  72. char *tok = token;
  73. static char *iptr = input;
  74. tok_type = NOTOK;
  75.  
  76. if( new_inp ) { // when interpreting a new line, restore the pointer
  77. iptr = input;
  78. new_inp = false;
  79. }
  80.  
  81. if( !*iptr ) { // no tokens left on this line
  82. *tok = '\0';
  83. return;
  84. }
  85.  
  86. while( *iptr && isspace(*iptr) ) iptr++; // skip over spaces
  87.  
  88. if( *iptr && isdelim(*iptr) ) { // found a delimiter
  89. tok_type = DELIMITER;
  90. *tok++ = *iptr++;
  91. }
  92.  
  93. if( tok_type == NOTOK && isdigit( *iptr ) ) { // found a number (integer only)
  94. tok_type = NUMBER;
  95. while( *iptr && isdigit( *iptr ) && !isdelim( *iptr ) ) {
  96. *tok++ = *iptr++;
  97. }
  98. }
  99.  
  100. if( tok_type == NOTOK && *iptr == '\"' ) { // string token, get the whole string
  101. tok_type = STRING;
  102. *tok++ = *iptr++;
  103. while( *iptr && ( *tok++ = *iptr++ ) != '\"' );
  104. }
  105.  
  106. if( tok_type == NOTOK && !isdelim( *iptr ) ) {
  107. tok_type = STATEMENT;
  108. while( *iptr && !isdelim( *iptr ) ) { // token is a statement (or rubbish)
  109. *tok++ = *iptr++;
  110. }
  111. }
  112.  
  113. *tok = '\0'; // add null-terminator to token
  114. }
  115.  
  116. void serror(int errnum, const string &custom)
  117. {
  118. const char *e[] = {
  119. "Syntax error: ",
  120. "Wrong argument: ",
  121. "Missing argument.",
  122. "No ending quote found.\nInvalid string: ",
  123. ""
  124. };
  125.  
  126. cout << e[errnum] << custom << "\n";
  127. }
  128.  
  129. void flush_input()
  130. {
  131. input[0] = '\0';
  132. new_inp = true;
  133. }
  134.  
  135. int isdelim(char c)
  136. {
  137. if( strchr(", ", c) ) return 1;
  138. return 0;
  139. }
  140.  
  141. /***************
  142. @file: minibas.h
  143. ***************/
  144.  
  145. /* All the interpreter's functions */
  146. void parse();
  147. void get_token();
  148. void flush_input();
  149. void serror(int errnum, const std::string &custom);
  150. int isdelim(char c);
  151.  
  152. void do_end();
  153. void do_print();
  154.  
  155. /* token type */
  156. enum tok_t {NOTOK, STATEMENT, STRING, DELIMITER, NUMBER};
  157.  
  158. /***********************
  159. @file: keyword_funcs.cpp
  160. ***********************/
  161.  
  162. /* All functions needed to handle the keywords of minibas */
  163.  
  164. #include <iostream>
  165. #include <cstdio>
  166. #include <string>
  167. #include "minibas.h"
  168. using namespace std;
  169.  
  170. extern tok_t tok_type;
  171. extern char token[];
  172.  
  173. void do_end()
  174. {
  175. get_token();
  176. if( tok_type != NOTOK ) {
  177. serror(0, "END doesn't require any parameters.");
  178. return;
  179. }
  180.  
  181. cout << "Stopped execution.\n";
  182. exit(0);
  183. }
  184.  
  185. void do_print()
  186. {
  187. get_token();
  188.  
  189. if( tok_type == NOTOK ) { // no token present, thus no argument for PRINT
  190. serror(2, "");
  191. return;
  192. }
  193.  
  194. if( tok_type == NUMBER ) {
  195. cout << token;
  196. }
  197. else if( tok_type == STRING ) {
  198. char *ptr = token;
  199.  
  200. if( !strchr( ++ptr, '\"' ) ) { // jump over beginning quote
  201. serror(3, string(token) ); // no ending quote found, thus string is invalid
  202. return;
  203. }
  204.  
  205. while( *ptr != '\"' ) putchar(*ptr++);
  206. }
  207. else {
  208. serror(0, "PRINT can only handle strings and numbers.");
  209. return;
  210. }
  211.  
  212. putchar('\n');
  213. }
  214.  
  215. /**********
  216. @sample run
  217. **********/
  218. /*
  219. > lmekf
  220.  
  221. Syntax error: lmekf
  222. > PRINT
  223.  
  224. Missing argument.
  225. > PRINT "Hello World!!"
  226.  
  227. Hello World!!
  228. > PRINT 35
  229.  
  230. 35
  231. > PRINT error
  232.  
  233. Syntax error: PRINT can only handle strings and numbers.
  234. > END
  235.  
  236. Stopped execution.
  237. */
tux4life Jul 4th, 2009 7:12 am
It might be better to implement a separate function to check whether a string is valid or not.

tux4life Jul 9th, 2009 12:51 pm
Instead of checking for
NOTOK
in the
get_token()
function, I had better made them into
else if
.

tux4life Jul 28th, 2009 11:48 am
As it now stands, I really have to admit that this code sucks.


All times are GMT -4. The time now is 7:34 am.

Forum system based on vBulletin Copyright ©2000 - 2009, Jelsoft Enterprises Ltd.
©2003 - 2009 DaniWeb® LLC