View Single Post
Join Date: Dec 2006
Posts: 1,089
Reputation: vijayan121 is a name known to all vijayan121 is a name known to all vijayan121 is a name known to all vijayan121 is a name known to all vijayan121 is a name known to all vijayan121 is a name known to all 
Solved Threads: 164
vijayan121 vijayan121 is offline Offline
Veteran Poster

Re: Lexer- Tokenizer problem

 
0
  #10
Aug 30th, 2007
using boost.spirit may be much easier: http://www.boost.org/libs/spirit/doc/quick_start.html
  1. #include <boost/spirit/core.hpp>
  2. #include <iostream>
  3. #include <string>
  4. #include <vector>
  5. #include <algorithm>
  6. #include <boost/assign.hpp>
  7. using namespace std ;
  8. using namespace boost ;
  9. using namespace boost::spirit ;
  10. using namespace boost::assign ;
  11.  
  12. struct parse_it
  13. {
  14. void operator() ( const string& str ) const
  15. {
  16. vector<string> tokens ;
  17. const char* cstr = str.c_str() ;
  18. size_t n = 0 ;
  19. while( n < str.size() )
  20. n += parse( cstr + n,
  21. (+space_p) [ push_back_a( tokens, "SPACE" ) ] |
  22. str_p("daniweb") [ push_back_a( tokens, "WEB" ) ] |
  23. str_p("lexer") [ push_back_a( tokens, "LEX" ) ] |
  24. str_p("tokenizer") [ push_back_a( tokens, "TOK" ) ] |
  25. (+~space_p) [ push_back_a( tokens, "STRING" ) ]
  26. ).length ;
  27. cout << '\n' << "parsed: " << str << "\ntokens: " ;
  28. copy( tokens.begin(), tokens.end(),
  29. ostream_iterator<string>(cout," ") ) ;
  30. cout << '\n' ;
  31. }
  32. };
  33. int main()
  34. {
  35. vector<string> test_cases = list_of
  36. ( "test daniweb lexer xyz tokenizer lexer" )
  37. ( "daniweblexer tokenizerlexer abcd lexerlexer" )
  38. ( "daniwebtest lexerdaniweblexertest tokenizerxxx" ) ;
  39. for_each( test_cases.begin(), test_cases.end(), parse_it() ) ;
  40. }
  41. /**
  42. >g++ -Wall -std=c++98 -I/usr/local/include keyword.cpp && ./a.out
  43.  
  44. parsed: test daniweb lexer xyz tokenizer lexer
  45. tokens: STRING SPACE WEB SPACE LEX SPACE STRING SPACE TOK SPACE LEX
  46.  
  47. parsed: daniweblexer tokenizerlexer abcd lexerlexer
  48. tokens: WEB LEX SPACE TOK LEX SPACE STRING SPACE LEX LEX
  49.  
  50. parsed: daniwebtest lexerdaniweblexertest tokenizerxxx
  51. tokens: WEB STRING SPACE LEX WEB LEX STRING SPACE TOK STRING
  52. */
Last edited by vijayan121; Aug 30th, 2007 at 2:55 pm.
Reply With Quote