I am having some problems with my java code, i am trying to write a C compiler and am currently working on developing a string tokenizer for the C programming language using java regex. my problem is first my program wont recognize multiline comments and second i cant figure out how to implement a correct order of operations (for example if i have the number 5.5 in my C code it stores both 5.5 as a float and stores 5 as an int). Ive been working on this for a couple days and cant seem to figure it out, here is my code for the tokenizer:

import java.io.*;
import java.util.*;
import java.util.regex.*;

public class tokenizer 
{
	static int count;
	
	public static void main(String[] args) 
	{
		Scanner scan = new Scanner(System.in);
		String file;
		
		System.out.println("Enter file name:");
		file = scan.nextLine();
		
		try
		{
			FileInputStream fstream = new FileInputStream(file);
			DataInputStream in = new DataInputStream(fstream);
			BufferedReader br = new BufferedReader(new InputStreamReader(in));
			
			String line;
			Pattern pint = Pattern.compile("int");
			Pattern pd = Pattern.compile("double");
			Pattern pc = Pattern.compile("char");
			Pattern pif = Pattern.compile("if");
			Pattern pw = Pattern.compile("while");
			Pattern pv = Pattern.compile("void");
			Pattern plb = Pattern.compile("\\{");
			Pattern prb = Pattern.compile("\\}");
			Pattern plp = Pattern.compile("\\(");
			Pattern prp = Pattern.compile("\\)");
			Pattern plr = Pattern.compile("\\[");
			Pattern prr = Pattern.compile("\\]");
			Pattern ps = Pattern.compile(";");
			Pattern pp = Pattern.compile("\\+");
			Pattern pm = Pattern.compile("-");
			Pattern pt = Pattern.compile("\\*");
			Pattern pe = Pattern.compile("=");
			Pattern p = Pattern.compile("\\.");
			Pattern pa = Pattern.compile("&&");
			Pattern po = Pattern.compile("\\|\\|");
			Pattern pn = Pattern.compile("!");
			Pattern pg = Pattern.compile(">");
			Pattern pl = Pattern.compile("<");
			Pattern pge = Pattern.compile(">=");
			Pattern ple = Pattern.compile("<=");
			Pattern prs = Pattern.compile(">>");
			Pattern pls = Pattern.compile("<<");
			Pattern ptr = Pattern.compile("->");
			Pattern per = Pattern.compile("%");
			Pattern pne = Pattern.compile("!=");
			Pattern pnu = Pattern.compile("[0-9]+");
			Pattern pfu = Pattern.compile("[-+]?[0-9]*\\.[0-9]+");
			Pattern pcm = Pattern.compile("//.*");
			Pattern pmc = Pattern.compile("/\\*(?:.|[\\n\\r])*?\\*/");
			Pattern pvar = Pattern.compile("[a-z]([a-z]|[0-1]|_)*");

			for(count = 1;(line = br.readLine()) != null; count ++)
			{
				tokenizeProgram(pcm, line);
				tokenizeProgram(pmc, line);
				
				tokenizeProgram(pint, line);
				tokenizeProgram(pd, line);
				tokenizeProgram(pc, line);
				tokenizeProgram(pif, line);
				tokenizeProgram(pw, line);
				tokenizeProgram(pv, line);
				
				tokenizeProgram(ps, line);
				tokenizeProgram(pp, line);
				tokenizeProgram(pm, line);
				tokenizeProgram(pt, line);
				tokenizeProgram(pe, line);
				tokenizeProgram(p, line);
				tokenizeProgram(pa, line);
				tokenizeProgram(po, line);
				tokenizeProgram(pn, line);
				tokenizeProgram(pg, line);
				tokenizeProgram(pl, line);
				tokenizeProgram(pge, line);
				tokenizeProgram(ple, line);
				tokenizeProgram(prs, line);
				tokenizeProgram(pls, line);
				tokenizeProgram(ptr, line);
				tokenizeProgram(per, line);
				tokenizeProgram(pne, line);
				
				tokenizeProgram(pfu, line);
				tokenizeProgram(pnu, line);
				tokenizeProgram(pvar, line);
				
				tokenizeProgram(plb, line);
				tokenizeProgram(prb, line);
				tokenizeProgram(plp, line);
				tokenizeProgram(prp, line);
				tokenizeProgram(plr, line);
				tokenizeProgram(prr, line);
			}
			
			fstream.close();
			in.close();	
			br.close();
		}catch (Exception e)
		{
			System.err.println("Error: " + e.getMessage());
		}
	}

	public static Vector<?> tokenizeProgram(Pattern pattern, String str)
	{
		Vector<Object> tokens = new Vector<Object>();
		Matcher matcher = pattern.matcher(str);
		
		while(matcher.find())
		{
			tokens.add(new Tokens(matcher.group(), pattern, count));
			
			System.out.println(((Tokens) tokens.lastElement()).toString());
		}
			
		return tokens;
	}
}

and Tokens:

import java.util.regex.Pattern;

public class Tokens 
{
	String token;
	Pattern name;
	int line;
	
	public Tokens(String t, Pattern pattern, int l)
	{
		token = t;
		name = pattern;
		line = l;
	}
	
	
	public String getToken()
	{
		return token;
	}

	public String getTokenName()
	{
		if(name.toString().equals("int"))
			return "INT";
		else if(name.toString().equals("double"))
			return "DOUBLE";
		else if(name.toString().equals("char"))
			return "CHAR";
		else if(name.toString().equals("boolean"))
			return "BOOLEAN";
		else if(name.toString().equals("if"))
			return "IF";
		else if(name.toString().equals("while"))
			return "WHILE";
		else if(name.toString().equals("void"))
			return "VOID";
		else if(name.toString().equals("\\{"))
			return "L_BRACE";
		else if(name.toString().equals("\\}"))
			return "R_BRACE";
		else if(name.toString().equals("\\("))
			return "L_PARA";
		else if(name.toString().equals("\\)"))
			return "R_PARA";
		else if(name.toString().equals("\\["))
			return "L_BRACKET";
		else if(name.toString().equals("\\]"))
			return "R_BRACKET";
		else if(name.toString().equals(";"))
			return "SEMI";
		else if(name.toString().equals("\\+"))
			return "PLUS";
		else if(name.toString().equals("-"))
			return "MINUS";
		else if(name.toString().equals("\\*"))
			return "MULT";
		else if(name.toString().equals("="))
			return "EQ";
		else if(name.toString().equals("\\."))
			return "PERIOD";
		else if(name.toString().equals("&&"))
			return "AND";
		else if(name.toString().equals("\\|\\|"))
			return "OR";
		else if(name.toString().equals("!"))
			return "NOT";
		else if(name.toString().equals(">"))
			return "GT";
		else if(name.toString().equals("<"))
			return "LT";
		else if(name.toString().equals(">="))
			return "GT_EQ";
		else if(name.toString().equals("<="))
			return "LT_EQ";
		else if(name.toString().equals(">>"))
			return "RT_SHIFT";
		else if(name.toString().equals("<<"))
			return "LFT_SHIFT";
		else if(name.toString().equals("->"))
			return "PTR";
		else if(name.toString().equals("%"))
			return "PERCENT";
		else if(name.toString().equals("!="))
			return "NOT_EQ";
		else if(name.toString().equals("[0-9]+"))
			return "INT_NUM";
		else if(name.toString().equals("[-+]?[0-9]*\\.[0-9]+"))
			return "FLOAT_NUM";
		else if(name.toString().equals("//.*"))
			return "SL_COMMENT";
		else if(name.toString().equals("/\\*(?:.|[\\n\\r])*?\\*/"))
			return "ML_COMMENT";
		else if(name.toString().equals("[a-z]([a-z]|[0-1]|_)*"))
			return "ID";
		else
			return "Trash";
	}
	
	public int getLineNumber()
	{
		return line;
	}
	
	public String toString()
	{
		return "(" + getToken() + ", " + getTokenName() + ", " + getLineNumber() + ")";
	}
}

use " '\\'{" instead of "\\{"

because if get double slash as what you put in double quote should be palces '' around \\ within " " ..that is " ' \\' "

Be a part of the DaniWeb community

We're a friendly, industry-focused community of developers, IT pros, digital marketers, and technology enthusiasts meeting, networking, learning, and sharing knowledge.