Hey,

I'm writing an XML parser in C++. Currently it works, but too much of what needs to be done is left up to the end user. I'm trying to figure out a way to have a clean, more encapsulated interface for the parser, but I can't seem to think of one that I like.

This is the deceleration of the parser:

class XMLParser
{
public:
	XMLParser();
	~XMLParser();

	int OpenFile(std::string filename);
	void CloseFile();

	int ReadTag(std::string* name,TagType* tag_type,bool* attributes);
	int ReadAtribute(std::string* name,std::string* value,TagType* tag_type,bool* moreAttributes);
	int ReadData(std::string* data);
private:
	std::string m_filename;
	std::ifstream m_fin;
};

Definition of TagType:

enum TagType
{
	Unknown,
	Open,
	Close,
	StandAlone
};

The parser works with three main methods. The first is ReadTag(). It returns 1 on success, and fills name with the name of the tag, tag_type with the type of the tag (if not known, its set to 0 (unknown)), and fill attributes with whether or not there are any attributes to be read (if attributes is true, tag_type is always unknown).

ReadAttribute() should only be called directly after a call to ReadTag() that sets attributes to true (or a call to ReadAttribute() that says there are more attributes). It will give you the name of the attribute, it's value, the type of tag (again, if known), and whether or not there are more attributes (again, if true, tag_type is always unknown).

ReadData() should be called only after the attribute flag (of either ReadTag() or ReadAttribute()) is false. It fills the string with data from the file until it encounters the beginning of another tag, at which point you should call ReadTag() and start the cycle over. Here is an example of how these methods may be used:

#include <iostream>
#include <string>
#include "ReznebXML.h"
using namespace std;

int main()
{
	cout << "XML test" << "\n\n";

	XMLParser* parse;
	parse = new XMLParser;
	parse->OpenFile("test.xml");

	string str1;
	string str2;
	bool attributes;
	TagType type;

	string root;
	parse->ReadTag(&str1,&type,&attributes);//parse root element...
	root = str1;//...and store it
	cout << "Root: " << root << "\n";
	while(attributes)//while there are still attributes to be read...
	{
		if(!parse->ReadAtribute(&str1,&str2,&type,&attributes))
			return 0;

		cout << "\t" << str1 << ": " << str2 << "\n";//...output them
	}
	if(type != Open)
		return 0;

	if(!parse->ReadData(&str1))
		return 0;

	cout << "Data: " << str1 << "\n";

	str1 = "";
	//Read elements
	while(true)
	{
		if(!parse->ReadTag(&str1,&type,&attributes))
			return 0;
		
		if(str1 == root)
		{
			if(!attributes && type == Close)
				break;
			else
				return 0;
		}
		else
		{
			cout << "Sub element: " << str1 << "\n";
			while(attributes)
			{
				if(!parse->ReadAtribute(&str1,&str2,&type,&attributes))
					return 0;

				cout << "\t" << str1 << ": " << str2 << "\n";
			}
			if(!parse->ReadData(&str1))
				return 0;

			cout << "Data : " << str1 << "\n";
		}
	}

	system("PAUSE");
	return 0;
}

As you can see, there's a lot of user-dependency (the user being the programmer that uses the parser). Can anyone help me think of a better interface, probably one that wraps around these three methods.

Thanks.

You can have a look at my definition for an xml parser. I've stored the contents of each tag/dir in an XmlDir class (which contains the tag name, a list of XmlDirs and a map of attributes to create a type of directory tree). The user interfaces are the public member functions in either class.

An example of usage:

XmlParser doc;
   
      doc.loadFile("c:/colourdefs.xml");
      
      list<XmlDir*> saturatedRedGreens;

      // supply a list to get multiple dirs
      doc.getDir("/palette/colors/color<@enabled=\"true\">/red<255>/../green<255>/../name", &saturatedReds);

      // saturatedReds now contains the name of all enabled (as per attribute field) colours having red = 255, green = 255
Attachments
class XmlDir{
   XmlDir *parent;
   bool validNameChar(const char a);

   bool validName(const string &st);
   vector<string> splitAny(const string &st, const string &delim, bool keepEmpty = true);
   vector<string> XmlDir::split(const string &st, const string &delim, bool keepEmpty = true);
   
   // Return dir if found, NULL if not found, or throw exception if error in input
   // You probably won't call this function directly - use getDir() or changeDirectory()
   // Always finds a dir if it exists (even if duplicate dirs exist)
   // Use dir<value> to match a dir containing "value"
   // Use dir<@attr1="value1"><@attr="value2"> etc. to match a dir containing the attribute attr1="value1" AND attr2="value2"
   // To match multiple values, use the ../ path; e.g. "/palette/colors/color<@enabled=true>/red<255>/../green<255>/../blue<127>/../name"
   // will return the name of the enabled colour with RGB(255,255,127)


   XmlDir *findDirs(const vector<string> &path, vector<string>::const_iterator pathIt, list<XmlDir*> *retList = NULL) throw(EXmlDir);
   
public:
   string name;         // name should not be included
   map<string, string> attributes;

// contains:
   string value;
// OR:
   // dirs should be map<string name, vector<XmlDir> > dirs;
   list<XmlDir> dirs;    // these are xml dirs - i.e. there can be multiple dirs with the same name &/| attributes

   XmlDir();
   XmlDir(const string &st) throw(EXmlDir);
   void clear();
   
   string getPathString();

   // path is relative to current directory. ".", ".." && "/" allowed.
   XmlDir *getDir(const string &path = ".", list<XmlDir*> *retList = NULL) throw(EXmlDir);
   
   // assign puts an xml directory into this structure
   // returns position just after last character processed
   string::size_type assign(const string &st, string::size_type startPos = 0) throw(EXmlDir);
   void loadFile(const string &filename); // assign file to *this
   bool goodChildren();       // returns true if all children know their parents
   
   // displays this xml doc
   void display(XmlDir *xml = NULL, const string tabspacing = "");
   void displayDir(const string &path = ".") throw(EXmlDir);
   
   friend class XmlParser;
   };

class XmlParser{
   XmlDir *root;
   XmlDir xml;
   
public:
   XmlParser();
   XmlParser(const string &st) throw(EXmlDir);
   void loadFile(const string &filename) throw(EXmlDir);
   void setRoot(const string &rel = "/") throw(EXmlDir);
   XmlDir *getDir(const string &rel = ".", list<XmlDir*> *retList = NULL) throw(EXmlDir);
   void displayDir(const string &path = ".") throw(EXmlDir);
   void display();
   string getRootString();
   void removeEmptyEntries(XmlDir *xml = NULL);
   };      
#endif

Look at TinyXML or something, it's nicely done.

now that makes me want to code an xml parser... perhaps with XPath support...

Um, sorry, what exactly is a dir? A directory?

dir... yes it's a directory. I'm not well versed in XML speak, so I likened each tag to a directory as in a file structure. They're actually called 'elements', which I'll make sure I call them in future...

This article has been dead for over six months. Start a new discussion instead.