Hello All,

First of all, I am beginner so please bear with me. :)

I want to extract data within the table from a html code. Lets consider HTML code to be in a file. I have read the data to a variable. Now using this variable I want to get all the data which is included in table format.

Preferred return type would be 2D Array or an array of vector or delimited string.

I have attached my code.

Your help will be much appreciated. Thanks in advance ;)

Attachments
#include "myHeader.h"


//#define FilePath "D:\\Documents and Settings\\Administrator\\Desktop\\bajaj.txt"


int main (int argc, char* argv[])
{
//string FilePath("D:\\Documents and Settings\\Administrator\\Desktop\\bajaj.txt");
	string FilePath = "C:\\test.html";
string Buffer;
MyCallback pCallback;
URLDownloadToFile(NULL, "http://www.indiaweather.in/mausam/cities/Gulbarga", "c:\\test.html", 0, &pCallback);

MSB msb;
msb.FileToBuffer(FilePath, Buffer);
msb.URLGetTable(Buffer, Buffer);
cout << Buffer << "\n\nEnd of file.\n\n";


getchar();
return 0;
}
#include <iostream>
#include <string>
#include <fstream>
#include <windows.h>
#include <tchar.h>

#include "URLSupport.h"

using namespace std;

class MSB
{
	int i;
public:
	string FileToBuffer(string Path, string &Buffer);
	string URLGetTable(string Buffer, string &Buf);

};

string MSB::FileToBuffer(std::string Path, string &Buffer)
{
	ifstream lFile(Path.c_str());
	string line;
	string variable;
	while(!lFile.eof())
	{
		getline(lFile, line);
		variable += line + "\n";
	}
	Buffer = variable;
	return(Buffer);
}

string MSB::URLGetTable(string Buffer, string &Buf)
{
	string findTabE = "</table>";
	string findTab = "<table";
	
	if(Buffer.find(findTabE, 0) != Buffer.npos)
	{
		int end = Buffer.find(findTabE, 0);
		int start = Buffer.find(findTab, 0);
		Buf = Buffer.substr(start, end - start);
		//while(Buf.find(findTab, 0) != Buf.npos)
		//{
		//	Buf = Buf.substr(Buf.find(findTab, 0), Buf.length() - Buf.find(findTab, 0));
		//}
	}
	else
	{
		Buf = "Table not found.\n";
	}
	return(Buf);
}
#include <urlmon.h>
//#pragma comment(lib, "urlmon.lib")
//#include "stdafx.h"
#pragma comment (lib, "urlmon.lib")

class MyCallback : public IBindStatusCallback
{
public:
MyCallback() {}

~MyCallback() { }

// This one is called by URLDownloadToFile
STDMETHOD(OnProgress)(/* [in] */ ULONG ulProgress, /* [in] */ ULONG ulProgressMax, /* [in] */ ULONG ulStatusCode, /* [in] */ LPCWSTR wszStatusText)
{
//cout << "Downloaded " << ulProgress << " of " << ulProgressMax << " byte(s), " << " Status Code = " << ulStatusCode << endl;
return S_OK;
}

// The rest don't do anything...
STDMETHOD(OnStartBinding)(/* [in] */ DWORD dwReserved, /* [in] */ IBinding __RPC_FAR *pib)
{ return E_NOTIMPL; }

STDMETHOD(GetPriority)(/* [out] */ LONG __RPC_FAR *pnPriority)
{ return E_NOTIMPL; }

STDMETHOD(OnLowResource)(/* [in] */ DWORD reserved)
{ return E_NOTIMPL; }

STDMETHOD(OnStopBinding)(/* [in] */ HRESULT hresult, /* [unique][in] */ LPCWSTR szError)
{ return E_NOTIMPL; }

STDMETHOD(GetBindInfo)(/* [out] */ DWORD __RPC_FAR *grfBINDF, /* [unique][out][in] */ BINDINFO __RPC_FAR *pbindinfo)
{ return E_NOTIMPL; }

STDMETHOD(OnDataAvailable)(/* [in] */ DWORD grfBSCF, /* [in] */ DWORD dwSize, /* [in] */ FORMATETC __RPC_FAR *pformatetc, /* [in] */ STGMEDIUM __RPC_FAR *pstgmed)
{ return E_NOTIMPL; }

STDMETHOD(OnObjectAvailable)(/* [in] */ REFIID riid, /* [iid_is][in] */ IUnknown __RPC_FAR *punk)
{ return E_NOTIMPL; }

// IUnknown stuff
STDMETHOD_(ULONG,AddRef)()
{ return 0; }

STDMETHOD_(ULONG,Release)()
{ return 0; }

STDMETHOD(QueryInterface)(/* [in] */ REFIID riid, /* [iid_is][out] */ void __RPC_FAR *__RPC_FAR *ppvObject)
{ return E_NOTIMPL; }
};

Thanks for your time & suggestion AD.

I was basically looking at the string functions or some simple way to get it working. Breaking html data into table/ getting data from tables was easy using string.find but arranging the values in right format is the biggest problem :(

I hope, I made my point clear now. AD or anyone else, any other suggestions please?

I supose you should post an example HTML file. If you can already extract all the data from the table then what's the problem? Is the data numeric or strings, or both?

Hi again AD :)

In the code I have used a web URL. Please have a glance.

You can consider any html code which has data in tables. I am not able to sort the data because the method I was using to extract data from tables was not accurate.

I am stuck at this part and do not have a clue what would be the best solution. :S

Using URLDownloadToFile, similarly is there any function which returns values from table using Table id or tag name?

This article has been dead for over six months. Start a new discussion instead.