0

Hi all. This regards chapter 6 of "Accelerated C++".
The program is intended to scan a document and find all the urls in it.
I am testing it with just one string input "text http://www.page.com more text".
The program is compiling but doesn't run beyond me entering the input.
I can't see what the problem is.
Any help would be, as always, much appreciated.
Thanks.

URL_Searcher_Main.cpp:

#include "find_urls.h"

#include <iostream>
#include <string>
#include <vector>

using std::cout;        using std::cin;         using std::endl;
using std::string;      using std::vector;

int main()
{
    string s;
    getline(cin, s);

    vector<string> urls = find_urls(s);

    for(vector<string>::const_iterator iter = urls.begin(); iter != urls.end(); ++iter)
        cout << *iter << endl;

    return 0;
}

find_urls.cpp:

#include "find_urls.h"
#include "url_beg.h"
#include "url_end.h"

using std::string;      using std::vector;

vector<string> find_urls(const string& s)
{
    vector<string> ret;
    typedef string::const_iterator iter;
    iter b = s.begin(), e = s.end();

    while (b != e);
    {
        b = url_beg(b, e);

        if (b != e)
        {
            iter after = url_end(b, e);

            ret.push_back(string(b, after));

            b = after;
        }
    }

    return ret;
}

url_beg.cpp:

#include "url_beg.h"
#include "not_url_char.h"

#include <algorithm>

using std::search;         using std::string;

string::const_iterator url_beg(string::const_iterator b, string::const_iterator e)
{
    static const string sep = "://";

    typedef string::const_iterator iter;

    iter i = b;

    while ((i = search(i, e, sep.begin(), sep.end())) != e)
    {
        if (i != b && i + sep.size() != e)
        {
            iter beg = i;
            while (beg != b && isalpha(beg[-1]))
                --beg;

            if (beg != i && !not_url_char(i[sep.size()]))
                return beg;
        }
        i += sep.size();
    }
    return e;
}

url_end.cpp:

#include "url_end.h"
#include "not_url_char.h"

#include <algorithm>

using std::find_if;     using std::string;

string::const_iterator url_end(string::const_iterator b, string::const_iterator e)
{
    return find_if(b, e, not_url_char);
}

not_url_char.cpp:

#include "not_url_char.h"

#include <algorithm>
#include <cctype>
#include <string>

using std::isalnum;     using std::string;

bool not_url_char(char c)
{
    static const string url_ch = "~;/?@=&$-_.+!*'(),";

    return !(isalnum(c) || find(url_ch.begin(), url_ch.end(), c) != url_ch.end());
}
3
Contributors
3
Replies
4
Views
5 Years
Discussion Span
Last Post by Tinnin
1

What compiler are you using? Learn to use your compiler's debugger so that you can step through the program and find out for yourself what is causing the problem.

0

In

string::const_iterator url_beg(string::const_iterator b, string::const_iterator e)
{
    static const string sep = "://";
    // ...    
    return e;
}

the value returned is always e (end). Instead, return an iterator to the begin of the url.
Something like:

string::const_iterator url_beg( std::string::const_iterator b,
                                std::string::const_iterator e )
{
    static const string sep = "://";
    string::const_iterator iter = std::search( b, e, sep.begin(), sep.end() ) ;
    if( iter != e ) while( iter != b && std::isalpha( iter[-1] ) ) --iter ;
    return iter ;
}
0

Thanks guys. I do need to learn how to debug. Not done it before.

However. Error found.

find_urls.cpp - line 13 ....epic fail /sigh

This question has already been answered. Start a new discussion instead.
Have something to contribute to this discussion? Please be thoughtful, detailed and courteous, and be sure to adhere to our posting rules.