andreea.mihaela.90475 0 Newbie Poster

i have a code writen for a domain, i want to modify for another domain (http://www.apbspeakers.com/speakers) scraping, but i dont know css elements from new domain to start i think:
__doPostBack('ctl00$pid-speakers'); and var nodes = __utils__.getElementsByXPath("//a[ 'pid-speakers']"); ????
, here is the code:

 var utils = require('utils');

    var casper = require('casper').create();


    casper.start("http://www.wmespeakers.com/Speakers.aspx");

    casper.userAgent('Mozilla/5.0 (Windows NT 5.1; rv:16.0) Gecko/20100101 Firefox/16.0'); 

    casper.thenEvaluate(function()
                        {
                            __doPostBack('ctl00$ContentPlaceHolder1$Menu1','Alphabetical');
                        }
                       );

    var urls = [], counter;

    casper.then(function() 
                {

                     urls = this.evaluate(function() 
                                          {

                                                var nodes = __utils__.getElementsByXPath("//a[starts-with(@id, 'ContentPlaceHolder1_SpeakersAlphabetically1_lstSpeakers_DataList1')]");

                                                return Array.prototype.map.call(nodes, function(e) 
                                                                                       {
                                                                                            return e.getAttribute("href");
                                                                                       }
                                                                               );
                                          }
                                         ); 


                }
               );


    casper.then(function() 
                {
                     casper.echo(urls.length + ' links found:');       

                     counter = 0;
                }
               );

    function check() 
    {
        if (counter < urls.length) //  
        {
            parseLink.call(this, 'http://www.wmespeakers.com/' + urls[counter]);
            counter++;
            this.run(check);
        } 
        else 
        {
            this.echo("All done.");
            this.exit();
        }
    }



     var result;        
    function parseLink(link)
    {
         this.start(link);
         //this.open(link);


         this.then(function() 
                   {


                         result = this.evaluate(function() 
                                                { 
                                                     var xpaths = {
                                                                       'name' : "//span[@id='ContentPlaceHolder1_lblSpeakerDisplayName']",
                                                                       "photo_url": "//img[@id='ContentPlaceHolder1_speakerImage']/@src",
                                                                       "twitter" : "//a[text()=' Twitter']/@href",
                                                                       "website" : "//a[contains(text(), 'Official Website')]/@href"
                                                                  };

                                                     var xpaths1 = {
                                                                       'description' : "//span[@id='ContentPlaceHolder1_lblTitle'] | //span[@id='ContentPlaceHolder1_lblPosition']",
                                                                       'bio' : "//span[@id='ContentPlaceHolder1_lblBlurb'] | //span[@id='lblLongBlurb']",
                                                                       "speeches-title" : "//span[starts-with(@id, 'ContentPlaceHolder1_ctrlSpeeches_lstSpeeches_lblTitle_')]",
                                                                       "speeches-description" : "//span[starts-with(@id, 'ContentPlaceHolder1_ctrlSpeeches_lstSpeeches_lblTitle_')]/../following-sibling::span[1]",
                                                                       "videos-title" : "//a[starts-with(text(), ' VIDEO:')]",
                                                                       "videos-url" : "//a[starts-with(text(), ' VIDEO:')]/@href",
                                                                       "reviews-organization" : "//span[starts-with(@id, 'WmeTestimonials1_lstTestimonials_Label2_')]",
                                                                       "reviews-body" : "//span[starts-with(@id, 'WmeTestimonials1_lstTestimonials_Label2_')]/../preceding-sibling::text()[string-length() > 10]",
                                                                       "books" : "//div[@class='notableWorkWrapper']/a[text() = 'Buy Now']/@href"
                                                                  };


                                                     //topics, travels_from, fee

                                                     var temp = {};
                                                     var t;

                                                     for(var query in xpaths)
                                                     {
                                                          if(xpaths.hasOwnProperty(query))
                                                          {
                                                               t = __utils__.getElementByXPath(xpaths[query]);
                                                               if(t !== undefined)
                                                               {
                                                                    temp[query] = t.textContent;
                                                               }
                                                               else
                                                               {
                                                                    temp[query] = '';
                                                               }
                                                          }
                                                     }

                                                     for(var query in xpaths1)
                                                     {
                                                          if(xpaths1.hasOwnProperty(query))
                                                          {
                                                               t = __utils__.getElementsByXPath(xpaths1[query]);

                                                               if(t.length == 0)
                                                               {
                                                                    temp[query] = [];
                                                               }
                                                               else
                                                               {
                                                                    temp[query] = Array.prototype.map.call(t, function(e) 
                                                                                                              {
                                                                                                                   return e.textContent;
                                                                                                              }
                                                                                                          );
                                                               }

                                                               if(query == 'description'  || query == 'bio')
                                                               {
                                                                    temp[query] = temp[query].join(' , ');
                                                               }

                                                               if(query == 'speeches-description')
                                                               {
                                                                    t = [];
                                                                    for(var i = 0; i < temp[query].length; i++)
                                                                    {
                                                                         t.push({
                                                                                     "title" : temp['speeches-title'][i], 
                                                                                     "description" : temp['speeches-description'][i]
                                                                                }
                                                                               );
                                                                    }

                                                                    temp['speeches'] = t;
                                                                    delete temp['speeches-description'];
                                                                    delete temp['speeches-title'];
                                                               }

                                                               if(query == 'videos-url')
                                                               {
                                                                    var t = [];
                                                                    for(var i = 0; i < temp[query].length; i++)
                                                                    {
                                                                         t.push({
                                                                                     "title" : temp['videos-title'][i], 
                                                                                     "url" : temp['videos-url'][i]
                                                                                }
                                                                               );
                                                                    }

                                                                    temp['videos'] = t;
                                                                    delete temp['videos-url'];
                                                                    delete temp['videos-title'];
                                                               }

                                                               if(query == 'reviews-body')
                                                               {
                                                                    var t = [];
                                                                    for(var i = 0; i < temp[query].length; i++)
                                                                    {
                                                                         t.push({
                                                                                     "organization" : temp['reviews-organization'][i], 
                                                                                     "body" : temp['reviews-body'][i]
                                                                                }
                                                                               );
                                                                    }

                                                                    temp['reviews'] = t;
                                                                    delete temp['reviews-organization'];
                                                                    delete temp['reviews-body'];
                                                               }
                                                          }
                                                     }

                                                     return temp;
                                                }
                                               );

                         utils.dump(result);

                    }
                  );


    }

    casper.run(check);
Be a part of the DaniWeb community

We're a friendly, industry-focused community of developers, IT pros, digital marketers, and technology enthusiasts learning and sharing knowledge.