0

Hello,

I am using Simpe_html_dom to parse an html table; however, I am having problem getting the content of a <td> that have secondary <table> nested in them. I would like to grab the content of all parent <tr>, each of which contains 3 <td> cells. The middle cell is the one containing a a nested table.

Here is a sample layout of the table:

<table id="tblClubs" border="1" cellpadding="2" width="858">


<tr> <!--first tr--BEGINS->

<td class="clubRow">
<a href="http://www.BITTA.US">Big Island Table Tennis Association</a><br><font size="-1">(11/30/2014)</font>
</td>

        <td align="center" valign="middle" width="500">

                        <table width="98%" border="1" cellpadding="2">
                        <tr>
                        <td width="60%" class="clubRowEven">Boys and Girls Club<br>100 Kamakahonu St.<br>Hilo, HI  96720</td>

                        <td width="40%" class="clubRowEven">Tuesday - 2:30 to 5:30PM, Saturday - 4:00 to 7:00PM, Sunday - 1:00 to 4:00PM</td>

                        </tr>
                        <tr>
                        <td width="60%" class="clubRowOdd">Waimea Community Center<br>Waimea, HI  <br><br><u>Directions</u>:<br>Contact - John Romoa 808-xxx-8443</td>

                        <td width="40%" class="clubRowOdd">Wednesday - 4:30 to 8:00PM</td>

                        </tr>
                        <tr>
                        <td width="60%" class="clubRowEven">Yano Hall (Kealakekua, So. Kona)<br>South Kona, HI  </td>

                        <td width="40%" class="clubRowEven">Â </td>

                        </tr>
                        </table>

</td>

<td class="clubRow">
                        <a href="mailto:xxx.xxxx@hpmhawaii.com">Russel xxx</a><br>808-959-4688
</td>

 </tr> <!--FIRST tr--ENDS->


<tr> <!--second tr--BEGINS->

    <td bgcolor="whitesmoke" class="clubRow">
            <a href="http://sites.google.com/site/hitabletennis/">Hawaii Table Tennis Club</a><br><font size="-1">(2/28/2014)</font>
    </td>

    <td align="center" valign="middle" width="500" bgcolor="whitesmoke">

                <table width="98%" border="1" cellpadding="2">

                            <tr>
                                    <td width="60%" class="clubRowEven">Palama Settlement - Gym<br>810 North Vineyard Blvd.<br>Honolulu, HI  96817</td>

                                    <td width="40%" class="clubRowEven">Sunday - 5:30 - 9:00PM</td>

                            </tr>
                </table>
    </td>

    <td bgcolor="whitesmoke" class="clubRow">
        <a href="mailto:hawaiitabletennisclub@yahoo.com">Rick xxx</a><br>808-222-0420
    </td>

</tr> <!--second tr--ENDS->
</table>

In the sample table above, I would like to grab the content of each <td> for both the parent <tr> in the table and build an array. The content of Each parent <td> of each parent <tr> would be a key =>value. All content inside the nested table of the middle parent <td> needs to be a sub array of that <td>.

Essentially, the array of the table above should look like this:

  Array
(
    [0] => Array
        (
        [cname] => 'Big Island Table Tennis Association
                (2/28/2014)'

        [locale] => 

     array ( 
             [add_1] => 'Boys and Girls Club
            100 Kamakahonu St.
            Hilo, HI  96720' 

             [days_time] => 'Tuesday - 2:30 to 5:30PM, Saturday - 4:00 to 7:00PM, Sunday - 1:00 to 4:00PM' 
             [add_2] => 'Waimea Community Center
            Waimea, HI 
            Directions:<br>Contact - John Romoa 808-345-8443' 
             [days_time_2] => 'Wednesday - 4:30 to 8:00PM' 
             [add_3] => 'Yano Hall (Kealakekua, So. Kona)<br>South Kona, HI' 
             [days_time_3] => 'Â' 

            )


    [contact] => 'Russel xxx 808-959-4688'


)


[1] => Array
    (
        [cName] => 'Hawaii Table Tennis Club
                (2/28/2014)'

        [locale] =>  array ( 
             [address] => 'Palama Settlement - Gym<br>810 North Vineyard Blvd.<br>Honolulu, HI  96817' 

             [days_time] => 'Sunday - 5:30 - 9:00PM' 

            )
        [contact] => 'Rick xxxx
                808-222-0420'
    )


)

I really hope I am making some sense with this request. Any thoughts is really appreciated!
Mossa

Edited by mbarandao

3
Contributors
3
Replies
27
Views
3 Years
Discussion Span
Last Post by diafol
0

Here is what have attempted so far:

include 'simple_html_dom.php';

    $ggleSrchVar=$_GET['state'];

$curl = curl_init();
curl_setopt ($curl, CURLOPT_URL, "http://xxxxx.com/jdjsj.aspx?State=$ggleSrchVar#listTop");
curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($curl, CURLOPT_USERAGENT, "Mozilla/5.0 (compatible; MSIE 5.01; Windows NT 5.0)");
curl_setopt($curl, CURLOPT_AUTOREFERER, 1);
curl_setopt($curl, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt($curl, CURLOPT_REFERER, "http://xxxxx.com/jdjsj.aspx?State=$ggleSrchVar#listTop");

$result = curl_exec ($curl);
curl_close ($curl);
//write contents of $result to file

$File = "table.txt";
$fh = fopen($File, 'w') or die("can't open file");
fwrite($fh, $result);
fclose($fh);
//turn file into dom object 
$page = file_get_html("fileClubs.php");

$header = $page->find("#tblClubs");
$html = file_get_html('table.txt');

//$html = file_get_html($url);

//Get all data inside the <tr> of <table class="results">
foreach($html->find('table[id="tblClubs"] tr') as $tr) {
    //get all <td> 
    foreach($tr->find('td[class="clubRow"]',1) as $t) {
        //get the inner HTML
        $data = $t->outertext;//"'$element'"

            $match_strip_raw=strip_tags($data, '<tr>');//strip html from string
            $match_strip=trim(preg_replace('/\s+/',' ', $match_strip_raw));


$pattern = '([<td[^>]*>(.*?)</td>])';
$pattern2='/<[^>]*[^\/]>/i';
$pattern3='/$\n?/m';
$pattern4='/\s?<tr>\s?/';
$match_array=preg_split($pattern4, $match_strip);//turn each newline in string into array of characters

    $match_array_clean_phase1 = array_map('trim', $match_array);//clean phase one, remove empty array



    $clubFount_array=array_filter($match_array_clean_phase1);//clean phase 2

    }
 /*to change the keys from int to name, I am using code below, not working properly because of the nested table  
 */
    $array_two = array();
    $arrayIndex=0;
    $loopCount=0;
    $second_index="";
    foreach ($clubFount_array as $myValue) {
    $loopCount = ($loopCount<5) ? $loopCount : 0;
    switch ($loopCount) {
    case '0':
    $second_index="cName";
    break;
    case '1':
    $second_index="local";
    break;
    case '2':
    $second_index="days_times";
    break;
    case '3':
    $second_index="contact";
    break;

case '4':
$second_index="marker";
break;

}
$array_two[$arrayIndex][$second_index]=$myValue;
 if($loopCount == 1)
    {
    //get zip
    $pattern = '#([A-Z]{0})\s+(\d{5})#s';
        if(preg_match($pattern, $myValue, $match) == 1)
        {
            $array_two[$arrayIndex]['zip'] = trim($match[0]);

        }
        else
        {
            $array_two[$arrayIndex]['zip'] = 0;
        }
        //get state
        if(preg_match("/[A-Z]{2}(?=[\s]{1,}[0-9]{5})/", $myValue, $match) == 1)
{
    $array_two[$arrayIndex]['state'] = "'$match[0]'";
}
else
{
    $array_two[$arrayIndex]['state'] = '0';
}
    }
    //$arrayIndex = ($loopCount>=4) ? $arrayIndex+1 : $arrayIndex;
    //$loopCount++;

$arrayIndex = ($loopCount>=4) ? $arrayIndex+1 : $arrayIndex;
$loopCount++;
}

Edited by mbarandao

This question has already been answered. Start a new discussion instead.
Have something to contribute to this discussion? Please be thoughtful, detailed and courteous, and be sure to adhere to our posting rules.