0

Ok we are working with a script and we get the following error

Warning: Invalid argument supplied for foreach() in /xxx/xxx/xxx/spider.php on line 149

Im not sure what the problem is and I havent had any luck fixing it, here is the line in question

foreach ($array as $value) {

We are attaching a copy of the file this is in incase someone needs to see it.

Any help on this is greatly appreciated.

Edited by valonesal: n/a

Attachments
<?php

/** define path */
if (!defined('SSP_DIR')) {
    define('SSP_DIR', str_replace('\\','/', dirname(__FILE__)));
}

/** session */
session_start();
session_write_close();

/** send header information */
header ("Expires: Mon, 26 Jul 1999 05:00:00 GMT");
header ("Last-Modified: ".gmdate("D, d M Y H:i:s")." GMT");
header ("Cache-Control: no-cache, must-revalidate");
header ("Pragma: no-cache");

@set_time_limit(0);
@ignore_user_abort(true);
ob_implicit_flush();

/** start */
$mtime  = microtime();
$mtime  = explode(" ", $mtime);
$tstart = $mtime[1] + $mtime[0];

/** check another started spider */
$process = false;
if (file_exists(SSP_DIR.'/data/pid.php')) {
    if (is_readable(SSP_DIR.'/data/pid.php')) {
        @include(SSP_DIR.'/data/pid.php');
        if (!isset($pid)) {
            @unlink(SSP_DIR.'/data/pid.php');
            $process = false;
        } else {
            if (time() > $pid + 60*5) {
                @unlink(SSP_DIR.'/data/pid.php');
                $process = false;
            } else {
                $process = true;
            }
        }
    } else {
        $process = true;
    }
    clearstatcache();
}

/** if don't another start spider */
if (!$process) {
    umask(0777);
    $fp = @fopen(SSP_DIR.'/data/pid.php', "w+");
    if ($fp) {
        fwrite($fp, '<'.'?php $pid = '.time().'; ?'.'>');
        fclose($fp);
        chmod(SSP_DIR.'/data/pid.php', 0777);
    }
    clearstatcache();
}

/** define vars */
$debug          = isset($_GET['debug']) ? true : false;
$total_size     = 0;
$total_files    = 0;

if ($debug) {
    error_reporting(E_ALL);
    ini_set("display_errors", 1);
}

/** check start from scheduled */
if (!defined('SSP_SCHEDULE')) {
    /** include files */
    include_once (SSP_DIR.'/data/db.php');
    include_once (SSP_DIR.'/data/config.php');
    include_once (SSP_DIR.'/data/system.php');
    include_once (SSP_DIR.'/data/functions.php');
    include_once (SSP_DIR.'/data/i18n.php');
    include_once (SSP_DIR.'/data/convert.php');
    
    /** connect to DB */
    $db_connect = mysql_connect($db['host'],$db['login'],$db['passwd']);
    $db_select  = mysql_select_db($db['name'],$db_connect);
}

/** define tbl names */
$tbl_name_db    = $system['prefix'].'_search_db';
$tbl_name_tmp   = $system['prefix'].'_search_tmp';

/** define spider urls and params */
$stop_dirs  = pre_preg(unserialize(stripslashes($config['stop_dirs'])), "", "\/");
$stop_files = pre_preg(unserialize(stripslashes($config['stop_files'])));
$allow_ext  = split(",", $config['allow_ext']);
/** indexing **/
if ($config['enable_pdf']) {
    $allow_ext[] = 'pdf';
} if ($config['enable_rtf']) {
    $allow_ext[] = 'rtf';
} if ($config['enable_doc']) {
    $allow_ext[] = 'doc';
} if ($config['enable_xls']) {
    $allow_ext[] = 'xls';
} 
$allow_ext  = pre_preg($allow_ext, "\.", "$");

if (!isset($groups_spider)) {
    if (!isset($_GET['groups'])) {
        $groups_spider = 0;
    } else {
        $groups_spider = $_GET['groups'];
    }
}

$groups = $groups_urls = $start_urls = $allow_urls = array();
$res = mysql_query("SELECT * FROM `{$system['prefix']}_groups`");
while ($row = mysql_fetch_assoc($res)) {
    if ($groups_spider == 0 || in_array($row['id'], split(',', $groups_spider))) {
        $start_urls = array_merge($start_urls, unserialize(stripslashes($row['start_urls'])));
        $allow_urls = array_merge($allow_urls, unserialize(stripslashes($row['allow_urls'])));
    }
    
    $groups[$row['id']] = $row;
    foreach (unserialize(stripslashes($row['allow_urls'])) as $url) {
        $groups_urls[] = array('id'=>$row['id'], 'url'=>$url);
    }
}

$inoutdb    = array();

/**
 * regular transformation
 * @param  array  $array  the array
 * @param  string $start  the start string
 * @param  string $finish the finish string
 * @return string
 */
function pre_preg ($array, $start = "", $finish = "")
{
    $ret = $start."(";
    $count = count($array); $i = 0;
    foreach ($array as $value) {
        $i ++;
        $value = quotemeta($value);
        $value = str_replace('\*', '.*', $value);
        if ($count > $i) $ret .= $value."|";
        else $ret .= $value;
    }
    return $ret.")".$finish;
}

/**
 * @return void
 * @desc start spidering
*/
function start_spidering ()
{
    global $start_urls, $allow_urls, $debug, $i18n;
    
    $to_visit = $visited = array();
    foreach ($start_urls as $url) {
    	$to_visit[$url] = 1;
    }
    $i = 0;
    while (sizeof($to_visit) > 0) {
        list($url,) = each($to_visit);
        
        if ($debug) {
            $fp = fopen(trim($url), "r");
        } else {
            $fp = @fopen(trim($url), "r");
        }
        $visited[$url] = 1;
        if (!$fp) {
            if ($debug) {
                print '<b>'.$i18n['spider']['error_open_url'].': '.$url.'</b><br>'."\n";
            }
            unset($to_visit[$url]);
        } else {
            $location_url   = false;
            $last_modified  = date("Y-m-d");
            $stream_get_meta_data = @stream_get_meta_data($fp);
            if (isset($stream_get_meta_data['wrapper_data'])) {
                foreach ($stream_get_meta_data['wrapper_data'] as $header) {
                    if (strtolower(substr($header, 0, 10)) == "location: ") {
                        $location_url = substr($header, 10);
                    } elseif (strtolower(substr($header, 0, 15)) == "last-modified: ") {
                        $last_modified = strtotime(substr($header, 15));
                        $last_modified = date("Y-m-d", $last_modified);
                    }
                }
            }
            /** if page location */
            if ($location_url) {
                $location_url           = get_absolute_url($url, $location_url);
                $location_url           = preg_replace("/#.*/", "", $location_url);
                $location_url_stripped  = preg_replace("/\?.*/", "", $location_url);
                if (check_url($location_url_stripped)) {
                    if (!isset($visited[$location_url])) {
                        $to_visit[$location_url] = 1;
                    }
                }
                unset($to_visit[$url]);
            } else {
                $data = "";
                while (!feof ($fp)) {
                    $data .= fgets($fp, 8192);
                }
                $base = $url;
                
                if (preg_match_all("/<base\\s+href=([\"']?)([^\\s\"'>]+)\\1/is", $data, $matches, PREG_SET_ORDER)) {
                    $base = $matches[0][2];
                }
                $links = get_links($data);
                foreach ($links as $k => $v) {
                    $link           = get_absolute_url($base, $k);
                    $link           = preg_replace("/#.*/", "", $link);
                    $link_stripped  = preg_replace("/\?.*/", "", $link);
                    
                    if (check_url($link_stripped)) {
                        if (!isset($visited[$link])) {
                            $to_visit[$link] = 1;
                        }
                    }
                }
                
                index_file($data, $url, $last_modified);
                
                unset($to_visit[$url]);
            }
        }
    } /** while */
}

/**
 * Get links
 * @param  string $text the html body
 * @return array
**/
function get_links($text)
{
    $links = array();
    $count = preg_match_all("'<\s*a\s.*?href\s*=\s*([\"\'])?(?(1) (.*?)\\1 | ([^\s\>]+))'isx", $text, $matches, PREG_SET_ORDER);
    for($i=0; $i < count($matches); $i++) {
        $link = !empty($matches[$i][2]) ? $matches[$i][2] : $matches[$i][3];
        $link = preg_replace("/(\r\n|\n|\r)+/", "", $link);
        if (!empty($link)) {
            $links[$link] = 1;
        }
    }

    $count = preg_match_all("/<frame[^>]+src=([\"']?)([^\\s\"'>]+)\\1/is", $text, $matches, PREG_SET_ORDER);
    for($i=0; $i < count($matches); $i++) {
        $links[$matches[$i][2]] = 1;
    }

    $count = preg_match_all("/<area[^>]+href=([\"']?)([^\\s\"'>]+)\\1/is", $text, $matches, PREG_SET_ORDER);
    for($i=0; $i < count($matches); $i++) {
        $links[$matches[$i][2]] = 1;
    }

    return $links;
}

/**
 * Get absolute url
 * @param  string $base the base url
 * @param  string $url the url
 * @return string
**/
function get_absolute_url($base,$url)
{
    /** delete session */
    $session_name = session_name();
    $url = preg_replace("/".$session_name."\=[a-f0-9]{32}/si", $session_name."=", $url);
    
    if (!$url) {
        return "";
    }
    
    $url_arr = @parse_url($url);
    if (isset($url_arr["scheme"])) {
        return $url;
    }

    if ($base != "http://" && $base != "https://") {
        $base_arr = parse_url($base);
    } else {
        return "";
    }
    
    $base_base = strtolower($base_arr["scheme"])."://";
    if (isset($base_arr["user"])) {
        $base_base .= $base_arr["user"].":".$base_arr["pass"]."@";
    }
    
    $base_base .= strtolower($base_arr["host"]);
    
    if (isset($base_arr["port"])) {
        $base_base .= ":".$base_arr["port"];
    }
    
    $base_path = @$base_arr['path'];
    
    if ($base_path == "") {
        $base_path = "/";
    }
    
    $base_path = preg_replace("/(.*\/).*/","\\1",$base_path);

    if (isset($url_arr['path'][0]) && $url_arr['path'][0] == chr(47)) {
        return $base_base.$url;
    }

    if (preg_match("'^\./'",$url)) {
        $url = preg_replace("'^\./'","",$url);
        return $base_base.$base_path.$url;
    }

    while (preg_match("'^\.\./'", $url)) {
        $url = preg_replace("'^\.\./'", "", $url);
        $base_path = preg_replace("/(.*\/).*\//","\\1",$base_path);
    }

    return $base_base.$base_path.$url;
}

/**
 * @desc   Check url
 * @param  string $url the url
 * @return string
**/
function check_url($url)
{
    global $debug, $allow_urls, $stop_dirs, $stop_files, $allow_ext, $i18n
2
Contributors
3
Replies
4
Views
6 Years
Discussion Span
Last Post by nonshatter
0

Don't quote me on this, but I think it's because the array you're passing the foreach loop isn't initialised. I find the following to be interesting in the man page:

foreach works only on arrays, and will issue an error when you try to use it on a variable with a different data type or an uninitialized variable.
0

I really have no idea what the issue is but I have decided to ditch the script and write my own, but thanks for looking and the help.

This question has already been answered. Start a new discussion instead.
Have something to contribute to this discussion? Please be thoughtful, detailed and courteous, and be sure to adhere to our posting rules.