<?
set_time_limit (1800);
mysql_connect('localhost','root','');
mysql_select_db('botsearch');
function domain ($ddomain) {
return preg_replace('/^((http(s)?:\/\/)?([^\/]+))(.*)/','$1',$ddomain);
}
function url_exists($durl)
{
// Version 4.x supported
$handle = curl_init($durl);
if (false === $handle)
{
return false;
}
curl_setopt($handle, CURLOPT_HEADER, true);
curl_setopt($handle, CURLOPT_FAILONERROR, true); // this works
curl_setopt($handle, CURLOPT_HTTPHEADER,
Array("User-Agent: Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.15) Gecko/20080623 Firefox/2.0.0.15") );
curl_setopt($handle, CURLOPT_NOBODY, true);
curl_setopt($handle, CURLOPT_RETURNTRANSFER, true);
$connectable = curl_exec($handle);
//echo '"'.$connectable.'"';
curl_close($handle);
$con=substr_replace($connectable,'',30);
if (stripos($con,'200 OK') || stripos($con,'Moved')) {
return true;
} else {
return false;
}
}
$in_valid_wordlist=array();
$r=mysql_query('SELECT * FROM `noun_list`');
while ($row=mysql_fetch_assoc($r)) {
$in_wordlist[$row['word']]=true;
}
function generate($url,$topicid) {
global $f_data; //Data of file contents
global $in_wordlist;
$r=mysql_query('SELECT * FROM `faq_datareference` WHERE `id`="'.mysql_real_escape_string($topicid).'"');
if (mysql_num_rows($r)==0) {
//do something with webpage $f_data.
//$tmp=htmlspecialchars_decode($f_data);
$f_data=html_entity_decode($f_data);
$f_data=str_replace('-|code|--|code|-','-|code|- -|code|-',$f_data);
$f_data=preg_replace('#<blockquote[^>]+>(.*)</blockquote>#U','',$f_data);
$tmp=str_replace(array('<br>','<br />'),"\r",$f_data);
preg_match_all('#<div class="codecontent">(.*)</div>#iUm',$tmp,$code);
$codeid=0;
unset($tmp);
$f_data=str_replace(array('<br/>','<br />'),'<br>',$f_data);
$f_data=str_replace('<br><br>','<br>',$f_data);
$f_data=str_replace('<br><br>','<br>',$f_data); //in case of 4 <br> in a row
$f_data=preg_replace('#<pre class="code">(.*)</pre>#iU','-|code|-',$f_data);
preg_match_all('#<div id="post_message_[0-9]+">(.*)</div> </div> </div>#isU',$f_data,$a_data);
$val=0;
$totalval=0;
$abortval=0;
$totalcodeval=0;
foreach ($a_data[1] AS $adata) {
$adata=strtolower(str_replace('...','.',strip_tags($adata,'<br>')));
$d_data=preg_split('/([\?.!]|<br>)/',$adata);
if ($abortval<4) {
foreach ($d_data AS $sentence) {
if (strlen($sentence)>16 && $abortval<4) {
$re=mysql_query('SELECT * FROM `faq_data` WHERE `string`="'.mysql_real_escape_string($sentence).'" AND `id`="'.mysql_real_escape_string($topicid).'"');
$did_wordcheck=false;
if (mysql_num_rows($re)==0) {
$did_wordcheck=true;
$words=explode(' ',$sentence);
foreach ($words AS $word) {
if (isset($in_wordlist[$word])) {
$val+=1;
if (!isset($nounverb_result[$word])) {
$nounverb_result[$word]=1;
} else {
$nounverb_result[$word]+=1;
}
}
}
$var[0]=$sentence;
$var[1]=$val;
$totalval+=($val-$val2);
mysql_query('INSERT INTO `faq_data` SET `match`="'.$var[1].'", `string`="'.mysql_real_escape_string($var[0]).'", `id`="'.mysql_real_escape_string($topicid).'"');
} else {
$abortval+=1;
}
$val=0;
$val2=0;
} else if ($sentence=='-|code|--|code|-') {
$re=mysql_query('SELECT * FROM `faq_data` WHERE `match`="-1" AND `string`="'.mysql_real_escape_string($code[1][$codeid]).'" AND `id`="'.mysql_real_escape_string($topicid).'"');
if ($abortval==0 || mysql_num_rows($re)==0) {
$var[0]=$sentence;
$var[1]+=3;
mysql_query('UPDATE `faq_data` SET `match`="'.$var[1].'" WHERE `string`="'.mysql_real_escape_string($var[0]).'" AND `id`="'.mysql_real_escape_string($topicid).'"');
mysql_query('INSERT INTO `faq_data` SET `match`="-1", `string`="'.mysql_real_escape_string($code[1][$codeid]).'", `id`="'.mysql_real_escape_string($topicid).'"');
$codeid+=1;
$val+=2;
}
$val2=$val;
$totalcodeval+=1;
}
}
} else {
break;
}
}
if ($did_wordcheck==true) {
arsort($nounverb_result);
$var='INSERT INTO `faq_datareference` SET `id`="'.mysql_real_escape_string($topicid).'"';
$i=1;
foreach ($nounverb_result AS $key=>$val) {
if ($i>10) { break; } else {
$var.=', `word'.$i.'`="'.mysql_real_escape_string($key).'", `word'.$i.'num`='.$val;
}
$i++;
}
for (;$i<11;$i++) {
$var.=', `word'.$i.'`=" ", `word'.$i.'num`=-1';
}
$var.=', `num_nounsverbs`='.$totalval.', `num_codeboxes`='.$totalcodeval;
mysql_query($var);
}
}
echo '<b>Generated Url: </b>'.$url.'<br>';
unset($f_data);
}
$rss=file_get_contents('http://www.daniweb.com/forums/rss143.xml');
preg_match_all('#<link><!\[CDATA\[(.*[&?]t=([^\&]++).*)\]\]></link>#U',$rss,$topic_url);
for ($id=0;isset($topic_url[2][$id]);$id++) {
$rr=mysql_query('SELECT `id` FROM `faq_datareference` WHERE `id`="'.mysql_real_escape_string($topic_url[2][$id]).'"');
if (url_exists($topic_url[1][$id]) && mysql_num_rows($rr)==0) {
$f_data=file_get_contents($topic_url[1][$id]);
preg_match('#<td class="alt1" nowrap="nowrap"><span>Page 1 of ([0-9])</span></td>#i',$f_data,$pages_num);
generate($topic_url[1][$id],$topic_url[2][$id]);
flush();
if (!empty($pages_num[1]) && isset($pages_num[1]) && $pages_num[1]>1) {
for ($i=2;$i<=$pages_num[1];$i++) {
if (url_exists($topic_url[1][$id])) {
$f_data=file_get_contents($topic_url[1][$id]);
generate('http://www.daniweb.com/forums/showthread.php?t='.$topic_url[2][$id].'&page='.$i,$topic_url[2][$id]);
flush();
}
}
}
}
}
?>