//http://ca3.php.net/eregi_replace
function highlight($x,$var) {
$var = explode(" ",$var);

for($j=0; $j< count($var); $j++){
$xtemp = "";
$i=0;
while($i<strlen($x)){
if((($i + strlen($var[$j])) <= strlen($x)) && (strcasecmp($var[$j], substr($x, $i, strlen($var[$j]))) == 0)) {
$xtemp .= "<b style='color:red'>" . substr($x, $i , strlen($var[$j])) . "</b>";
$i += strlen($var[$j]);
}
else {
$xtemp .= $x{$i};
$i++;
}
}
$x = $xtemp;
}
return $x;
}

function get_content($url)
{
$ch = curl_init();

curl_setopt ($ch, CURLOPT_URL, $url);
curl_setopt ($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_FAILONERROR, 0);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt($ch, CURLOPT_USERAGENT,'Mozilla/4.0 (compatible; MSIE 7.0b; Windows NT 6.0)');
curl_setopt($ch, CURLOPT_TIMEOUT, 5); //times out after 4s
if(preg_match('/^https:\/\//sim',$url)==true) {
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false);
}
ob_start();

curl_exec ($ch);
curl_close ($ch);
$string = ob_get_contents();

ob_end_clean();

return $string;
}

function processPage($url) {
$pagehtml='';
$outputHTML='';
if($url) {
$pagehtml=get_content($url);
if($pagehtml) {
if (eregi("<title>(.*)</title>", $pagehtml, $title)) {
$page_title = $title[1];
$is_title = true;
}
$outputHTML.= '<tr><td valign=top><b>url:</b></td><td><a href="'. $_GET['url']. '">' .$_GET['url'].'</a></td></tr>';

$outputHTML.= '<tr><td valign=top><b>title:</b></td><td>'.$page_title.'</td></tr>';

preg_match_all('/<meta(.*?)>/sim',$pagehtml,$metaarray);
for($tmploop=0;$tmploop<count($metaarray[0]);$tmploop++) {

preg_match('/name=(\'|")(.*?)(\'|")/sim',$metaarray[1][$tmploop],$namearray);
preg_match('/content=(\'|")(.*?)(\'|")/sim',$metaarray[1][$tmploop],$contentarray);

$namearray[2] = ltrim($namearray[2]);
$contentarray[2] = ltrim($contentarray[2]);


if( ($namearray[2] && $contentarray[2]) && isset($namearray[2]) ) {
$outputHTML.='<tr>';
$outputHTML.='<td nowrap valign=top><b>' . $namearray[2] . ':</b></td>' . '<td width=200 valign=top>' .$contentarray[2] . '</td>';
$outputHTML.='</tr>';
}

}
} else {
$outputHTML='URL can not be found!';
}

} else {
$outputHTML='Invalid URL!';
}


echo '<strong>URL, TITLE & META</strong><br><hr>';
echo '<table>';
echo highlight($outputHTML, $_GET["keyword"]);
echo '</table><br>';

echo '<strong>PAGE COPY TEXT</strong><br><hr>';

preg_match("/<body[^>]*>(.*)<\/body>/siU", $pagehtml, $pageCopyArray);

$finalText = eregi_replace("<script[^>]*>", "", $pageCopyArray[0]);
$finalText = eregi_replace("<style[^>]*>", "", $finalText);


echo '<div>'.highlight(strip_tags($finalText),$_GET["keyword"]).'</div>';

}
processPage($_GET[url]);
?>