$urlg = $_GET['article'];
$urlg = str_replace("//", "/", $urlg);
$uri = $_SERVER['REQUEST_URI'];
#if (strpos($uri, '%2F' OR '%252F') !== false) {
if(preg_match('(%2F|%252F)', $uri) === 1) {
$uri = str_replace("%2F", "/", $uri);
$uri = str_replace("%252F", "/", $uri);
header("HTTP/1.1 301 Moved Permanently");
header('Location: http://' . $_SERVER['HTTP_HOST'] . $uri);
exit();
}
$url = "https://www.huffpost.com$urlg";
$urlyt = "https://www.huffpost.com$urlg";
#echo $url;
$refresh = $_GET['refresh'];
$diag = $_GET['diag'];
if ($diag){echo "diag";}
$var = parse_url($urlg,PHP_URL_PATH);
$dirname = "/var/wwwroot/westnet.ca/huffarticles";
$cachefile = basename($var);
$cachefilebase = $cachefile;
$cachefile = "$dirname$var";
$var = dirname($var); //strip urlpath
$cachefiledir = "$dirname$var";
#echo "
cachefilebase: $cachefilebase
";
#echo "dirname: $dirname
";
#echo "URL Path (var): $var
";
#echo "cachefile: $cachefile
";
#echo "path for cachefile: $cachefiledir
";
if ($refresh == "yes"){
unlink($cachefile);
unlink("$cachefile.amp");
}
if (trim(file_get_contents($cachefile)) == false){
unlink($cachefile);}
if ($refresh == "yes"){
unlink($cachefile);}
if (file_exists($cachefile)) {
$file = file($cachefile);
$contents = implode($file);
} else {
#see if page exists
$ch = curl_init("$url");
curl_setopt($ch);
curl_setopt($ch, CURLOPT_HEADER, true); // we want headers
curl_setopt($ch, CURLOPT_NOBODY, true); // we don't need body
curl_setopt($ch, CURLOPT_RETURNTRANSFER,1);
curl_setopt($ch, CURLOPT_TIMEOUT,2);
curl_setopt($ch, CURLOPT_IPRESOLVE, CURL_IPRESOLVE_V4);
curl_setopt($ch, CURLOPT_ENCODING, '');//set gzip, deflate or keep empty for server to detect and set supported encoding.
curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (iPhone; CPU iPhone OS 12_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) CriOS/70.0.3538.75 Mobile/15E148 Safari/605.1');
$output = curl_exec($ch);
$httpcode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
#echo $httpcode;
#$info = curl_getinfo($ch);
#echo 'Header Took ' . $info['total_time'] . ' seconds to send a request to ' . $info['url'];
curl_close($ch);
if ($httpcode !== 200){
$notfound = "404 Not Found";
$msg = "$notfound
";
$msg .= "Return Home ";
$title = "$title $notfound - ";
header($_SERVER["SERVER_PROTOCOL"]." 404 Not Found");
}else{
$curl = curl_init($url);
curl_setopt($curl, CURLOPT_IPRESOLVE, CURL_IPRESOLVE_V4);
curl_setopt($curl, CURLOPT_ENCODING, '');//set gzip, deflate or keep empty for server to detect and set supported encoding.
curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);
curl_setopt($curl, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, FALSE);
curl_setopt($curl, CURLOPT_SSL_VERIFYHOST, FALSE);
curl_setopt($curl, CURLOPT_TIMEOUT,2);
curl_setopt($curl, CURLOPT_USERAGENT, 'Mozilla/5.0 (iPhone; CPU iPhone OS 12_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) CriOS/70.0.3538.75 Mobile/15E148 Safari/605.1');
$contents = curl_exec($curl);
$info = curl_getinfo($curl);
#echo 'Body Took ' . $info['total_time'] . ' seconds to send a request to ' . $info['url'];
curl_close($curl);
#echo "httpcode is $httpcode
";
if (!file_exists($cachefiledir)) {
mkdir($cachefiledir, 0777, true);}
###2024 file_put_contents($cachefile, $contents);
$dom = new DOMDocument();
libxml_use_internal_errors( 1 );
$dom->loadHTML($contents);
$xpath = new DOMXpath( $dom );
$jsonScripts = $xpath->query( '//script[@type="application/ld+json"]' );
$json = trim( $jsonScripts->item(0)->nodeValue );
$data = json_decode($json);
$json = json_decode($json, true);
$dateModified = $json['dateModified'];
$datePublished = $json['datePublished'];
$timestamp = strtotime('11/23/2011 10:59 am EST');
$datePublishedlinux = strtotime($datePublished);
$dateModifiedlinux = strtotime($dateModified);
touch($cachefile, $dateModifiedlinux, $datePublishedlinux);
//Save images
$doc = new DOMDocument();
@$doc->loadHTML($contents);
$tags = $doc->getElementsByTagName('img');
foreach ($tags as $tag) {
$imgurl = $tag->getAttribute('src');
#$imgurl .= $tag->getAttribute('srcset');
$pathimg = $imgurl;
$filename = basename($imgurl);
#$filename = str_replace(" ","", $filename);
$filename = strtok($filename, '?');
#echo "filename is $filename";
#echo "$imgurl
";
$var = parse_url($pathimg,PHP_URL_PATH);
$path = parse_url($pathimg, PHP_URL_PATH);
#echo $path;
$dirnameimage = dirname($path);
$dirnameimage = "$dirname/images$dirnameimage";
$filenamewithpath = "$dirnameimage$filename";
#echo "dirname for dirnameimage: $dirnameimage
";
#echo "filenamewithpath $filenamewithpath
";
if (!file_exists($filenamewithpath)) {
#echo "image $filenamewithpath does not exist going to download it and save it ";
$curl = curl_init($imgurl);
curl_setopt($curl, CURLOPT_IPRESOLVE, CURL_IPRESOLVE_V4);
curl_setopt($curl, CURLOPT_ENCODING, '');//set gzip, deflate or keep empty for server to detect and set supported encoding.
curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);
curl_setopt($curl, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, FALSE);
curl_setopt($curl, CURLOPT_SSL_VERIFYHOST, FALSE);
curl_setopt($curl, CURLOPT_TIMEOUT,2);
curl_setopt($curl, CURLOPT_USERAGENT, 'Mozilla/5.0 (iPhone; CPU iPhone OS 12_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) CriOS/70.0.3538.75 Mobile/15E148 Safari/605.1');
$image = curl_exec($curl);
$info = curl_getinfo($curl);
#echo '
Image(s) Took ' . $info['total_time'] . ' seconds to send a request to ' . $info['url'];
curl_close($curl);
if (!empty($image)){
if (!file_exists($dirnameimage)) {
mkdir($dirnameimage, 0777, true);
#echo "making dir $dirnameimage";
}
### 2024 file_put_contents($filenamewithpath, $image);
#echo "Saving $filenamewithpath from $imgurl
";
}
}
}
}//filenotfound
}//code 200 check
//Local Processing
#check if amp exists
$amp = "$cachefile.amp";
#echo "amp is $amp
";
$dom = new DOMDocument();
@$dom->loadHTML($contents);
$nodes = $dom->getElementsByTagName('link');
foreach ($nodes as $node)
{
if ($node->getAttribute('rel') === 'amphtml')
{
$amphtml = ($node->getAttribute('href'));
/* amp page found do your logic */
#echo "amphtml found at $amphtml ";
if (!file_exists($amp)) {
#echo " local not found so getting amp $amp from $amphtml ";
$curl = curl_init($amphtml);
curl_setopt($curl, CURLOPT_IPRESOLVE, CURL_IPRESOLVE_V4);
curl_setopt($curl, CURLOPT_ENCODING, '');//set gzip, deflate or keep empty for server to detect and set supported encoding.
curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);
curl_setopt($curl, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, FALSE);
curl_setopt($curl, CURLOPT_SSL_VERIFYHOST, FALSE);
curl_setopt($curl, CURLOPT_TIMEOUT,2);
curl_setopt($curl, CURLOPT_USERAGENT, 'Mozilla/5.0 (iPhone; CPU iPhone OS 12_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) CriOS/70.0.3538.75 Mobile/15E148 Safari/605.1');
$contents = curl_exec($curl);
$info = curl_getinfo($curl);
#echo '
Image(s) Took ' . $info['total_time'] . ' seconds to send a request to ' . $info['url'];
curl_close($curl);
### 2024 file_put_contents($amp, $contents);
}
}
}
if (file_exists($amp)) {
#$contents = file_get_contents($amp);
$doc = new DOMDocument();
#libxml_use_internal_errors(true);
$doc->loadHTML($contents);
$finder = new DomXPath($doc);
$node = $finder->query("//*[contains(@class, 'entry__content')]");
$ampnewbodystory = ($doc->saveHTML($node->item(0)));
}
if (file_exists($amp)) {
#$contents = file_get_contents($amp);
}
$contents = preg_replace('/srcSet=/m',"src2=", $contents);
$contents = preg_replace('~href="/~', 'href="/newstempch.php?article=/', $contents);
$contents = preg_replace('~href="http://www.huffpost.com/~', 'href="/newstempch.php?article=/', $contents);
$contents = preg_replace('~href="https://www.huffpost.com/~', 'href="/newstempch.php?article=/', $contents);
$contents = preg_replace('~href="/news/~', 'href="/newstempch.php?article=', $contents);
$contents = str_replace('src=\'https://i.huffpost.com',"src='$cdn/huffarticles/images", $contents);
$contents = str_replace('src=\'http://i.huffpost.com',"src='$cdn/huffarticles/images", $contents);
$contents = str_replace('src="https://i.huffpost.com',"src=\"$cdn/huffarticles/images", $contents);
$contents = preg_replace('~https://i.huffpost.com~',"$cdn/huffarticles/images", $contents);
$contents = str_replace("| CBC News","", $contents);
$contents = preg_replace('/[\x00-\x1F\x7F-\xFF]/', '', $contents);
$contents = preg_replace('~https://www.huffpost.com~', 'https://action.news', $contents);
$contents = str_replace('', ' ', $contents);
$contents = str_replace("<a","<", $contents);
$contents = htmlspecialchars_decode($contents);
$contents = preg_replace('//Usi', "", $contents);
$contents = preg_replace('/
loadHTML($contents);
$xpath = new DomXPath($dom);
$xpath_results = $xpath->query("//div[contains(@class, '$classname')]");
if($div = $xpath_results->item(0)){
//remove the node the same way
$div ->parentNode->removeChild($div);
$contents = $dom->saveHTML();
#echo $dom->saveHTML();
}
$classname = "img-sized__placeholder";
$dom = new DOMDocument();
$dom->loadHTML($contents);
$xpath = new DomXPath($dom);
$xpath_results = $xpath->query("//div[contains(@class, '$classname')]");
if($div = $xpath_results->item(0)){
//remove the node the same way
$div ->parentNode->removeChild($div);
$contents = $dom->saveHTML();
#echo $dom->saveHTML();
}
$classname = "connatix-wrapper";
$dom = new DOMDocument();
$dom->loadHTML($contents);
$xpath = new DomXPath($dom);
$xpath_results = $xpath->query("//div[contains(@class, '$classname')]");
if($div = $xpath_results->item(0)){
//remove the node the same way
$div ->parentNode->removeChild($div);
$contents = $dom->saveHTML();
#echo $dom->saveHTML();
}
$doc = new DOMDocument();
@$doc->loadHTML($contents);
$nodes = $doc->getElementsByTagName('title');
$title = $nodes->item(0)->nodeValue;
$metas = $doc->getElementsByTagName('meta');
for ($i = 0; $i < $metas->length; $i++)
{
$meta = $metas->item($i);
if($meta->getAttribute('name') == 'description')
$description = $meta->getAttribute('content');
if($meta->getAttribute('name') == 'keywords')
$keywords = $meta->getAttribute('content');
if($meta->getAttribute('property') == 'og:image')
$metaimage = $meta->getAttribute('content');
}
if (empty($title)) {
//Get the article title
preg_match_all('/
(.*?)<\/titleh>/is', $contents, $matches);
foreach($matches[1] as $title1){
$title = "$title1 - ";}
}
if (!file_exists($cachefile)){
$notfound = "404 Not Found";
$msg = "$notfound
";
$msg .= "Return Home ";
$title = "$title $notfound - ";
header($_SERVER["SERVER_PROTOCOL"]." 404 Not Found");
}
$doc = new DOMDocument();
libxml_use_internal_errors(true);
$doc->loadHTML($contents);
$finder = new DomXPath($doc);
$node = $finder->query("//*[contains(@class, 'top-header js-cet-subunit')]");
$newbodyheader = ($doc->saveHTML($node->item(0)));
$doc = new DOMDocument();
#libxml_use_internal_errors(true);
$doc->loadHTML($contents);
$finder = new DomXPath($doc);
$node = $finder->query("//*[contains(@class, 'entry__content-list js-main-content-list')]");
#$node = $finder->query("//*[contains(@class, 'entry__header entry__header--no-top-media')]");
$newbodystory = ($doc->saveHTML($node->item(0)));
$classname = "nav__content";
$dom = new DOMDocument();
$dom->loadHTML($contents);
$xpath = new DomXPath($dom);
$xpath_results = $xpath->query("//div[contains(@class, '$classname')]");
if($div = $xpath_results->item(0)){
//remove the node the same way
$div ->parentNode->removeChild($div);
$contents = $dom->saveHTML();
#echo $dom->saveHTML();
}
if (empty($newstorybody)){
$finder = new DomXPath($doc);
$node = $finder->query("//*[contains(@class, 'entry-head-container')]");
#$newbodystory = ($doc->saveHTML($node->item(0)));
}
$dom = new DOMDocument();
libxml_use_internal_errors( 1 );
$dom->loadHTML($contents);
$xpath = new DOMXpath( $dom );
$jsonScripts = $xpath->query( '//script[@type="application/ld+json"]' );
$json = trim( $jsonScripts->item(0)->nodeValue );
$data = json_decode($json);
$json = json_decode($json, true);
$dateModified = $json['dateModified'];
$datePublished = $json['datePublished'];
$timestamp = strtotime('11/23/2011 10:59 am EST');
$datePublishedlinux = strtotime($datePublished);
$dateModifiedlinux = strtotime($dateModified);
touch($cachefile, $dateModifiedlinux, $datePublishedlinux);
touch($amp, $dateModifiedlinux, $datePublishedlinux);
include('header.php');
?>
if ($username == "westnet"){
if (file_exists($cachefile)) {
echo "Last accessed: " . date ("F d Y H:i:s", filectime($cachefile));}}
echo "Posted: $datePublished | Updated: $dateModified ";
if (file_exists($amp)) {
#echo "amp exists as $amp";
#echo "
include('rightcol.php');?>
include('westnet-hd-news2018.php');?>
include('footer.htm'); ?>