public function search($q) {
if (!$q) {
die('death');
}
for ($num = 0; $num <= 500; $num = $num + 36) {
$gurl = "https://www.google.com/search?q=$q&hl=en&tbm=shop&start=$num";
$html = file_get_contents($gurl);
phpQuery::newDocumentHTML($html);
foreach (phpQuery::pq(".r a") as $link) {
$urls[] = phpQuery::pq($link)->attr("href");
}
foreach ($urls as $url) {
$parsed = parse_url($url);
var_dump($url);
var_dump($parsed);
if (isset($parsed['host']) && strpos($parsed['host'], 'google.com') !== FALSE) {
$queryString = parse_str($parsed['query']);
var_dump($queryString);
$x = parse_url($adurl);
var_dump($x);
if (isset($x['query'])) {
$y = parse_str($x['query']);
var_dump($y);
if (!R::find('sproduct', ' url = ?', array($ds_dest_url))) {
// sproduct is sponsored product (that's I'm why parsing adurl)
$sproduct = R::dispense('sproduct');
$sproduct->url = $ds_dest_url;
$sproduct->scraped = FALSE;
//R::store($sproduct);
//var_dump($sproduct);
}
}
} else {
$parsed['host'] = 'www.google.com'; // google shopping product
unset($parsed['query']);
$link = "http://" . $parsed['host'] . $parsed['path'];
if (!R::find('gproduct', ' url = ?', array($link))) {
$gproduct = R::dispense('gproduct');
$gproduct->url = $link;
$gproduct->scraped = FALSE;
//R::store($gproduct);
//var_dump($gproduct);
}
}
}
echo "Scraped $gurl \n";
sleep(rand(4, 8));
}
sleep(rand(4, 8));
}