<?php
namespace App\Http\Controllers;
use Illuminate\Http\Request;
use App\Http\Requests;
use RedBeanPHP\R;
use PhpQuery\PhpQuery as phpQuery;
use GuzzleHttp\Client;
use Cache;
/**
* This class searches google shopping for a given
* keyword and scrapes the page urls for microdata
*/
class GoogleShoppingController extends Controller {
/**
* @var $_html GoogleShoppingController
*
*/
private $_html;
/**
* @var $_url GoogleShoppingController
* @return array of urls from scraping shopping results
*/
private $_urls;
/**
* Creates new Guzzle http client and caches the response
* Loads response into $_html
* Calls loadHtmlDom() which returns phpQuery object
*
* @param string $url url to scrape for links
*/
private function getHtml($url) {
$request = new Client();
try {
// see if url/html exists in Cache
// if it does not, fetch the html and store it in the cache
if (Cache::has($url)) {
$this->_html = Cache::get($url);
} else {
$response = $request->get($url);
Cache::put($url, $response->getBody(), 3600);
}
} catch (RequestException $e) {
echo $e->getRequest();
if ($e->hasResponse()) {
echo $e->getResponse();
}
}
$this->_html = Cache::get($url);
$this->loadHtmlDom();
}
/**
* Loads html response into phpQuery
*
* @return phpQuery object
*/
private function loadHtmlDom() {
return phpQuery::newDocumentHTML($this->_html);
}
/**
* Scrapes the actual links <a href="" from given URL string
*
* @param string $url URL to scrape for links
* @param string $selector CSS selector to scrape
* @return Array of scraped urls
*/
private function getLinks($url, $selector) {
$this->getHtml($url);
foreach (phpQuery::pq($selector) as $link) {
$links[] = phpQuery::pq($link)->attr("href");
}
return $links;
}
/**
* Search Google shopping for $keyword
*
* @param string $keyword Keyword to search on GoogleShopping
* @return Array of scraped urls containing all results
*/
public function getGoogleShoppingSearchResults($keyword) {
$this->_urls = $this->getLinks("https://www.google.com/search?q={$keyword}&hl=en&tbm=shop", ".r a");
return $this->_urls;
}
/**
* Parses only Google Shopping Product links from the _urls array
*
* @return Array of scraped urls in the format http://www.google.com/shopping/product/
*/
public function parseGoogleShoppingProductLinks() {
foreach ($this->_urls as $url) {
$parsed = parse_url($url);
if (isset($parsed['path']) && strpos($parsed['path'], 'shopping/product/')) {
$parsed['host'] = 'www.google.com';
unset($parsed['query']);
$urls[] = "http://" . $parsed['host'] . $parsed['path'];
return $urls;
}
}
}
/**
* Parses only Google Sponsored Product links from the _urls array
*
* @return Array of scraped urls for sponsored google products original store page
*/
public function parseGoogleSponsoredProductLinks() {
foreach ($this->_urls as $url) {
$parsed = parse_url($url);
if (isset($parsed['path']) && $parsed['path'] == '/aclk') {
$parsed_query = parse_str($parsed['query'], $query);
$parsed_adurl = parse_str($query['adurl'], $adurl);
if (isset($adurl['ds_dest_url'])) {
$urls[] = $adurl['ds_dest_url'];
} else if ($adurl['u']) {
$urls[] = $adurl['u'];
}
return $urls;
}
}
}