From 0d143235b7f034cff571abc599d3e768b73d8f46 Mon Sep 17 00:00:00 2001 From: raiym Date: Tue, 21 Mar 2017 18:38:07 +0300 Subject: [PATCH] Don't query cache every time when calling Instagram endpoint. Instead call it once when login and at the end of script call $instagram->saveSession() --- src/InstagramScraper/Instagram.php | 267 ++++++++++++----------------- tests/InstagramTest.php | 8 +- 2 files changed, 121 insertions(+), 154 deletions(-) diff --git a/src/InstagramScraper/Instagram.php b/src/InstagramScraper/Instagram.php index c51afc05..f4e4e2c5 100644 --- a/src/InstagramScraper/Instagram.php +++ b/src/InstagramScraper/Instagram.php @@ -19,9 +19,7 @@ class Instagram private static $instanceCache; public $sessionUsername; public $sessionPassword; - public $sessionId; - public $mid; - public $csrfToken; + public $userSession; public function __construct() { @@ -32,11 +30,14 @@ public static function withCredentials($username, $password, $sessionFolder = nu if (is_null($sessionFolder)) { $sessionFolder = __DIR__ . DIRECTORY_SEPARATOR . 'sessions' . DIRECTORY_SEPARATOR; } - // load cache - CacheManager::setDefaultConfig([ - 'path' => $sessionFolder - ]); - self::$instanceCache = CacheManager::getInstance('files'); + if (is_string($sessionFolder)) { + CacheManager::setDefaultConfig([ + 'path' => $sessionFolder + ]); + self::$instanceCache = CacheManager::getInstance('files'); + } else { + self::$instanceCache = $sessionFolder; + } $instance = new self(); $instance->sessionUsername = $username; $instance->sessionPassword = $password; @@ -94,49 +95,6 @@ public static function getMedias($username, $count = 20, $maxId = '') return $medias; } - public static function getPaginateMedias($username, $maxId = '') - { - $hasNextPage = true; - $medias = []; - - $toReturn = [ - 'medias' => $medias, - 'maxId' => $maxId, - 'hasNextPage' => $hasNextPage - ]; - - $response = Request::get(Endpoints::getAccountMediasJsonLink($username, $maxId)); - - if ($response->code !== 200) { - throw new InstagramException('Response code is ' . $response->code . '. Body: ' . $response->body . ' Something went wrong. Please report issue.'); - } - - $arr = json_decode($response->raw_body, true); - - if (!is_array($arr)) { - throw new InstagramException('Response code is ' . $response->code . '. Body: ' . $response->body . ' Something went wrong. Please report issue.'); - } - - if (count($arr['items']) === 0) { - return $toReturn; - } - - foreach ($arr['items'] as $mediaArray) { - $medias[] = Media::fromApi($mediaArray); - } - - $maxId = $arr['items'][count($arr['items']) - 1]['id']; - $hasNextPage = $arr['more_available']; - - $toReturn = [ - 'medias' => $medias, - 'maxId' => $maxId, - 'hasNextPage' => $hasNextPage - ]; - - return $toReturn; - } - public static function getMediaByCode($mediaCode) { return self::getMediaByUrl(Endpoints::getMediaPageLink($mediaCode)); @@ -163,6 +121,7 @@ public static function getMediaByUrl($mediaUrl) public static function searchAccountsByUsername($username) { + // TODO: Add tests and auth $response = Request::get(Endpoints::getGeneralSearchJsonLink($username)); if ($response->code === 404) { throw new InstagramNotFoundException('Account with given username does not exist.'); @@ -188,6 +147,7 @@ public static function searchAccountsByUsername($username) public static function searchTagsByTagName($tag) { + // TODO: Add tests and auth $response = Request::get(Endpoints::getGeneralSearchJsonLink($tag)); if ($response->code === 404) { throw new InstagramNotFoundException('Account with given username does not exist.'); @@ -217,13 +177,67 @@ public static function getMediaById($mediaId) return self::getMediaByUrl($mediaLink); } - public static function getMediaCommentsById($mediaId, $count = 10, $maxId = null) + public function getPaginateMedias($username, $maxId = '') + { + $hasNextPage = true; + $medias = []; + + $toReturn = [ + 'medias' => $medias, + 'maxId' => $maxId, + 'hasNextPage' => $hasNextPage + ]; + + $response = Request::get(Endpoints::getAccountMediasJsonLink($username, $maxId), $this->generateHeaders($this->userSession)); + + if ($response->code !== 200) { + throw new InstagramException('Response code is ' . $response->code . '. Body: ' . $response->body . ' Something went wrong. Please report issue.'); + } + + $arr = json_decode($response->raw_body, true); + + if (!is_array($arr)) { + throw new InstagramException('Response code is ' . $response->code . '. Body: ' . $response->body . ' Something went wrong. Please report issue.'); + } + + if (count($arr['items']) === 0) { + return $toReturn; + } + + foreach ($arr['items'] as $mediaArray) { + $medias[] = Media::fromApi($mediaArray); + } + + $maxId = $arr['items'][count($arr['items']) - 1]['id']; + $hasNextPage = $arr['more_available']; + + $toReturn = [ + 'medias' => $medias, + 'maxId' => $maxId, + 'hasNextPage' => $hasNextPage + ]; + + return $toReturn; + } + + private function generateHeaders($session) + { +// var_dump($session); + $cookies = ''; + foreach ($session as $key => $value) { + $cookies .= "$key=$value; "; + } + $headers = ['cookie' => $cookies, 'referer' => Endpoints::BASE_URL . '/', 'x-csrftoken' => $session['csrftoken']]; + return $headers; + } + + public function getMediaCommentsById($mediaId, $count = 10, $maxId = null) { $code = Media::getCodeFromId($mediaId); return self::getMediaCommentsByCode($code, $count, $maxId); } - public static function getMediaCommentsByCode($code, $count = 10, $maxId = null) + public function getMediaCommentsByCode($code, $count = 10, $maxId = null) { $remain = $count; $comments = []; @@ -245,7 +259,13 @@ public static function getMediaCommentsByCode($code, $count = 10, $maxId = null) } else { $parameters = Endpoints::getCommentsBeforeCommentIdByCode($code, $numberOfCommentsToRetreive, $maxId); } - $jsonResponse = json_decode(self::getContentsFromUrl($parameters), true); + $response = Request::post(Endpoints::INSTAGRAM_QUERY_URL, $this->generateHeaders($this->userSession), ['q' => $parameters]); + if ($response->code !== 200) { + throw new InstagramException('Response code is ' . $response->code . '. Body: ' . $response->body . ' Something went wrong. Please report issue.'); + } + $cookies = self::parseCookies($response->headers['Set-Cookie']); + $this->userSession['csrftoken'] = $cookies['csrftoken']; + $jsonResponse = json_decode($response->raw_body, true); $nodes = $jsonResponse['comments']['nodes']; foreach ($nodes as $commentArray) { $comments[] = Comment::fromApi($commentArray); @@ -263,24 +283,17 @@ public static function getMediaCommentsByCode($code, $count = 10, $maxId = null) return $comments; } - private function getContentsFromUrl($parameters) + private static function parseCookies($rawCookies) { - if (!function_exists('curl_init')) { - return false; + $cookies = []; + foreach ($rawCookies as $c) { + $c = explode(';', $c)[0]; + $parts = explode('=', $c); + if (sizeof($parts) >= 2 && !is_null($parts[1])) { + $cookies[$parts[0]] = $parts[1]; + } } - $ch = curl_init(); - curl_setopt($ch, CURLOPT_URL, Endpoints::INSTAGRAM_QUERY_URL); - curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE); - curl_setopt($ch, CURLOPT_POST, 1); - curl_setopt($ch, CURLOPT_POSTFIELDS, 'q=' . $parameters); - $headers = array(); - $headers[] = "Cookie: csrftoken=$this->csrfToken; sessionid=$this->sessionId"; - $headers[] = "X-Csrftoken: $this->csrfToken"; - $headers[] = "Referer: https://www.instagram.com/"; - curl_setopt($ch, CURLOPT_HTTPHEADER, $headers); - $output = curl_exec($ch); - curl_close($ch); - return $output; + return $cookies; } public function getAccountById($id) @@ -288,22 +301,17 @@ public function getAccountById($id) if (!is_numeric($id)) { throw new \InvalidArgumentException('User id must be integer or integer wrapped in string'); } - $cachedString = self::$instanceCache->getItem($this->sessionUsername); - $session = $cachedString->get(); $parameters = Endpoints::getAccountJsonInfoLinkByAccountId($id); - $response = Request::post(Endpoints::INSTAGRAM_QUERY_URL, $this->generateHeaders($session), ['q' => $parameters]); + $response = Request::post(Endpoints::INSTAGRAM_QUERY_URL, $this->generateHeaders($this->userSession), ['q' => $parameters]); if ($response->code !== 200) { throw new InstagramException('Response code is ' . $response->code . '. Body: ' . $response->body . ' Something went wrong. Please report issue.'); } $cookies = self::parseCookies($response->headers['Set-Cookie']); - $this->csrfToken = $cookies['csrftoken']; - $session['csrftoken'] = $this->csrfToken; - $cachedString->set($session); - self::$instanceCache->save($cachedString); + $this->userSession['csrftoken'] = $cookies['csrftoken']; $userArray = json_decode($response->raw_body, true); if ($userArray['status'] === 'fail') { @@ -315,47 +323,19 @@ public function getAccountById($id) return Account::fromAccountPage($userArray); } - private function generateHeaders($session) - { - $cookies = ''; - foreach ($session as $key => $value) { - $cookies .= "$key=$value; "; - } - $headers = ['cookie' => $cookies, 'referer' => Endpoints::BASE_URL . '/', 'x-csrftoken' => $this->csrfToken]; - return $headers; - } - - private static function parseCookies($rawCookies) - { - $cookies = []; - foreach ($rawCookies as $c) { - $c = explode(';', $c)[0]; - $parts = explode('=', $c); - if (sizeof($parts) >= 2 && !is_null($parts[1])) { - $cookies[$parts[0]] = $parts[1]; - } - } - return $cookies; - } - public function getMediasByTag($tag, $count = 12, $maxId = '') { - $cachedString = self::$instanceCache->getItem($this->sessionUsername); - $session = $cachedString->get(); $index = 0; $medias = []; $mediaIds = []; $hasNextPage = true; while ($index < $count && $hasNextPage) { - $response = Request::get(Endpoints::getMediasJsonByTagLink($tag, $maxId), $this->generateHeaders($session)); + $response = Request::get(Endpoints::getMediasJsonByTagLink($tag, $maxId), $this->generateHeaders($this->userSession)); if ($response->code !== 200) { throw new InstagramException('Response code is ' . $response->code . '. Body: ' . $response->body . ' Something went wrong. Please report issue.'); } $cookies = self::parseCookies($response->headers['Set-Cookie']); - $this->csrfToken = $cookies['csrftoken']; - $session['csrftoken'] = $this->csrfToken; - $cachedString->set($session); - self::$instanceCache->save($cachedString); + $this->userSession['csrftoken'] = $cookies['csrftoken']; $arr = json_decode($response->raw_body, true); if (!is_array($arr)) { throw new InstagramException('Response decoding failed. Returned data corrupted or this library outdated. Please report issue'); @@ -395,19 +375,15 @@ public function getPaginateMediasByTag($tag, $maxId = '') 'maxId' => $maxId, 'hasNextPage' => $hasNextPage ]; - $cachedString = self::$instanceCache->getItem($this->sessionUsername); - $session = $cachedString->get(); - $response = Request::get(Endpoints::getMediasJsonByTagLink($tag, $maxId), $this->generateHeaders($session)); + + $response = Request::get(Endpoints::getMediasJsonByTagLink($tag, $maxId), $this->generateHeaders($this->userSession)); if ($response->code !== 200) { throw new InstagramException('Response code is ' . $response->code . '. Body: ' . $response->body . ' Something went wrong. Please report issue.'); } $cookies = self::parseCookies($response->headers['Set-Cookie']); - $this->csrfToken = $cookies['csrftoken']; - $session['csrftoken'] = $this->csrfToken; - $cachedString->set($session); - self::$instanceCache->save($cachedString); + $this->userSession['csrftoken'] = $cookies['csrftoken']; $arr = json_decode($response->raw_body, true); @@ -445,9 +421,7 @@ public function getPaginateMediasByTag($tag, $maxId = '') public function getTopMediasByTagName($tagName) { - $cachedString = self::$instanceCache->getItem($this->sessionUsername); - $session = $cachedString->get(); - $response = Request::get(Endpoints::getMediasJsonByTagLink($tagName, ''), $this->generateHeaders($session)); + $response = Request::get(Endpoints::getMediasJsonByTagLink($tagName, ''), $this->generateHeaders($this->userSession)); if ($response->code === 404) { throw new InstagramNotFoundException('Account with given username does not exist.'); } @@ -455,10 +429,7 @@ public function getTopMediasByTagName($tagName) throw new InstagramException('Response code is ' . $response->code . '. Body: ' . $response->body . ' Something went wrong. Please report issue.'); } $cookies = self::parseCookies($response->headers['Set-Cookie']); - $this->csrfToken = $cookies['csrftoken']; - $session['csrftoken'] = $this->csrfToken; - $cachedString->set($session); - self::$instanceCache->save($cachedString); + $this->userSession['csrftoken'] = $cookies['csrftoken']; $jsonResponse = json_decode($response->raw_body, true); $medias = []; foreach ($jsonResponse['tag']['top_posts']['nodes'] as $mediaArray) { @@ -469,9 +440,7 @@ public function getTopMediasByTagName($tagName) public function getLocationTopMediasById($facebookLocationId) { - $cachedString = self::$instanceCache->getItem($this->sessionUsername); - $session = $cachedString->get(); - $response = Request::get(Endpoints::getMediasJsonByLocationIdLink($facebookLocationId), $this->generateHeaders($session)); + $response = Request::get(Endpoints::getMediasJsonByLocationIdLink($facebookLocationId), $this->generateHeaders($this->userSession)); if ($response->code === 404) { throw new InstagramNotFoundException('Location with this id doesn\'t exist'); } @@ -479,10 +448,7 @@ public function getLocationTopMediasById($facebookLocationId) throw new InstagramException('Response code is ' . $response->code . '. Body: ' . $response->body . ' Something went wrong. Please report issue.'); } $cookies = self::parseCookies($response->headers['Set-Cookie']); - $this->csrfToken = $cookies['csrftoken']; - $session['csrftoken'] = $this->csrfToken; - $cachedString->set($session); - self::$instanceCache->save($cachedString); + $this->userSession['csrftoken'] = $cookies['csrftoken']; $jsonResponse = json_decode($response->raw_body, true); $nodes = $jsonResponse['location']['top_posts']['nodes']; $medias = []; @@ -494,22 +460,16 @@ public function getLocationTopMediasById($facebookLocationId) public function getLocationMediasById($facebookLocationId, $quantity = 12, $offset = '') { - $cachedString = self::$instanceCache->getItem($this->sessionUsername); - $session = $cachedString->get(); - $index = 0; $medias = []; $hasNext = true; while ($index < $quantity && $hasNext) { - $response = Request::get(Endpoints::getMediasJsonByLocationIdLink($facebookLocationId, $offset), $this->generateHeaders($session)); + $response = Request::get(Endpoints::getMediasJsonByLocationIdLink($facebookLocationId, $offset), $this->generateHeaders($this->userSession)); if ($response->code !== 200) { throw new InstagramException('Response code is ' . $response->code . '. Body: ' . $response->body . ' Something went wrong. Please report issue.'); } $cookies = self::parseCookies($response->headers['Set-Cookie']); - $this->csrfToken = $cookies['csrftoken']; - $session['csrftoken'] = $this->csrfToken; - $cachedString->set($session); - self::$instanceCache->save($cachedString); + $this->userSession['csrftoken'] = $cookies['csrftoken']; $arr = json_decode($response->raw_body, true); $nodes = $arr['location']['media']['nodes']; foreach ($nodes as $mediaArray) { @@ -530,10 +490,7 @@ public function getLocationMediasById($facebookLocationId, $quantity = 12, $offs public function getLocationById($facebookLocationId) { - $cachedString = self::$instanceCache->getItem($this->sessionUsername); - $session = $cachedString->get(); - - $response = Request::get(Endpoints::getMediasJsonByLocationIdLink($facebookLocationId), $this->generateHeaders($session)); + $response = Request::get(Endpoints::getMediasJsonByLocationIdLink($facebookLocationId), $this->generateHeaders($this->userSession)); if ($response->code === 404) { throw new InstagramNotFoundException('Location with this id doesn\'t exist'); } @@ -541,10 +498,7 @@ public function getLocationById($facebookLocationId) throw new InstagramException('Response code is ' . $response->code . '. Body: ' . $response->body . ' Something went wrong. Please report issue.'); } $cookies = self::parseCookies($response->headers['Set-Cookie']); - $this->csrfToken = $cookies['csrftoken']; - $session['csrftoken'] = $this->csrfToken; - $cachedString->set($session); - self::$instanceCache->save($cachedString); + $this->userSession['csrftoken'] = $cookies['csrftoken']; $jsonResponse = json_decode($response->raw_body, true); return Location::makeLocation($jsonResponse['location']); } @@ -563,19 +517,20 @@ public function login($force = false) throw new InstagramException('Response code is ' . $response->code . '. Body: ' . $response->body . ' Something went wrong. Please report issue.'); } $cookies = self::parseCookies($response->headers['Set-Cookie']); - $this->csrfToken = $cookies['csrftoken']; - $this->mid = $cookies['mid']; - $headers = ['cookie' => "csrftoken=$this->csrfToken; mid=$this->mid;", 'referer' => Endpoints::BASE_URL . '/', 'x-csrftoken' => $this->csrfToken]; + $mid = $cookies['mid']; + $csrfToken = $cookies['csrftoken']; + $headers = ['cookie' => "csrftoken=$csrfToken; mid=$mid;", 'referer' => Endpoints::BASE_URL . '/', 'x-csrftoken' => $csrfToken]; $response = Request::post(Endpoints::LOGIN_URL, $headers, ['username' => $this->sessionUsername, 'password' => $this->sessionPassword]); if ($response->code !== 200) { throw new InstagramAuthException('Response code is ' . $response->code . '. Body: ' . $response->body . ' Something went wrong. Please report issue.'); } $cookies = self::parseCookies($response->headers['Set-Cookie']); - $cookies['mid'] = $this->mid; - $this->csrfToken = $cookies['csrftoken']; - $this->sessionId = $cookies['sessionid']; + $cookies['mid'] = $mid; $cachedString->set($cookies); self::$instanceCache->save($cachedString); + $this->userSession = $cookies; + } else { + $this->userSession = $session; } } @@ -584,9 +539,9 @@ public function isLoggedIn($session) if (is_null($session) || !isset($session['sessionid'])) { return false; } - $this->sessionId = $session['sessionid']; - $this->csrfToken = $session['csrftoken']; - $headers = ['cookie' => "csrftoken=$this->csrfToken; sessionid=$this->sessionId;", 'referer' => Endpoints::BASE_URL . '/', 'x-csrftoken' => $this->csrfToken]; + $sessionId = $session['sessionid']; + $csrfToken = $session['csrftoken']; + $headers = ['cookie' => "csrftoken=$csrfToken; sessionid=$sessionId;", 'referer' => Endpoints::BASE_URL . '/', 'x-csrftoken' => $csrfToken]; $response = Request::get(Endpoints::BASE_URL, $headers); if ($response->code !== 200) { return false; @@ -597,4 +552,10 @@ public function isLoggedIn($session) } return true; } + + public function saveSession() + { + $cachedString = self::$instanceCache->getItem($this->sessionUsername); + $cachedString->set($this->userSession); + } } \ No newline at end of file diff --git a/tests/InstagramTest.php b/tests/InstagramTest.php index c0639cb4..16d9b7e3 100644 --- a/tests/InstagramTest.php +++ b/tests/InstagramTest.php @@ -4,6 +4,7 @@ use InstagramScraper\Instagram; use InstagramScraper\Model\Media; +use phpFastCache\CacheManager; use PHPUnit\Framework\TestCase; @@ -13,7 +14,12 @@ class InstagramTest extends TestCase public static function setUpBeforeClass() { - self::$instagram = Instagram::withCredentials('PASTE_LOGIN', 'PASTE_PASSWORD'); + $sessionFolder = __DIR__ . DIRECTORY_SEPARATOR . 'sessions' . DIRECTORY_SEPARATOR; + CacheManager::setDefaultConfig([ + 'path' => $sessionFolder + ]); + $instanceCache = CacheManager::getInstance('files'); + self::$instagram = Instagram::withCredentials('USERNAME', 'PASSWORD', $instanceCache); self::$instagram->login(); }