Skip to content

Commit

Permalink
Merge pull request #348 from mr-molodoy/master
Browse files Browse the repository at this point in the history
Fix for new query_hash + gis signed header requirement.
  • Loading branch information
raiym authored Apr 18, 2018
2 parents 87ddba9 + 549a64f commit c441ac5
Show file tree
Hide file tree
Showing 2 changed files with 56 additions and 11 deletions.
13 changes: 8 additions & 5 deletions src/InstagramScraper/Endpoints.php
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ class Endpoints
const LOGIN_URL = 'https://www.instagram.com/accounts/login/ajax/';
const ACCOUNT_PAGE = 'https://www.instagram.com/{username}';
const MEDIA_LINK = 'https://www.instagram.com/p/{code}';
const ACCOUNT_MEDIAS = 'https://instagram.com/graphql/query/?query_id=17888483320059182&id={user_id}&first={count}&after={max_id}';
const ACCOUNT_MEDIAS = 'https://www.instagram.com/graphql/query/?query_hash=42323d64886122307be10013ad2dcc44&variables={variables}';
const ACCOUNT_JSON_INFO = 'https://www.instagram.com/{username}/?__a=1';
const MEDIA_JSON_INFO = 'https://www.instagram.com/p/{code}/?__a=1';
const MEDIA_JSON_BY_LOCATION_ID = 'https://www.instagram.com/explore/locations/{{facebookLocationId}}/?__a=1&max_id={{maxId}}';
Expand Down Expand Up @@ -44,6 +44,11 @@ public static function setAccountMediasRequestCount($count)
static::$requestMediaCount = $count;
}

public static function getAccountMediasRequestCount()
{
return static::$requestMediaCount;
}

public static function getAccountPageLink($username)
{
return str_replace('{username}', urlencode($username), static::ACCOUNT_PAGE);
Expand All @@ -59,11 +64,9 @@ public static function getAccountJsonInfoLinkByAccountId($id)
return str_replace('{userId}', urlencode($id), static::ACCOUNT_JSON_INFO_BY_ID);
}

public static function getAccountMediasJsonLink($userId, $maxId = '')
public static function getAccountMediasJsonLink($variables)
{
$url = str_replace('{user_id}', urlencode($userId), static::ACCOUNT_MEDIAS);
$url = str_replace('{count}', static::$requestMediaCount, $url);
return str_replace('{max_id}', urlencode($maxId), $url);
return str_replace('{variables}', urlencode($variables), static::ACCOUNT_MEDIAS);
}

public static function getMediaPageLink($code)
Expand Down
54 changes: 48 additions & 6 deletions src/InstagramScraper/Instagram.php
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@ class Instagram
private $sessionUsername;
private $sessionPassword;
private $userSession;
private $userAgent = null;
private $rhxGis = null;
private $userAgent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.106 Safari/537.36';

/**
* @param string $username
Expand Down Expand Up @@ -203,10 +204,11 @@ public function searchAccountsByUsername($username)

/**
* @param $session
* @param $gisToken
*
* @return array
*/
private function generateHeaders($session)
private function generateHeaders($session, $gisToken = null)
{
$headers = [];
if ($session) {
Expand All @@ -223,6 +225,10 @@ private function generateHeaders($session)

if ($this->getUserAgent()) {
$headers['user-agent'] = $this->getUserAgent();

if (!is_null($gisToken)) {
$headers['x-instagram-gis'] = $gisToken;
}
}

return $headers;
Expand Down Expand Up @@ -298,6 +304,35 @@ public function getAccount($username)
return Account::create($userArray['entry_data']['ProfilePage'][0]['graphql']['user']);
}

private function getSharedDataFromPage($url = Endpoints::BASE_URL)
{
$response = Request::get(rtrim($url, '/') . '/', $this->generateHeaders($this->userSession));
if (static::HTTP_NOT_FOUND === $response->code) {
throw new InstagramNotFoundException('Account with given username does not exist.');
}
if (static::HTTP_OK !== $response->code) {
throw new InstagramException('Response code is ' . $response->code . '. Body: ' . static::getErrorBody($response->body) . ' Something went wrong. Please report issue.');
}

preg_match_all('#\_sharedData \= (.*?)\;\<\/script\>#', $response->raw_body, $out);
return json_decode($out[1][0], true, 512, JSON_BIGINT_AS_STRING);
}

private function getRhxGis()
{
if ($this->rhxGis === null) {
$sharedData = $this->getSharedDataFromPage();
$this->rhxGis = $sharedData['rhx_gis'];
}

return $this->rhxGis;
}

private function generateGisToken($variables)
{
return md5(implode(':', [$this->getRhxGis(), $variables ]));
}

/**
* @param int $id
* @param int $count
Expand All @@ -306,13 +341,20 @@ public function getAccount($username)
* @return Media[]
* @throws InstagramException
*/
public function getMediasByUserId($id, $count = 20, $maxId = '')
public function getMediasByUserId($id, $count = 12, $maxId = '')
{
$index = 0;
$medias = [];
$isMoreAvailable = true;
while ($index < $count && $isMoreAvailable) {
$response = Request::get(Endpoints::getAccountMediasJsonLink($id, $maxId), $this->generateHeaders($this->userSession));
$variables = json_encode([
'id' => (string) $id,
'first' => (string) $count,
'after' => (string) $maxId
]);

$response = Request::get(Endpoints::getAccountMediasJsonLink($variables), $this->generateHeaders($this->userSession, $this->generateGisToken($variables)));

if (static::HTTP_OK !== $response->code) {
throw new InstagramException('Response code is ' . $response->code . '. Body: ' . static::getErrorBody($response->body) . ' Something went wrong. Please report issue.');
}
Expand Down Expand Up @@ -340,13 +382,13 @@ public function getMediasByUserId($id, $count = 20, $maxId = '')
}
return $medias;
}

/**
* @param string $username
* @param int $count
*
* @return Media[]
* @throws InstagramException
* @throws InstagramNotFoundException
*/
public function getMediasFromFeed($username, $count = 20)
{
Expand Down

0 comments on commit c441ac5

Please sign in to comment.