Skip to content

Commit

Permalink
Hotfix for Instagram changes
Browse files Browse the repository at this point in the history
  • Loading branch information
aik27 committed Mar 14, 2018
1 parent d924c3b commit 93a13e4
Show file tree
Hide file tree
Showing 2 changed files with 83 additions and 82 deletions.
8 changes: 4 additions & 4 deletions src/InstagramScraper/Endpoints.php
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ class Endpoints
const LOGIN_URL = 'https://www.instagram.com/accounts/login/ajax/';
const ACCOUNT_PAGE = 'https://www.instagram.com/{username}';
const MEDIA_LINK = 'https://www.instagram.com/p/{code}';
const ACCOUNT_MEDIAS = 'https://www.instagram.com/{username}/?__a=1&max_id={max_id}';
const ACCOUNT_MEDIAS = 'https://instagram.com/graphql/query/?query_id=17888483320059182&id={user_id}&first=30&after={max_id}';
const ACCOUNT_JSON_INFO = 'https://www.instagram.com/{username}/?__a=1';
const MEDIA_JSON_INFO = 'https://www.instagram.com/p/{code}/?__a=1';
const MEDIA_JSON_BY_LOCATION_ID = 'https://www.instagram.com/explore/locations/{{facebookLocationId}}/?__a=1&max_id={{maxId}}';
Expand Down Expand Up @@ -49,10 +49,10 @@ public static function getAccountJsonInfoLinkByAccountId($id)
return str_replace('{userId}', urlencode($id), static::ACCOUNT_JSON_INFO_BY_ID);
}

public static function getAccountMediasJsonLink($username, $maxId = '')
public static function getAccountMediasJsonLink($userId, $maxId = '')
{
$url = str_replace('{username}', urlencode($username), static::ACCOUNT_MEDIAS);
return str_replace('{max_id}', urlencode($maxId), $url);
$url = str_replace('{user_id}', urlencode($userId), static::ACCOUNT_MEDIAS);
return str_replace('{max_id}', urlencode($maxId), $url);
}

public static function getMediaPageLink($code)
Expand Down
157 changes: 79 additions & 78 deletions src/InstagramScraper/Instagram.php
Original file line number Diff line number Diff line change
Expand Up @@ -221,38 +221,38 @@ private function generateHeaders($session)
*/
public function getMedias($username, $count = 20, $maxId = '')
{
$index = 0;
$medias = [];
$isMoreAvailable = true;
while ($index < $count && $isMoreAvailable) {
$response = Request::get(Endpoints::getAccountMediasJsonLink($username, $maxId), $this->generateHeaders($this->userSession));
if (static::HTTP_OK !== $response->code) {
throw new InstagramException('Response code is ' . $response->code . '. Body: ' . static::getErrorBody($response->body) . ' Something went wrong. Please report issue.');
}

$arr = json_decode($response->raw_body, true, 512, JSON_BIGINT_AS_STRING);
if (!is_array($arr)) {
throw new InstagramException('Response code is ' . $response->code . '. Body: ' . static::getErrorBody($response->body) . ' Something went wrong. Please report issue.');
}
$nodes = $arr['graphql']['user']['edge_owner_to_timeline_media']['edges'];
// fix - count takes longer/has more overhead
if (!isset($nodes) || empty($nodes)) {
return [];
}
foreach ($nodes as $mediaArray) {
if ($index === $count) {
return $medias;
}
$medias[] = Media::create($mediaArray['node']);
$index++;
}
if (empty($nodes) || !isset($nodes)) {
return $medias;
}
$maxId = $nodes[count($nodes) - 1]['node']['id'];
$isMoreAvailable = $arr['graphql']['user']['edge_owner_to_timeline_media']['page_info']['has_next_page'];
}
return $medias;
$account = $this->getAccount($username);
$index = 0;
$medias = [];
$isMoreAvailable = true;
while ($index < $count && $isMoreAvailable) {
$response = Request::get(Endpoints::getAccountMediasJsonLink($account->getId(), $maxId), $this->generateHeaders($this->userSession));
if (static::HTTP_OK !== $response->code) {
throw new InstagramException('Response code is ' . $response->code . '. Body: ' . static::getErrorBody($response->body) . ' Something went wrong. Please report issue.');
}
$arr = json_decode($response->raw_body, true, 512, JSON_BIGINT_AS_STRING);
if (!is_array($arr)) {
throw new InstagramException('Response code is ' . $response->code . '. Body: ' . static::getErrorBody($response->body) . ' Something went wrong. Please report issue.');
}
$nodes = $arr['data']['user']['edge_owner_to_timeline_media']['edges'];
// fix - count takes longer/has more overhead
if (!isset($nodes) || empty($nodes)) {
return [];
}
foreach ($nodes as $mediaArray) {
if ($index === $count) {
return $medias;
}
$medias[] = Media::create($mediaArray['node']);
$index++;
}
if (empty($nodes) || !isset($nodes)) {
return $medias;
}
$maxId = $arr['data']['user']['edge_owner_to_timeline_media']['page_info']['end_cursor'];
$isMoreAvailable = $arr['data']['user']['edge_owner_to_timeline_media']['page_info']['has_next_page'];
}
return $medias;
}

/**
Expand Down Expand Up @@ -320,52 +320,53 @@ public function getMediaByCode($mediaCode)
*/
public function getPaginateMedias($username, $maxId = '')
{
$hasNextPage = true;
$medias = [];

$toReturn = [
'medias' => $medias,
'maxId' => $maxId,
'hasNextPage' => $hasNextPage,
];

$response = Request::get(Endpoints::getAccountMediasJsonLink($username, $maxId),
$this->generateHeaders($this->userSession));

// use a raw constant in the code is not a good idea!!
//if ($response->code !== 200) {
if (static::HTTP_OK !== $response->code) {
throw new InstagramException('Response code is ' . $response->code . '. Body: ' . static::getErrorBody($response->body) . ' Something went wrong. Please report issue.');
}

$arr = json_decode($response->raw_body, true, 512, JSON_BIGINT_AS_STRING);

if (!is_array($arr)) {
throw new InstagramException('Response code is ' . $response->code . '. Body: ' . static::getErrorBody($response->body) . ' Something went wrong. Please report issue.');
}
$nodes = $arr['user']['media']['nodes'];

//if (count($arr['items']) === 0) {
// I generally use empty. Im not sure why people would use count really - If the array is large then count takes longer/has more overhead.
// If you simply need to know whether or not the array is empty then use empty.
if (empty($nodes)) {
return $toReturn;
}

foreach ($nodes as $mediaArray) {
$medias[] = Media::create($mediaArray);
}

$maxId = $arr['user']['media']['page_info']['end_cursor'];
$hasNextPage = $arr['user']['media']['page_info']['has_next_page'];

$toReturn = [
'medias' => $medias,
'maxId' => $maxId,
'hasNextPage' => $hasNextPage,
];

return $toReturn;
$account = $this->getAccount($username);
$hasNextPage = true;
$medias = [];

$toReturn = [
'medias' => $medias,
'maxId' => $maxId,
'hasNextPage' => $hasNextPage,
];

$response = Request::get(Endpoints::getAccountMediasJsonLink($account->getId(), $maxId),
$this->generateHeaders($this->userSession));

// use a raw constant in the code is not a good idea!!
//if ($response->code !== 200) {
if (static::HTTP_OK !== $response->code) {
throw new InstagramException('Response code is ' . $response->code . '. Body: ' . static::getErrorBody($response->body) . ' Something went wrong. Please report issue.');
}

$arr = json_decode($response->raw_body, true, 512, JSON_BIGINT_AS_STRING);

if (!is_array($arr)) {
throw new InstagramException('Response code is ' . $response->code . '. Body: ' . static::getErrorBody($response->body) . ' Something went wrong. Please report issue.');
}
$nodes = $arr['data']['user']['edge_owner_to_timeline_media']['edges'];

//if (count($arr['items']) === 0) {
// I generally use empty. Im not sure why people would use count really - If the array is large then count takes longer/has more overhead.
// If you simply need to know whether or not the array is empty then use empty.
if (empty($nodes)) {
return $toReturn;
}

foreach ($nodes as $mediaArray) {
$medias[] = Media::create($mediaArray['node']);
}

$maxId = $arr['data']['user']['edge_owner_to_timeline_media']['page_info']['end_cursor'];
$isMoreAvailable = $arr['data']['user']['edge_owner_to_timeline_media']['page_info']['has_next_page'];

$toReturn = [
'medias' => $medias,
'maxId' => $maxId,
'hasNextPage' => $hasNextPage,
];

return $toReturn;
}

/**
Expand Down

0 comments on commit 93a13e4

Please sign in to comment.