DouglasTaylorSupportGroup · jckli · Nov 16, 2022 · Nov 16, 2022 · Nov 16, 2022 · Nov 16, 2022
diff --git a/Pipfile b/Pipfile
@@ -0,0 +1,13 @@
+[[source]]
+url = "https://pypi.org/simple"
+verify_ssl = true
+name = "pypi"
+
+[packages]
+beautifulsoup4 = "*"
+requests = "*"
+
+[dev-packages]
+
+[requires]
+python_version = "3.10"
diff --git a/Pipfile.lock b/Pipfile.lock
diff --git a/cheinsteinpy/api.py b/cheinsteinpy/api.py
@@ -18,6 +18,22 @@ def checkLink(url):
     """
     return pageParser.checkLink(url)["isChapter"]
 
+def getId(url):
+    """
+    Gets the id of the question.
+
+    Parameters
+    ----------
+    url : str
+        The url to check.
+
+    Returns
+    -------
+    id : str
+        The id of the question.
+    """
+    return pageParser.getId(url)
+
 def answer(url, cookie, userAgent):
     """
     Gets answer data from Chegg.
@@ -39,17 +55,31 @@ def answer(url, cookie, userAgent):
     """
     cookieStr = cookieParser.parseCookie(cookie)
     isChapter = pageParser.checkLink(url)["isChapter"]
-    htmlData = requestPage.requestWebsite(url, cookieStr, userAgent)
     if isChapter:
         # await asyncio.sleep(6)
+        htmlData = requestPage.requestWebsite(url, cookieStr, userAgent)
         htmlRaw = requestPage.requestChapter(url, cookieStr, userAgent, htmlData)
     else:
-        htmlRaw = htmlData
+        qid = pageParser.getId(url)
+        data = {
+            "operationName":"QnaPageQuestionByLegacyId",
+            "variables": {
+                "id":int(qid)
+            },
+            "extensions": {
+                "persistedQuery": {
+                    "version":1,
+                    "sha256Hash":"26efed323ef07d1759f67adadd2832ac85d7046b7eca681fe224d7824bab0928"
+                }
+            }
+        }
+        raw = requestPage.requestGraphQl(cookieStr, userAgent, data)
+        htmlRaw = raw["data"]["questionByLegacyId"]["htmlAnswers"][0]["answerData"]["html"]
     dataRaw = pageParser.parsePage(htmlRaw, isChapter)
     if isChapter:
         data = dataRaw[1]
     else:
-        data = dataRaw[1]
+        data = dataRaw
     parsedAnswer = answerParser.getAnswer(data, isChapter)
     if isChapter:
         answer = parsedAnswer[0]
@@ -78,17 +108,31 @@ def question(url, cookie, userAgent):
     """
     cookieStr = cookieParser.parseCookie(cookie)
     isChapter = pageParser.checkLink(url)["isChapter"]
-    htmlData = requestPage.requestWebsite(url, cookieStr, userAgent)
     if isChapter:
         # await asyncio.sleep(6)
+        htmlData = requestPage.requestWebsite(url, cookieStr, userAgent)
         htmlRaw = requestPage.requestChapter(url, cookieStr, userAgent, htmlData)
     else:
-        htmlRaw = htmlData
+        qid = pageParser.getId(url)
+        data = {
+            "operationName":"QnaPageQuestionByLegacyId",
+            "variables": {
+                "id":int(qid)
+            },
+            "extensions": {
+                "persistedQuery": {
+                    "version":1,
+                    "sha256Hash":"26efed323ef07d1759f67adadd2832ac85d7046b7eca681fe224d7824bab0928"
+                }
+            }
+        }
+        raw = requestPage.requestGraphQl(cookieStr, userAgent, data)
+        htmlRaw = raw["data"]["questionByLegacyId"]["content"]["body"]
     dataRaw = pageParser.parsePage(htmlRaw, isChapter)
     if isChapter:
         data = dataRaw[0]
     else:
-        data = dataRaw[0]
+        data = dataRaw
     parsedQuestion = questionParser.getQuestion(data, isChapter)
     if isChapter:
         question = parsedQuestion

diff --git a/cheinsteinpy/parsers/pageParser.py b/cheinsteinpy/parsers/pageParser.py
@@ -9,6 +9,13 @@ def checkLink(link):
     linkCheck = {"isChapter": isChapter}
     return linkCheck
 
+def getId(link):
+    item = re.search(r'chegg.com/homework-help/questions-and-answers/(.*?)-q(\d+)', link)
+    if item:
+        return item.group(2)
+    else:
+        return None
+
 def parsePage(data, isChapter):
     if isChapter:
         chapter = data["data"]["textbook_solution"]["chapter"][0]
@@ -20,6 +27,4 @@ def parsePage(data, isChapter):
         return questionjson, solutionjson
     else:
         soup = bs(data, "html.parser")
-        questionhtml = soup.find("div", {"class": "question-body-text"})
-        answerhtml = soup.find("div", {"class": "answer-given-body"})
-        return questionhtml, answerhtml
+        return soup
diff --git a/cheinsteinpy/parsers/questionParser.py b/cheinsteinpy/parsers/questionParser.py
@@ -32,15 +32,23 @@ def getQuestion(dataRaw, isChapter):
             img = dataRaw.find_all("img")
             for i in img:
                 url = i["src"]
-                i.replace_with(url)
+                i.replace_with(" " + url + " ")
+        if "<div>" or "</div>" in str(dataRaw):
+            div = dataRaw.find_all("div")
+            for i in div:
+                i.replaceWithChildren()
+        if "<br/>" in str(dataRaw):
+            br = dataRaw.find_all("br")
+            for i in br:
+                i.replace_with("")
         questionList = []
-        for k in dataRaw.contents[1:-1]:
+        for k in dataRaw.contents:
             txt = k.text
             questionList.append(txt)
         questionList = [x for x in questionList if x]
         if questionList[0] == "\n":
             questionList = questionList[1:]
         if questionList[-1] == "\n":
             questionList = questionList[:-1]      
-        questionList = " ".join(questionList)
+        questionList = (" ".join((" ".join(questionList)).split(" "))).strip()
         return questionList
diff --git a/cheinsteinpy/requestPage.py b/cheinsteinpy/requestPage.py
@@ -57,6 +57,28 @@ def requestChapter(url, cookie, userAgent, html):
     response = requests.post(url=url, headers=headers, json=query, data=None)
     return response.json()
 
+def requestGraphQl(cookie, userAgent, data):
+    headers = {
+        'apollographql-client-name': 'chegg-web',
+        'apollographql-client-version': 'main-474a3766-3331951797',
+        'content-type': 'application/json',
+        "Accept-Encoding": "gzip, deflate, br",
+        'Accept-Language': 'en-US,en;q=0.5',
+        'authorization': 'Basic TnNZS3dJMGxMdVhBQWQwenFTMHFlak5UVXAwb1l1WDY6R09JZVdFRnVvNndRRFZ4Ug==',
+        'cookie': cookie,
+        'upgrade-insecure-requests': '1',
+        'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8',
+        'sec-fetch-site': 'cross-site',
+        'sec-fetch-mode': 'navigate',
+        'sec-fetch-user': '?1',
+        'sec-fetch-dest': 'document',
+        'Upgrade-Insecure-Requests': '1',
+        'user-agent': userAgent,
+    }
+    url = "https://gateway.chegg.com/one-graph/graphql"
+    response = requests.post(url=url, headers=headers, json=data, data=None)
+    return response.json()
+
 def requestEnhancedAnswer(url, cookie, userAgent, html):
     headers = {
         'Host': 'www.chegg.com',

diff --git a/setup.py b/setup.py
@@ -5,7 +5,7 @@
 
 setuptools.setup(
     name="cheinsteinpy",
-    version="0.1.6",
+    version="0.1.7",
     author="jckli & okinahiru",
     description="A Python library to get information from Chegg.",
     long_description=long_description,

diff --git a/test.py b/test.py
@@ -0,0 +1,30 @@
+from cheinsteinpy.parsers import pageParser, cookieParser
+from cheinsteinpy import requestPage, api
+
+link = "https://www.chegg.com/homework-help/questions-and-answers/question-1-die-rolled-10-times-let-x-number-times-six-appears-10-rolls-part-probability-4--q63245414"
+userAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36"
+with open("cookie.txt", 'r') as f:
+    cookieTxt = f.read()
+
+# cookieStr = cookieParser.parseCookie(cookieTxt)
+
+answer = api.question(link, cookieTxt, userAgent)
+print(answer)
+"""
+payload = {
+    "operationName":"QnaPageQuestionByLegacyId",
+    "variables": {
+        "id":88129188
+    },
+    "extensions": {
+        "persistedQuery": {
+            "version":1,
+            "sha256Hash":"26efed323ef07d1759f67adadd2832ac85d7046b7eca681fe224d7824bab0928"
+        }
+    }
+}
+
+#data = requestPage.requestWebsite(link, cookieStr, userAgent)
+data = requestPage.requestGraphQl(cookieStr, userAgent, payload)
+print(data)
+"""