From cf86c5b5af931cbd644f7313b9dedfee0dde15e4 Mon Sep 17 00:00:00 2001
From: lina <jack@jackli.dev>
Date: Wed, 16 Nov 2022 08:39:06 -0700
Subject: [PATCH 1/6] feat: getid method

---
 cheinsteinpy/parsers/pageParser.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/cheinsteinpy/parsers/pageParser.py b/cheinsteinpy/parsers/pageParser.py
index d4ab121..8f6b2ec 100644
--- a/cheinsteinpy/parsers/pageParser.py
+++ b/cheinsteinpy/parsers/pageParser.py
@@ -9,6 +9,13 @@ def checkLink(link):
     linkCheck = {"isChapter": isChapter}
     return linkCheck
 
+def getId(link):
+    item = re.search(r'chegg.com/homework-help/questions-and-answers/(.*?)-q(\d+)', link)
+    if item:
+        return item.group(2)
+    else:
+        return None
+
 def parsePage(data, isChapter):
     if isChapter:
         chapter = data["data"]["textbook_solution"]["chapter"][0]

From cba9bc2c92c581a8bb1df8c97325bb717e03c696 Mon Sep 17 00:00:00 2001
From: lina <jack@jackli.dev>
Date: Wed, 16 Nov 2022 09:26:39 -0700
Subject: [PATCH 2/6] feat: request graphql method

---
 cheinsteinpy/requestPage.py | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/cheinsteinpy/requestPage.py b/cheinsteinpy/requestPage.py
index b24210a..85d4e37 100644
--- a/cheinsteinpy/requestPage.py
+++ b/cheinsteinpy/requestPage.py
@@ -57,6 +57,28 @@ def requestChapter(url, cookie, userAgent, html):
     response = requests.post(url=url, headers=headers, json=query, data=None)
     return response.json()
 
+def requestGraphQl(cookie, userAgent, data):
+    headers = {
+        'apollographql-client-name': 'chegg-web',
+        'apollographql-client-version': 'main-474a3766-3331951797',
+        'content-type': 'application/json',
+        "Accept-Encoding": "gzip, deflate, br",
+        'Accept-Language': 'en-US,en;q=0.5',
+        'authorization': 'Basic TnNZS3dJMGxMdVhBQWQwenFTMHFlak5UVXAwb1l1WDY6R09JZVdFRnVvNndRRFZ4Ug==',
+        'cookie': cookie,
+        'upgrade-insecure-requests': '1',
+        'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8',
+        'sec-fetch-site': 'cross-site',
+        'sec-fetch-mode': 'navigate',
+        'sec-fetch-user': '?1',
+        'sec-fetch-dest': 'document',
+        'Upgrade-Insecure-Requests': '1',
+        'user-agent': userAgent,
+    }
+    url = "https://gateway.chegg.com/one-graph/graphql"
+    response = requests.post(url=url, headers=headers, json=data, data=None)
+    return response.text
+
 def requestEnhancedAnswer(url, cookie, userAgent, html):
     headers = {
         'Host': 'www.chegg.com',

From 9544fbfc909ff7912b03a90a86ddef0df397f7de Mon Sep 17 00:00:00 2001
From: lina <jack@jackli.dev>
Date: Wed, 16 Nov 2022 09:43:04 -0700
Subject: [PATCH 3/6] fix: answer grabbing

---
 cheinsteinpy/api.py                | 36 +++++++++++++++++++++++++++---
 cheinsteinpy/parsers/pageParser.py |  4 +---
 cheinsteinpy/requestPage.py        |  2 +-
 3 files changed, 35 insertions(+), 7 deletions(-)

diff --git a/cheinsteinpy/api.py b/cheinsteinpy/api.py
index eff16e8..30493bd 100644
--- a/cheinsteinpy/api.py
+++ b/cheinsteinpy/api.py
@@ -18,6 +18,22 @@ def checkLink(url):
     """
     return pageParser.checkLink(url)["isChapter"]
 
+def getId(url):
+    """
+    Gets the id of the question.
+
+    Parameters
+    ----------
+    url : str
+        The url to check.
+
+    Returns
+    -------
+    id : str
+        The id of the question.
+    """
+    return pageParser.getId(url)
+
 def answer(url, cookie, userAgent):
     """
     Gets answer data from Chegg.
@@ -39,17 +55,31 @@ def answer(url, cookie, userAgent):
     """
     cookieStr = cookieParser.parseCookie(cookie)
     isChapter = pageParser.checkLink(url)["isChapter"]
-    htmlData = requestPage.requestWebsite(url, cookieStr, userAgent)
     if isChapter:
         # await asyncio.sleep(6)
+        htmlData = requestPage.requestWebsite(url, cookieStr, userAgent)
         htmlRaw = requestPage.requestChapter(url, cookieStr, userAgent, htmlData)
     else:
-        htmlRaw = htmlData
+        qid = pageParser.getId(url)
+        data = {
+            "operationName":"QnaPageQuestionByLegacyId",
+            "variables": {
+                "id":int(qid)
+            },
+            "extensions": {
+                "persistedQuery": {
+                    "version":1,
+                    "sha256Hash":"26efed323ef07d1759f67adadd2832ac85d7046b7eca681fe224d7824bab0928"
+                }
+            }
+        }
+        raw = requestPage.requestGraphQl(cookieStr, userAgent, data)
+        htmlRaw = raw["data"]["questionByLegacyId"]["htmlAnswers"][0]["answerData"]["html"]
     dataRaw = pageParser.parsePage(htmlRaw, isChapter)
     if isChapter:
         data = dataRaw[1]
     else:
-        data = dataRaw[1]
+        data = dataRaw
     parsedAnswer = answerParser.getAnswer(data, isChapter)
     if isChapter:
         answer = parsedAnswer[0]
diff --git a/cheinsteinpy/parsers/pageParser.py b/cheinsteinpy/parsers/pageParser.py
index 8f6b2ec..ddd9f8b 100644
--- a/cheinsteinpy/parsers/pageParser.py
+++ b/cheinsteinpy/parsers/pageParser.py
@@ -27,6 +27,4 @@ def parsePage(data, isChapter):
         return questionjson, solutionjson
     else:
         soup = bs(data, "html.parser")
-        questionhtml = soup.find("div", {"class": "question-body-text"})
-        answerhtml = soup.find("div", {"class": "answer-given-body"})
-        return questionhtml, answerhtml
\ No newline at end of file
+        return soup
\ No newline at end of file
diff --git a/cheinsteinpy/requestPage.py b/cheinsteinpy/requestPage.py
index 85d4e37..07b79ac 100644
--- a/cheinsteinpy/requestPage.py
+++ b/cheinsteinpy/requestPage.py
@@ -77,7 +77,7 @@ def requestGraphQl(cookie, userAgent, data):
     }
     url = "https://gateway.chegg.com/one-graph/graphql"
     response = requests.post(url=url, headers=headers, json=data, data=None)
-    return response.text
+    return response.json()
 
 def requestEnhancedAnswer(url, cookie, userAgent, html):
     headers = {

From 79c3b616e1d1c3cac43294435bf5f15cc1ae567c Mon Sep 17 00:00:00 2001
From: lina <jack@jackli.dev>
Date: Wed, 16 Nov 2022 10:15:09 -0700
Subject: [PATCH 4/6] fix: question grabbing

---
 cheinsteinpy/api.py                    | 20 +++++++++++++++++---
 cheinsteinpy/parsers/questionParser.py | 14 +++++++++++---
 2 files changed, 28 insertions(+), 6 deletions(-)

diff --git a/cheinsteinpy/api.py b/cheinsteinpy/api.py
index 30493bd..5c8ef2a 100644
--- a/cheinsteinpy/api.py
+++ b/cheinsteinpy/api.py
@@ -108,17 +108,31 @@ def question(url, cookie, userAgent):
     """
     cookieStr = cookieParser.parseCookie(cookie)
     isChapter = pageParser.checkLink(url)["isChapter"]
-    htmlData = requestPage.requestWebsite(url, cookieStr, userAgent)
     if isChapter:
         # await asyncio.sleep(6)
+        htmlData = requestPage.requestWebsite(url, cookieStr, userAgent)
         htmlRaw = requestPage.requestChapter(url, cookieStr, userAgent, htmlData)
     else:
-        htmlRaw = htmlData
+        qid = pageParser.getId(url)
+        data = {
+            "operationName":"QnaPageQuestionByLegacyId",
+            "variables": {
+                "id":int(qid)
+            },
+            "extensions": {
+                "persistedQuery": {
+                    "version":1,
+                    "sha256Hash":"26efed323ef07d1759f67adadd2832ac85d7046b7eca681fe224d7824bab0928"
+                }
+            }
+        }
+        raw = requestPage.requestGraphQl(cookieStr, userAgent, data)
+        htmlRaw = raw["data"]["questionByLegacyId"]["content"]["body"]
     dataRaw = pageParser.parsePage(htmlRaw, isChapter)
     if isChapter:
         data = dataRaw[0]
     else:
-        data = dataRaw[0]
+        data = dataRaw
     parsedQuestion = questionParser.getQuestion(data, isChapter)
     if isChapter:
         question = parsedQuestion
diff --git a/cheinsteinpy/parsers/questionParser.py b/cheinsteinpy/parsers/questionParser.py
index ecda5cc..ad50ed0 100644
--- a/cheinsteinpy/parsers/questionParser.py
+++ b/cheinsteinpy/parsers/questionParser.py
@@ -32,9 +32,17 @@ def getQuestion(dataRaw, isChapter):
             img = dataRaw.find_all("img")
             for i in img:
                 url = i["src"]
-                i.replace_with(url)
+                i.replace_with(" " + url + " ")
+        if "<div>" or "</div>" in str(dataRaw):
+            div = dataRaw.find_all("div")
+            for i in div:
+                i.replaceWithChildren()
+        if "<br/>" in str(dataRaw):
+            br = dataRaw.find_all("br")
+            for i in br:
+                i.replace_with("")
         questionList = []
-        for k in dataRaw.contents[1:-1]:
+        for k in dataRaw.contents:
             txt = k.text
             questionList.append(txt)
         questionList = [x for x in questionList if x]
@@ -42,5 +50,5 @@ def getQuestion(dataRaw, isChapter):
             questionList = questionList[1:]
         if questionList[-1] == "\n":
             questionList = questionList[:-1]      
-        questionList = " ".join(questionList)
+        questionList = (" ".join((" ".join(questionList)).split(" "))).strip()
         return questionList
\ No newline at end of file

From 8d095d92e8776af433c9518fa2406c5e16ba73c4 Mon Sep 17 00:00:00 2001
From: lina <jack@jackli.dev>
Date: Wed, 16 Nov 2022 10:21:54 -0700
Subject: [PATCH 5/6] chore: update version

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 2eba3dc..a21b146 100644
--- a/setup.py
+++ b/setup.py
@@ -5,7 +5,7 @@
 
 setuptools.setup(
     name="cheinsteinpy",
-    version="0.1.6",
+    version="0.1.7",
     author="jckli & okinahiru",
     description="A Python library to get information from Chegg.",
     long_description=long_description,

From 7cb789e8288fd936f16b5b112e1cfa0a9851ac1a Mon Sep 17 00:00:00 2001
From: lina <me@jackli.dev>
Date: Fri, 2 Dec 2022 18:07:26 -0700
Subject: [PATCH 6/6] chore: add some files

---
 Pipfile      | 13 +++++++++
 Pipfile.lock | 77 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 test.py      | 30 ++++++++++++++++++++
 3 files changed, 120 insertions(+)
 create mode 100644 Pipfile
 create mode 100644 Pipfile.lock
 create mode 100644 test.py

diff --git a/Pipfile b/Pipfile
new file mode 100644
index 0000000..ee5fd68
--- /dev/null
+++ b/Pipfile
@@ -0,0 +1,13 @@
+[[source]]
+url = "https://pypi.org/simple"
+verify_ssl = true
+name = "pypi"
+
+[packages]
+beautifulsoup4 = "*"
+requests = "*"
+
+[dev-packages]
+
+[requires]
+python_version = "3.10"
diff --git a/Pipfile.lock b/Pipfile.lock
new file mode 100644
index 0000000..336588c
--- /dev/null
+++ b/Pipfile.lock
@@ -0,0 +1,77 @@
+{
+    "_meta": {
+        "hash": {
+            "sha256": "73352366ebbba8a1b3952ae48124bd44d7414d5f62dd76c10b00d5772e972461"
+        },
+        "pipfile-spec": 6,
+        "requires": {
+            "python_version": "3.10"
+        },
+        "sources": [
+            {
+                "name": "pypi",
+                "url": "https://pypi.org/simple",
+                "verify_ssl": true
+            }
+        ]
+    },
+    "default": {
+        "beautifulsoup4": {
+            "hashes": [
+                "sha256:58d5c3d29f5a36ffeb94f02f0d786cd53014cf9b3b3951d42e0080d8a9498d30",
+                "sha256:ad9aa55b65ef2808eb405f46cf74df7fcb7044d5cbc26487f96eb2ef2e436693"
+            ],
+            "index": "pypi",
+            "version": "==4.11.1"
+        },
+        "certifi": {
+            "hashes": [
+                "sha256:0d9c601124e5a6ba9712dbc60d9c53c21e34f5f641fe83002317394311bdce14",
+                "sha256:90c1a32f1d68f940488354e36370f6cca89f0f106db09518524c88d6ed83f382"
+            ],
+            "markers": "python_version >= '3.6'",
+            "version": "==2022.9.24"
+        },
+        "charset-normalizer": {
+            "hashes": [
+                "sha256:5a3d016c7c547f69d6f81fb0db9449ce888b418b5b9952cc5e6e66843e9dd845",
+                "sha256:83e9a75d1911279afd89352c68b45348559d1fc0506b054b346651b5e7fee29f"
+            ],
+            "markers": "python_full_version >= '3.6.0'",
+            "version": "==2.1.1"
+        },
+        "idna": {
+            "hashes": [
+                "sha256:814f528e8dead7d329833b91c5faa87d60bf71824cd12a7530b5526063d02cb4",
+                "sha256:90b77e79eaa3eba6de819a0c442c0b4ceefc341a7a2ab77d7562bf49f425c5c2"
+            ],
+            "markers": "python_version >= '3.5'",
+            "version": "==3.4"
+        },
+        "requests": {
+            "hashes": [
+                "sha256:7c5599b102feddaa661c826c56ab4fee28bfd17f5abca1ebbe3e7f19d7c97983",
+                "sha256:8fefa2a1a1365bf5520aac41836fbee479da67864514bdb821f31ce07ce65349"
+            ],
+            "index": "pypi",
+            "version": "==2.28.1"
+        },
+        "soupsieve": {
+            "hashes": [
+                "sha256:3b2503d3c7084a42b1ebd08116e5f81aadfaea95863628c80a3b774a11b7c759",
+                "sha256:fc53893b3da2c33de295667a0e19f078c14bf86544af307354de5fcf12a3f30d"
+            ],
+            "markers": "python_version >= '3.6'",
+            "version": "==2.3.2.post1"
+        },
+        "urllib3": {
+            "hashes": [
+                "sha256:3fa96cf423e6987997fc326ae8df396db2a8b7c667747d47ddd8ecba91f4a74e",
+                "sha256:b930dd878d5a8afb066a637fbb35144fe7901e3b209d1cd4f524bd0e9deee997"
+            ],
+            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5' and python_version < '4'",
+            "version": "==1.26.12"
+        }
+    },
+    "develop": {}
+}
diff --git a/test.py b/test.py
new file mode 100644
index 0000000..101bacd
--- /dev/null
+++ b/test.py
@@ -0,0 +1,30 @@
+from cheinsteinpy.parsers import pageParser, cookieParser
+from cheinsteinpy import requestPage, api
+
+link = "https://www.chegg.com/homework-help/questions-and-answers/question-1-die-rolled-10-times-let-x-number-times-six-appears-10-rolls-part-probability-4--q63245414"
+userAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36"
+with open("cookie.txt", 'r') as f:
+    cookieTxt = f.read()
+
+# cookieStr = cookieParser.parseCookie(cookieTxt)
+
+answer = api.question(link, cookieTxt, userAgent)
+print(answer)
+"""
+payload = {
+    "operationName":"QnaPageQuestionByLegacyId",
+    "variables": {
+        "id":88129188
+    },
+    "extensions": {
+        "persistedQuery": {
+            "version":1,
+            "sha256Hash":"26efed323ef07d1759f67adadd2832ac85d7046b7eca681fe224d7824bab0928"
+        }
+    }
+}
+
+#data = requestPage.requestWebsite(link, cookieStr, userAgent)
+data = requestPage.requestGraphQl(cookieStr, userAgent, payload)
+print(data)
+"""
\ No newline at end of file