Fix bug of document empty because of html.parsestring function

NaiboWang · Dec 11, 2024 · b4d7ddf · b4d7ddf
1 parent 2031b09
commit b4d7ddf
Show file tree

Hide file tree

Showing 2 changed files with 9 additions and 1 deletion.
diff --git a/ExecuteStage/.vscode/launch.json b/ExecuteStage/.vscode/launch.json
@@ -12,7 +12,7 @@
             "justMyCode": false,
             //  "args": ["--ids", "[7]", "--read_type", "remote", "--headless", "0"]
             // "args": ["--ids", "[9]", "--read_type", "remote", "--headless", "0", "--saved_file_name", "YOUTUBE"]
-            "args": ["--ids", "[5]", "--headless", "0", "--user_data", "0", "--keyboard", "0",
+            "args": ["--ids", "[35]", "--headless", "0", "--user_data", "0", "--keyboard", "0",
         "--read_type", "remote", 
     ]
             // "args": "--ids '[97]' --user_data 1 --server_address http://localhost:8074 --config_folder '/Users/naibo/Documents/EasySpider/ElectronJS/' --headless 0 --read_type remote --config_file_name config.json --saved_file_name"

diff --git a/ExecuteStage/easyspider_executestage.py b/ExecuteStage/easyspider_executestage.py
@@ -1148,6 +1148,14 @@ def loopExecute(self, node, loopValue, clickPath="", index=0):
         self.history["handle"] = thisHandle
         thisHistoryURL = self.browser.current_url
         # 快速提取处理
+        # start = time.time()
+        try:
+            tree = html.fromstring(self.browser.page_source)
+        except Exception as e:
+            self.print_and_log("解析页面时出错，将切换普通提取模式|Error parsing page, will switch to normal extraction mode")
+            node["parameters"]["quickExtractable"] = False
+        # end = time.time()
+        # print("解析页面秒数：", end - start)
         if node["parameters"]["quickExtractable"]:
             self.browser.switch_to.default_content() # 切换到主页面
             tree = html.fromstring(self.browser.page_source)