-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathbase64_generator.py
57 lines (47 loc) · 2.03 KB
/
base64_generator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time as t
import undetected_chromedriver as uc
import pandas as pd
storage_df = pd.DataFrame()
for i in range(500):
print(f'Now on captcha generating attempt: {i}')
try:
options = uc.ChromeOptions()
options.add_argument("--no-sandbox")
options.add_argument('--disable-notifications')
options.add_argument("--window-size=1280,720")
options.add_argument('--ignore-certificate-errors')
options.add_argument('--allow-running-insecure-content')
browser = uc.Chrome(options=options)
# load browser
wait = WebDriverWait(browser, 7)
# the board
url = 'https://boards.4channel.org/o/'
# open url
browser.get(url)
# start a thread
wait.until(EC.element_to_be_clickable((By.PARTIAL_LINK_TEXT, 'Start a New Thread'))).click()
# wait 5 mins
t.sleep(301)
wait.until(EC.element_to_be_clickable((By.XPATH, '//button[@id="t-load"]'))).click()
# extract background and foreground
b64img_background = wait.until(EC.element_to_be_clickable((By.XPATH, '//div[@id="t-bg"]'))).get_attribute('style').split('url("data:image/png;base64,')[1].split('");')[0]
b64img_foreground = wait.until(EC.element_to_be_clickable((By.XPATH, '//div[@id="t-fg"]'))).get_attribute('style').split('url("data:image/png;base64,')[1].split('");')[0]
# store the data into a dataframe
temp_df = pd.DataFrame({'bg': [b64img_background],
'fg': [b64img_foreground]
})
# add it to the main results
storage_df = pd.concat([temp_df, storage_df], axis=0)
# close chrome
browser.close()
browser.quit()
except Exception as e:
print(e)
continue
# check shape
storage_df.shape
# results:
storage_df.to_csv('captcha.csv', index=False)