-
Notifications
You must be signed in to change notification settings - Fork 18
/
Copy pathscrape.py
executable file
·31 lines (26 loc) · 890 Bytes
/
scrape.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
#!/usr/bin/env python3
import os
import requests
import bs4
os.system("mkdir -p infodump/thumbs")
url = "https://moneroinfodump.neocities.org/"
contents = requests.get(url, timeout=15).content
soup = bs4.BeautifulSoup(contents, "html.parser")
images = soup.find_all("img")
links = soup.find_all("a")
for image in images:
img = image.get("src")
if img.startswith("http"):
os.system(f"wget -q --no-clobber -O infodump/{os.path.basename(img)} {img}")
image["src"] = os.path.basename(img)
elif img.startswith("data:image/png"):
pass
else:
os.system(f"wget -q --no-clobber -O infodump/{img} {img}")
image["src"] = img
for link in links:
href = link.get("href")
if href and href.startswith("https://i.imgur.com"):
link["href"] = os.path.basename(href)
with open("infodump/index.html", "w") as f:
f.write(str(soup))