-
Notifications
You must be signed in to change notification settings - Fork 9
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Khemarato Bhikkhu
committed
Jan 19, 2024
1 parent
2f75e1a
commit b44051c
Showing
9 changed files
with
300 additions
and
19 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
#!/bin/python3 | ||
|
||
from urllib.parse import urlparse, parse_qs | ||
|
||
import gdrive | ||
|
||
gfiles = gdrive.session().files() | ||
|
||
query = "mimeType='application/vnd.google-apps.document' and trashed=false and 'me' in owners" | ||
fields = "id,properties,name,size" | ||
|
||
for file in gdrive.all_files_matching(query, fields): | ||
print(f"Analyzing '{file['name']}'...") | ||
if int(file.get('size') or 1) > 3000: | ||
print(" File too large to be a link file. Skipping") | ||
continue | ||
if file.get('properties',{}).get('url'): | ||
print(" Has the metadata already. Skipping") | ||
continue | ||
doc = gfiles.export( | ||
fileId=file['id'], | ||
mimeType='text/html', | ||
).execute().decode('utf-8') | ||
soup = gdrive.BeautifulSoup(doc, features='html.parser') | ||
links, ps, h2s, h3s = [soup.find_all(foo) or [] for foo in ['a', 'p', 'h2', 'h3']] | ||
if len(links) != 1: | ||
# I don't want to handle multi-link docs | ||
# and no-link docs are other things | ||
print(" Doesn't appear to be a single-link doc") | ||
continue | ||
link = links[0].get('href') | ||
link = parse_qs(urlparse(link).query).get('q', [''])[0] | ||
if len(link) > 121: | ||
link = gdrive.requests.get('http://tinyurl.com/api-create.php?url='+link).text | ||
data = {'properties': { | ||
'createdBy': 'LibraryUtils.LinkSaver', | ||
'url': link, | ||
}} | ||
# if new-style doc, just add properties metadata | ||
if len(ps) == len(h2s) and len(h2s) == 1 and len(h3s) == 0: | ||
print(f" Saving '{link}' to document properties...") | ||
gfiles.update(fileId=file['id'], body=data).execute() | ||
continue | ||
# if old-style doc, reformat to the new style and add metadata | ||
ps_with_text = [p for p in ps if p.get_text() != ""] | ||
if len(ps_with_text) == 2 and len(h2s) == len(h3s) and len(h3s) == 0: | ||
title = ps_with_text[0].get_text() | ||
elif len(ps_with_text) == 1 and ps_with_text[0].get_text() == link: | ||
title = file['name'] | ||
else: | ||
print(" Doesn't match any known link doc format. Skipping") | ||
continue | ||
data['mimeType'] = 'text/html' | ||
html = f"""<h2>{title}</h2><a href="{link}">{link}</a>""" | ||
print(" Updating style and adding metadata...") | ||
gfiles.update( | ||
fileId=file['id'], | ||
body=data, | ||
media_body=gdrive.string_to_media(html, 'text/html'), | ||
).execute() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
#!/bin/python3 | ||
|
||
from archivedotorg import archive_urls | ||
import gdrive | ||
|
||
FIELDS = "id,properties,name,size" | ||
|
||
def regen_link_doc(docid, title=None, link=None): | ||
if not (link and title): | ||
fdata = gdrive.session().files().get(fileId=docid,fields=FIELDS).execute() | ||
if not link: | ||
link = fdata['properties']['url'] | ||
if not title: | ||
title = fdata['name'] | ||
html = gdrive.make_link_doc_html(title, link) | ||
gdrive.session().files().update( | ||
fileId=docid, | ||
body={'mimeType':'text/html'}, | ||
media_body=gdrive.string_to_media(html, 'text/html'), | ||
).execute() | ||
|
||
if __name__ == '__main__': | ||
QUERY = " and ".join([ | ||
"mimeType='application/vnd.google-apps.document'", | ||
"trashed=false", | ||
"'me' in writers", | ||
"properties has { key='createdBy' and value='LibraryUtils.LinkSaver' }", | ||
]) | ||
urls = [] | ||
for file in gdrive.all_files_matching(QUERY, FIELDS, page_size=2): | ||
print(f"Regenerating '{file['name']}'...") | ||
link = file['properties']['url'] | ||
regen_link_doc(file['id'], title=file['name'], link=link) | ||
if 'youtu' not in link: | ||
urls.append(link) | ||
print(f"Ensuring all {len(urls)} (non-YT) URLs are saved to Archive.org...") | ||
archive_urls(urls) | ||
|
Oops, something went wrong.