forked from LibrariesHacked/openlibrary-search
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathopenlibrary-search.py
79 lines (71 loc) · 3.19 KB
/
openlibrary-search.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import csv
import re
import urllib.request
import json
import time
openLibraryUrl = 'https://openlibrary.org'
books_api = '%s/api/books?jscmd=data&format=json&bibkeys=ISBN:' % openLibraryUrl
data_file = 'data\\children_isbns.csv'
keywords_file = 'data\\keywords.csv'
isbns = []
# Read through the ISBNs in our CSV
with open(data_file, 'r') as isbnfile:
isbnreader = csv.reader(isbnfile, delimiter=',', quotechar='"')
for row in isbnreader:
if len(row) > 0 and row[0] != '':
matches = re.findall(r'(\d+)', row[0])
## For now just include numbers of 10 or 13 digits
if len(matches) > 0 and (len(matches[0]) == 10 or len(matches[0]) == 13):
isbn = matches[0]
isbns.append(isbn)
keywords = []
# Read through the keywords
with open(keywords_file, 'r') as keywordsfile:
keywordreader = csv.reader(keywordsfile, delimiter=',', quotechar='"')
for row in keywordreader:
keywords.append(row[0])
# The open library API doesn't really specify how many ISBNs you can throw at it.
# Try 10 at a time and then wait for 5 seconds.
def batcher(seq, size):
return (seq[pos:pos + size] for pos in range(0, len(seq), size))
with open('data\\childrens.csv', 'w') as outputfile:
writer = csv.writer(outputfile, delimiter=',',quotechar='"', quoting=csv.QUOTE_MINIMAL)
olIsbns = ''
for batch in batcher(isbns, 10):
olIsbns = ',ISBN:'.join(batch)
openlibrary_url = books_api + olIsbns
print(openlibrary_url)
try:
content = urllib.request.urlopen(openlibrary_url)
str_response = content.read().decode('utf-8')
booksjson = json.loads(str_response)
# Loop through all the books returned from Open Library
for book in booksjson:
# Fields to look at from open library: title, subjects, description, notes
match = False
for word in keywords:
if booksjson[book].get('title') != None:
title = booksjson[book].get('title')
if word in str.lower(title):
match = True
if booksjson[book].get('subtitle') != None:
subtitle = booksjson[book].get('subtitle')
if word in str.lower(subtitle):
match = True
if booksjson[book].get('description') != None:
description = booksjson[book].get('description')
if word in str.lower(description):
match = True
if booksjson[book].get('notes') != None:
notes = booksjson[book].get('notes')
if word in str.lower(notes):
match = True
if booksjson[book].get('subjects') != None:
for subject in booksjson[book].get('subjects'):
if word in subject:
match = True
if match == True:
writer.writerow([book,title])
except:
print('Failed')
time.sleep(10)