forked from EHRI/rs-aggregator
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
2 changed files
with
55 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
# Import the os module, for the os.walk function | ||
import os | ||
import requests | ||
import sys | ||
from multiprocessing.dummy import Pool as ThreadPool | ||
|
||
# Set the directory you want to start from | ||
rootDir = 'target/destination' | ||
awsurl="https://search-core-resync-tqjhs3lbpljpp76xbglvpgyrme.us-west-2.es.amazonaws.com" | ||
start=0 | ||
count=0 | ||
pool = ThreadPool(32) | ||
files=[] | ||
def run(file): | ||
fname = file[1] | ||
dirName=file[0] | ||
if fname.endswith("json"): | ||
coreid = fname.split(".")[0] | ||
with open(dirName+"/"+fname, 'r') as f: | ||
payload=f.read() | ||
print("Uploading %s" % coreid) | ||
response = requests.put(awsurl+"/articles/articles/"+coreid, data=payload, headers={"Content-Type":"application/json"}) | ||
print(response.text) | ||
|
||
|
||
if sys.argv[1]: | ||
start=int(sys.argv[1]) | ||
|
||
for dirName, subdirList, fileList in os.walk(rootDir): | ||
print('Found directory: %s' % dirName) | ||
count = count+len(fileList) | ||
if count>start: | ||
for fname in fileList: | ||
if fname.endswith("json"): | ||
print(dirName) | ||
print (fname) | ||
files.append([dirName, fname]) | ||
results = pool.map(run, files) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
from subprocess import STDOUT, check_output | ||
import shutil | ||
|
||
sets=["223","1124","221","1732","2697","48","193","1997","2586","2740","522","1501","721","2581","2907","1562"] | ||
batch_sizes=["5000"] | ||
for batch in batch_sizes: | ||
for set in sets: | ||
uri= "https://resourcesync.core.ac.uk/sitemaps/repo"+set+"/metadata/capabilitylist.xml" | ||
#print("removing target") | ||
#shutil.rmtree("/Users/mc26486/workspace/KMI/rs-aggregator/target/destination/resourcesync.core.ac.uk") | ||
#cmd = "java -cp rs-aggregator-jar-with-dependencies.jar uk.ac.core.main.CORESyncApp --uri="+uri+" --measure --max=5000" | ||
cmd = "java -cp rs-aggregator-jar-with-dependencies.jar uk.ac.core.main.COREBatchSyncApp --uri="+uri+" --batch-size="+batch+" --measure --max=5000" | ||
|
||
print(cmd) | ||
check_output(cmd,shell=True ) | ||
|
||
|