Skip to content

Commit

Permalink
Merge branch 'release/2.0.2'
Browse files Browse the repository at this point in the history
  • Loading branch information
mcancellieri committed Feb 6, 2019
2 parents b36d737 + 65708b4 commit ab5f9cf
Show file tree
Hide file tree
Showing 2 changed files with 55 additions and 0 deletions.
38 changes: 38 additions & 0 deletions aws_uploader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# Import the os module, for the os.walk function
import os
import requests
import sys
from multiprocessing.dummy import Pool as ThreadPool

# Set the directory you want to start from
rootDir = 'target/destination'
awsurl="https://search-core-resync-tqjhs3lbpljpp76xbglvpgyrme.us-west-2.es.amazonaws.com"
start=0
count=0
pool = ThreadPool(32)
files=[]
def run(file):
fname = file[1]
dirName=file[0]
if fname.endswith("json"):
coreid = fname.split(".")[0]
with open(dirName+"/"+fname, 'r') as f:
payload=f.read()
print("Uploading %s" % coreid)
response = requests.put(awsurl+"/articles/articles/"+coreid, data=payload, headers={"Content-Type":"application/json"})
print(response.text)


if sys.argv[1]:
start=int(sys.argv[1])

for dirName, subdirList, fileList in os.walk(rootDir):
print('Found directory: %s' % dirName)
count = count+len(fileList)
if count>start:
for fname in fileList:
if fname.endswith("json"):
print(dirName)
print (fname)
files.append([dirName, fname])
results = pool.map(run, files)
17 changes: 17 additions & 0 deletions experiment_runner.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from subprocess import STDOUT, check_output
import shutil

sets=["223","1124","221","1732","2697","48","193","1997","2586","2740","522","1501","721","2581","2907","1562"]
batch_sizes=["5000"]
for batch in batch_sizes:
for set in sets:
uri= "https://resourcesync.core.ac.uk/sitemaps/repo"+set+"/metadata/capabilitylist.xml"
#print("removing target")
#shutil.rmtree("/Users/mc26486/workspace/KMI/rs-aggregator/target/destination/resourcesync.core.ac.uk")
#cmd = "java -cp rs-aggregator-jar-with-dependencies.jar uk.ac.core.main.CORESyncApp --uri="+uri+" --measure --max=5000"
cmd = "java -cp rs-aggregator-jar-with-dependencies.jar uk.ac.core.main.COREBatchSyncApp --uri="+uri+" --batch-size="+batch+" --measure --max=5000"

print(cmd)
check_output(cmd,shell=True )


0 comments on commit ab5f9cf

Please sign in to comment.