-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathprocess_all_tiles.py
57 lines (49 loc) · 1.96 KB
/
process_all_tiles.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import json, subprocess, os, sys
import pathlib, argparse
from elasticsearch import Elasticsearch, exceptions
# load environment config and set path vars
file_path = os.path.realpath(__file__)
directory_path = "/".join(file_path.split("/")[:-1])
with open(f"{directory_path}/environment.json") as reader:
environment = json.load(reader)
# setup index
data_host = environment["elasticsearch_url"]
data_index = "fyp-tiles"
data_es = Elasticsearch([
{"host": data_host, "port": 443, "use_ssl": True, "timeout": 60, "max_retries": 10, "retry_on_timeout": True},
])
query = {
"query": {
"bool": {
"must": [
{
"range": {
"cloud_cover": {
"lte": 15
}
}
}
]
}
}
}
data = data_es.search(index=data_index, body=query, size=1000, timeout="1m")
def process_all(directory):
for hit in data["hits"]["hits"]:
metadata = {
"path": hit["_source"]["path"],
"datetime": hit["_source"]["datetime"]
}
tile_name = hit["_source"]["path"].split("/")[-1].split(".zip")[0]
pathlib.Path(f"{directory}/{tile_name}").mkdir(parents=True, exist_ok=True)
with open(f"{directory}/{tile_name}/metadata.json", "w") as writer:
json.dump(metadata, writer)
subprocess.call(f"bsub -o {directory}/{tile_name}/%J.out -W 3:00 -q short-serial {sys.executable} {directory_path}/process_tile.py -d {directory}/{tile_name}", shell=True)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description=
'This script scrolls through data index and processes tiles with low cloud cover')
parser.add_argument('-d', '--dir', dest = 'directory',
help = 'dir to create tiles underneath')
args = parser.parse_args()
absolute_dir = pathlib.Path(args.directory).resolve()
process_all(absolute_dir)