-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathpipeline-spec.yaml
82 lines (76 loc) · 2.64 KB
/
pipeline-spec.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
github_pull_covid19_israel:
schedule:
crontab: "* * * * *"
pipeline:
- flow: avid_covider_pipelines.github_pull_covid19_israel
parameters:
dump_to_path: data/github_pull_covid19_israel
change-run-covid: true
covid19_israel_files_list:
schedule:
crontab: "* * * * *"
pipeline:
- flow: avid_covider_pipelines.covid19_israel_files_list
parameters:
dump_to_path: data/covid19_israel_files_list
covid19_israel_files_zip:
schedule:
crontab: "0 4 * * *"
pipeline:
- flow: avid_covider_pipelines.covid19_israel_files_zip
parameters:
dump_to_path: data/covid19_israel_files_zip
corona_data_collector:
schedule:
crontab: "0 2 * * *"
pipeline:
- flow: corona_data_collector.download_gdrive_data
parameters:
# limit_rows: 200
files_dump_to_path: data/corona_data_collector/gdrive_data
# COVID19 WS > Data > Analysis Raw Data
google_drive_csv_folder_id: 1pzAyk-uXy__bt1tCX4rpTiPZNmrehTOz
# the source id (google / hebrew_google..) should match the sources in load_from_db file_sources
file_sources:
COVID-19-English.csv: google
COVID-19-Russian.csv: google
COVID-19-Hebrew.csv: hebrew_google
- flow: corona_data_collector.load_from_db
# parameters:
# limit_rows: 200
# where: "id > 600000 and id < 622250"
- flow: corona_data_collector.add_gps_coordinates
parameters:
source_fields: {
"db": {
"street": "street",
"city_town": "city",
},
"google": {
"Street": "street",
"Город проживания": "street",
"City": "city",
"Улица": "city",
},
"hebrew_google": {
"עיר / ישוב מגורים": "city",
"עיר / יישוב מגורים": "city",
"רחוב מגורים": "street",
}
}
workplace_source_fields: {
"db": {
"routine_workplace_street": "street",
"routine_workplace_city_town": "city"
}
}
dump_to_path: data/corona_data_collector/with_gps_data
# gps_data: data/corona_data_collector/gps_data.json
gps_datapackage_path: data/corona_data_collector/gps_data_cache
- flow: corona_data_collector.export_corona_bot_answers
parameters:
destination_output: data/corona_data_collector/corona_bot_answers
- flow: corona_data_collector.export_corona_bot_answers
parameters:
unsupported: true
destination_output: data/corona_data_collector/corona_bot_answers_unsupported