generated from MuckRock/documentcloud-alert-cron-addon
-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathconfig.yaml
40 lines (39 loc) · 1.1 KB
/
config.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
title: Scraper
description: >-
This add-on will scrape and optionally crawl a given site for documents to upload
to DocumentCloud. It can also alert you of given keywords appearing in those
documents.
type: object
properties:
site:
type: string
format: uri
description: The URL of the site to start scraping
default: https://www.ssa.gov/foia/readingroom.html
project:
type: number
description: >-
The DocumentCloud project ID of the project the documents should be uploaded in to
default: 207338
keywords:
type: array
items:
type: string
description: Keywords to search and notify on
default:
- court
- foia
filetypes:
type: array
items:
type: string
description: File extensions to be uploaded to DocumentCloud
default: [".pdf", ".docx", ".xlsx", ".pptx", ".doc", ".xls", ".ppt"]
crawl_depth:
type: number
description: How many pages away from the first site to scrape from
default: 1
dry_run:
type: boolean
description: Do not upload any documents or store any scraping data
default: false