From b5d0985775baae9b141e2cdc24fbb480588af4b8 Mon Sep 17 00:00:00 2001 From: Matthew Wang Date: Fri, 16 Dec 2016 19:55:30 -0500 Subject: [PATCH 1/4] adds ruby port this was ridiculously hard --- .gitignore | 4 +++ .travis.yml | 21 +++++++++++- Gemfile | 5 +++ forage.py | 32 ++++++------------ forage.rb | 97 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 136 insertions(+), 23 deletions(-) create mode 100644 Gemfile create mode 100644 forage.rb diff --git a/.gitignore b/.gitignore index a7f58b2..ab7a00a 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,10 @@ __pycache__/ *.py[cod] *$py.class +# Ruby Things + +Gemfile.lock + # Built Files output-hydrometric.csv diff --git a/.travis.yml b/.travis.yml index b12098b..f876702 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,4 +1,3 @@ -language: python matrix: include: - os: linux @@ -25,3 +24,23 @@ matrix: - python forage.py -w csv - python forage.py -w json - jsonlint output-hydrometric.json + - os: linux + language: ruby + rvm: "2.2" + install: "bundle" + script: + - ruby forage.rb + - ruby forage.rb -w csv + - ruby forage.rb -w json + - jsonlint output-hydrometric.json + - csvlint output-hydrometric.csv + - os: linux + language: ruby + rvm: "2.3" + install: "bundle" + script: + - ruby forage.rb + - ruby forage.rb -w csv + - ruby forage.rb -w json + - jsonlint output-hydrometric.json + - csvlint output-hydrometric.csv diff --git a/Gemfile b/Gemfile new file mode 100644 index 0000000..7314d42 --- /dev/null +++ b/Gemfile @@ -0,0 +1,5 @@ +source "https://rubygems.org" + +gem "json" +gem 'jsonlint' +gem "csvlint" diff --git a/forage.py b/forage.py index 22fd5e0..24cec12 100644 --- a/forage.py +++ b/forage.py @@ -1,4 +1,4 @@ -print " Importing libraries..." +# importing libraries import sys # this lets us receive passed arguments in the command line import getopt # this lets us receive arguments and options in the command line @@ -9,8 +9,6 @@ import requests # this helps us make good HTTP requests from contextlib import closing # this helps us make good HTTP requests -print " Importing done!" - # Setting a few variables output = {} @@ -63,30 +61,20 @@ for endpoint in settings["endpoints"]: if (settings["endpoints"][endpoint]["type"] == "hydrometric" and hydrometric == True): endpoint = settings["endpoints"][endpoint] - output["hydrometric"][endpoint["id"]] = [] + data["hydrometric"][endpoint["id"]] = [] url = endpoint["root"] + endpoint["type"] + "/csv/" + endpoint["province"] + "/" + endpoint["timescale"] + "/" + endpoint["province"] + "_" + endpoint["id"] + "_" + endpoint["timescale"] + "_hydrometric.csv" with closing(requests.get(url, stream=True)) as r: - reader = csv.reader(r.iter_lines(), delimiter=',', quotechar='"') + lines = r.iter_lines() + reader = csv.reader(lines, delimiter=',', quotechar='"') + temp = False for row in reader: - output["hydrometric"][endpoint["id"]].append(row) - output["hydrometric"][endpoint["id"]] = output["hydrometric"][endpoint["id"]][1:] + if (temp == False): + temp = True + elif (row): + data["hydrometric"][endpoint["id"]].append([settings["endpoints"][endpoint["id"]]["name"], endpoint["id"], settings["endpoints"][endpoint["id"]]["timescale"], calendar.timegm(dateutil.parser.parse(row[1]).utctimetuple()), row[2], row[6]]) print " Data fetched!" -print " Organzing data..." - -# This organizes the data; this function is also poorly designed, and will be removed sooner or later. - -if (hydrometric == True): - for dataset in output["hydrometric"]: - data["hydrometric"][dataset] = [] - for i in range(len(output["hydrometric"][dataset])): - row = output["hydrometric"][dataset][i] - if (row): - data["hydrometric"][dataset].append([settings["endpoints"][row[0]]["timescale"], settings["endpoints"][row[0]]["name"], row[0], calendar.timegm(dateutil.parser.parse(row[1]).utctimetuple()), row[2], row[6]]) - -print " Data Organized!" - # If the write type is CSV, this function outputs to a CSV file called output-hydrometric.csv if (write == "csv" or write == "CSV"): @@ -94,7 +82,7 @@ if (hydrometric == True): with open('output-hydrometric.csv', 'wb') as csvfile: writer = csv.writer(csvfile) - writer.writerow(["Type", "Timescale", "Station Name", "Station ID", "Unix Timestamp", "Water Level (m)", "Discharge (m3/s)"]) + writer.writerow(["Type", "Station Name", "Station ID", "Timescale", "Unix Timestamp", "Water Level (m)", "Discharge (m3/s)"]) for dataset in data["hydrometric"]: for i in range(len(data["hydrometric"][dataset])): row = data["hydrometric"][dataset][i] diff --git a/forage.rb b/forage.rb new file mode 100644 index 0000000..4585b7c --- /dev/null +++ b/forage.rb @@ -0,0 +1,97 @@ +require 'optparse' # we use this to parse arguments/parameters +require 'time' # we need this to deal with dates and times +require 'csv' # we need this to read CSV files +require 'json' # we need this to read JSON files +require "net/http" # we need this to make http requests +require "uri" # we need this to make http requests + +# Setup Variables + +data = Hash.new() +write = false +hydrometric = false + +# Parsing Command Line Arguments + +options = {} +OptionParser.new do |opts| + opts.banner = "Usage: forage.rb [options]" + + opts.on("-w", "--write TYPE", "Add output type, e.g. json or csv") do |w| + write = w + end + + opts.on("-h", "--help", "Get help!") do + puts "Help coming soon!" + exit + end +end.parse! + +# Parsing Settings + +puts " Opening settings from settings.json ..." + +settingsf = File.read('settings.json') +settings = JSON.parse(settingsf) + +# Enabling hydrometrics + +if (settings["global"]["hydrometric"] == "enabled") + data["hydrometric"] = Hash.new() + hydrometric = true +end + +puts " Settings configured!" + +puts " Fetching data..." + +settings["endpoints"].each do |key, value| + temp = false + data["hydrometric"][value["id"]] = [] + if value["type"] == "hydrometric" && hydrometric == true + url = value["root"] + value["type"] + "/csv/" + value["province"] + "/" + value["timescale"] + "/" + value["province"] + "_" + value["id"] + "_" + value["timescale"] + "_hydrometric.csv" + uri = URI.parse(url) + http = Net::HTTP.new(uri.host, uri.port) + request = Net::HTTP::Get.new(uri.request_uri) + response = http.request(request) + CSV.parse(response.body) do |row| + if row + if temp == false + temp = true + else + data["hydrometric"][key].push([value["name"], value["id"], value["timescale"], Time.parse(row[1]).to_i, row[2], row[6]]) + end + end + end + end +end + +puts " Data fetched!" + +if write == "csv" || write == "CSV" + puts " Writing data to CSV..." + if hydrometric == true + CSV.open("output-hydrometric.csv", "wb") do |csv| + csv << ["Type", "Station Name", "Station ID", "Timescale", "Unix Timestamp", "Water Level (m)", "Discharge (m3/s)"] + data["hydrometric"].each do |key, value| + value.each do |k| + csv << k.unshift("Hydrometric") + end + end + end + end + puts " Write to CSV complete!" +end + +if write == "json" || write == "JSON" + puts " Writing data to JSON..." + if hydrometric == true + File.open("output-hydrometric.json","w") do |f| + f.write(data.to_json) + end + end + puts " Write to JSON complete!" +end + +puts " Script complete!" +puts " Shutting down..." From 71bb777922a95af3f013c735f2ed134cd7f66d61 Mon Sep 17 00:00:00 2001 From: Matthew Wang Date: Fri, 16 Dec 2016 19:56:58 -0500 Subject: [PATCH 2/4] drops rvm 2.3 from build matrix --- .travis.yml | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/.travis.yml b/.travis.yml index f876702..93d69b3 100644 --- a/.travis.yml +++ b/.travis.yml @@ -34,13 +34,3 @@ matrix: - ruby forage.rb -w json - jsonlint output-hydrometric.json - csvlint output-hydrometric.csv - - os: linux - language: ruby - rvm: "2.3" - install: "bundle" - script: - - ruby forage.rb - - ruby forage.rb -w csv - - ruby forage.rb -w json - - jsonlint output-hydrometric.json - - csvlint output-hydrometric.csv From ddf63c4dfc643f47818f40f0cccad433b265821b Mon Sep 17 00:00:00 2001 From: Matthew Wang Date: Fri, 16 Dec 2016 19:58:17 -0500 Subject: [PATCH 3/4] attempts running a mac build matrix --- .travis.yml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/.travis.yml b/.travis.yml index 93d69b3..4cba2a5 100644 --- a/.travis.yml +++ b/.travis.yml @@ -34,3 +34,13 @@ matrix: - ruby forage.rb -w json - jsonlint output-hydrometric.json - csvlint output-hydrometric.csv + - os: osx + language: ruby + rvm: "2.2" + install: "bundle" + script: + - ruby forage.rb + - ruby forage.rb -w csv + - ruby forage.rb -w json + - jsonlint output-hydrometric.json + - csvlint output-hydrometric.csv From 8e7fc7c42a6293a44170bcde20884c532fd87962 Mon Sep 17 00:00:00 2001 From: Matthew Wang Date: Fri, 16 Dec 2016 20:25:17 -0500 Subject: [PATCH 4/4] updates documentation to match ruby scripts --- README.md | 80 +++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 57 insertions(+), 23 deletions(-) diff --git a/README.md b/README.md index e520940..38d2da4 100644 --- a/README.md +++ b/README.md @@ -1,11 +1,11 @@ # Forage [![Build Status](https://travis-ci.org/NorvalLabs/forage.svg?branch=master)](https://travis-ci.org/NorvalLabs/forage) -A set of tools to pull information from the [Canadian Government's Weather API](http://dd.weather.gc.ca/), with support for the hydrometric data for now. Support for more data is coming soon! +A set of tools to pull information from the [Canadian Government's Weather API](http://dd.weather.gc.ca/), for use with NorvalLabs. Currently, we support pulling hydrometric data, using Python or Ruby, and outputting to CSV or JSON. -## Settings +## Build Settings -`settings.json` acts as a settings file for all the build scripts, regardless of language. Let's see how it works: +`settings.json` acts as a settings file for all the build scripts, regardless of language. We include a sample `settings.json` in this project. Here's a look at it: ```json { @@ -36,22 +36,34 @@ A set of tools to pull information from the [Canadian Government's Weather API]( ### Global Settings The `global` object defines some global settings. You should not have to add or remove any key/value pairs from `global`, just modifying the values. -* For every type of data you want to get and parse, switch the value for that type to `"enabled"`. If not, switch it to `"disabled"`. e.g. `"hydrometric": "enabled"` gets and parses hydrometric data, while `"hydrometric": "disabled"` does not + +| Property | Description | Accepted Values | +| --- | --- | --- | +| `hydrometric` | Enables processing of hydrometric data. | `enabled` (default), `disabled` | ### Endpoints -Each endpoint has slightly different formats, but the general idea is the same. To add a new endpoint to get/parse, just add a new `endpoint` object. It consists of: +Each endpoint has slightly different formats, but the general idea is the same. To add a new endpoint to get/parse, just add a new `endpoint` object. + +| Property | Description | +|---|---| +| Key Name | This should be the same as `id`, and is normally the station ID | +| `type` | This should be the type of data collected, as present in the URL (e.g. `hydrometric`) | +| `id` | This should be the same as the key name, and is normally the station ID | +| `province` | The two-letter abbreviation of the province the station is located in | +| `timescale` | The supported timescales for your data type: `hydrometric` supports `hourly` and `daily` | +| `root` | The root site we pull from, for now it should always be `http://dd.weather.gc.ca/` | + +## Script Parameters -* Key Name: this should be the same as `id`, and is normally the station ID -* `type`: this should be the type of data collected, as present in the URL (e.g. `hydrometric`) -* `id`: this should be the same as the key name, and is normally the station ID -* `province`: the Province the station is located in -* `timescale`: the supported timescales for your data type: `hydrometric` supports `hourly` and `daily` -* `root`: the root site we pull from, for now it should always be `http://dd.weather.gc.ca/` +| Parameter | Description | Accepted Values | Supported Scripts | +|---|---| --- | --- | +| `-w` | Write Parameter: dictates what file type the output is written to. Outputs to `output-DATA-TYPE.FILE-TYPE` | none (default), `csv`, `json` | Python, Ruby | +| `-h` | Help: displays help with the script (WIP) | N/A | Python, Ruby | ## Python -*Note: this installation process requires [pip](https://pip.pypa.io/en/stable/). Please install it if you do not already have it!* +*Note: this script requires [Python 2.7](https://www.python.org/) and [pip](https://pip.pypa.io/en/stable/). Please install these if you do not already have them!* In order to use the python script, we first need to get our dependencies: @@ -65,16 +77,38 @@ Then, just run our script! python forage.py ``` -But, running the script just pulls the data, and doesn't output it. Luckily, the script takes parameters! +You can use the script parameters with `forage.py` (if you don't, it doesn't output anything). For example: + +``` +python forage.py -w json +``` + +## Ruby + +*Note: this script requires [Ruby 2+](https://www.ruby-lang.org/en/), and optionally [Bundle](http://bundler.io/) for linting.* + +In order to use the ruby script, simply run it from the command line. + +``` +ruby forage.rb +``` -The script takes two parameters: -* `-w` or `--write` - * Parameter of what file type to write - * Default is none (no file will be written) - * Accepts `csv` or `json` - * The outputted file(s) is at `API-output.FILETYPE` - * Example: `python forage.py -w csv` -* `-h` or `--help` - * Displays help commands (coming soon) +You can use the script parameters with `forage.rb` (if you don't, it doesn't output anything). For example: -And, it also uses `settings.json` to know what to get: look above on documentation on how to use it! +``` +ruby forage.rb -w csv +``` + +To use optional linting gems, first install them with bundle: + +``` +bundle +``` + +Then, use either the `csvlint` or `jsonlint` commands. + +``` +csvlint output-hydrometric.csv + +jsonlint output.hydrometric.json +```