diff --git a/Makefile b/Makefile index de3817c..43690cd 100644 --- a/Makefile +++ b/Makefile @@ -13,18 +13,21 @@ nominatim: tests/nominatim.db geocode-sqlite nominatim $^ innout_test \ --location "{full}, {city}, {state} {postcode}" \ --delay 1 \ + --raw \ --user-agent "geocode-sqlite" .PHONY: mapquest mapquest: tests/mapquest.db geocode-sqlite open-mapquest $^ innout_test \ --location "{full}, {city}, {state} {postcode}" \ + --raw \ --api-key "$(MAPQUEST_API_KEY)" .PHONY: google google: tests/google.db geocode-sqlite googlev3 $^ innout_test \ --location "{full}, {city}, {state} {postcode}" \ + --raw \ --api-key "$(GOOGLE_API_KEY)" \ --bbox 33.030551 -119.787326 34.695341 -115.832248 @@ -33,6 +36,7 @@ bing: tests/bing.db geocode-sqlite bing $^ innout_test \ --location "{full}, {city}, {state} {postcode}" \ --delay 1 \ + --raw \ --api-key "$(BING_API_KEY)" .PHONY: mapbox @@ -40,12 +44,15 @@ mapbox: tests/mapbox.db geocode-sqlite mapbox $^ innout_test \ --location "{full}, {city}, {state} {postcode}" \ --delay 1 \ + --raw \ --api-key "$(MAPBOX_API_KEY)" .PHONY: opencage opencage: tests/opencage.db geocode-sqlite opencage $^ innout_test \ --location "{full}, {city}, {state} {postcode}" \ + --delay '0.1' \ + --raw \ --api-key "$(OPENCAGE_API_KEY)" .PHONY: run diff --git a/README.md b/README.md index c8f3339..f20dddd 100644 --- a/README.md +++ b/README.md @@ -84,9 +84,30 @@ From there, we have a set of options passed to every geocoder: - `longitude`: longitude column name - `geojson`: store results as GeoJSON, instead of in latitude and longitude columns - `spatialite`: store results in a SpatiaLite geometry column, instead of in latitude and longitude columns +- `raw`: store raw geocoding results in a JSON column Each geocoder takes additional, specific arguments beyond these, such as API keys. Again, [geopy's documentation](https://geopy.readthedocs.io/en/latest/#module-geopy.geocoders) is an excellent resource. +## Using SpatiaLite + +The `--spatialite` flag will store results in a [geometry column](https://www.gaia-gis.it/gaia-sins/spatialite-cookbook-5/cookbook_topics.adminstration.html#topic_TABLE_to_SpatialTable), instead of `latitude` and `longitude` columns. This is useful if you're doing other GIS operations, such as using a [spatial index](https://www.gaia-gis.it/fossil/libspatialite/wiki?name=SpatialIndex). See the [SpatiaLite cookbook](https://www.gaia-gis.it/gaia-sins/spatialite-cookbook-5/index.html) and [functions list](https://www.gaia-gis.it/gaia-sins/spatialite-sql-latest.html) for more of what's possible. + +## Capturing additional geocoding data + +Geocoding services typically return more data than just coordinates. This might include accuracy, normalized addresses or other context. This can be captured using the `--raw` flag. By default, this will add a `raw` column and store the full geocoding response as JSON. If you want to rename that column, pass a value, like `--raw custom_raw`. + +The shape of this response object will vary between services. You can query specific values using [SQLite's built-in JSON functions](https://www.sqlite.org/json1.html). For example, this will work with Google's geocoder: + +```sql +select + json_extract(raw, '$.formatted_address') as address, + json_extract(raw, '$.geometry.location_type') as location_type +from + innout_test +``` + +Check each geocoding service's documentation for what's included in the response. + ## Python API The command line interface aims to support the most common options for each geocoder. For more fine-grained control, use the Python API. diff --git a/geocode_sqlite/cli.py b/geocode_sqlite/cli.py index a7eaf8f..c978de4 100644 --- a/geocode_sqlite/cli.py +++ b/geocode_sqlite/cli.py @@ -67,6 +67,14 @@ def common_options(f): default=False, help="""Store results as a SpatiaLite geometry. Using this will add a geometry column instead of latitude and longitude columns.""", + ), + click.option( + "--raw", + is_flag=False, + default="", + flag_value="raw", + help="""Store raw geocoding results as JSON. +This column will be called 'raw' by default. Pass a value to rename it.""", ), click.pass_context, ] @@ -86,6 +94,7 @@ def fill_context( longitude, geojson, spatialite, + raw, **kwargs, ): "Add common options to context" @@ -98,6 +107,7 @@ def fill_context( longitude=longitude, geojson=geojson, spatialite=spatialite, + raw=raw, kwargs=kwargs, ) @@ -113,6 +123,7 @@ def extract_context(ctx): ctx.obj["longitude"], ctx.obj["geojson"], ctx.obj["spatialite"], + ctx.obj["raw"], ctx.obj.get("kwargs", {}), ) @@ -143,7 +154,9 @@ def cli(ctx): # name changed in click 8.0 -result_callback = getattr(cli, "result_callback", None) or getattr(cli, "resultcallback") +result_callback = getattr(cli, "result_callback", None) or getattr( + cli, "resultcallback" +) @result_callback() @@ -159,6 +172,7 @@ def geocode(ctx, geocoder): longitude, geojson, spatialite, + raw, kwargs, ) = extract_context(ctx) @@ -178,6 +192,9 @@ def geocode(ctx, geocoder): if longitude != "longitude": click.echo(f"Using custom longitude field: {longitude}") + if raw and raw != "raw": + click.echo(f"Using custom raw result field: {raw}") + if not (geojson or spatialite) and latitude not in columns: click.echo(f"Adding column: {latitude}") table.add_column(latitude, float) @@ -194,6 +211,10 @@ def geocode(ctx, geocoder): click.echo("Adding geometry column") table.add_geometry_column(GEOMETRY_COLUMN, "POINT") + if raw and raw not in columns: + click.echo(f"Adding {raw} column") + table.add_column(raw, str) + if GEOCODER_COLUMN not in table.columns_dict: click.echo("Adding geocoder column") table.add_column(GEOCODER_COLUMN, str) @@ -220,6 +241,7 @@ def geocode(ctx, geocoder): longitude_column=longitude, geojson=geojson, spatialite=spatialite, + raw=raw, **kwargs, ) @@ -244,6 +266,11 @@ def geocode(ctx, geocoder): click.echo(f"{pk}: {location.format(row)}") +############# +# Geocoders # +############# + + @cli.command("test", hidden=True) @common_options @click.option("-p", "--db-path", type=click.Path(exists=True)) @@ -257,12 +284,22 @@ def use_tester( longitude, geojson, spatialite, + raw, db_path, ): "Only use this for testing" click.echo(f"Using test geocoder with database {db_path}") fill_context( - ctx, database, table, location, delay, latitude, longitude, geojson, spatialite + ctx, + database, + table, + location, + delay, + latitude, + longitude, + geojson, + spatialite, + raw, ) return DummyGeocoder(Database(db_path)) @@ -287,12 +324,22 @@ def bing( longitude, geojson, spatialite, + raw, api_key, ): "Bing" click.echo("Using Bing geocoder") fill_context( - ctx, database, table, location, delay, latitude, longitude, geojson, spatialite + ctx, + database, + table, + location, + delay, + latitude, + longitude, + geojson, + spatialite, + raw, ) return geocoders.Bing(api_key=api_key) @@ -321,6 +368,7 @@ def google( longitude, geojson, spatialite, + raw, api_key, domain, bbox, @@ -337,6 +385,7 @@ def google( longitude, geojson, spatialite, + raw, bounds=bbox, ) return geocoders.GoogleV3(api_key=api_key, domain=domain) @@ -363,6 +412,7 @@ def mapquest( longitude, geojson, spatialite, + raw, api_key, bbox, ): @@ -378,6 +428,7 @@ def mapquest( longitude, geojson, spatialite, + raw, bounds=bbox, ) return geocoders.MapQuest(api_key=api_key) @@ -406,13 +457,23 @@ def nominatim( longitude, geojson, spatialite, + raw, user_agent, domain, ): "Nominatim (OSM)" click.echo(f"Using Nominatim geocoder at {domain}") fill_context( - ctx, database, table, location, delay, latitude, longitude, geojson, spatialite + ctx, + database, + table, + location, + delay, + latitude, + longitude, + geojson, + spatialite, + raw, ) return geocoders.Nominatim(user_agent=user_agent, domain=domain) @@ -437,12 +498,22 @@ def open_mapquest( longitude, geojson, spatialite, + raw, api_key, ): "Open Mapquest" click.echo("Using MapQuest geocoder") fill_context( - ctx, database, table, location, delay, latitude, longitude, geojson, spatialite + ctx, + database, + table, + location, + delay, + latitude, + longitude, + geojson, + spatialite, + raw, ) return geocoders.MapQuest(api_key=api_key) @@ -474,6 +545,7 @@ def mapbox( longitude, geojson, spatialite, + raw, api_key, bbox, proximity, @@ -490,6 +562,7 @@ def mapbox( longitude, geojson, spatialite, + raw, bbox=bbox, proximity=proximity, ) @@ -516,6 +589,7 @@ def opencage( longitude, geojson, spatialite, + raw, api_key, ): "OpenCage" @@ -530,5 +604,6 @@ def opencage( longitude, geojson, spatialite, + raw, ) return geocoders.OpenCage(api_key=api_key) diff --git a/geocode_sqlite/utils.py b/geocode_sqlite/utils.py index 6f6ad23..ca0aa26 100644 --- a/geocode_sqlite/utils.py +++ b/geocode_sqlite/utils.py @@ -22,6 +22,7 @@ def geocode_table( longitude_column="longitude", geojson=False, spatialite=False, + raw="", force=False, **kwargs, ): @@ -124,6 +125,7 @@ def geocode_list( longitude_column="longitude", geojson=False, spatialite=False, + raw="", **kwargs, ): """ @@ -140,7 +142,7 @@ def geocode_list( result = geocode_row(geocode, query_template, row, **kwargs) if result: row = update_row( - row, result, latitude_column, longitude_column, geojson, spatialite + row, result, latitude_column, longitude_column, geojson, spatialite, raw ) row[GEOCODER_COLUMN] = get_geocoder_class(geocode) @@ -162,6 +164,7 @@ def update_row( longitude_column="longitude", geojson=False, spatialite=False, + raw="", ): """ Update a row before saving, either setting latitude and longitude, @@ -180,6 +183,10 @@ def update_row( row[longitude_column] = result.longitude row[latitude_column] = result.latitude + if raw: + # save the raw dictionary, let sqlite-utils turn it into a str + row[raw] = result.raw + return row diff --git a/setup.py b/setup.py index 0c00f66..57224f2 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,7 @@ from setuptools import setup import os -VERSION = "0.7.0" +VERSION = "0.8.0" requirements = ["click>=7.0", "sqlite_utils", "geopy"] diff --git a/tests/test_geocode_sqlite.py b/tests/test_geocode_sqlite.py index a2dd44d..828b905 100644 --- a/tests/test_geocode_sqlite.py +++ b/tests/test_geocode_sqlite.py @@ -388,3 +388,82 @@ def test_spatialite_geocode_table(db, geocoder): assert geometry["type"] == expected["type"] assert expected["coordinates"] == pytest.approx(geometry["coordinates"]) + + +def test_capture_raw(db, db_path, geocoder): + table = db[TABLE_NAME] + geo_table = db[GEO_TABLE] + + assert "latitude" not in table.columns_dict + assert "longitude" not in table.columns_dict + assert "raw" not in table.columns_dict + + # run the cli with our test geocoder + runner = CliRunner() + result = runner.invoke( + cli, + [ + "test", # geocoder subcommand + str(db_path), # db + str(TABLE_NAME), # table + "--db-path", # path, for test geocoder + str(db_path), + "--location", # location + "{id}", + "--delay", # delay + "0", + "--raw", # capture raw output + ], + ) + + print(result.stdout) + assert 0 == result.exit_code + + for row in table.rows: + assert type(row.get("raw")) == str + + raw = json.loads(row["raw"]) + result = geo_table.get(row["id"]) + + assert raw == result + + +def test_capture_raw_custom(db, db_path, geocoder): + table = db[TABLE_NAME] + geo_table = db[GEO_TABLE] + + RAW = "raw_custom" + + assert "latitude" not in table.columns_dict + assert "longitude" not in table.columns_dict + assert RAW not in table.columns_dict + + # run the cli with our test geocoder + runner = CliRunner() + result = runner.invoke( + cli, + [ + "test", # geocoder subcommand + str(db_path), # db + str(TABLE_NAME), # table + "--db-path", # path, for test geocoder + str(db_path), + "--location", # location + "{id}", + "--delay", # delay + "0", + "--raw", + RAW, # capture raw output with a custom name + ], + ) + + print(result.stdout) + assert 0 == result.exit_code + + for row in table.rows: + assert type(row.get(RAW)) == str + + raw = json.loads(row[RAW]) + result = geo_table.get(row["id"]) + + assert raw == result