Skip to content

Commit

Permalink
Make it possible to pass bare name blocklist to USCityIndex.
Browse files Browse the repository at this point in the history
  • Loading branch information
davidmcclure committed Aug 26, 2018
1 parent 37c99bb commit c26b9a9
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 10 deletions.
31 changes: 21 additions & 10 deletions litecoder/usa.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,25 +59,32 @@ def __getitem__(self, text):

class AllowBareCityName:

def __init__(self, min_p1_gap=200000):
def __init__(self, min_p1_gap=200000, blocklist=None):
self.name_pops = CityNamePopulations()
self.min_p1_gap = min_p1_gap
self.blocklist = set(map(keyify, blocklist or []))

def blocked(self, name):
return keyify(name) in self.blocklist

def large_p1_gap(self, row, name):
"""Get the difference in population between this city and the second-
most-populous city with the name. Allow if over threshold.
"""
all_pops = sorted(self.name_pops[name], reverse=True)
pop = row.population or 0
return pop - sum(all_pops[1:]) > self.min_p1_gap

def __call__(self, row, name):
"""Is a city name unique enough that it should be indexed
independently?
"""Is a name unique enough that it should be indexed independently?
Args:
row (models.Locality)
row (models.WOFLocality)
name (str)
Returns: bool
"""
all_pops = sorted(self.name_pops[name], reverse=True)

pop = row.population or 0

return pop - sum(all_pops[1:]) > self.min_p1_gap
return not self.blocked(name) and self.large_p1_gap(row, name)


class CityKeyIter:
Expand Down Expand Up @@ -231,10 +238,14 @@ class USCityIndex(Index):
def load(cls, path=US_CITY_PATH):
return super().load(path)

def __init__(self, bare_name_blocklist=None):
super().__init__()
self.bare_name_blocklist = bare_name_blocklist

def build(self):
"""Index all US cities.
"""
iter_keys = CityKeyIter()
iter_keys = CityKeyIter(blocklist=self.bare_name_blocklist)

# Deduped cities.
cities = WOFLocality.clean_us_cities()
Expand Down
27 changes: 27 additions & 0 deletions tests/prod_db/test_bare_name_blocklist.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@


import pytest

from litecoder.usa import USCityIndex


def test_bare_name_bloclist():
"""Blocklisted bare names should be omitted from index.
"""
idx = USCityIndex(bare_name_blocklist=['Washington', 'New York'])
idx.build()

blocked = (
'Washington',
'washington',
'Washington, USA',
'New York',
'new york',
'New York, USA'
)

for query in blocked:
assert not idx[query]

assert idx['Washington DC'][0].data.wof_id == 85931779
assert idx['New York, NY'][0].data.wof_id == 85977539

0 comments on commit c26b9a9

Please sign in to comment.