Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: Use Libpostal service #146

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .jshintrc
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"node": true,
"curly": true,
"eqeqeq": true,
"esversion": 6,
"esversion": 8,
"freeze": true,
"immed": true,
"indent": 2,
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# base image
FROM pelias/libpostal_baseimage
FROM pelias/baseimage

# dependencies
RUN apt-get update && \
Expand Down
199 changes: 101 additions & 98 deletions api/search.js
Original file line number Diff line number Diff line change
Expand Up @@ -31,120 +31,123 @@ function setup( addressDbPath, streetDbPath ){
if( 'string' !== typeof number ){ return cb( 'invalid number' ); }
if( 'string' !== typeof street ){ return cb( 'invalid street' ); }

var normalized = {
number: analyze.housenumber( number ),
street: analyze.street( street )
};
analyze.street(street, function streetAnalyzeCallback(err, street, metadata) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's the idea behind this syntax? It seems... verbose

analyze.street(street, function streetAnalyzeCallback(err, street, metadata) {

wouldn't that be simpler as:

analyze.street(street, (err, street, metadata) => {

I see that streetAnalyzeCallback isn't referenced anywhere else in the file so I'm guessing it's for stack traces?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see in another file below that you're using the syntax:

const analyze_street = util.promisify(analyze.street);
var names = await analyze_street();

I think that syntax is much cleaner, and it allows you to leave most of the code untouched and avoids the closure?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was simply because I was hoping to avoid having to use async/await syntax at all in this PR. Despite the verboseness I'm honestly not a fan of additional language complexity. Functions are well understood by everyone :)

However, because later on we are depending on many calls to libpostal returning before proceeding, using Promisify.all is basically the only way to get the behavior we want.

Since we're already going to be using the new syntax, I'll update this code to use it as well.


// error checking
if( isNaN( point.lat ) ){ return cb( 'invalid latitude' ); }
if( isNaN( point.lon ) ){ return cb( 'invalid longitude' ); }
if( isNaN( normalized.number ) ){ return cb( 'invalid number' ); }
if( !normalized.street.length ){ return cb( 'invalid street' ); }
var normalized = {
number: analyze.housenumber( number ),
street: street
};

// perform a db lookup for the specified street
// @todo: perofmance: only query for part of the table
query.search( db, point, normalized.number, normalized.street, function( err, res ){
// error checking
if( isNaN( point.lat ) ){ return cb( 'invalid latitude' ); }
if( isNaN( point.lon ) ){ return cb( 'invalid longitude' ); }
if( isNaN( normalized.number ) ){ return cb( 'invalid number' ); }
if( !normalized.street.length ){ return cb( 'invalid street' ); }

// @note: results can be from multiple different street ids.
// perform a db lookup for the specified street
// @todo: perofmance: only query for part of the table
query.search( db, point, normalized.number, normalized.street, function( err, res ){

// an error occurred or no results were found
if( err || !res || !res.length ){ return cb( err, null ); }
// @note: results can be from multiple different street ids.

// try to find an exact match
var match = res.find( function( row ){
if( row.source === 'VERTEX' ){ return false; }
return row.housenumber === normalized.number;
});
// an error occurred or no results were found
if( err || !res || !res.length ){ return cb( err, null ); }

// return exact match
if( match ){
return cb( null, {
type: 'exact',
source: match.source,
source_id: match.source_id,
number: analyze.housenumberFloatToString( match.housenumber ),
lat: parseFloat( match.lat.toFixed(7) ),
lon: parseFloat( match.lon.toFixed(7) )
// try to find an exact match
var match = res.find( function( row ){
if( row.source === 'VERTEX' ){ return false; }
return row.housenumber === normalized.number;
});
}

// try to find a close match with the same number (possibly an apartment)
match = res.find( function( row ){
if( row.source === 'VERTEX' ){ return false; }
return Math.floor( row.housenumber ) === Math.floor( normalized.number );
});
// return exact match
if( match ){
return cb( null, {
type: 'exact',
source: match.source,
source_id: match.source_id,
number: analyze.housenumberFloatToString( match.housenumber ),
lat: parseFloat( match.lat.toFixed(7) ),
lon: parseFloat( match.lon.toFixed(7) )
});
}

// return close match
if( match ){
return cb( null, {
type: 'close',
source: match.source,
source_id: match.source_id,
number: analyze.housenumberFloatToString( match.housenumber ),
lat: parseFloat( match.lat.toFixed(7) ),
lon: parseFloat( match.lon.toFixed(7) )
// try to find a close match with the same number (possibly an apartment)
match = res.find( function( row ){
if( row.source === 'VERTEX' ){ return false; }
return Math.floor( row.housenumber ) === Math.floor( normalized.number );
});
}

// attempt to interpolate the position

// find the records before and after the desired number (group by street segment)
var map = {};
res.forEach( function( row ){
if( !map.hasOwnProperty( row.id ) ){ map[row.id] = {}; }
if( row.housenumber < normalized.number ){ map[row.id].before = row; }
if( row.housenumber > normalized.number ){ map[row.id].after = row; }
if( map[row.id].before && map[row.id].after ){
map[row.id].diff = {
before: map[row.id].before.housenumber - normalized.number,
after: map[row.id].after.housenumber - normalized.number
};

// return close match
if( match ){
return cb( null, {
type: 'close',
source: match.source,
source_id: match.source_id,
number: analyze.housenumberFloatToString( match.housenumber ),
lat: parseFloat( match.lat.toFixed(7) ),
lon: parseFloat( match.lon.toFixed(7) )
});
}
});

// remove segments with less than 2 points; convert map to array
var segments = [];
for( var id in map ){
if( map[id].before && map[id].after ){
segments.push( map[id] );
// attempt to interpolate the position

// find the records before and after the desired number (group by street segment)
var map = {};
res.forEach( function( row ){
if( !map.hasOwnProperty( row.id ) ){ map[row.id] = {}; }
if( row.housenumber < normalized.number ){ map[row.id].before = row; }
if( row.housenumber > normalized.number ){ map[row.id].after = row; }
if( map[row.id].before && map[row.id].after ){
map[row.id].diff = {
before: map[row.id].before.housenumber - normalized.number,
after: map[row.id].after.housenumber - normalized.number
};
}
});

// remove segments with less than 2 points; convert map to array
var segments = [];
for( var id in map ){
if( map[id].before && map[id].after ){
segments.push( map[id] );
}
}
}

// could not find two rows to use for interpolation
if( !segments.length ){
return cb( null, null );
}
// could not find two rows to use for interpolation
if( !segments.length ){
return cb( null, null );
}

// sort by miniumum housenumber difference from target housenumber ASC
segments.sort( function( a, b ){
return Math.abs( a.diff.before + a.diff.after ) - Math.abs( b.diff.before + b.diff.after );
});
// sort by miniumum housenumber difference from target housenumber ASC
segments.sort( function( a, b ){
return Math.abs( a.diff.before + a.diff.after ) - Math.abs( b.diff.before + b.diff.after );
});

// select before/after values to use for the interpolation
var before = segments[0].before;
var after = segments[0].after;

// compute interpolated address
var A = { lat: project.toRad( before.proj_lat ), lon: project.toRad( before.proj_lon ) };
var B = { lat: project.toRad( after.proj_lat ), lon: project.toRad( after.proj_lon ) };
var distance = geodesic.distance( A, B );

// if distance = 0 then we can simply use either A or B (they are the same lat/lon)
// else we interpolate between the two positions
var point = A;
if( distance > 0 ){
var ratio = ((normalized.number - before.housenumber) / (after.housenumber - before.housenumber));
point = geodesic.interpolate( distance, ratio, A, B );
}

// return interpolated address
return cb( null, {
type: 'interpolated',
source: 'mixed',
number: '' + Math.floor( normalized.number ),
lat: parseFloat( project.toDeg( point.lat ).toFixed(7) ),
lon: parseFloat( project.toDeg( point.lon ).toFixed(7) )
// select before/after values to use for the interpolation
var before = segments[0].before;
var after = segments[0].after;

// compute interpolated address
var A = { lat: project.toRad( before.proj_lat ), lon: project.toRad( before.proj_lon ) };
var B = { lat: project.toRad( after.proj_lat ), lon: project.toRad( after.proj_lon ) };
var distance = geodesic.distance( A, B );

// if distance = 0 then we can simply use either A or B (they are the same lat/lon)
// else we interpolate between the two positions
var point = A;
if( distance > 0 ){
var ratio = ((normalized.number - before.housenumber) / (after.housenumber - before.housenumber));
point = geodesic.interpolate( distance, ratio, A, B );
}

// return interpolated address
return cb( null, {
type: 'interpolated',
source: 'mixed',
number: '' + Math.floor( normalized.number ),
lat: parseFloat( project.toDeg( point.lat ).toFixed(7) ),
lon: parseFloat( project.toDeg( point.lon ).toFixed(7) )
});
});
});
};
Expand Down
4 changes: 0 additions & 4 deletions cmd/server.js
Original file line number Diff line number Diff line change
Expand Up @@ -209,9 +209,5 @@ app.use('/demo', express.static('demo'));
// app.use('/builds', directory('/data/builds', { hidden: false, icons: false, view: 'details' }));

app.listen( PORT, function() {

// force loading of libpostal
analyze.street( 'test street' );

console.log( 'server listening on port', PORT );
});
50 changes: 17 additions & 33 deletions lib/analyze.js
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
const libpostal_service = require( './libpostal_wrapper' );
// constants for controlling how we parse ranges, eg: 'α-β'
// some ranges such as '1-7' are ambiguous; it could mean 'apt 7, no 1'; or
// it could mean 'apt 1, no 7'; or could even be a valid range 'one to seven'.
Expand All @@ -7,47 +8,30 @@ var MIN_RANGE = 1; // the miniumum amount β is higher than α
var MAX_RANGE = 6; // the maximum amount β is higher than α
var MIN_RANGE_HOUSENUMBER = 10; // the minimum acceptible value for both α and β

/*
* Return the appropriate version of node-postal
*/

var _nodepostal_module;
function get_libpostal() {
// lazy load this dependency; since it's large (~2GB RAM) and may be
// accidentally required by a process which doesn't use it.
if (!_nodepostal_module) {
// load the mock library if MOCK_LIBPOSTAL env var is set
if (process.env.MOCK_LIBPOSTAL) {
_nodepostal_module = require('../test/lib/mock_libpostal');
// otherwise load the real thing
} else {
_nodepostal_module = require('node-postal');
}
}

return _nodepostal_module;
}

/**
analyze input streetname string and return a list of expansions.
**/
function street( streetName ){
const postal = get_libpostal();
function street( streetName, callback ){
const postal = libpostal_service();

// use libpostal to expand the address
var expansions = postal.expand.expand_address( streetName );
postal.expand.expand_address( streetName, function streetCallback(err, results, metadata) {
if (err) {
return callback(err);
}

// remove ordinals
expansions = expansions.map(function( item ){
return item.replace( /(([0-9]+)(st|nd|rd|th)($|\s))/gi, '$2 ' ).trim();
});
// remove ordinals
let expansions = results.map(function( item ){
return item.replace( /(([0-9]+)(st|nd|rd|th)($|\s))/gi, '$2 ' ).trim();
});

// remove duplicates
expansions = expansions.filter(function(item, pos, self) {
return self.indexOf(item) === pos;
});
// remove duplicates
expansions = expansions.filter(function(item, pos, self) {
return self.indexOf(item) === pos;
});

return expansions;
callback(null, expansions, metadata);
});
}

/**
Expand Down
22 changes: 22 additions & 0 deletions lib/libpostal_wrapper.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
const mock_libpostal = require('../test/lib/mock_libpostal');

// This module is a wrapper around the actual libpostal service library
// and the mock libpostal library
// it allows an environment variable to switch which library is used in application code

let libpostal_module;
function get_libpostal() {
// return the mock library if MOCK_LIBPOSTAL env var is set
if (process.env.MOCK_LIBPOSTAL) {
return mock_libpostal;
// otherwise return the actual service
} else {
// lazy load the libpostal module so that tests can skip configuring the service
if (!libpostal_module) {
libpostal_module = require( '../libpostal/service' );
}
return libpostal_module;
}
}

module.exports = get_libpostal;
47 changes: 47 additions & 0 deletions libpostal/service.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
const microservice_wrapper = require('pelias-microservice-wrapper');
const pelias_config = require('pelias-config').generate();

const LibpostalServiceConfig = class extends microservice_wrapper.ServiceConfiguration {
constructor(configBlob) {
super('libpostal', configBlob);
}
getUrl(params) {
return this.baseUrl + params.endpoint;
}
getParameters(params) {
return {
address: params.address
};
}
};

// use the 'services.libpostal' config entry if available, otherwise fall back to 'api.services.libpostal'
const config_entry = pelias_config.get('services.libpostal') || pelias_config.get('api.services.libpostal');

if (!config_entry) {
throw new Error('Libpostal configuration not found in `services.libpostal` or `api.services.libpostal`');
}

// create an instance of the libpostal service
const libpostal_service = microservice_wrapper.service(
new LibpostalServiceConfig(config_entry)
);

// create an object that looks like the interface to `node-postal` but uses a remote service
module.exports = {
expand: {
expand_address: function(param, callback) {
const params = {
endpoint: 'expand',
address: param
};

// the libpostal service will not handle an empty parameter
// so return empty array immediately
if (!param) {
return callback(null, []);
}
libpostal_service(params, callback);
}
}
};
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,10 @@
"jsftp": "^2.0.0",
"lodash": "^4.17.4",
"morgan": "^1.9.0",
"node-postal": "imothee/node-postal#6d0b00f68a",
"pbf2json": "^6.4.0",
"pelias-config": "^4.0.0",
"pelias-logger": "^1.2.1",
"pelias-microservice-wrapper": "^1.8.3",
"quadtree": "^1.1.3",
"require-dir": "^1.0.0",
"serve-index": "^1.8.0",
Expand Down
Loading