diff --git a/lib/streams/overrideLookedUpLocalityAndLocaladmin.js b/lib/streams/overrideLookedUpLocalityAndLocaladmin.js index d3f9509f..38d93887 100644 --- a/lib/streams/overrideLookedUpLocalityAndLocaladmin.js +++ b/lib/streams/overrideLookedUpLocalityAndLocaladmin.js @@ -9,10 +9,20 @@ var through2 = require('through2'); // when adminlookup happens, it's lat/lon is located in the Lancaster, PA // WOF locality. This is self-contradictory because now a city is located within // another city. This logic forces `locality` and `localadmin` records to be -// in agreement since we store the record itself in it's parentage. +// in agreement since we store the record itself in it's parentage. function reassignParent(document, layer) { document.clearParent(layer); + + // primary name document.addParent(layer, document.getName('default'), document.getId()); + + // name aliases + let aliases = document.getNameAliases('default'); + if( aliases.length ){ + aliases.forEach( alias => { + document.addParent(layer, alias, document.getId()); + }); + } } module.exports.create = function create() { diff --git a/lib/streams/peliasDocGenerator.js b/lib/streams/peliasDocGenerator.js index 0fee7e9b..c3c172cf 100644 --- a/lib/streams/peliasDocGenerator.js +++ b/lib/streams/peliasDocGenerator.js @@ -3,20 +3,41 @@ var logger = require( 'pelias-logger' ).get( 'geonames' ); var categoryMapping = require( '../../metadata/category_mapping.json' ); var through2 = require('through2'); +// common name delimiters +const NAME_DELIM_REGEX = /[,#\/]/; + module.exports = {}; module.exports.create = function() { return through2.obj(function(data, enc, next) { var record; try { + + // names + // note: some name fields contain a delimited list + // eg. 'Bern/Berne/Berna' + var names = data.name.trim().split(NAME_DELIM_REGEX).filter(n => n.length); + var layer = data.layer || 'venue'; record = new Document( 'geonames', layer, data._id ) - .setName( 'default', data.name.trim() ) + .setName( 'default', names[0].trim() ) .setCentroid({ lat: data.latitude, lon: data.longitude }); + // altnames + try { + names.forEach((name, i) => { + let trimmed = name.trim(); + if( trimmed.length ){ + if( i > 0 ){ + record.setNameAlias( 'default', trimmed ); + } + } + }); + } catch( err ){} + try { var population = parseInt(data.population, 10); if (population) { @@ -42,10 +63,7 @@ module.exports.create = function() { ); } - // copy 'name' object to 'phrase' in order to allow ES to create - // separate indices with different analysis techniques. if( record !== undefined ){ - record.phrase = record.name; this.push( record ); } next(); diff --git a/package.json b/package.json index dda0375e..2ff7f489 100644 --- a/package.json +++ b/package.json @@ -49,7 +49,7 @@ "pelias-config": "^3.3.0", "pelias-dbclient": "^2.5.6", "pelias-logger": "^1.2.1", - "pelias-model": "^5.5.2", + "pelias-model": "^5.7.1", "pelias-wof-admin-lookup": "^4.6.5", "request": "^2.34.0", "through2": "^2.0.1", diff --git a/test/data/expected.json b/test/data/expected.json index 13d3c060..38f83a14 100644 --- a/test/data/expected.json +++ b/test/data/expected.json @@ -32658,10 +32658,10 @@ "_id": "1885258", "data": { "name": { - "default": "Police National Service Headquarters / Public Affairs Department" + "default": ["Police National Service Headquarters", "Public Affairs Department"] }, "phrase": { - "default": "Police National Service Headquarters / Public Affairs Department" + "default": ["Police National Service Headquarters", "Public Affairs Department"] }, "center_point": { "lon": 103.84111, @@ -33634,14 +33634,15 @@ "_id": "6355174", "data": { "name": { - "default": "Gamat-eMas Network (Singapore) Blk 34, Whampoa West" + "default": ["Gamat-eMas Network (Singapore) Blk 34", "Whampoa West"] }, "phrase": { - "default": "Gamat-eMas Network (Singapore) Blk 34, Whampoa West" + "default": ["Gamat-eMas Network (Singapore) Blk 34", "Whampoa West"] }, "parent": { "locality": [ - "Gamat-eMas Network (Singapore) Blk 34, Whampoa West" + "Gamat-eMas Network (Singapore) Blk 34", + "Whampoa West" ], "locality_a": [ null @@ -35256,10 +35257,10 @@ "_id": "6481476", "data": { "name": { - "default": "Traders Hotel Singapore, by Shangri-la" + "default": ["Traders Hotel Singapore", "by Shangri-la"] }, "phrase": { - "default": "Traders Hotel Singapore, by Shangri-la" + "default": ["Traders Hotel Singapore", "by Shangri-la"] }, "center_point": { "lon": 103.824, @@ -35509,10 +35510,10 @@ "_id": "6487979", "data": { "name": { - "default": "Raffles The Plaza, Singapore" + "default": ["Raffles The Plaza", "Singapore"] }, "phrase": { - "default": "Raffles The Plaza, Singapore" + "default": ["Raffles The Plaza", "Singapore"] }, "center_point": { "lon": 103.8527, @@ -35532,10 +35533,10 @@ "_id": "6488230", "data": { "name": { - "default": "Swissotel The Stamford, Singapore" + "default": ["Swissotel The Stamford", "Singapore"] }, "phrase": { - "default": "Swissotel The Stamford, Singapore" + "default": ["Swissotel The Stamford", "Singapore"] }, "center_point": { "lon": 103.8529, @@ -35808,10 +35809,10 @@ "_id": "6502100", "data": { "name": { - "default": "Bayview Hotel, Singapore" + "default":[ "Bayview Hotel", "Singapore"] }, "phrase": { - "default": "Bayview Hotel, Singapore" + "default":[ "Bayview Hotel", "Singapore"] }, "center_point": { "lon": 103.8498, @@ -36878,10 +36879,10 @@ "_id": "6940488", "data": { "name": { - "default": "Ministry of Information, Communications and the Arts" + "default": ["Ministry of Information", "Communications and the Arts"] }, "phrase": { - "default": "Ministry of Information, Communications and the Arts" + "default": ["Ministry of Information", "Communications and the Arts"] }, "center_point": { "lon": 103.84809, @@ -37998,10 +37999,10 @@ "_id": "9882676", "data": { "name": { - "default": "Pan Pacific Orchard, Singapore" + "default": ["Pan Pacific Orchard", "Singapore"] }, "phrase": { - "default": "Pan Pacific Orchard, Singapore" + "default": ["Pan Pacific Orchard", "Singapore"] }, "center_point": { "lon": 103.83012, diff --git a/test/streams/peliasDocGeneratorTest.js b/test/streams/peliasDocGeneratorTest.js index 5f6e75a7..76d66f29 100644 --- a/test/streams/peliasDocGeneratorTest.js +++ b/test/streams/peliasDocGeneratorTest.js @@ -149,4 +149,70 @@ tape('peliasDocGenerator', function(test) { }); + test.test('add name aliases for forward slash delimited names', function(t) { + var input = { + _id: 12345, + name: ' / Something / Else/Two Words ', + latitude: 1, + longitude: 1 + }; + + var expected = new Document( 'geonames', 'venue', 12345 ) + .setName('default', 'Something') + .setNameAlias('default', 'Else') + .setNameAlias('default', 'Two Words') + .setCentroid({ lat: 1, lon: 1 }); + + var docGenerator = peliasDocGenerator.create(); + + test_stream([input], docGenerator, function(err, actual) { + t.deepEqual(actual, [expected], 'should have returned true'); + t.end(); + }); + }); + + test.test('add name aliases for comma delimited names', function(t) { + var input = { + _id: 12345, + name: ' , Something , Else,Two Words ', + latitude: 1, + longitude: 1 + }; + + var expected = new Document( 'geonames', 'venue', 12345 ) + .setName('default', 'Something') + .setNameAlias('default', 'Else') + .setNameAlias('default', 'Two Words') + .setCentroid({ lat: 1, lon: 1 }); + + var docGenerator = peliasDocGenerator.create(); + + test_stream([input], docGenerator, function(err, actual) { + t.deepEqual(actual, [expected], 'should have returned true'); + t.end(); + }); + }); + + test.test('add name aliases for hash delimited names', function(t) { + var input = { + _id: 12345, + name: ' # Something # Else#Two Words ', + latitude: 1, + longitude: 1 + }; + + var expected = new Document( 'geonames', 'venue', 12345 ) + .setName('default', 'Something') + .setNameAlias('default', 'Else') + .setNameAlias('default', 'Two Words') + .setCentroid({ lat: 1, lon: 1 }); + + var docGenerator = peliasDocGenerator.create(); + + test_stream([input], docGenerator, function(err, actual) { + t.deepEqual(actual, [expected], 'should have returned true'); + t.end(); + }); + }); + });