Skip to content

Commit

Permalink
Merge pull request #342 from pelias/delimited_names
Browse files Browse the repository at this point in the history
handle delimited names
  • Loading branch information
orangejulius authored Oct 31, 2018
2 parents e1b5a21 + a9f2258 commit 5601577
Show file tree
Hide file tree
Showing 5 changed files with 118 additions and 23 deletions.
12 changes: 11 additions & 1 deletion lib/streams/overrideLookedUpLocalityAndLocaladmin.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,20 @@ var through2 = require('through2');
// when adminlookup happens, it's lat/lon is located in the Lancaster, PA
// WOF locality. This is self-contradictory because now a city is located within
// another city. This logic forces `locality` and `localadmin` records to be
// in agreement since we store the record itself in it's parentage.
// in agreement since we store the record itself in it's parentage.
function reassignParent(document, layer) {
document.clearParent(layer);

// primary name
document.addParent(layer, document.getName('default'), document.getId());

// name aliases
let aliases = document.getNameAliases('default');
if( aliases.length ){
aliases.forEach( alias => {
document.addParent(layer, alias, document.getId());
});
}
}

module.exports.create = function create() {
Expand Down
26 changes: 22 additions & 4 deletions lib/streams/peliasDocGenerator.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,41 @@ var logger = require( 'pelias-logger' ).get( 'geonames' );
var categoryMapping = require( '../../metadata/category_mapping.json' );
var through2 = require('through2');

// common name delimiters
const NAME_DELIM_REGEX = /[,#\/]/;

module.exports = {};

module.exports.create = function() {
return through2.obj(function(data, enc, next) {
var record;
try {

// names
// note: some name fields contain a delimited list
// eg. 'Bern/Berne/Berna'
var names = data.name.trim().split(NAME_DELIM_REGEX).filter(n => n.length);

var layer = data.layer || 'venue';
record = new Document( 'geonames', layer, data._id )
.setName( 'default', data.name.trim() )
.setName( 'default', names[0].trim() )
.setCentroid({
lat: data.latitude,
lon: data.longitude
});

// altnames
try {
names.forEach((name, i) => {
let trimmed = name.trim();
if( trimmed.length ){
if( i > 0 ){
record.setNameAlias( 'default', trimmed );
}
}
});
} catch( err ){}

try {
var population = parseInt(data.population, 10);
if (population) {
Expand All @@ -42,10 +63,7 @@ module.exports.create = function() {
);
}

// copy 'name' object to 'phrase' in order to allow ES to create
// separate indices with different analysis techniques.
if( record !== undefined ){
record.phrase = record.name;
this.push( record );
}
next();
Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@
"pelias-config": "^3.3.0",
"pelias-dbclient": "^2.5.6",
"pelias-logger": "^1.2.1",
"pelias-model": "^5.5.2",
"pelias-model": "^5.7.1",
"pelias-wof-admin-lookup": "^4.6.5",
"request": "^2.34.0",
"through2": "^2.0.1",
Expand Down
35 changes: 18 additions & 17 deletions test/data/expected.json
Original file line number Diff line number Diff line change
Expand Up @@ -32658,10 +32658,10 @@
"_id": "1885258",
"data": {
"name": {
"default": "Police National Service Headquarters / Public Affairs Department"
"default": ["Police National Service Headquarters", "Public Affairs Department"]
},
"phrase": {
"default": "Police National Service Headquarters / Public Affairs Department"
"default": ["Police National Service Headquarters", "Public Affairs Department"]
},
"center_point": {
"lon": 103.84111,
Expand Down Expand Up @@ -33634,14 +33634,15 @@
"_id": "6355174",
"data": {
"name": {
"default": "Gamat-eMas Network (Singapore) Blk 34, Whampoa West"
"default": ["Gamat-eMas Network (Singapore) Blk 34", "Whampoa West"]
},
"phrase": {
"default": "Gamat-eMas Network (Singapore) Blk 34, Whampoa West"
"default": ["Gamat-eMas Network (Singapore) Blk 34", "Whampoa West"]
},
"parent": {
"locality": [
"Gamat-eMas Network (Singapore) Blk 34, Whampoa West"
"Gamat-eMas Network (Singapore) Blk 34",
"Whampoa West"
],
"locality_a": [
null
Expand Down Expand Up @@ -35256,10 +35257,10 @@
"_id": "6481476",
"data": {
"name": {
"default": "Traders Hotel Singapore, by Shangri-la"
"default": ["Traders Hotel Singapore", "by Shangri-la"]
},
"phrase": {
"default": "Traders Hotel Singapore, by Shangri-la"
"default": ["Traders Hotel Singapore", "by Shangri-la"]
},
"center_point": {
"lon": 103.824,
Expand Down Expand Up @@ -35509,10 +35510,10 @@
"_id": "6487979",
"data": {
"name": {
"default": "Raffles The Plaza, Singapore"
"default": ["Raffles The Plaza", "Singapore"]
},
"phrase": {
"default": "Raffles The Plaza, Singapore"
"default": ["Raffles The Plaza", "Singapore"]
},
"center_point": {
"lon": 103.8527,
Expand All @@ -35532,10 +35533,10 @@
"_id": "6488230",
"data": {
"name": {
"default": "Swissotel The Stamford, Singapore"
"default": ["Swissotel The Stamford", "Singapore"]
},
"phrase": {
"default": "Swissotel The Stamford, Singapore"
"default": ["Swissotel The Stamford", "Singapore"]
},
"center_point": {
"lon": 103.8529,
Expand Down Expand Up @@ -35808,10 +35809,10 @@
"_id": "6502100",
"data": {
"name": {
"default": "Bayview Hotel, Singapore"
"default":[ "Bayview Hotel", "Singapore"]
},
"phrase": {
"default": "Bayview Hotel, Singapore"
"default":[ "Bayview Hotel", "Singapore"]
},
"center_point": {
"lon": 103.8498,
Expand Down Expand Up @@ -36878,10 +36879,10 @@
"_id": "6940488",
"data": {
"name": {
"default": "Ministry of Information, Communications and the Arts"
"default": ["Ministry of Information", "Communications and the Arts"]
},
"phrase": {
"default": "Ministry of Information, Communications and the Arts"
"default": ["Ministry of Information", "Communications and the Arts"]
},
"center_point": {
"lon": 103.84809,
Expand Down Expand Up @@ -37998,10 +37999,10 @@
"_id": "9882676",
"data": {
"name": {
"default": "Pan Pacific Orchard, Singapore"
"default": ["Pan Pacific Orchard", "Singapore"]
},
"phrase": {
"default": "Pan Pacific Orchard, Singapore"
"default": ["Pan Pacific Orchard", "Singapore"]
},
"center_point": {
"lon": 103.83012,
Expand Down
66 changes: 66 additions & 0 deletions test/streams/peliasDocGeneratorTest.js
Original file line number Diff line number Diff line change
Expand Up @@ -149,4 +149,70 @@ tape('peliasDocGenerator', function(test) {

});

test.test('add name aliases for forward slash delimited names', function(t) {
var input = {
_id: 12345,
name: ' / Something / Else/Two Words ',
latitude: 1,
longitude: 1
};

var expected = new Document( 'geonames', 'venue', 12345 )
.setName('default', 'Something')
.setNameAlias('default', 'Else')
.setNameAlias('default', 'Two Words')
.setCentroid({ lat: 1, lon: 1 });

var docGenerator = peliasDocGenerator.create();

test_stream([input], docGenerator, function(err, actual) {
t.deepEqual(actual, [expected], 'should have returned true');
t.end();
});
});

test.test('add name aliases for comma delimited names', function(t) {
var input = {
_id: 12345,
name: ' , Something , Else,Two Words ',
latitude: 1,
longitude: 1
};

var expected = new Document( 'geonames', 'venue', 12345 )
.setName('default', 'Something')
.setNameAlias('default', 'Else')
.setNameAlias('default', 'Two Words')
.setCentroid({ lat: 1, lon: 1 });

var docGenerator = peliasDocGenerator.create();

test_stream([input], docGenerator, function(err, actual) {
t.deepEqual(actual, [expected], 'should have returned true');
t.end();
});
});

test.test('add name aliases for hash delimited names', function(t) {
var input = {
_id: 12345,
name: ' # Something # Else#Two Words ',
latitude: 1,
longitude: 1
};

var expected = new Document( 'geonames', 'venue', 12345 )
.setName('default', 'Something')
.setNameAlias('default', 'Else')
.setNameAlias('default', 'Two Words')
.setCentroid({ lat: 1, lon: 1 });

var docGenerator = peliasDocGenerator.create();

test_stream([input], docGenerator, function(err, actual) {
t.deepEqual(actual, [expected], 'should have returned true');
t.end();
});
});

});

0 comments on commit 5601577

Please sign in to comment.