Skip to content
This repository has been archived by the owner on Mar 15, 2024. It is now read-only.

Invalid tag modification v2 #149

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 69 additions & 0 deletions analysis/invalid-tag-modification/get_feature_details.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
from bs4 import BeautifulSoup
import requests
import csv
import json

def parse_changeset_page(changeset_id):
"""Parse changeset page to extract feature URLs"""
url = 'http://osmcha.mapbox.com/{}/'.format(changeset_id)
r = requests.get(url)

# Using BeautifulSoup to parse and scrape feature URLs.
soup = BeautifulSoup(r.text, 'html.parser')

# Filter by the ID of the collapsible feature.
div = soup.find(id='collapseOne')
table = div.find('table')

feature_urls = []
# Skip header row of the table.
for row in table.find_all('tr')[1:]:
reasons = row.find_all('td')[-1].text.strip()
# Not interested in fetures not flagged by the invalid_tag_modification comparator.
if 'modification' not in reasons: continue
feature_url = 'http://osmcha.mapbox.com{}'.format(row.find_all('td')[0].find('a')['href'])
feature_urls.append(feature_url)
return feature_urls

def parse_feature_page(feature_url):
"""Parse feature page to extract feature details."""
primaryTags = [
'aerialway', 'aeroway', 'amenity', 'barrier', 'boundary', 'building', 'craft', 'emergency',
'geological', 'highway', 'historic', 'landuse', 'leisure', 'man_made', 'military', 'natural',
'office', 'places', 'power', 'public_transport', 'railway', 'route', 'shop', 'sport', 'tourism', 'waterway'
]
details = {'created': '', 'deleted': ''}

r = requests.get(feature_url)
# Using BeautifulSoup to parse and scrape feature details.
soup = BeautifulSoup(r.text, 'html.parser')

# NOTE: Assuming that the page has only one table.
table = soup.find('table')

# Skip header row of the table.
for row in table.find_all('tr')[1:]:
tds = [item.text.strip() for item in row.find_all('td')]
# We are only interested in primary feature tags.
if tds[0] not in primaryTags: continue

if tds[-1] == 'ADDED': details['created'] = tds[0]
elif tds[-1] == 'DELETED': details['deleted'] = tds[0]
return [details['created'], details['deleted']]

with open('sample.csv') as f:
reader = csv.reader(f)
for row in reader:
try:
changeset_id = str(int(row[0]))
except ValueError:
# For the header of the csv file.
pass
else:
feature_urls = parse_changeset_page(changeset_id)
for feature_url in feature_urls:
# Is the changeset harmful or not in the 15th column.
changeset_details = [changeset_id, row[15], feature_url]
feature_details = parse_feature_page(feature_url)
changeset_details.extend(feature_details)
print(','.join([str(item) for item in changeset_details]))
1,731 changes: 1,731 additions & 0 deletions analysis/invalid-tag-modification/invalid-tag-modification-v2.ipynb

Large diffs are not rendered by default.

8 changes: 6 additions & 2 deletions comparators/invalid_tag_modification.js
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,14 @@ function invalidTagModification(newVersion, oldVersion, callback) {
var result = {};
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@bkowshik 👋 ,instead of returning {} for a false result, we are now following convention to send false value itself. Ref commit: 756c5cb

if (!newVersion || !oldVersion) return callback(null, result);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now we assume that newVersion will always be present in a geojson. And if a feature is deleted, we will check for deleted tag of new version. Ref commit : 2e0e6b6

So (!newVersion || !oldVersion) will become (newVersion.deleted || !oldVersion)


// If all properties of feature are removed, assume feature is being moved into a relation.
if (Object.keys(newVersion.properties).length === 0) return callback(null, {});

var primaryTags = getPrimaryTags(oldVersion.properties);
// Check if all primary tags are retained in newVersion.
for (var i = 0; i < primaryTags.length; i++) {
if (!(primaryTags[i] in newVersion.properties)) return callback(null, {'result:invalid_tag_modification': true});
// Check if all primary tags are retained in newVersion.
// If not retained, check if there were two primary tags to start with.
if (!(primaryTags[i] in newVersion.properties) && (primaryTags.length < 2)) return callback(null, {'result:invalid_tag_modification': true});
}

return callback(null, {});
Expand Down
20 changes: 18 additions & 2 deletions tests/fixtures/invalid_tag_modification.json
Original file line number Diff line number Diff line change
Expand Up @@ -115,8 +115,8 @@
}
},
{
"description": "Test deletion of a second primary tag to a feature",
"expectedResult": {"result:invalid_tag_modification":true},
"description": "Test deletion of a second primary tag to a feature. The feature just became more specific",
"expectedResult": {},
"newVersion": {
"type": "Feature",
"properties": {
Expand All @@ -132,6 +132,22 @@
},
"geometry": null
}
},
{
"description": "When all properties of a feature are removed, assuming that the feature is being moved to a relation",
"expectedResult": {},
"newVersion": {
"type": "Feature",
"properties": {},
"geometry": null
},
"oldVersion": {
"type": "Feature",
"properties": {
"railway": "yes"
},
"geometry": null
}
}
]
}