Skip to content

Commit

Permalink
fix swimming pool scraper
Browse files Browse the repository at this point in the history
  • Loading branch information
joergreichert committed Sep 18, 2024
1 parent 4ab3e54 commit a73f4c3
Show file tree
Hide file tree
Showing 3 changed files with 100 additions and 49 deletions.
2 changes: 1 addition & 1 deletion public/data/leipzig-swimming-pools.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
[{"title":"Grünauer Welle","address":{"leisure":"Grünauer Welle","house_number":"7","road":"Stuttgarter Allee","neighbourhood":"Wohnkomplex 4","suburb":"Grünau-Siedlung","city_district":"West","city":"Leipzig","state":"Sachsen","ISO3166-2-lvl4":"DE-SN","postcode":"04209","country":"Deutschland","country_code":"de"},"link":"https://www.leipzig.de//detailansicht-adresse/gruenauer-welle","type":"indoor_pool","lat":"51.31507295","lon":"12.291681015468672"},{"title":"Sachsen-Therme","address":{"road":"Schongauerstraße","suburb":"Paunsdorf","city_district":"Ost","city":"Leipzig","state":"Sachsen","ISO3166-2-lvl4":"DE-SN","postcode":"04329","country":"Deutschland","country_code":"de"},"link":"https://www.leipzig.de//detailansicht-adresse/sachsen-therme","type":"indoor_pool","lat":"51.3497763","lon":"12.4739067"},{"title":"Schwimmhalle Mitte","address":{"leisure":"Schwimmhalle Mitte","house_number":"84","road":"Kirschbergstraße","suburb":"Gohlis-Süd","city_district":"Nord","city":"Leipzig","state":"Sachsen","ISO3166-2-lvl4":"DE-SN","postcode":"04155","country":"Deutschland","country_code":"de"},"link":"https://www.leipzig.de//detailansicht-adresse/schwimmhalle-mitte","type":"indoor_pool","lat":"51.36225665","lon":"12.353394654258464"},{"title":"Schwimmhalle Nord","address":{"leisure":"Schwimmhalle Nord","house_number":"54","road":"Kleiststraße","suburb":"Eutritzsch","city_district":"Nord","city":"Leipzig","state":"Sachsen","ISO3166-2-lvl4":"DE-SN","postcode":"04157","country":"Deutschland","country_code":"de"},"link":"https://www.leipzig.de//detailansicht-adresse/schwimmhalle-nord","type":"indoor_pool","lat":"51.37231965","lon":"12.38134070842299"},{"title":"Schwimmhalle Nordost","address":{"leisure":"Schwimmhalle Nordost","house_number":"26","road":"Schönefelder Allee","neighbourhood":"Schönefeld","suburb":"Schönefeld-Abtnaundorf","city_district":"Nordost","city":"Leipzig","state":"Sachsen","ISO3166-2-lvl4":"DE-SN","postcode":"04347","country":"Deutschland","country_code":"de"},"link":"https://www.leipzig.de//detailansicht-adresse/schwimmhalle-nordost","type":"indoor_pool","lat":"51.358358550000005","lon":"12.404955916520343"},{"title":"Schwimmhalle Süd","address":{"leisure":"Schwimmhalle Süd","house_number":"10","road":"Tarostraße","suburb":"Zentrum-Südost","city_district":"Mitte","city":"Leipzig","state":"Sachsen","ISO3166-2-lvl4":"DE-SN","postcode":"04103","country":"Deutschland","country_code":"de"},"link":"https://www.leipzig.de//detailansicht-adresse/schwimmhalle-sued","type":"indoor_pool","lat":"51.3239909","lon":"12.385783649999993"},{"title":"Schwimmhalle Südost","address":{"leisure":"Schwimmhalle Südost","house_number":"35","road":"Kolmstraße","suburb":"Probstheida","city_district":"Südost","city":"Leipzig","state":"Sachsen","ISO3166-2-lvl4":"DE-SN","postcode":"04299","country":"Deutschland","country_code":"de"},"link":"https://www.leipzig.de//detailansicht-adresse/schwimmhalle-suedost","type":"indoor_pool","lat":"51.31632955","lon":"12.42836830011111"},{"title":"Schwimmhalle Universität Leipzig","address":{"leisure":"Universitätsschwimmhalle","house_number":"4","road":"Mainzer Straße","neighbourhood":"Bachviertel","suburb":"Zentrum-West","city_district":"Mitte","city":"Leipzig","state":"Sachsen","ISO3166-2-lvl4":"DE-SN","postcode":"04109","country":"Deutschland","country_code":"de"},"link":"https://www.leipzig.de//detailansicht-adresse/schwimmhalle-universitaet-leipzig","type":"indoor_pool","lat":"51.336847199999994","lon":"12.353675175513555"},{"title":"Schwimmhalle West","address":{"leisure":"Schwimmhalle West","house_number":"52a","road":"Hans-Driesch-Straße","suburb":"Leutzsch","city_district":"Altwest","city":"Leipzig","state":"Sachsen","ISO3166-2-lvl4":"DE-SN","postcode":"04179","country":"Deutschland","country_code":"de"},"link":"https://www.leipzig.de//detailansicht-adresse/schwimmhalle-west","type":"indoor_pool","lat":"51.34652235","lon":"12.311952829626495"},{"title":"Sportbad an der Elster","address":{"leisure":"Sportbad an der Elster","house_number":"8","road":"Antonienstraße","suburb":"Plagwitz","city_district":"Südwest","city":"Leipzig","state":"Sachsen","ISO3166-2-lvl4":"DE-SN","postcode":"04229","country":"Deutschland","country_code":"de"},"link":"https://www.leipzig.de//detailansicht-adresse/sportbad-an-der-elster","type":"indoor_pool","lat":"51.319954300000006","lon":"12.336005139793688"},{"title":"Sprunghalle der Universität Leipzig","address":{"leisure":"Universitätsschwimmhalle","house_number":"4","road":"Mainzer Straße","neighbourhood":"Bachviertel","suburb":"Zentrum-West","city_district":"Mitte","city":"Leipzig","state":"Sachsen","ISO3166-2-lvl4":"DE-SN","postcode":"04109","country":"Deutschland","country_code":"de"},"link":"https://www.leipzig.de//detailansicht-adresse/sprunghalle-der-universitaet-leipzig","type":"indoor_pool","lat":"51.336847199999994","lon":"12.353675175513555"},{"title":"Kinderfreibecken \"Robbe\"","address":{"leisure":"Schwimmhalle Nord","house_number":"54","road":"Kleiststraße","suburb":"Eutritzsch","city_district":"Nord","city":"Leipzig","state":"Sachsen","ISO3166-2-lvl4":"DE-SN","postcode":"04157","country":"Deutschland","country_code":"de"},"link":"https://www.leipzig.de//detailansicht-adresse/kinderfreibecken-robbe","type":"outdoor_pool","lat":"51.37231965","lon":"12.38134070842299"},{"title":"Ökobad Lindenthal","address":{"house_number":"3","road":"Am Freibad","suburb":"Lindenthal","city_district":"Nordwest","city":"Leipzig","state":"Sachsen","ISO3166-2-lvl4":"DE-SN","postcode":"04158","country":"Deutschland","country_code":"de"},"link":"https://www.leipzig.de//detailansicht-adresse/oekobad-lindenthal","type":"outdoor_pool","lat":"51.3935438","lon":"12.3269022"},{"title":"Schreberbad","address":{"leisure":"Schreberbad","house_number":"15","road":"Schreberstraße","neighbourhood":"Bachviertel","suburb":"Zentrum-West","city_district":"Mitte","city":"Leipzig","state":"Sachsen","ISO3166-2-lvl4":"DE-SN","postcode":"04109","country":"Deutschland","country_code":"de"},"link":"https://www.leipzig.de//detailansicht-adresse/schreberbad","type":"outdoor_pool","lat":"51.338967100000005","lon":"12.358802401994044"},{"title":"Sommerbad Kleinzschocher","address":{"house_number":"75","road":"Küchenholzallee","suburb":"Kleinzschocher","city_district":"Südwest","city":"Leipzig","state":"Sachsen","ISO3166-2-lvl4":"DE-SN","postcode":"04229","country":"Deutschland","country_code":"de"},"link":"https://www.leipzig.de//detailansicht-adresse/sommerbad-kleinzschocher","type":"outdoor_pool","lat":"51.3163021","lon":"12.338389"},{"title":"Sommerbad Schönefeld","address":{"house_number":"39","road":"Volbedingstraße","neighbourhood":"Schönefeld","suburb":"Schönefeld-Abtnaundorf","city_district":"Nordost","city":"Leipzig","state":"Sachsen","ISO3166-2-lvl4":"DE-SN","postcode":"04357","country":"Deutschland","country_code":"de"},"link":"https://www.leipzig.de//detailansicht-adresse/sommerbad-schoenefeld","type":"outdoor_pool","lat":"51.3621058","lon":"12.4110359"},{"title":"Sommerbad Südost","address":{"house_number":"173","road":"Oststraße","suburb":"Stötteritz","city_district":"Südost","city":"Leipzig","state":"Sachsen","ISO3166-2-lvl4":"DE-SN","postcode":"04299","country":"Deutschland","country_code":"de"},"link":"https://www.leipzig.de//detailansicht-adresse/sommerbad-suedost","type":"outdoor_pool","lat":"51.3262935","lon":"12.4198936"}]
[{"title":"Grünauer Welle","address":{"leisure":"Grünauer Welle","house_number":"7","road":"Stuttgarter Allee","neighbourhood":"Wohnkomplex 4","suburb":"Grünau-Siedlung","city_district":"West","city":"Leipzig","state":"Sachsen","ISO3166-2-lvl4":"DE-SN","postcode":"04209","country":"Deutschland","country_code":"de"},"lat":"51.31507295","lon":"12.291681015468672","link":"https://www.leipzig.de/freizeit-kultur-und-tourismus/sport/sportstaettenbelegung/detailseite/detailseite/schwimmhalle-gruenauer-welle"},{"title":"Schwimmhalle Mitte","address":{"leisure":"Schwimmhalle Mitte","house_number":"84","road":"Kirschbergstraße","suburb":"Gohlis-Süd","city_district":"Nord","city":"Leipzig","state":"Sachsen","ISO3166-2-lvl4":"DE-SN","postcode":"04155","country":"Deutschland","country_code":"de"},"lat":"51.36225665","lon":"12.353394654258464","link":"https://www.leipzig.de/freizeit-kultur-und-tourismus/sport/sportstaettenbelegung/detailseite/detailseite/schwimmhalle-mitte"},{"title":"Schwimmhalle Nord","address":{"leisure":"Schwimmhalle Nord","house_number":"54","road":"Kleiststraße","suburb":"Eutritzsch","city_district":"Nord","city":"Leipzig","state":"Sachsen","ISO3166-2-lvl4":"DE-SN","postcode":"04157","country":"Deutschland","country_code":"de"},"lat":"51.37231965","lon":"12.38134070842299","link":"https://www.leipzig.de/freizeit-kultur-und-tourismus/sport/sportstaettenbelegung/detailseite/detailseite/schwimmhalle-nord"},{"title":"Schwimmhalle Nordost","address":{"leisure":"Schwimmhalle Nordost","house_number":"26","road":"Schönefelder Allee","neighbourhood":"Schönefeld","suburb":"Schönefeld-Abtnaundorf","city_district":"Nordost","city":"Leipzig","state":"Sachsen","ISO3166-2-lvl4":"DE-SN","postcode":"04347","country":"Deutschland","country_code":"de"},"lat":"51.358358550000005","lon":"12.404955916520343","link":"https://www.leipzig.de/freizeit-kultur-und-tourismus/sport/sportstaettenbelegung/detailseite/detailseite/schwimmhalle-nordost"},{"title":"Schwimmhalle Süd","address":{"leisure":"Schwimmhalle Süd","house_number":"10","road":"Tarostraße","suburb":"Zentrum-Südost","city_district":"Mitte","city":"Leipzig","state":"Sachsen","ISO3166-2-lvl4":"DE-SN","postcode":"04103","country":"Deutschland","country_code":"de"},"lat":"51.3239909","lon":"12.385783649999993","link":"https://www.leipzig.de/freizeit-kultur-und-tourismus/sport/sportstaettenbelegung/detailseite/detailseite/schwimmhalle-sued"},{"title":"Schwimmhalle Südost","address":{"leisure":"Schwimmhalle Südost","house_number":"35","road":"Kolmstraße","suburb":"Probstheida","city_district":"Südost","city":"Leipzig","state":"Sachsen","ISO3166-2-lvl4":"DE-SN","postcode":"04299","country":"Deutschland","country_code":"de"},"lat":"51.31632955","lon":"12.42836830011111","link":"https://www.leipzig.de/freizeit-kultur-und-tourismus/sport/sportstaettenbelegung/detailseite/detailseite/schwimmhalle-suedost"},{"title":"Schwimmhalle West","address":{"leisure":"Schwimmhalle West","house_number":"52a","road":"Hans-Driesch-Straße","suburb":"Leutzsch","city_district":"Altwest","city":"Leipzig","state":"Sachsen","ISO3166-2-lvl4":"DE-SN","postcode":"04179","country":"Deutschland","country_code":"de"},"lat":"51.34652235","lon":"12.311952829626495","link":"https://www.leipzig.de/freizeit-kultur-und-tourismus/sport/sportstaettenbelegung/detailseite/detailseite/schwimmhalle-west"},{"title":"Sportbad an der Elster","address":{"leisure":"Sauna im Sportbad an der Elster","house_number":"8","road":"Antonienstraße","suburb":"Plagwitz","city_district":"Südwest","city":"Leipzig","state":"Sachsen","ISO3166-2-lvl4":"DE-SN","postcode":"04229","country":"Deutschland","country_code":"de"},"lat":"51.3196487","lon":"12.3356885","link":"https://www.leipzig.de/freizeit-kultur-und-tourismus/sport/sportstaettenbelegung/detailseite/detailseite/sportbad-an-der-elster"},{"title":"Sommerbad Kleinzschocher","address":{"shop":"Sommerbad Kleinzschocher","house_number":"75","road":"Küchenholzallee","suburb":"Kleinzschocher","city_district":"Südwest","city":"Leipzig","state":"Sachsen","ISO3166-2-lvl4":"DE-SN","postcode":"04229","country":"Deutschland","country_code":"de"},"lat":"51.3165758","lon":"12.3384644","link":"https://www.leipzig.de/freizeit-kultur-und-tourismus/sport/sportstaettenbelegung/detailseite/detailseite/freibad-kleinzschocher"},{"title":"Schreberbad","address":{"leisure":"Schreberbad","house_number":"15","road":"Schreberstraße","neighbourhood":"Bachviertel","suburb":"Zentrum-West","city_district":"Mitte","city":"Leipzig","state":"Sachsen","ISO3166-2-lvl4":"DE-SN","postcode":"04109","country":"Deutschland","country_code":"de"},"lat":"51.338967100000005","lon":"12.358802401994044","link":"https://www.leipzig.de/freizeit-kultur-und-tourismus/sport/sportstaettenbelegung/detailseite/detailseite/freibad-schreberbad"},{"title":"Sommerbad Schönefeld","address":{"leisure":"Sommerbad Schönefeld","house_number":"39","road":"Volbedingstraße","neighbourhood":"Schönefeld","suburb":"Schönefeld-Abtnaundorf","city_district":"Nordost","city":"Leipzig","state":"Sachsen","ISO3166-2-lvl4":"DE-SN","postcode":"04357","country":"Deutschland","country_code":"de"},"lat":"51.3630321","lon":"12.411970819689838","link":"https://www.leipzig.de/freizeit-kultur-und-tourismus/sport/sportstaettenbelegung/detailseite/detailseite/freibad-schoenefeld"},{"title":"Sommerbad Südost","address":{"house_number":"173","road":"Oststraße","suburb":"Stötteritz","city_district":"Südost","city":"Leipzig","state":"Sachsen","ISO3166-2-lvl4":"DE-SN","postcode":"04299","country":"Deutschland","country_code":"de"},"lat":"51.3262935","lon":"12.4198936","link":"https://www.leipzig.de/freizeit-kultur-und-tourismus/sport/sportstaettenbelegung/detailseite/detailseite/freibad-suedost"},{"title":"Ökobad Lindenthal","address":{"house_number":"3","road":"Am Freibad","suburb":"Lindenthal","city_district":"Nordwest","city":"Leipzig","state":"Sachsen","ISO3166-2-lvl4":"DE-SN","postcode":"04158","country":"Deutschland","country_code":"de"},"lat":"51.3930018","lon":"12.3270532","link":"https://www.leipzig.de/freizeit-kultur-und-tourismus/sport/sportstaettenbelegung/detailseite/detailseite/oekobad-lindenthal"}]
101 changes: 53 additions & 48 deletions src/scrapers/leipzig-swimming-pools.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,70 +8,75 @@ const url_outdoor_pool = `${domain}/freizeit-kultur-und-tourismus/sport/sportsta

const __dirname = new URL('.', import.meta.url).pathname;

Promise.all([scrapeIt(url_indoor_pool, {
list: {
listItem: '.address-list-item',
data: {
title: {
selector: '.link_intern',
attr: 'title'
},
address: {
selector: '.list.left',
convert: address => address.split('\n').map(d => d.trim()).filter(d => !!d)
},
link: {
attr: "href",
selector: '.link_intern',
convert: href => `${domain}/${href}`
}
}
}
}), scrapeIt(url_outdoor_pool, {
const scrapeDetailsUrl = {
list: {
listItem: '.address-list-item',
listItem: '.project-attributes',
data: {
title: {
selector: '.link_intern',
attr: 'title'
selector: 'h3 a'
},
address: {
selector: '.list.left',
convert: address => address.split('\n').map(d => d.trim()).filter(d => !!d)
},
link: {
attr: "href",
selector: '.link_intern',
convert: href => `${domain}/${href}`
detailsUrl: {
selector: 'h3 a',
attr: "href"
}
}
}
})]).then(async ([indoor_pool, outdoor_pool]) => {
let data = [
...indoor_pool.data.list.map(sh => ({ ...sh, type: 'indoor_pool' })),
...outdoor_pool.data.list.map(sh => ({ ...sh, type: 'outdoor_pool' })),
]
}

let newList = [];
for (let i = 0; i < data.length; i++) {
let element = data[i];
let q = `${element.address.join(' ').replace(/ \(.*\)/, '').replace('an der Schwimmhalle Nord', '').replace('Vollbedingstraße', 'Volbedingstraße').replace('Kirschbergstraße 84', 'Schwimmhalle Mitte')}`;
console.log(q)
let resp = await search(q)
let search_results = resp.filter(r => ['water_park', 'sports_centre'].includes(r.type));
const scrapeDetailsData = {
address: {
selector: '.t3booking-t3booking-main-content p',
eq: 0,
convert: value => value.replace(' 04', ', 04').replace('Im Stadtplan anzeigen', '')
}
}

const enrichWithCoords = async (element) => {
console.log('search for ' + element.address)
try {
const resp = await search(element.address)
const search_results = resp.filter(r => ['water_park', 'sports_centre'].includes(r.type));
if(search_results.length > 0) {
element = {
return {
...element,
address: resp[0].address,
lat: resp[0].lat,
lon: resp[0].lon
}
}
} else {
console.log(element.title, element.address, 'could not be found')
console.log(resp)
return element;
}
newList.push(element)
} catch(e) {
console.log(element.title, element.address, 'could not be resolved')
console.log(resp)
return element;
}
}

const scrapeDetailsUrlIndoor = scrapeIt(url_indoor_pool, scrapeDetailsUrl);
const scrapeDetailsUrlOutdoor = scrapeIt(url_outdoor_pool, scrapeDetailsUrl);

const handleScrapeResponse = async ([indoor_pool, outdoor_pool]) => {
const data = [
...indoor_pool.data.list.map(sh => ({ ...sh, type: 'indoor_pool' })),
...outdoor_pool.data.list.map(sh => ({ ...sh, type: 'outdoor_pool' })),
]
const promises = [];
for (let i = 0; i < data.length; i++) {
const element = data[i];
const detailsUrl = `${domain}${element.detailsUrl}`;
const details = await scrapeIt(detailsUrl, scrapeDetailsData);
const result = await enrichWithCoords(details.data);
promises.push({
title: element.title,
...result,
link: detailsUrl
});
}
const details = await Promise.all(promises);
fs.writeFileSync(`${__dirname}../../public/data/leipzig-swimming-pools.json`, JSON.stringify(details), 'utf8')
};

fs.writeFileSync(`${__dirname}../../public/data/leipzig-swimming-pools.json`, JSON.stringify(newList, null, 2), 'utf8')
})
Promise.all([scrapeDetailsUrlIndoor, scrapeDetailsUrlOutdoor]).then(handleScrapeResponse)
46 changes: 46 additions & 0 deletions test/scrapers/leipzig-swimming-pools-test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import scrapeIt from "scrape-it";

const domain = `https://www.leipzig.de`;
const url_indoor_pool = `${domain}/freizeit-kultur-und-tourismus/sport/sportstaetten/schwimmhallen`;

const scrapeDetailsUrl = {
list: {
listItem: '.project-attributes',
data: {
title: {
selector: 'h3 a'
},
detailsUrl: {
selector: 'h3 a',
attr: "href"
}
}
}
}

const scrapeDetailsData = {
address: {
selector: '.t3booking-t3booking-main-content p',
eq: 0,
convert: value => value.replace(' 04', ', 04').replace('Im Stadtplan anzeigen', '')
}
}

const scrapeDetailsUrlIndoor = scrapeIt(url_indoor_pool, scrapeDetailsUrl);

const handleScrapeResponse = async ([indoor_pool]) => {
let data = [
...indoor_pool.data.list.map(sh => ({ ...sh, type: 'indoor_pool' })),
]
let element = data[0];
scrapeIt(`${domain}${element.detailsUrl}`, scrapeDetailsData).then(details => {
console.log(details.data);
const result = {
title: element.title,
address: details.data.address,
}
console.log(JSON.stringify(result, null, 2));
})
};

Promise.all([scrapeDetailsUrlIndoor]).then(handleScrapeResponse)

0 comments on commit a73f4c3

Please sign in to comment.