Skip to content
This repository has been archived by the owner on Apr 24, 2024. It is now read-only.

Scraper: Error message of missing csv files now contains file name #1303

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ Syntax: `- short text describing the change _(Your Name)_`
- _()_
- _()_
- _()_
- _()_
- Scraper: Error message of missing csv files now contains file name _(Christoph Schreiner)_
- _()_
- _()_
- _()_
Expand Down
7 changes: 3 additions & 4 deletions scraper/src/apply_overrides.js
Original file line number Diff line number Diff line change
@@ -1,20 +1,19 @@
import fs from "fs";
import path from "path";
import { parse as json2csv } from "json2csv";
import csv from "csvtojson";
import { cleanUpJsonForCsv } from "./helpers/helpers.js";
import { cleanUpJsonForCsv, readCsv } from "./helpers/helpers.js";
import { applyOverride } from "./helpers/override.js";

const deletionsFile = "00_DELETIONS.csv";

async function loadMergedDataset() {
return csv().fromFile("data/mergedDatasets.csv");
return readCsv("data/mergedDatasets.csv");
}

async function applyDeletions(plants) {
console.log(`[INFO] Deleting plants from data/overrides/${deletionsFile}`);

const deletePlants = await csv().fromFile(`data/overrides/${deletionsFile}`);
const deletePlants = await readCsv(`data/overrides/${deletionsFile}`);

deletePlants.forEach((overridePlant) => {
// find the plant
Expand Down
6 changes: 3 additions & 3 deletions scraper/src/compare_datasets.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import fs from "fs";
import { parse as json2csv } from "json2csv";
import csv from "csvtojson";
import columnMapping from "./column_mapping_permapeople.js";
import { readCsv } from "./helpers/helpers.js";

/**
* Sanitize the column names of the csv files.
Expand Down Expand Up @@ -49,8 +49,8 @@ function sanitizeColumnNames(jsonArray) {
async function compareDatabases() {
const allPlants = [];

const practicalPlants = await csv().fromFile("data/detail.csv"); // Practical plants dataset
const permapeople = await csv().fromFile("data/permapeopleRawData.csv"); // Permapeople dataset
const practicalPlants = await readCsv("data/detail.csv"); // Practical plants dataset
const permapeople = await readCsv("data/permapeopleRawData.csv"); // Permapeople dataset

sanitizeColumnNames(practicalPlants);
sanitizeColumnNames(permapeople);
Expand Down
5 changes: 2 additions & 3 deletions scraper/src/fetch_german_names.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,7 @@ import axios from "axios";
import axiosRetry from "axios-retry";
import fs from "fs";
import { parse as json2csv } from "json2csv";
import csv from "csvtojson";
import { capitalizeWords } from "./helpers/helpers.js";
import { capitalizeWords, readCsv } from "./helpers/helpers.js";

let GermanNamesFound = 0;

Expand Down Expand Up @@ -184,7 +183,7 @@ async function fetchGermanNames() {
fs.mkdirSync("data/overrides");
}

let plants = await csv().fromFile("data/mergedDatasets.csv");
let plants = await readCsv("data/mergedDatasets.csv");

await fetchGermanNamesForPlantsConcurrent(plants);

Expand Down
16 changes: 16 additions & 0 deletions scraper/src/helpers/helpers.js
Original file line number Diff line number Diff line change
@@ -1,3 +1,18 @@
import csv from "csvtojson";

/*
* Reads a CSV file and returns a JSON array.
* Logs filename on error and exits the process.
*/
async function readCsv(file) {
try {
return await csv().fromFile(file);
} catch (error) {
console.error(`[ERROR] Error reading CSV file '${file}': ${error}`);
process.exit(1);
}
}

/**
* Capitalizes the first character of every word in a string.
*
Expand Down Expand Up @@ -133,4 +148,5 @@ export {
capitalizeWords,
cleanUpJsonForCsv,
processMeasurement,
readCsv,
};
4 changes: 2 additions & 2 deletions scraper/src/helpers/override.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import csv from "csvtojson";
import { readCsv } from "./helpers.js";

/**
* Apply the given override file to the plants
Expand All @@ -11,7 +11,7 @@ import csv from "csvtojson";
async function applyOverride(plants, file) {
console.log(`[INFO] Applying override ${file}`);

const overridePlants = await csv().fromFile(file);
const overridePlants = await readCsv(file);

overridePlants.forEach((overridePlant) => {
// find the plant
Expand Down
6 changes: 3 additions & 3 deletions scraper/src/insert_plant_relations.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import pgPromise from "pg-promise";
import csv from "csvtojson";

import { config } from "dotenv";
import { readCsv } from "./helpers/helpers.js";

config({
path: ".env.local",
Expand Down Expand Up @@ -97,8 +97,8 @@ async function start(companionsFilePath, antagonistFilePath) {
"[INFO] Starting the insertion of plant relations into database."
);

const companionsJsonArray = await csv().fromFile(companionsFilePath);
const antagonistJsonArray = await csv().fromFile(antagonistFilePath);
const companionsJsonArray = await readCsv(companionsFilePath);
const antagonistJsonArray = await readCsv(antagonistFilePath);
const uniquePlantNameIdMap = new Map();

//fill map with plantnames from the csv
Expand Down
5 changes: 2 additions & 3 deletions scraper/src/insert_plants.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import pgPromise from "pg-promise";
import csv from "csvtojson";
import dbPlantsColumns from "./helpers/dp_plants_columns.js";
import { sanitizeColumnNames } from "./helpers/helpers.js";
import { sanitizeColumnNames, readCsv } from "./helpers/helpers.js";

import { config } from "dotenv";

Expand Down Expand Up @@ -98,7 +97,7 @@ function sanitizeValues(jsonArray) {
async function insertPlants(fileName) {
console.log("[INFO] Starting the insertion of plants into database.");

const jsonArray = await csv().fromFile(fileName);
const jsonArray = await readCsv(fileName);

sanitizeColumnNames(jsonArray);

Expand Down
8 changes: 4 additions & 4 deletions scraper/src/merge_datasets.js
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
import fs from "fs";
import { parse as json2csv } from "json2csv";
import csv from "csvtojson";
import permapeopleColumnMapping from "./helpers/column_mapping_permapeople.js";
import {
sanitizeColumnNames,
processMeasurement,
getSoilPH,
cleanUpJsonForCsv,
readCsv,
} from "./helpers/helpers.js";

/**
Expand Down Expand Up @@ -153,9 +153,9 @@ async function mergeDatasets() {

let allPlants = [];

let practicalPlants = await csv().fromFile("data/detail.csv"); // Practical plants dataset
let permapeople = await csv().fromFile("data/permapeopleRawData.csv"); // Permapeople dataset
let reinsaat = await csv().fromFile("data/reinsaatRawData.csv"); // Reinsaat dataset
let practicalPlants = await readCsv("data/detail.csv"); // Practical plants dataset
let permapeople = await readCsv("data/permapeopleRawData.csv"); // Permapeople dataset
let reinsaat = await readCsv("data/reinsaatRawData.csv"); // Reinsaat dataset

sanitizeColumnNames(practicalPlants, "practicalplants");
sanitizeColumnNames(permapeople, "permapeople");
Expand Down
5 changes: 2 additions & 3 deletions scraper/src/merge_german_names.js
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
import fs from "fs";
import { parse as json2csv } from "json2csv";
import csv from "csvtojson";
import { cleanUpJsonForCsv } from "./helpers/helpers.js";
import { cleanUpJsonForCsv, readCsv } from "./helpers/helpers.js";
import { applyOverride } from "./helpers/override.js";

const germanCommonNames = "data/germanCommonNames.csv";

async function loadMergedDataset() {
return csv().fromFile("data/mergedDatasets.csv");
return readCsv("data/mergedDatasets.csv");
}

async function applyGermanNames(plants) {
Expand Down
6 changes: 3 additions & 3 deletions scraper/src/merge_reinsaat.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import fs from "fs";
import { parse as json2csv } from "json2csv";
import csv from "csvtojson";
import mapping from "./helpers/column_mapping_reinsaat.js";
import { readCsv } from "./helpers/helpers.js";

const renameColumns = (plants) => {
return plants.map((plant) => {
Expand Down Expand Up @@ -98,8 +98,8 @@ const renameCategory = (plants, lang) => {
async function mergeDatasets() {
let allPlants = [];

let reinsaatRawDataEN = await csv().fromFile("data/reinsaatRawDataEN.csv");
let reinsaatRawDataDE = await csv().fromFile("data/reinsaatRawDataDE.csv");
let reinsaatRawDataEN = await readCsv("data/reinsaatRawDataEN.csv");
let reinsaatRawDataDE = await readCsv("data/reinsaatRawDataDE.csv");

reinsaatRawDataEN = renameCategory(reinsaatRawDataEN, "EN");
reinsaatRawDataDE = renameCategory(reinsaatRawDataDE, "DE");
Expand Down