Skip to content
This repository has been archived by the owner on Apr 24, 2024. It is now read-only.

Scraper: Error message of missing csv files now contains file name #1303

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion doc/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
FROM rust:1.67.1-slim-bookworm AS builder

ENV MDBOOK_VERSION=0.4.23 \
MDBOOK_MERMAID_VERSION=0.12.6
MDBOOK_MERMAID_VERSION=0.12.6 \
CARGO_NET_RETRY=10

RUN apt-get update && \
apt-get install -y --no-install-recommends \
Expand Down
3 changes: 3 additions & 0 deletions doc/changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@ Syntax: `- short text describing the change _(Your Name)_`
- _()_
- _()_
- _()_
- Scraper: Error message of missing csv files now contains file name _(Christoph Schreiner)_
- _()_
- _()_
- Add tests for hooks in frontend/layers _(Lukas Anton Lakits)_
- _()_
- Add warn signs to sizes for multi-selected plant-areas _(Lukas Anton Lakits)_
Expand Down
7 changes: 3 additions & 4 deletions scraper/src/apply_overrides.js
Original file line number Diff line number Diff line change
@@ -1,20 +1,19 @@
import fs from "fs";
import path from "path";
import { parse as json2csv } from "json2csv";
import csv from "csvtojson";
import { cleanUpJsonForCsv } from "./helpers/helpers.js";
import { cleanUpJsonForCsv, readCsv } from "./helpers/helpers.js";
import { applyOverride } from "./helpers/override.js";

const deletionsFile = "00_DELETIONS.csv";

async function loadMergedDataset() {
return csv().fromFile("data/mergedDatasets.csv");
return readCsv("data/mergedDatasets.csv");
}

async function applyDeletions(plants) {
console.log(`[INFO] Deleting plants from data/overrides/${deletionsFile}`);

const deletePlants = await csv().fromFile(`data/overrides/${deletionsFile}`);
const deletePlants = await readCsv(`data/overrides/${deletionsFile}`);

deletePlants.forEach((overridePlant) => {
// find the plant
Expand Down
6 changes: 3 additions & 3 deletions scraper/src/compare_datasets.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import fs from "fs";
import { parse as json2csv } from "json2csv";
import csv from "csvtojson";
import columnMapping from "./column_mapping_permapeople.js";
import { readCsv } from "./helpers/helpers.js";

/**
* Sanitize the column names of the csv files.
Expand Down Expand Up @@ -49,8 +49,8 @@ function sanitizeColumnNames(jsonArray) {
async function compareDatabases() {
const allPlants = [];

const practicalPlants = await csv().fromFile("data/detail.csv"); // Practical plants dataset
const permapeople = await csv().fromFile("data/permapeopleRawData.csv"); // Permapeople dataset
const practicalPlants = await readCsv("data/detail.csv"); // Practical plants dataset
const permapeople = await readCsv("data/permapeopleRawData.csv"); // Permapeople dataset

sanitizeColumnNames(practicalPlants);
sanitizeColumnNames(permapeople);
Expand Down
5 changes: 2 additions & 3 deletions scraper/src/fetch_german_names.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,7 @@ import axios from "axios";
import axiosRetry from "axios-retry";
import fs from "fs";
import { parse as json2csv } from "json2csv";
import csv from "csvtojson";
import { capitalizeWords } from "./helpers/helpers.js";
import { capitalizeWords, readCsv } from "./helpers/helpers.js";

let GermanNamesFound = 0;

Expand Down Expand Up @@ -184,7 +183,7 @@ async function fetchGermanNames() {
fs.mkdirSync("data/overrides");
}

let plants = await csv().fromFile("data/mergedDatasets.csv");
let plants = await readCsv("data/mergedDatasets.csv");

await fetchGermanNamesForPlantsConcurrent(plants);

Expand Down
16 changes: 16 additions & 0 deletions scraper/src/helpers/helpers.js
Original file line number Diff line number Diff line change
@@ -1,3 +1,18 @@
import csv from "csvtojson";

/*
* Reads a CSV file and returns a JSON array.
* Logs filename on error and exits the process.
*/
async function readCsv(file) {
try {
return await csv().fromFile(file);
} catch (error) {
console.error(`[ERROR] Error reading CSV file '${file}': ${error}`);
process.exit(1);
}
}

/**
* Capitalizes the first character of every word in a string.
*
Expand Down Expand Up @@ -133,4 +148,5 @@ export {
capitalizeWords,
cleanUpJsonForCsv,
processMeasurement,
readCsv,
};
4 changes: 2 additions & 2 deletions scraper/src/helpers/override.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import csv from "csvtojson";
import { readCsv } from "./helpers.js";

/**
* Apply the given override file to the plants
Expand All @@ -11,7 +11,7 @@ import csv from "csvtojson";
async function applyOverride(plants, file) {
console.log(`[INFO] Applying override ${file}`);

const overridePlants = await csv().fromFile(file);
const overridePlants = await readCsv(file);

overridePlants.forEach((overridePlant) => {
// find the plant
Expand Down
6 changes: 3 additions & 3 deletions scraper/src/insert_plant_relations.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import pgPromise from "pg-promise";
import csv from "csvtojson";

import { config } from "dotenv";
import { readCsv } from "./helpers/helpers.js";

config({
path: ".env.local",
Expand Down Expand Up @@ -97,8 +97,8 @@ async function start(companionsFilePath, antagonistFilePath) {
"[INFO] Starting the insertion of plant relations into database."
);

const companionsJsonArray = await csv().fromFile(companionsFilePath);
const antagonistJsonArray = await csv().fromFile(antagonistFilePath);
const companionsJsonArray = await readCsv(companionsFilePath);
const antagonistJsonArray = await readCsv(antagonistFilePath);
const uniquePlantNameIdMap = new Map();

//fill map with plantnames from the csv
Expand Down
5 changes: 2 additions & 3 deletions scraper/src/insert_plants.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import pgPromise from "pg-promise";
import csv from "csvtojson";
import dbPlantsColumns from "./helpers/dp_plants_columns.js";
import { sanitizeColumnNames } from "./helpers/helpers.js";
import { sanitizeColumnNames, readCsv } from "./helpers/helpers.js";

import { config } from "dotenv";

Expand Down Expand Up @@ -98,7 +97,7 @@ function sanitizeValues(jsonArray) {
async function insertPlants(fileName) {
console.log("[INFO] Starting the insertion of plants into database.");

const jsonArray = await csv().fromFile(fileName);
const jsonArray = await readCsv(fileName);

sanitizeColumnNames(jsonArray);

Expand Down
8 changes: 4 additions & 4 deletions scraper/src/merge_datasets.js
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
import fs from "fs";
import { parse as json2csv } from "json2csv";
import csv from "csvtojson";
import permapeopleColumnMapping from "./helpers/column_mapping_permapeople.js";
import {
sanitizeColumnNames,
processMeasurement,
getSoilPH,
cleanUpJsonForCsv,
readCsv,
} from "./helpers/helpers.js";

/**
Expand Down Expand Up @@ -153,9 +153,9 @@ async function mergeDatasets() {

let allPlants = [];

let practicalPlants = await csv().fromFile("data/detail.csv"); // Practical plants dataset
let permapeople = await csv().fromFile("data/permapeopleRawData.csv"); // Permapeople dataset
let reinsaat = await csv().fromFile("data/reinsaatRawData.csv"); // Reinsaat dataset
let practicalPlants = await readCsv("data/detail.csv"); // Practical plants dataset
let permapeople = await readCsv("data/permapeopleRawData.csv"); // Permapeople dataset
let reinsaat = await readCsv("data/reinsaatRawData.csv"); // Reinsaat dataset

sanitizeColumnNames(practicalPlants, "practicalplants");
sanitizeColumnNames(permapeople, "permapeople");
Expand Down
5 changes: 2 additions & 3 deletions scraper/src/merge_german_names.js
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
import fs from "fs";
import { parse as json2csv } from "json2csv";
import csv from "csvtojson";
import { cleanUpJsonForCsv } from "./helpers/helpers.js";
import { cleanUpJsonForCsv, readCsv } from "./helpers/helpers.js";
import { applyOverride } from "./helpers/override.js";

const germanCommonNames = "data/germanCommonNames.csv";

async function loadMergedDataset() {
return csv().fromFile("data/mergedDatasets.csv");
return readCsv("data/mergedDatasets.csv");
}

async function applyGermanNames(plants) {
Expand Down
6 changes: 3 additions & 3 deletions scraper/src/merge_reinsaat.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import fs from "fs";
import { parse as json2csv } from "json2csv";
import csv from "csvtojson";
import mapping from "./helpers/column_mapping_reinsaat.js";
import { readCsv } from "./helpers/helpers.js";

const renameColumns = (plants) => {
return plants.map((plant) => {
Expand Down Expand Up @@ -98,8 +98,8 @@ const renameCategory = (plants, lang) => {
async function mergeDatasets() {
let allPlants = [];

let reinsaatRawDataEN = await csv().fromFile("data/reinsaatRawDataEN.csv");
let reinsaatRawDataDE = await csv().fromFile("data/reinsaatRawDataDE.csv");
let reinsaatRawDataEN = await readCsv("data/reinsaatRawDataEN.csv");
let reinsaatRawDataDE = await readCsv("data/reinsaatRawDataDE.csv");

reinsaatRawDataEN = renameCategory(reinsaatRawDataEN, "EN");
reinsaatRawDataDE = renameCategory(reinsaatRawDataDE, "DE");
Expand Down