-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpreparando_camada_silver.py
61 lines (48 loc) · 1.65 KB
/
preparando_camada_silver.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import pandas as pd
import duckdb
csv_path = "./data_lake/raw/raw.csv"
# Criação da camada silver
duckdb.execute("""
CREATE TABLE raw AS
SELECT * FROM read_csv_auto(?)""", [csv_path])
duckdb.execute("ALTER TABLE raw ADD COLUMN currency VARCHAR(5)")
exchanges_to_currency = {
"NYA": "USD",
"IXIC": "USD",
"HSI": "HKD",
"000001.SS": "CNY",
"N225": "JPY",
"N100": "EUR",
"399001.SZ": "CNY",
"GSPTSE": "CAD",
"NSEI": "INR",
"GDAXI": "EUR",
"KS11": "KRW",
"SSMI": "CHF",
"TWII": "TWD",
"J203.JO": "ZAR"
}
for exchange, currency in exchanges_to_currency.items():
duckdb.execute("""
UPDATE raw
SET currency = ?
WHERE "Index" = ?
""", [currency, exchange])
## Procurando valores duplicados ou nulos
duckdb.execute("""
CREATE TABLE silver AS
SELECT * FROM raw
WHERE "Index" IS NOT NULL AND TRIM("Index") NOT IN ('', ' ', 'null') AND
"Date" IS NOT NULL AND
"Open" IS NOT NULL AND TRIM("Open") NOT IN ('', ' ', 'null') AND
"High" IS NOT NULL AND TRIM("High") NOT IN ('', ' ', 'null') AND
"Low" IS NOT NULL AND TRIM("Low") NOT IN ('', ' ', 'null') AND
"Close" IS NOT NULL AND TRIM("Close") NOT IN ('', ' ', 'null') AND
"Adj Close" IS NOT NULL AND TRIM("Adj Close") NOT IN ('', ' ', 'null') AND
"Volume" IS NOT NULL AND TRIM("Volume") NOT IN ('', ' ', 'null') AND
"currency" IS NOT NULL AND TRIM("currency") NOT IN ('', ' ', 'null');
""")
# Como o arquivo não é grande, exportei como csv
duckdb.execute("""
COPY silver TO './data_lake/silver/silver.csv' (HEADER, DELIMITER ',')
""")