-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathIII-Wheat-Yield-Calculation.R
405 lines (274 loc) · 16.3 KB
/
III-Wheat-Yield-Calculation.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
################################################################################
# Wheat yield computation from ZA-AUI data #
# date: 18/10/2021 #
# generated file: III-WW_Revenue.csv #
# Hervé D., Philippe M, Niklas M. #
################################################################################
setwd("Y:/Papers and Data/Datasets/ZA AUI Data/data_transformation_HD_PM_NM")
library(tidyverse)
library(DescTools)
#Read-in data for the years 2009 to 2018
#External data from ZA-AUI database (protected data)
d9 <- read.csv2("./Feldkalender/Feldkalender_2009.csv", header = T, stringsAsFactors = F)
d10 <- read.csv2("./Feldkalender/Feldkalender_2010.csv", header = T, stringsAsFactors = F)
d11 <- read.csv2("./Feldkalender/Feldkalender_2011.csv", header = T, stringsAsFactors = F)
d12 <- read.csv2("./Feldkalender/Feldkalender_2012.csv", header = T, stringsAsFactors = F)
d13 <- read.csv2("./Feldkalender/Feldkalender_2013.csv", header = T, stringsAsFactors = F)
d14 <- read.csv2("./Feldkalender/Feldkalender_2014.csv", header = T, stringsAsFactors = F)
d15 <- read.csv2("./Feldkalender/Feldkalender_2015.csv", header = T, stringsAsFactors = F)
d16 <- read.csv2("./Feldkalender/Feldkalender_2016.csv", header = T, stringsAsFactors = F)
d17 <- read.csv2("./Feldkalender/Feldkalender_2017.csv", header = T, stringsAsFactors = F)
d18 <- read.csv2("./Feldkalender/Feldkalender_2018.csv", header = T, stringsAsFactors = F)
ds <- list(d9, d10, d11, d12, d13, d14, d15, d16, d17, d18)
years <- c(2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018)
# Loading the file with correspondent AUI.ID for AUI.Betriebsnr.
ID <- read.csv2("Zuweisung_BetrNr_AUI_ID.csv", header = T, stringsAsFactors = F)
# Replace ZA.Betriebnsr. identification variable by the unique AUI.ID identification variable
IDConverter <- cbind.data.frame(ID$AUI.ID, ID$AUI.Betriebsnummer)
colnames(IDConverter) <- c("AUI.ID", "AUI.Betriebsnummer")
IDConverter <- with(IDConverter, setNames(AUI.ID, AUI.Betriebsnummer))
dfs <- lapply(years, function(y) {
df <- read.csv2(paste0("./Feldkalender/Feldkalender_", y, ".csv"), header = T, stringsAsFactors = F)
within(df, ZA.Betriebsnr. <- unname(IDConverter[as.character(ZA.Betriebsnr.)]))
})
for (i in 1:length(years)) {
ds[[i]] <- dfs[[i]]
colnames(ds[[i]])[colnames(ds[[i]]) == "ZA.Betriebsnr."] <- "AUI.ID"
}
# Use dataset with converted information but no aggregation
dat <- read.csv2("Pesticide_cleaned_nobook_noagg_ind_definiert_09_18.csv")
nec <- c(
"SIdent", "Schlag.ID", "KID", "AUI.ID", "Schlagflaeche", "Flaeche", "PSMBez", "PSMIdent", "Menge.Produkt", "Menge.Produkt.ha", "Menge.Produkt.ha2",
"Massnahmeflaeche2", "Herbizid", "Insektizid", "Fungizid", "Rest", "AIag", "AIA", "AIAha", "FL", "HL", "TL", "STIQuotient", "Kultur", "Datum", "Kulturertrag", "Jahr"
) # Use only neccesary information
table(nec %in% names(dat)) # spelling mistakes?
dat <- dat[nec[nec %in% names(dat)]]
### Check for WW Ext if they use "gebeiztes Saatgut"
# Provide years and AUI.ID and plots with Extenso WW
dat_ids <- dat[, c("AUI.ID", "Schlag.ID", "Jahr", "Kultur")]
Jahre <- list(2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018)
ext_list <-
dat_ids %>%
filter(Kultur == "WinterweizenExt") %>%
select(Schlag.ID, Jahr) %>%
distinct(Schlag.ID, Jahr)
# From original ZA-AUI data: Massnahmentypnr. == 2 (Saat/Pflanzung) und == 5,6 (Ernte) are interesting
# First start with Saat (2)
test <- as.data.frame(cbind(c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10), c(
"gebeizt", "Gebeizt",
"gebeizt Gebeizt", "hallo",
"Gr?tze ungebeizt",
"unGebeizt", "Galle geb.",
"gebeizte Gr?tze", "Gr?tze gebeizt ist lecker",
"Gr?tze non trait."
))) # Check how to use the string recognition
int1 <- "gebeizt"
int2 <- "geb[:punct:]"
int3 <- "(Gebeizt|gebeizt|geb\\.|trait?es|trait\\.)"
int4 <- "(non|ungebeizt)"
int5 <- "(Winterweizen|Bl? d'automne)"
int6 <- "(combin?|comb\\.|komb\\.|kobiniert)"
checks <- rbind(test[str_detect(test$V2, int1), ], test[str_detect(test$V2, int2), ])
checks2 <- test[str_detect(test$V2, int3), ]
checks3 <- checks2[!(str_detect(checks2$V2, regex(int4, ignore_case = T))), ]
# First use table(o3$Produktbezeichnung) to find all the possible ways of naming a product gebeizt (gebeizt,Gebeizt,geb.,traite?s.....) ... than use str_detect or tidyr (14.4.4 Grouped matches ...on bookmark) to extract those plots
ext_check <- list()
for (i in 1:length(years)) {
ext_nec <- ext_list[ext_list$Jahr == Jahre[[i]], ]
o <- ds[[i]]
o2 <- o[o$Schlag.ID %in% ext_nec$Schlag.ID, ]
o2 <- o2[, c("AUI.ID", "Schlag.ID", "Massnahmentypnr.", "Massnahmentyp", "Massnahme", "Produktbezeichnung")]
o3 <- o2[o2$Massnahmentypnr. %in% c(2), ]
o4 <- o3[str_detect(o3$Produktbezeichnung, int3), ] # Filter those with variations of gebeizt
o5 <- o4[!(str_detect(o4$Produktbezeichnung, regex(int4, ignore_case = T))), ] # Kick those out with non traitees / ungebeizt
o6 <- o5[str_detect(o5$Produktbezeichnung, int5), ] # Only use those which have Winterweizen/Ble de automne in their name
o7 <- o6[str_detect(o6$Produktbezeichnung, regex(int6, ignore_case = T)), ] # Only those which have combined seed treatment are forbidden under Extenso
o8 <- unique(o7$Schlag.ID) # Extract IDs of those which cannot be Extenso
ext_check[[i]] <- o8
}
# Change those plots from WWExt to WW
for (i in 1:length(years)) {
dat$Kultur[dat$Jahr == Jahre[[i]] & dat$Kultur == "WinterweizenExt" & dat$Schlag.ID %in% ext_check[[i]]] <- "Winterweizen"
}
### Identify those which grow WW and WW Ext in the same year
dat_ids <- dat[, c("AUI.ID", "Schlag.ID", "Jahr", "Kultur")]
rel_ids <-
dat_ids %>%
filter(Kultur %in% c("Winterweizen", "WinterweizenExt")) %>%
mutate(ind_cult = ifelse(Kultur == "Winterweizen", 1,
ifelse(Kultur == "WinterweizenExt", 2, 0)
)) %>%
distinct(AUI.ID, Jahr, ind_cult)
rel_ids2 <-
rel_ids %>%
select(AUI.ID, Jahr, ind_cult) %>%
group_by(AUI.ID, Jahr) %>%
summarize(rel = sum(ind_cult, na.rm = T))
table(rel_ids2$rel) # rel=1 WW, rel=2 WWExt, rel=3 both - 3 is only possible if WWExt was breadwheat and WW conv fodderwheat
# Extract those plots where WW conv is cultivated and the farmer grows WW Ext in the same year
WWfodder_ids <-
merge(dat, rel_ids2, by = c("AUI.ID", "Jahr")) %>%
select(AUI.ID, Schlag.ID, Jahr, Kultur, rel) %>%
filter(rel == 3, Kultur == "Winterweizen")
# First look at information regarding harvest: If they are Weizen Top, I,II,II --> cannot be fooder wheat
WWnotfodder_plots <- list()
test <- as.data.frame(cbind(c(1, 2, 3, 4, 5, 6), c("I", "II", "III", "Top", "Weizen Top", "Weizen II"))) # Check how to use the string recognition
int1 <- "(I|Top)"
checks <- test[str_detect(test$V2, int1), ]
for (i in 1:length(years)) {
o <- ds[[i]]
o2 <- o[, c("AUI.ID", "Schlag.ID", "Massnahmentypnr.", "Massnahmentyp", "Massnahme", "Produktbezeichnung")]
o3 <- WWfodder_ids[WWfodder_ids$Jahr == Jahre[[i]], ] # select the right year
o4 <- o2[o2$Schlag.ID %in% o3$Schlag.ID, ] # select those plots where they grow WW conv and farmer grows both in a given year
o5 <- o4[o4$Massnahmentypnr. %in% c(5, 6), ] # select harvest measures
o6 <- o5[str_detect(o5$Produktbezeichnung, int1), ] # select those with Weizen I, II, III or Top
o7 <- ds[[i]]
o8 <- o7[, c("AUI.ID", "Schlag.ID", "Massnahmentypnr.", "Massnahmentyp", "Massnahme", "Produktbezeichnung", "Datum")]
o9 <- o8[o8$Schlag.ID %in% o6$Schlag.ID, ] # to make sure no mistake is done look at events in this plot as a whole again
WWnotfodder_plots[[i]] <- unique(o9$AUI.ID)
}
# The above identified farmers (WWnotfodder_plots) are not allowed to grow extensive breadwheat according to IP-SUisse because they grow intensive breadwheat already --> change those plots from WW Ext to WW int
WWfodderext_ids <- list() # Extenso plots that are in question
for (i in 1:length(years)) {
WWfodderext_ids[[i]] <-
dat %>%
select(AUI.ID, Schlag.ID, Jahr, Kultur) %>%
filter(Jahr == Jahre[[i]], Kultur == "WinterweizenExt", AUI.ID %in% WWnotfodder_plots[[i]])
}
WWnotfodderext_plots <- list() # And those of these Extenso plots where breadwheat is grown
test <- as.data.frame(cbind(c(1, 2, 3, 4, 5, 6), c("I", "II", "III", "Top", "Weizen Top", "Weizen II"))) # Check how to use the string recognition
int1 <- "(I|Top)"
checks <- test[str_detect(test$V2, int1), ]
for (i in 1:length(years)) {
o <- ds[[i]]
o2 <- o[, c("AUI.ID", "Schlag.ID", "Massnahmentypnr.", "Massnahmentyp", "Massnahme", "Produktbezeichnung")]
o3 <- WWfodderext_ids[[i]] # select the right year
o4 <- o2[o2$Schlag.ID %in% o3$Schlag.ID, ] # select those plots where they grow WW conv and farmer grows both in a given year
o5 <- o4[o4$Massnahmentypnr. %in% c(5, 6), ] # select harvest measures
o6 <- o5[str_detect(o5$Produktbezeichnung, int1), ] # select those with Weizen I, II, III or Top
WWnotfodderext_plots[[i]] <- unique(o6$Schlag.ID) # Change those plots from Ext to WW
}
# Change those plots from WWExt to WW
for (i in 1:length(years)) {
dat$Kultur[dat$Jahr == Jahre[[i]] & dat$Kultur == "WinterweizenExt" & dat$Schlag.ID %in% WWnotfodderext_plots[[i]]] <- "Winterweizen"
}
dat$Kultur[dat$Jahr == 2011 & dat$Schlag.ID %in% c("77356E0D-C1C3-4593-9DD2-79B05D", "71F8AABD-2057-469F-B55F-93BD21")] <- "WinterweizenExt" # No indications of intensive were found
# delete plots with yield == 0 in 2013
noyield <-
ds[[5]] %>%
filter(Kultur == "Winterweizen", Kulturertrag == 0) %>%
select(Schlag.ID) %>%
distinct(Schlag.ID) # identify IDs with yield ==0 in 2013 and no hail damage
dat <- dat[!(dat$Jahr == 2013 & dat$Schlag.ID %in% noyield$Schlag.ID), ] # delete the plots
dat <- dat[dat$Kultur %in% c("Kartoffeln", "Winterweizen"), ] # fulfill requirement 1a): cultures
table(dat$Kultur) # check cultures left
############# 2.) Add prices and yields= revenues ##################
# a) Create a Value Variable (yield*price) ####
# Results in avg rev per ha over all years per AUI.ID are saved in revenue_Pot_all and revenue_WW_all
# a - i) Winterwheat ####
cat_WW <- list()
count_WW_prod <- list()
double_WW_prod <- list()
no_WW_prod <- list()
test <- as.data.frame(cbind(c(1, 2, 3, 4, 5, 6), c("I", "II", "III", "Top", "Weizen Top", "Weizen II"))) # Check how to use the string recognition
int1 <- "(I|Top|Weizen|Biscuits)" # wanted expressions
int2 <- "(Weizenstroh)" # unwanted expressions
int3 <- "Arina"
checks <- test[str_detect(test$V2, int1), ]
for (i in 1:length(years)) {
o <- ds[[i]] # raw data
o2 <- o[, c("AUI.ID", "Schlag.ID", "Massnahmentypnr.", "Massnahmentyp", "Massnahme", "Produktbezeichnung")]
o3 <- dat[dat$Kultur == "Winterweizen" & dat$Jahr == Jahre[[i]], ] # plots used for analysis
o4 <- o2[o2$Schlag.ID %in% o3$Schlag.ID & o2$Massnahmentypnr. %in% c(5, 6), ] # only harvest
o5 <- o4[str_detect(o4$Produktbezeichnung, int1), ] # those which have Weizen in their name
o6 <- o5[!(str_detect(o5$Produktbezeichnung, int2)), ] # take those with straw in their name out
cat_WW[[i]] <- o6 # list with relevant products harvested per plot
o7 <- # count if there are multiple products indicated on some plots (indicates mistake)
unique(o6) %>%
group_by(AUI.ID, Schlag.ID, Produktbezeichnung) %>%
summarise(classification = n())
o7b <- # 2 stands for 2 product names per plot
o7 %>%
filter(classification == 2)
double_WW_prod[[i]] <- # identify plots with two product indications --> why? -> hay is indicated differently here: no problem because classes are consistent
o6 %>%
filter(Schlag.ID %in% o7b$Schlag.ID)
o8 <- o7[, c("AUI.ID", "Schlag.ID", "classification")]
o9 <- # join information from counting with all used plots: see which plots do not have product information yet
merge(o3, o8, by = c("AUI.ID", "Schlag.ID"), all.x = T) %>% # plots with no info will receive an NA in classification variable
select(AUI.ID, Schlag.ID, classification) %>%
distinct(AUI.ID, Schlag.ID, classification)
count_WW_prod[[i]] <- o9 # save this information to get an overview
o10 <- # those plots which have no indication of products
o9 %>%
filter(is.na(classification)) %>%
select(Schlag.ID)
o11 <- o2[o2$Schlag.ID %in% o10$Schlag.ID & o2$Massnahmentypnr. %in% c(2), ] # those which have no product indication, sewing information
no_WW_prod[[i]] <- unique(o11) # those with Arina are Weizen I ... the rest not identified
o12 <- o11[str_detect(o11$Produktbezeichnung, int3), ] # those are the unnamed ones that can be identified as Arina
# create dataset
o13 <- unique(o3[, c("AUI.ID", "Schlag.ID")]) # alle IDs used
o14 <- merge(o13, o6, by = c("AUI.ID", "Schlag.ID"), all.x = T) # merge with information on products
o14$Produktbezeichnung <- as.character(o14$Produktbezeichnung)
o14$Produktbezeichnung[o13$Schlag.ID %in% o12$Schlag.ID] <- "Weizen I" # Arina plots are Weizen I
o14$Produktbezeichnung[is.na(o14$Produktbezeichnung)] <- "Average" # fill empty fields with word Average
o15 <- o14[, c("AUI.ID", "Schlag.ID", "Produktbezeichnung")]
cat_WW[[i]] <- unique(o15) # list with relevant products harvested per plot revised: no double entries ... only empty fields have to be filled with averages
}
# Price information from Agridea
Years <- as.numeric(c(
rep(2009, 7), rep(2010, 7), rep(2011, 7), rep(2012, 7), rep(2013, 7),
rep(2014, 7), rep(2015, 7), rep(2016, 7), rep(2017, 7), rep(2018, 7)
))
Klasse <- rep(c("Weizen Top", "Weizen I", "Weizen II", "Weizen III", "Weizen Biscuits", "Weizen", "Average"), 5)
Price <- as.numeric(c(
51, 48, 43, 40, 44.5, 36, 44.5,
51, 48, 43, 40, 44.5, 36, 44.5,
53, 50, 47, 44, 48, 36.5, 48,
53, 50, 47.5, 44, 48, 36.5, 48,
52, 50.5, 49.5, 44.5, 48.5, 36.5, 44.5,
52, 50, 49, 45, 49, 36.5, 49,
52, 50, 49, 45, 49, 36.5, 49,
52, 50, 49, 45, 49, 36.5, 49,
52, 50, 49, 45, 49, 36.5, 49,
52, 50, 49, 45, 49, 36.5, 49
)) # Prices per dt (100kg)
prices_WW <- as.data.frame(cbind(Years, Klasse, Price))
prices_WW$Price <- as.numeric(prices_WW$Price)#as.numeric(levels(prices_WW$Price))[prices_WW$Price]
# Join information: classification, prices per class, yields, plotsize
revenue_WW_yearly <- list()
checks_revenue <- list()
checks_yield <- list()
for (i in 1:length(years)) {
o <- ds[[i]] # Raw data
o2 <- dat[dat$Kultur == "Winterweizen" & dat$Jahr == Jahre[[i]], ] # plots used for analysis
o3 <- o2[o2$Schlag.ID %in% o2$Schlag.ID, ] # plots used in the analysis
o4 <- unique(o3[, c("AUI.ID", "Schlag.ID", "Schlagflaeche", "Kulturertrag")]) # IDs, acreage per plot, yield in 100kg/ha
o4$Kulturertrag <- as.numeric(o4$Kulturertrag)
checks_revenue[[i]] <- dim(o4)[1] - length(unique(o4$Schlag.ID)) # if zero no doubble entries: Yes!!!
o5 <- merge(o4, cat_WW[[i]], by = c("AUI.ID", "Schlag.ID"))
o5$Kulturertrag[o5$Kulturertrag == 980] <- 98 # correct typo in AUI.ID 10
o6 <- prices_WW[prices_WW$Years == Jahre[[i]], ] # Prices in this year
o7 <-
merge(o5, o6, by.x = c("Produktbezeichnung"), by.y = c("Klasse")) %>%
mutate(Rev = Kulturertrag * Price) # Rev per ha
check_ID <- o7$Schlag.ID[o7$Kulturertrag < 40]
checks_yield[[i]] <- o[o$Schlag.ID %in% check_ID, ]
revenue_WW_yearly[[i]] <- o7
}
# Correct Outliers (very obvious)
o <- revenue_WW_yearly[[1]]
o$Kulturertrag[o$AUI.ID == 278] <- o$Kulturertrag[o$AUI.ID == 278] * 10
o$Rev[o$AUI.ID == 278] <- o$Rev[o$AUI.ID == 278] * 10
revenue_WW_yearly[[1]] <- o
##################### 3. Save Files #######################################################
for (i in 1:length(years)) {
revenue_WW_yearly[[i]]$Jahr <- rep(Jahre[[i]], times = dim(revenue_WW_yearly[[i]])[1])
}
revenue_WW <- rbind(
revenue_WW_yearly[[1]], revenue_WW_yearly[[2]], revenue_WW_yearly[[3]], revenue_WW_yearly[[4]], revenue_WW_yearly[[5]],
revenue_WW_yearly[[6]], revenue_WW_yearly[[7]], revenue_WW_yearly[[8]], revenue_WW_yearly[[9]],
revenue_WW_yearly[[10]]
)
summary(revenue_WW)
write.csv2(revenue_WW, "III-WW_Revenue.csv", row.names = F)