-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathprepare_data.R
80 lines (70 loc) · 3.64 KB
/
prepare_data.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
# In this file, write the R-code necessary to load your original data file
# (e.g., an SPSS, Excel, or SAS-file), and convert it to a data.frame. Then,
# use the function open_data(your_data_frame) or closed_data(your_data_frame)
# to store the data.
library(worcs)
library(foreign)
library(haven)
source("scales_list.r")
nl <- read.spss("nl.sav", to.data.frame = TRUE, use.value.labels = TRUE)
names(nl) <- tolower(names(nl))
all(unlist(scales_list$nl) %in% names(nl))
unlist(scales_list$nl)[!(unlist(scales_list$nl) %in% names(nl))]
nl <- nl[unlist(scales_list$nl)]
# Rescale variables, because otherwise the observed covariances for some
# variables will be a factor 1000 larger than for others. This complicates model
# convergence.
desc <- descriptives(nl)
rescale_these <- desc$name[which(desc$max > 90)]
nl[,rescale_these] <- nl[, rescale_these]/10
names(nl) <- unlist(lapply(names(scales_list$nl), function(nam){paste0(nam, "_", 1:length(scales_list$nl[[nam]]))}))
nl[sapply(nl, inherits, what = "factor")] <- lapply(nl[sapply(nl, inherits, what = "factor")], as.numeric)
rev_these <- which(unlist(reverse_coded[["nl"]]))
maxval <- sapply(nl[rev_these], max, na.rm = TRUE) + 1
nl[rev_these] <- mapply(function(c, vec){c-vec}, c = maxval, vec = nl[rev_these])
closed_data(nl, synthetic = FALSE)
dk <- read_dta("dk.dta")
names(dk) <- tolower(names(dk))
all(unlist(scales_list$dk) %in% names(dk))
dk <- dk[unlist(scales_list$dk)]
names(dk) <- unlist(lapply(names(scales_list$dk), function(nam){paste0(nam, "_", 1:length(scales_list$dk[[nam]]))}))
rev_these <- which(unlist(reverse_coded[["dk"]]))
maxval <- sapply(dk[rev_these], max, na.rm = TRUE) + 1
dk[rev_these] <- mapply(function(c, vec){c-vec}, c = maxval, vec = dk[rev_these])
closed_data(dk, synthetic = FALSE)
us <- read.csv("us_original.csv", stringsAsFactors = FALSE, skip = 2, header = FALSE)
names_us <- readLines("us_original.csv", n = 1)
names_us <- strsplit(names_us, ",")[[1]]
names_us <- tolower(names_us)
names(us) <- names_us
all(unlist(scales_list$us) %in% names(us))
us <- us[unlist(scales_list$us)]
names(us) <- unlist(lapply(names(scales_list$us), function(nam){paste0(nam, "_", 1:length(scales_list$us[[nam]]))}))
rev_these <- which(unlist(reverse_coded[["us"]]))
maxval <- sapply(us[rev_these], max, na.rm = TRUE) + 1
us[rev_these] <- mapply(function(c, vec){c-vec}, c = maxval, vec = us[rev_these])
closed_data(us, synthetic = FALSE)
tris <- read.csv("aita_mac_secs 16.06.csv", stringsAsFactors = FALSE)
names(tris) <- tolower(names(tris))
scales_tris <- c(list(
secs_soc = c("secs_abortion",
"secs_security", "secs_religion", "secs_marriage",
"secs_traditionalvalues", "secs_familyunit",
"secs_patriotism"),
secs_eco = c("secs_limitedgovernment", "secs_welfare", "secs_gun", "secs_fiscal", "secs_business")),
lapply(unique(cut(1:21, 7)), function(i) grep("^mac", names(tris), value = T)[which(cut(1:21, 7) == i)])
)
names(scales_tris)[-c(1:2)] <- names(scales_list$dk[-c(1:2)])
tris <- tris[unlist(scales_tris)]
rev_these <- c("secs_abortion", "secs_welfare")
maxval <- sapply(tris[rev_these], max, na.rm = TRUE) + 1
tris[rev_these] <- mapply(function(c, vec){c-vec}, c = maxval, vec = tris[rev_these])
names(tris) <- unlist(lapply(names(scales_tris), function(i){paste0(i, "_", 1:length(scales_tris[[i]]))}))
closed_data(tris, synthetic = FALSE)
dats <- list(dk = dk, nl = nl, us = us, tris = tris)
for(n in names(dats)){
tmp <- tidySEM::tidy_sem(dats[[n]])
tmp <- tidySEM::create_scales(tmp)
write.csv(tmp$descriptives, file = paste0("descriptives_", n, ".csv"), row.names = FALSE)
write.csv(tmp$correlations, file = paste0("correlations_", n, ".csv"), row.names = FALSE)
}