Skip to content

Commit

Permalink
Merge branch 'dhj-updates'
Browse files Browse the repository at this point in the history
  • Loading branch information
hughjonesd committed May 20, 2024
2 parents 9417c6b + ef02724 commit 55135a9
Show file tree
Hide file tree
Showing 13 changed files with 1,807 additions and 329 deletions.
218 changes: 7 additions & 211 deletions chapters/evaluation_data_input.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ library(formattable)
# others ----
library(here)
#library(aggreCAT)
library(aggreCAT)
library(DescTools)
select <- dplyr::select
Expand All @@ -33,175 +33,19 @@ Below, the evaluation data is input from an Airtable, which itself was largely h

:::

```{r}
#| label: input_at
#| code-summary: "input from airtable"
#base_id <- "appbPYEw9nURln7Qg"
base_id <- "applDG6ifmUmeEJ7j" #new ID to cover "UJ - research & core members" base
# Set your Airtable API key
Sys.setenv(AIRTABLE_API_KEY = Sys.getenv("AIRTABLE_API_KEY"))
#this should be set in my .Renviron file
# Read data from a specific view
evals <- air_get(base = base_id, "output_eval")
```{r read-data}
all_pub_records <- data.frame()
pub_records <- air_select(base = base_id, table = "crucial_rsx")
all_papers_p <- readRDS(here("data/all_papers_p.Rdata"))
evals_pub <- readRDS(here("data/evals.Rdata"))
# Append the records to the list
all_pub_records <- bind_rows(all_pub_records, pub_records)
# While the length of the records list is 100 (the maximum), fetch more records
while(nrow(pub_records) == 100) {
# Get the ID of the last record in the list
offset <- get_offset(pub_records)
# Fetch the next 100 records, starting after the last ID
pub_records <- air_select(base = base_id, table = "crucial_rsx", offset = offset)
# Append the records to the df
all_pub_records <- bind_rows(all_pub_records, pub_records)
}
# housekeeping
rm(pub_records)
```

```{r}
#| label: extract & clean
#| code-summary: "just the useful and publish-able data, clean a bit"
# clean evals names to snakecase
colnames(evals) <- snakecase::to_snake_case(colnames(evals))
evals_pub <- evals %>%
dplyr::rename(stage_of_process = stage_of_process_todo_from_crucial_research_2) %>%
mutate(stage_of_process = unlist(stage_of_process)) %>%
dplyr::filter(stage_of_process == "published") %>%
select(id,
crucial_research,
paper_abbrev,
evaluator_name,
category,
source_main,
author_agreement,
overall,
lb_overall,
ub_overall,
conf_index_overall,
advancing_knowledge_and_practice,
lb_advancing_knowledge_and_practice,
ub_advancing_knowledge_and_practice,
conf_index_advancing_knowledge_and_practice,
methods_justification_reasonableness_validity_robustness,
lb_methods_justification_reasonableness_validity_robustness,
ub_methods_justification_reasonableness_validity_robustness,
conf_index_methods_justification_reasonableness_validity_robustness,
logic_communication, lb_logic_communication, ub_logic_communication,
conf_index_logic_communication,
engaging_with_real_world_impact_quantification_practice_realism_and_relevance,
lb_engaging_with_real_world_impact_quantification_practice_realism_and_relevance,
ub_engaging_with_real_world_impact_quantification_practice_realism_and_relevance,
conf_index_engaging_with_real_world_impact_quantification_practice_realism_and_relevance,
relevance_to_global_priorities,
lb_relevance_to_global_priorities,
ub_relevance_to_global_priorities,
conf_index_relevance_to_global_priorities,
journal_quality_predict,
lb_journal_quality_predict,
ub_journal_quality_predict,
conf_index_journal_quality_predict,
open_collaborative_replicable,
conf_index_open_collaborative_replicable,
lb_open_collaborative_replicable,
ub_open_collaborative_replicable,
merits_journal,
lb_merits_journal,
ub_merits_journal,
conf_index_merits_journal)
# shorten names (before you expand into columns)
new_names <- c(
"eval_name" = "evaluator_name",
"cat" = "category",
"crucial_rsx" = "crucial_research",
"conf_overall" = "conf_index_overall",
"adv_knowledge" = "advancing_knowledge_and_practice",
"lb_adv_knowledge" = "lb_advancing_knowledge_and_practice",
"ub_adv_knowledge" = "ub_advancing_knowledge_and_practice",
"conf_adv_knowledge" = "conf_index_advancing_knowledge_and_practice",
"methods" = "methods_justification_reasonableness_validity_robustness",
"lb_methods" = "lb_methods_justification_reasonableness_validity_robustness",
"ub_methods" = "ub_methods_justification_reasonableness_validity_robustness",
"conf_methods" = "conf_index_methods_justification_reasonableness_validity_robustness",
"logic_comms" = "logic_communication",
"lb_logic_comms" = "lb_logic_communication",
"ub_logic_comms" = "ub_logic_communication",
"conf_logic_comms" = "conf_index_logic_communication",
"real_world" = "engaging_with_real_world_impact_quantification_practice_realism_and_relevance",
"lb_real_world" = "lb_engaging_with_real_world_impact_quantification_practice_realism_and_relevance",
"ub_real_world" = "ub_engaging_with_real_world_impact_quantification_practice_realism_and_relevance",
"conf_real_world" = "conf_index_engaging_with_real_world_impact_quantification_practice_realism_and_relevance",
"gp_relevance" = "relevance_to_global_priorities",
"lb_gp_relevance" = "lb_relevance_to_global_priorities",
"ub_gp_relevance" = "ub_relevance_to_global_priorities",
"conf_gp_relevance" = "conf_index_relevance_to_global_priorities",
"journal_predict" = "journal_quality_predict",
"lb_journal_predict" = "lb_journal_quality_predict",
"ub_journal_predict" = "ub_journal_quality_predict",
"conf_journal_predict" = "conf_index_journal_quality_predict",
"open_sci" = "open_collaborative_replicable",
"conf_open_sci" = "conf_index_open_collaborative_replicable",
"lb_open_sci" = "lb_open_collaborative_replicable",
"ub_open_sci" = "ub_open_collaborative_replicable",
"conf_merits_journal" = "conf_index_merits_journal"
)
evals_pub <- evals_pub %>%
rename(!!!new_names)
# Create a list of labels with the old, longer names
labels <- str_replace_all(new_names, "_", " ") %>% str_to_title()
# Assign labels to the dataframe / tibble
# (maybe this can be done as an attribute, not currently working)
# for(i in seq_along(labels)) {
# col_name <- new_names[names(new_names)[i]]
# label <- labels[i]
# attr(evals_pub[[col_name]], "label") <- label
# }
# expand categories into columns, unlist everything
evals_pub %<>%
tidyr::unnest_wider(cat, names_sep = "_") %>% # give each of these its own col
mutate(across(everything(), unlist)) # maybe check why some of these are lists in the first place
# clean the Anonymous names
evals_pub$eval_name <- ifelse(
grepl("^\\b\\w+\\b$|\\bAnonymous\\b", evals_pub$eval_name),
paste0("Anonymous_", seq_along(evals_pub$eval_name)),
evals_pub$eval_name
)
#housekeeping
rm(evals)
#Todo -- check the unlist is not propagating the entry
#Note: category, topic_subfield, and source have multiple meaningful categories. These will need care
```


```{r evals_pub to longer format}
evals_pub_long <- evals_pub %>%
pivot_longer(cols = -c(id, crucial_rsx, paper_abbrev, eval_name,
cat_1,cat_2, cat_3,source_main,author_agreement),
pivot_longer(cols = -c(id, crucial_rsx, crucial_rsx_id,
paper_abbrev, eval_name,
cat_1,cat_2, cat_3, source_main, author_agreement),
names_pattern = "(lb_|ub_|conf_)?(.+)",
names_to = c("value_type", "rating_type")) %>% # one line per rating type
mutate(value_type = if_else(value_type == "", "est_", value_type)) %>% #add main rating id
Expand Down Expand Up @@ -371,32 +215,6 @@ evals_pub_long <- evals_pub_long %>%

We cannot publicly share the 'papers under consideration', but we can share some of the statistics on these papers. Let's generate an ID (or later, salted hash) for each such paper, and keep only the shareable features of interest

```{r}
#| code-summary: "keep shareable variables from all papers"
all_papers_p <- all_pub_records %>%
dplyr::select(
id,
category,
cfdc_DR,
'confidence -- user entered',
cfdc_assessor,
avg_cfdc,
category,
cause_cat_1_text,
cause_cat_2_text,
topic_subfield_text,
eval_manager_text,
'publication status',
'Contacted author?',
'stage of process/todo',
'source_main',
'author permission?',
'Direct Kotahi Prize Submission?',
'createdTime'
)
```


```{r ratings_agg, warning=FALSE}
#| code-summary: "Create and add aggregated ratings information to evals_pub_long"
Expand Down Expand Up @@ -441,8 +259,6 @@ evals_pub_long <- evals_pub_long %>%
```




```{r shiny_data_explorer}
#| code-summary: "create a dataset to be used as the input to the shiny app"
Expand All @@ -460,23 +276,3 @@ evals_pub_long %>%
write_rds(file = here("shinyapp/DataExplorer", "shiny_explorer.rds"))
```


```{r save data}
#| label: savedata
#| code-summary: "save data for others' use"
all_papers_p %>% saveRDS(file = here("data", "all_papers_p.Rdata"))
all_papers_p %>% write_csv(file = here("data", "all_papers_p.csv"))
evals_pub %>% saveRDS(file = here("data", "evals.Rdata"))
evals_pub %>% write_csv(file = here("data", "evals.csv"))
evals_pub_long %>% write_rds(file = here("data", "evals_long.rds"))
evals_pub_long %>% write_csv(file = here("data", "evals_long.csv"))
#evals_pub %>% readRDS(file = here("data", "evals.Rdata"))
```

6 changes: 3 additions & 3 deletions docs/chapters/aggregation.html
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@
<div class="sidebar-title mb-0 py-0">
<a href="../">The Unjournal evaluations: data and analysis</a>
<div class="sidebar-tools-main">
<a href="https://github.com/daaronr/unjournaldata/" title="Source Code" class="quarto-navigation-tool px-1" aria-label="Source Code"><i class="bi bi-github"></i></a>
<a href="https://github.com/unjournal/unjournaldata/" title="Source Code" class="quarto-navigation-tool px-1" aria-label="Source Code"><i class="bi bi-github"></i></a>
<a href="" class="quarto-reader-toggle quarto-navigation-tool px-1" onclick="window.quartoToggleReader(); return false;" title="Toggle reader mode">
<div class="quarto-reader-toggle-btn">
<i class="bi"></i>
Expand Down Expand Up @@ -162,7 +162,7 @@ <h2 id="toc-title">Table of contents</h2>
<li><a href="#decomposing-variation-dimension-reduction-simple-linear-models" id="toc-decomposing-variation-dimension-reduction-simple-linear-models" class="nav-link" data-scroll-target="#decomposing-variation-dimension-reduction-simple-linear-models"><span class="header-section-number">3.3</span> Decomposing variation, dimension reduction, simple linear models</a></li>
<li><a href="#later-possiblities" id="toc-later-possiblities" class="nav-link" data-scroll-target="#later-possiblities"><span class="header-section-number">3.4</span> Later possiblities</a></li>
</ul>
<div class="toc-actions"><ul><li><a href="https://github.com/daaronr/unjournaldata/edit/main/chapters/aggregation.qmd" class="toc-action"><i class="bi bi-github"></i>Edit this page</a></li><li><a href="https://github.com/daaronr/unjournaldata/issues/new" class="toc-action"><i class="bi empty"></i>Report an issue</a></li></ul></div></nav>
<div class="toc-actions"><ul><li><a href="https://github.com/unjournal/unjournaldata/edit/main/chapters/aggregation.qmd" class="toc-action"><i class="bi bi-github"></i>Edit this page</a></li><li><a href="https://github.com/unjournal/unjournaldata/issues/new" class="toc-action"><i class="bi empty"></i>Report an issue</a></li></ul></div></nav>
</div>
<!-- main -->
<main class="content" id="quarto-document-content">
Expand Down Expand Up @@ -736,4 +736,4 @@ <h2 data-number="3.4" class="anchored" data-anchor-id="later-possiblities"><span



<footer class="footer"><div class="nav-footer"><div class="nav-footer-center"><div class="toc-actions d-sm-block d-md-none"><ul><li><a href="https://github.com/daaronr/unjournaldata/edit/main/chapters/aggregation.qmd" class="toc-action"><i class="bi bi-github"></i>Edit this page</a></li><li><a href="https://github.com/daaronr/unjournaldata/issues/new" class="toc-action"><i class="bi empty"></i>Report an issue</a></li></ul></div></div></div></footer></body></html>
<footer class="footer"><div class="nav-footer"><div class="nav-footer-center"><div class="toc-actions d-sm-block d-md-none"><ul><li><a href="https://github.com/unjournal/unjournaldata/edit/main/chapters/aggregation.qmd" class="toc-action"><i class="bi bi-github"></i>Edit this page</a></li><li><a href="https://github.com/unjournal/unjournaldata/issues/new" class="toc-action"><i class="bi empty"></i>Report an issue</a></li></ul></div></div></div></footer></body></html>
Loading

0 comments on commit 55135a9

Please sign in to comment.