Merge 'import-data-from-coda' branch

unjournal · Jul 17, 2024 · 9d4e11e · 9d4e11e
2 parents 08defb2 + 9671945
commit 9d4e11e
Show file tree

Hide file tree

Showing 23 changed files with 1,455 additions and 811 deletions.
diff --git a/.github/workflows/render-and-publish.yml b/.github/workflows/render-and-publish.yml
@@ -34,21 +34,27 @@ jobs:
           sudo apt-get install jags libcurl4-openssl-dev \
             libharfbuzz-dev libfribidi-dev libsodium-dev
 
+      - name: Set up Python 3.11 and install requirements
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.11'
+          cache: 'pip' # caching pip dependencies
+      - run: pip install -r requirements.txt
+
+      - name: Import Unjournal data
+        run: |
+          source .venv/bin/activate
+          python3 code/import-unjournal-data.py
+        env: 
+          CODA_API_KEY: ${{ secrets.CODA_API_KEY }}
+
       - name: Setup dependencies with renv
         uses: r-lib/actions/setup-renv@v2
         with:
           cache-version: 2 # increment this to invalidate the renv cache
         env:
           GITHUB_PAT: ${{ secrets.RENV_GITHUB_PAT }}
 
-      - name: Import Unjournal data
-        run: |
-          source("code/import-unjournal-data.R")
-          save_data()
-        env: 
-          AIRTABLE_API_KEY: ${{ secrets.AIRTABLE_API_KEY }}
-        shell: Rscript {0}
-
       - name: Upload Unjournal data as artifact
         uses: actions/upload-artifact@v4
         with:
@@ -59,7 +65,7 @@ jobs:
         run: |
           git config --global user.email "[email protected]"
           git config --global user.name "Github Actions"
-          git add data/evals.csv data/all_papers_p.csv
+          git add data/*.csv
           git diff-index HEAD data/*.csv || git commit -m "Github actions: auto-update data"
           git push
       

diff --git a/TODO.md b/TODO.md
@@ -7,6 +7,10 @@ the [Unjournal coda.io project management website](https://coda.io/d/Project-Man
 
 [ ] Rewrite import-unjournal-data to use Coda.io (once the database schema is
     nailed down)
+    - rework scripts currently using the R version to read the csv file
+    - wait for column names etc. to settle down, then rewrite scripts using
+      the csv
+    - import for all_papers_p.csv
 [ ] Maybe consider splitting the deploy action up
 [x] Add field descriptions to data folder, or point to documentation elsewhere.
 [x] Put the DataExplorer app deployment into deploy.yml and not in 

diff --git a/chapters/aggregation.qmd b/chapters/aggregation.qmd
@@ -13,7 +13,7 @@ library(irr)
 # add the modified DistributionWAgg function to aggregate our ratings
 source(here("code", "DistAggModified.R"))
 
-evals_pub <- read_rds(file = here("data", "evals.Rdata"))
+ratings <- readr::read_csv(here("data/rsx_evalr_rating.csv"))
 
 # Lists of categories
 rating_cats <- c("overall", "adv_knowledge", "methods", "logic_comms", "real_world", "gp_relevance", "open_sci")
@@ -153,6 +153,7 @@ More information about Krippendorff's alpha and links to further reading can be
 
 ```{r}
 #| echo: false
+#| eval: false
 #| fig-height: 8
 
 # function that returns kripp.alpha
@@ -161,29 +162,31 @@ More information about Krippendorff's alpha and links to further reading can be
 # and converts the nested data into
 # a matrix to allow 
 mod_kripp_alpha <- function(dat) {
-  dat = as.matrix.POSIXlt(dat)
-  if(nrow(dat)>1) {
-    a = kripp.alpha(dat, method = "ratio")
-    res = a$value
+  dat <- as.matrix(dat)
+  if (nrow(dat) > 1) {
+    a <- kripp.alpha(dat, method = "ratio")
+    res <- a$value
   } else {
-    res = NA_integer_
+    res <- NA_integer_
   }
   return(res)
 }
 
 # plot
-evals_pub %>% 
-  group_by(paper_abbrev) %>% 
-  select(paper_abbrev, all_of(rating_cats)) %>%
-  nest(data = -paper_abbrev) %>%
+ratings %>% 
+  select(research, criteria, middle_rating)
+  tidyr::pivot_wider(id_cols = research, )
+  group_by(research) %>% 
+  select(research, all_of(rating_cats)) %>%
+  nest(data = -research) %>%
   mutate(KrippAlpha = map_dbl(.x = data, .f = mod_kripp_alpha)) %>% 
   unnest(data) %>% 
   group_by(KrippAlpha, add = T) %>% 
   summarize(Raters = n()) %>%
   ungroup() %>% 
   filter(Raters > 1) %>% 
-  ggplot(aes(x = reorder(paper_abbrev, KrippAlpha), y = KrippAlpha)) +
-  geom_point(aes(color = paper_abbrev, size = Raters), 
+  ggplot(aes(x = reorder(research, KrippAlpha), y = KrippAlpha)) +
+  geom_point(aes(color = research, size = Raters), 
              stat = "identity", shape = 16, stroke = 1) +
   coord_flip() +
   labs(x = "Paper", y = "Krippendorf's Alpha", caption = "Papers with < 2 raters no pictured") +