From 69c4d90c7dda54560b898de27adf80bc707af96a Mon Sep 17 00:00:00 2001 From: Ruben Dries Date: Wed, 3 Jul 2024 09:45:04 -0400 Subject: [PATCH 1/2] add writeChatGPTqueryDEG --- R/general_help.R | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/R/general_help.R b/R/general_help.R index 67e6e7c13..853bfc91d 100644 --- a/R/general_help.R +++ b/R/general_help.R @@ -272,6 +272,50 @@ rank_binarize_wrapper <- function( } +## chatgpt queries #### + +#' @title writeChatGPTqueryDEG +#' @name writeChatGPTqueryDEG +#' @description This function writes a query as a .txt file that can be used with +#' ChatGPT or a similar LLM service to find the most likely cell types based on the +#' top differential expressed genes (DEGs) between identified clusters. +#' @param DEG_output the output format from the differenetial expression functions +#' @param top_n_genes number of genes for each cluster +#' @param tissue_type tissue type +#' @param folder_name path to the folder where you want to save the .txt file +#' @param file_name name of .txt file +#' @returns writes a .txt file to the desired location +#' @details This function does not run any LLM service. It simply creates the .txt +#' file that can then be used any LLM service (e.g. OpenAI, Gemini, ...) +#' @export +writeChatGPTquery = function(DEG_output, + top_n_genes = 10, + tissue_type = 'human breast cancer', + folder_name = getwd(), + file_name = 'chatgpt_query.txt') { + + chatgpt_query = paste0("Identify cell types of ", tissue_type, " tissue using the following markers. Identify one cell type for each row. Only provide the cell type name and the marker genes used for cell type identification.") + + selected_DEG_output = DEG_output[, head(.SD, top_n_genes), by="cluster"] + + finallist = list() + finallist[[1]] = chatgpt_query + + for(clus in unique(selected_DEG_output$cluster)) { + x = selected_DEG_output[cluster == clus][['feats']] + x = c(clus, x) + finallist[[as.numeric(clus)+1]] = x + } + + outputdt = data.table::data.table(finallist) + + cat('\n start writing \n') + data.table::fwrite(x = outputdt, + file = paste0(folder_name,'/', file_name), + sep2 = c(""," ",""), col.names = F) + +} + # IDs #### From a360eb01631816c373968ca8d470dfaace2a39d8 Mon Sep 17 00:00:00 2001 From: Ruben Dries Date: Wed, 3 Jul 2024 09:46:21 -0400 Subject: [PATCH 2/2] add writeChatGPTqueryDEG --- R/general_help.R | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/R/general_help.R b/R/general_help.R index 853bfc91d..d1d87323a 100644 --- a/R/general_help.R +++ b/R/general_help.R @@ -288,11 +288,11 @@ rank_binarize_wrapper <- function( #' @details This function does not run any LLM service. It simply creates the .txt #' file that can then be used any LLM service (e.g. OpenAI, Gemini, ...) #' @export -writeChatGPTquery = function(DEG_output, - top_n_genes = 10, - tissue_type = 'human breast cancer', - folder_name = getwd(), - file_name = 'chatgpt_query.txt') { +writeChatGPTqueryDEG = function(DEG_output, + top_n_genes = 10, + tissue_type = 'human breast cancer', + folder_name = getwd(), + file_name = 'chatgpt_query.txt') { chatgpt_query = paste0("Identify cell types of ", tissue_type, " tissue using the following markers. Identify one cell type for each row. Only provide the cell type name and the marker genes used for cell type identification.")