Questions of gaussian family #56

TBW108 · 2024-08-03T13:32:47Z

Hi, Dongyuan! I have a question about generating scRNA-seq data using gaussian family.
When I generate scRNA-seq data with gaussian family like the code below

example_data <- construct_data(
  sce = example_sce,
  assay_use = "normcounts",
  celltype = "cell_type",
  pseudotime = NULL,
  spatial = NULL,
  other_covariates = NULL,
  corr_by = "1",
  ncell = 2000,
)

example_marginal <- fit_marginal(
  data = example_data,
  predictor = "gene",
  mu_formula = "cell_type",
  sigma_formula = "1",
  family_use = "gaussian",
  n_cores = 50,
  usebam = FALSE
)

set.seed(123)
example_copula <- fit_copula(
  sce = example_sce,
  assay_use = "counts",
  marginal_list = example_marginal,
  family_use = "gaussian",
  copula = "gaussian",
  n_cores = 50,
  input_data = example_data$dat,
  parallelization='pbmcmapply'
)

example_para <- extract_para(
  sce = example_sce,
  marginal_list = example_marginal,
  n_cores = 50,
  family_use = "gaussian",
  new_covariate = example_data$newCovariate,
  data = example_data$dat
)

it reports errors like this

How to solve it? How to generate lognorm scRNA-seq data with gaussian family? Thanks.

The text was updated successfully, but these errors were encountered:

SONGDONGYUAN1994 · 2024-08-04T15:38:49Z

@TBW108 Hi TBW, in your fit_copula() function, you also need to specify the assay = 'normcounts'. Please let me know if it works.

Best,
Dongyuan

TBW108 · 2024-08-09T08:18:20Z

Thanks! I have added assay_use = "normcounts" in fit_copula(). The code has been changed like this:

set.seed(123)
example_data <- construct_data(
  sce = example_sce,
  assay_use = "normcounts",
  celltype = "cell_type",
  pseudotime = NULL,
  spatial = NULL,
  other_covariates = NULL,
  corr_by = "1",
  ncell = 2000,
)

example_marginal <- fit_marginal(
  data = example_data,
  predictor = "gene",
  mu_formula = "cell_type",
  sigma_formula = "1",
  family_use = "gaussian",
  n_cores = 50,
  usebam = FALSE
)

set.seed(123)
example_copula <- fit_copula(
  sce = example_sce,
  assay_use = "normcounts",
  marginal_list = example_marginal,
  family_use = "gaussian",
  copula = "gaussian",
  n_cores = 50,
  input_data = example_data$dat,
)

example_para <- extract_para(
  sce = example_sce,
  assay_use = "normcounts",
  marginal_list = example_marginal,
  n_cores = 50,
  family_use = "gaussian",
  new_covariate = example_data$newCovariate,
  data = example_data$dat
)

However, R reports the error as below.

Is there any problem with my setting? Or the parameter `corr_by' should be set as "ind"? Thanks!

SONGDONGYUAN1994 · 2024-08-09T17:48:09Z

Hi,
Could you provide a reproducible dataset for the testing? I would be happy to check that.

Best,
Dongyuan

TBW108 · 2024-08-11T14:24:41Z

Hi, I just use the default dataset in scDesign3, but I filter out some genes that are not in the H1 pathways. The code is provided as below

library(scDesign3,verbose =FALSE)
library(SingleCellExperiment)
library(ggplot2)
library(DuoClustering2018)
library(tidyverse)
library(msigdbr)
library(zellkonverter)
library(reticulate)
library(scuttle)
library(scran)

# get data
Zhengmix4eq_sce <- get("sce_filteredExpr10_Zhengmix4eq")(metadata = FALSE)
logNormCounts(Zhengmix4eq_sce)


# replace the row name with gene
rownames(Zhengmix4eq_sce)=rowData(Zhengmix4eq_sce)$symbol
example_sce=Zhengmix4eq_sce

# normcounts(example_sce)
# logcounts(example_sce)

# extract genes
genesets=msigdbr("Homo sapiens", "H")
all_filter_genes=c()
groups=list()
for(geneset in unique(genesets$gs_name)){
    genes=genesets$gene_symbol[genesets$gs_name==geneset]
    filter_genes=rowData(example_sce)$symbol[rowData(example_sce)$symbol %in% genes] # nolint
    if(length(filter_genes)>5){
        all_filter_genes<-union(all_filter_genes, filter_genes)
        groups[[geneset]]<-filter_genes
    }
}
all_filter_genes
example_sce=example_sce[rowData(example_sce)$symbol %in% all_filter_genes,]
example_sce

# extract cells and divide them into two types
# selected_cells <- which(colData(example_sce)$phenoid %in% c("b.cells","regulatory.t"))
selected_cells <- which(colData(example_sce)$phenoid == c("regulatory.t"))
example_sce <- example_sce[,selected_cells]
cell_id=seq(1,dim(example_sce)[2])
sampled_cell_id <- sample(cell_id, size = length(cell_id)/2, replace = FALSE)
cell_type=rep(0,dim(example_sce)[2])
cell_type[sampled_cell_id]=1

colData(example_sce)$cell_type <- as.factor(cell_type)
example_sce

# get parameters
set.seed(123)
example_data <- construct_data(
  sce = example_sce,
  assay_use = "normcounts",
  celltype = "cell_type",
  pseudotime = NULL,
  spatial = NULL,
  other_covariates = NULL,
  corr_by = "1",
  ncell = 2000,
)

example_marginal <- fit_marginal(
  data = example_data,
  predictor = "gene",
  mu_formula = "cell_type",
  sigma_formula = "1",
  family_use = "gaussian",
  n_cores = 50,
  usebam = FALSE
)

set.seed(123)
example_copula <- fit_copula(
  sce = example_sce,
  assay_use = "normcounts",
  marginal_list = example_marginal,
  family_use = "gaussian",
  copula = "gaussian",
  n_cores = 50,
  input_data = example_data$dat,
)

example_para <- extract_para(
  sce = example_sce,
  assay_use = "normcounts",
  marginal_list = example_marginal,
  n_cores = 50,
  family_use = "gaussian",
  new_covariate = example_data$newCovariate,
  data = example_data$dat
)

SONGDONGYUAN1994 · 2024-09-25T17:06:52Z

@TBW108 Sorry for the late reply. I copied your code, and it runs Ok on my system. I am not sure about your system; could you set n_cores smaller based on your system and re-run it? In addition, you need reinstall scDesign3 from our Github.

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Questions of gaussian family #56

Questions of gaussian family #56

TBW108 commented Aug 3, 2024 •

edited

Loading

SONGDONGYUAN1994 commented Aug 4, 2024

TBW108 commented Aug 9, 2024

SONGDONGYUAN1994 commented Aug 9, 2024

TBW108 commented Aug 11, 2024

SONGDONGYUAN1994 commented Sep 25, 2024

Questions of gaussian family #56

Questions of gaussian family #56

Comments

TBW108 commented Aug 3, 2024 • edited Loading

SONGDONGYUAN1994 commented Aug 4, 2024

TBW108 commented Aug 9, 2024

SONGDONGYUAN1994 commented Aug 9, 2024

TBW108 commented Aug 11, 2024

SONGDONGYUAN1994 commented Sep 25, 2024

TBW108 commented Aug 3, 2024 •

edited

Loading