google-research
111 строк · 2.8 Кб
1#!/usr/bin/env Rscript
2# Example use:
3# time Rscript scran_process.R \
4# --input_path='Zhengmix4eq.rds' \
5# --output_loom='Zhengmix4eq.loom' \
6# --use_ERCC=1 \
7# --use_sum_factors=1 \
8# --n_pcs=10 \
9# --n_tops=500 \
10# --assay_type='logcounts'
11
12suppressPackageStartupMessages({
13library("argparse")
14library("SingleCellExperiment")
15library("scran")
16library("scater")
17library("Seurat")
18library("loomR")
19})
20
21parser <- ArgumentParser()
22
23parser$add_argument("--input_path",
24type = "character",
25help = "Tissue to run on, cleaned SingleCellExperiment"
26)
27parser$add_argument("--output_loom",
28type = "character",
29help = "Output loom"
30)
31parser$add_argument("--use_sum_factors",
32type = "integer",
33help = "Whether to use sum factors for the normalization"
34)
35parser$add_argument("--use_ERCC",
36type = "integer",
37help = "Whether to use the ERCC normalization."
38)
39parser$add_argument("--assay_type",
40type = "character",
41help = "Which assay to run PCA on, must be one of `logcounts` or `counts`"
42)
43parser$add_argument("--n_pcs",
44type = "integer",
45help = "Number of PCs to compute"
46)
47parser$add_argument("--n_tops",
48type = "integer",
49help = "Number of genes to use for PCA"
50)
51
52args <- parser$parse_args()
53
54write_sce_to_loom <- function(sce, out_path) {
55facs_seurat <- as.Seurat(sce, counts = "counts", data = "counts")
56# as.loom expects a layer for variable genes.
57facs_seurat <- FindVariableFeatures(facs_seurat)
58# as.loom does not know how to deal with NA.
59Idents(facs_seurat) <- "None"
60
61# We need to have both normalized and scaled the data with Seurat, otherwise
62# Seurat does not write reducedDim to the loom object.
63facs_seurat <- NormalizeData(facs_seurat)
64facs_seurat <- ScaleData(facs_seurat)
65
66facs.loom <- as.loom(facs_seurat,
67filename = out_path,
68verbose = TRUE, overwrite = TRUE
69)
70facs.loom$close_all()
71}
72
73process_sample <- function(sce,
74use_sum_factors,
75use_ercc,
76assay_type,
77n_pcs,
78n_tops) {
79dat <- sce
80isSpike(dat, "ERCC") <- grepl("^ERCC", rownames(dat))
81
82if (use_sum_factors) {
83dat <- computeSumFactors(dat)
84}
85if (use_ercc) {
86dat <- computeSpikeFactors(dat, type = "ERCC", general.use = FALSE)
87}
88
89if (assay_type == "logcounts") {
90dat <- normalize(dat)
91} else if (assay_type == "counts") {
92dat <- normalize(dat, return_log = FALSE)
93}
94
95dat <- runPCA(dat, exprs_values = assay_type, ncomponents = n_pcs, ntop = n_tops)
96
97dat
98}
99
100sce <- readRDS(args$input_path)
101use_sum_factors <- args$use_sum_factors == 1
102use_ERCC <- args$use_ERCC == 1
103processed.data <- process_sample(
104sce,
105use_sum_factors,
106use_ERCC,
107args$assay_type,
108args$n_pcs,
109args$n_tops
110)
111write_sce_to_loom(processed.data, out_path = args$output_loom)
112