google-research

scran_process.R
111 строк · 2.8 Кб
Перенос по словам
1
#!/usr/bin/env Rscript
2
# Example use:
3
# time Rscript scran_process.R \
4
# --input_path='Zhengmix4eq.rds' \
5
# --output_loom='Zhengmix4eq.loom' \
6
# --use_ERCC=1 \
7
# --use_sum_factors=1 \
8
# --n_pcs=10 \
9
# --n_tops=500 \
10
# --assay_type='logcounts'
11

12
suppressPackageStartupMessages({
13
  library("argparse")
14
  library("SingleCellExperiment")
15
  library("scran")
16
  library("scater")
17
  library("Seurat")
18
  library("loomR")
19
})
20

21
parser <- ArgumentParser()
22

23
parser$add_argument("--input_path",
24
  type = "character",
25
  help = "Tissue to run on, cleaned SingleCellExperiment"
26
)
27
parser$add_argument("--output_loom",
28
  type = "character",
29
  help = "Output loom"
30
)
31
parser$add_argument("--use_sum_factors",
32
  type = "integer",
33
  help = "Whether to use sum factors for the normalization"
34
)
35
parser$add_argument("--use_ERCC",
36
  type = "integer",
37
  help = "Whether to use the ERCC normalization."
38
)
39
parser$add_argument("--assay_type",
40
  type = "character",
41
  help = "Which assay to run PCA on, must be one of `logcounts` or `counts`"
42
)
43
parser$add_argument("--n_pcs",
44
  type = "integer",
45
  help = "Number of PCs to compute"
46
)
47
parser$add_argument("--n_tops",
48
  type = "integer",
49
  help = "Number of genes to use for PCA"
50
)
51

52
args <- parser$parse_args()
53

54
write_sce_to_loom <- function(sce, out_path) {
55
  facs_seurat <- as.Seurat(sce, counts = "counts", data = "counts")
56
  # as.loom expects a layer for variable genes.
57
  facs_seurat <- FindVariableFeatures(facs_seurat)
58
  # as.loom does not know how to deal with NA.
59
  Idents(facs_seurat) <- "None"
60

61
  # We need to have both normalized and scaled the data with Seurat, otherwise
62
  # Seurat does not write reducedDim to the loom object.
63
  facs_seurat <- NormalizeData(facs_seurat)
64
  facs_seurat <- ScaleData(facs_seurat)
65

66
  facs.loom <- as.loom(facs_seurat,
67
    filename = out_path,
68
    verbose = TRUE, overwrite = TRUE
69
  )
70
  facs.loom$close_all()
71
}
72

73
process_sample <- function(sce,
74
                           use_sum_factors,
75
                           use_ercc,
76
                           assay_type,
77
                           n_pcs,
78
                           n_tops) {
79
  dat <- sce
80
  isSpike(dat, "ERCC") <- grepl("^ERCC", rownames(dat))
81

82
  if (use_sum_factors) {
83
    dat <- computeSumFactors(dat)
84
  }
85
  if (use_ercc) {
86
    dat <- computeSpikeFactors(dat, type = "ERCC", general.use = FALSE)
87
  }
88

89
  if (assay_type == "logcounts") {
90
    dat <- normalize(dat)
91
  } else if (assay_type == "counts") {
92
    dat <- normalize(dat, return_log = FALSE)
93
  }
94

95
  dat <- runPCA(dat, exprs_values = assay_type, ncomponents = n_pcs, ntop = n_tops)
96

97
  dat
98
}
99

100
sce <- readRDS(args$input_path)
101
use_sum_factors <- args$use_sum_factors == 1
102
use_ERCC <- args$use_ERCC == 1
103
processed.data <- process_sample(
104
  sce,
105
  use_sum_factors,
106
  use_ERCC,
107
  args$assay_type,
108
  args$n_pcs,
109
  args$n_tops
110
)
111
write_sce_to_loom(processed.data, out_path = args$output_loom)
112
google-research

Использование cookies