google-research

Форк
0
/
peakVI_process.py 
75 строк · 2.2 Кб
1
# coding=utf-8
2
# Copyright 2024 The Google Research Authors.
3
#
4
# Licensed under the Apache License, Version 2.0 (the "License");
5
# you may not use this file except in compliance with the License.
6
# You may obtain a copy of the License at
7
#
8
#     http://www.apache.org/licenses/LICENSE-2.0
9
#
10
# Unless required by applicable law or agreed to in writing, software
11
# distributed under the License is distributed on an "AS IS" BASIS,
12
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
# See the License for the specific language governing permissions and
14
# limitations under the License.
15

16
"""Script for running PeakVI on a 10x formatted dataset."""
17

18
import os
19
from typing import Any, Sequence
20

21
from absl import app
22
from absl import flags
23
import anndata
24
import pandas as pd
25
import scipy.io
26
import scipy.sparse
27
import scvi
28
import tensorflow as tf
29

30
FLAGS = flags.FLAGS
31
flags.DEFINE_string('input_path', None, 'Path to the 10x formatted folder.')
32
flags.DEFINE_string('output_path', None, 'Path to the output directory.')
33

34

35
def create_anndata(path):
36
  """Creates anndata object from raw data.
37

38
  Args:
39
    path: Path to the 10x formatted input files.
40

41
  Returns:
42
    anndata object for the experiment.
43
  """
44
  with tf.io.gfile.GFile(os.path.join(path, 'matrix.mtx'), mode='rb') as f:
45
    matrix = scipy.io.mmread(f)
46
  matrix = scipy.sparse.csr_matrix(matrix)
47
  adata = anndata.AnnData(matrix)
48
  adata = adata.transpose()
49
  with tf.io.gfile.GFile(os.path.join(path, 'barcodes.tsv'), mode='r') as f:
50
    barcodes = pd.read_csv(f, sep='\t', header=None)[0]
51
  adata.obs_names = barcodes
52
  with tf.io.gfile.GFile(os.path.join(path, 'bins.tsv'), mode='r') as f:
53
    bins = pd.read_csv(f, sep='\t', header=None)[0]
54
  adata.var_names = bins
55
  return adata
56

57

58
def main(argv):
59
  if len(argv) > 1:
60
    raise app.UsageError('Too many command-line arguments.')
61

62
  adata = create_anndata(FLAGS.input_path)
63
  scvi.model.PEAKVI.setup_anndata(adata)
64
  vae = scvi.model.PEAKVI(adata)
65
  vae.train()
66
  dr = pd.DataFrame(vae.get_latent_representation(), index=adata.obs_names)
67

68
  tf.io.gfile.makedirs(FLAGS.output_path)
69
  with tf.io.gfile.GFile(os.path.join(FLAGS.output_path, 'peakVI.csv'),
70
                         'w') as f:
71
    dr.to_csv(f)
72

73

74
if __name__ == '__main__':
75
  app.run(main)
76

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.