google-research

Форк
0
99 строк · 3.2 Кб
1
# coding=utf-8
2
# Copyright 2024 The Google Research Authors.
3
#
4
# Licensed under the Apache License, Version 2.0 (the "License");
5
# you may not use this file except in compliance with the License.
6
# You may obtain a copy of the License at
7
#
8
#     http://www.apache.org/licenses/LICENSE-2.0
9
#
10
# Unless required by applicable law or agreed to in writing, software
11
# distributed under the License is distributed on an "AS IS" BASIS,
12
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
# See the License for the specific language governing permissions and
14
# limitations under the License.
15

16
"""Resize bins for the 10X formatted dataset."""
17

18
import os
19
from typing import Sequence, Any
20

21
from absl import app
22
from absl import flags
23
import anndata
24
import pandas as pd
25
import scipy.io
26
import scipy.sparse
27
import tensorflow as tf
28

29
from schptm_benchmark import resize_bins_lib
30

31
FLAGS = flags.FLAGS
32
flags.DEFINE_string('input_path', None, 'Path to the 10x formatted folder.')
33
flags.DEFINE_string('output_dir', None, 'Path to the output directory.')
34
flags.DEFINE_integer('binsize', None, 'Number of bp per bin (in kbp).')
35
flags.DEFINE_enum('mode', 'bins', ['bins', 'annotation'],
36
                  'Number of bp per bin (in kbp)')
37
flags.DEFINE_string('annotation', None, 'Path to the annotation.')
38

39

40
def create_anndata(path):
41
  """Creates anndata object from raw data.
42

43
  Args:
44
    path: Path to the 10x formatted input files.
45

46
  Returns:
47
    anndata object for the experiment.
48
  """
49
  with tf.io.gfile.GFile(os.path.join(path, 'matrix.mtx'), mode='rb') as f:
50
    matrix = scipy.io.mmread(f)
51
  matrix = scipy.sparse.csr_matrix(matrix)
52
  adata = anndata.AnnData(matrix)
53
  adata = adata.transpose()
54
  with tf.io.gfile.GFile(os.path.join(path, 'barcodes.tsv'), mode='r') as f:
55
    barcodes = pd.read_csv(f, sep='\t', header=None)[0]
56
  adata.obs_names = barcodes
57
  with tf.io.gfile.GFile(os.path.join(path, 'bins.tsv'), mode='r') as f:
58
    bins = pd.read_csv(f, sep='\t', header=None)[0]
59
  adata.var_names = bins
60
  return adata
61

62

63
def save_anndata(adata, output_dir,
64
                 input_path):
65
  """Saves AnnData object in 10X format."""
66
  tf.io.gfile.makedirs(output_dir)
67
  with tf.io.gfile.GFile(os.path.join(output_dir, 'matrix.mtx'), mode='w') as f:
68
    scipy.io.mmwrite(f, adata.X.transpose())
69
  new_bins = pd.DataFrame(adata.var_names, columns=['var_names'])
70
  with tf.io.gfile.GFile(os.path.join(output_dir, 'bins.tsv'), mode='w') as f:
71
    new_bins.to_csv(
72
        f,
73
        sep='\t',
74
        index=False,
75
        header=False,
76
        columns=['var_names', 'var_names'])
77
  tf.io.gfile.copy(
78
      os.path.join(input_path, 'barcodes.tsv'),
79
      os.path.join(output_dir, 'barcodes.tsv'),
80
      overwrite=True)
81

82

83
def main(argv):
84
  del argv
85

86
  adata = create_anndata(FLAGS.input_path)
87
  if FLAGS.mode == 'bins':
88
    adata = resize_bins_lib.merge_bins(adata, FLAGS.binsize * (10**3))
89
  elif FLAGS.mode == 'annotation':
90
    adata = resize_bins_lib.bins_from_annotation(adata, FLAGS.annotation)
91

92
  save_anndata(adata, FLAGS.output_dir, FLAGS.input_path)
93

94

95
if __name__ == '__main__':
96
  flags.mark_flag_as_required('input_path')
97
  flags.mark_flag_as_required('output_dir')
98
  flags.mark_flag_as_required('binsize')
99
  app.run(main)
100

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.