google-research
77 строк · 2.3 Кб
1# coding=utf-8
2# Copyright 2024 The Google Research Authors.
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8# http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15
16"""Utilities for data loading and preprocessing."""
17
18from typing import Any19from typing import Callable20import numpy as np21import scipy.io22import scipy.sparse as sps23
24
25def preprocess_adjacency(26adjacency,27*,28convert_to_csr = True,29convert_to_unweighted = True,30remove_self_loops = True,31remove_isolated_nodes = True,32):33"""Pre-processes input adjacency matrix.34
35Args:
36adjacency: Input adjacency matrix.
37convert_to_csr: Whether to convert the input matrix to the CSR format with
38fast matrix multiplications.
39convert_to_unweighted: Whether to discard input weights.
40remove_self_loops: Whether to remove self-loops from the graph.
41remove_isolated_nodes: Whether to remove isolated nodes from the graph.
42
43Returns:
44Clean adjacency matrix.
45"""
46if adjacency.ndim != 2:47raise ValueError(48f'Adjacency matrix should be a 2D tensor, got {adjacency.ndim}'49)50if adjacency.shape[0] != adjacency.shape[1]:51raise ValueError(52f'Adjacency matrix should be square, got {adjacency.shape}'53)54if convert_to_csr:55adjacency = adjacency.tocsr()56if convert_to_unweighted:57adjacency.data = np.ones_like(adjacency.data)58if remove_self_loops:59adjacency = adjacency - sps.diags(adjacency.diagonal())60if remove_isolated_nodes:61nonzero_rows = (adjacency.sum(0) != 0).A162adjacency = adjacency[nonzero_rows, :][:, nonzero_rows]63return adjacency64
65
66def load_matfile(67filepath,68matfile_variable_name = 'network',69convert_to_unweighted = True,70open_fn = open,71):72with open_fn(filepath, 'rb') as inf:73data = scipy.io.loadmat(inf)74adjacency = data[matfile_variable_name].tocsr()75if convert_to_unweighted:76adjacency.data = np.ones_like(adjacency.data)77return adjacency78