scikit-image
265 строк · 10.3 Кб
1"""
2fisher_vector.py - Implementation of the Fisher vector encoding algorithm
3
4This module contains the source code for Fisher vector computation. The
5computation is separated into two distinct steps, which are called separately
6by the user, namely:
7
8learn_gmm: Used to estimate the GMM for all vectors/descriptors computed for
9all examples in the dataset (e.g. estimated using all the SIFT
10vectors computed for all images in the dataset, or at least a subset
11of this).
12
13fisher_vector: Used to compute the Fisher vector representation for a
14single set of descriptors/vector (e.g. the SIFT
15descriptors for a single image in your dataset, or
16perhaps a test image).
17
18Reference: Perronnin, F. and Dance, C. Fisher kernels on Visual Vocabularies
19for Image Categorization, IEEE Conference on Computer Vision and
20Pattern Recognition, 2007
21
22Origin Author: Dan Oneata (Author of the original implementation for the Fisher
23vector computation using scikit-learn and NumPy. Subsequently ported to
24scikit-image (here) by other authors.)
25"""
26
27import numpy as np28
29
30class FisherVectorException(Exception):31pass32
33
34class DescriptorException(FisherVectorException):35pass36
37
38def learn_gmm(descriptors, *, n_modes=32, gm_args=None):39"""Estimate a Gaussian mixture model (GMM) given a set of descriptors and40number of modes (i.e. Gaussians). This function is essentially a wrapper
41around the scikit-learn implementation of GMM, namely the
42:class:`sklearn.mixture.GaussianMixture` class.
43
44Due to the nature of the Fisher vector, the only enforced parameter of the
45underlying scikit-learn class is the covariance_type, which must be 'diag'.
46
47There is no simple way to know what value to use for `n_modes` a-priori.
48Typically, the value is usually one of ``{16, 32, 64, 128}``. One may train
49a few GMMs and choose the one that maximises the log probability of the
50GMM, or choose `n_modes` such that the downstream classifier trained on
51the resultant Fisher vectors has maximal performance.
52
53Parameters
54----------
55descriptors : np.ndarray (N, M) or list [(N1, M), (N2, M), ...]
56List of NumPy arrays, or a single NumPy array, of the descriptors
57used to estimate the GMM. The reason a list of NumPy arrays is
58permissible is because often when using a Fisher vector encoding,
59descriptors/vectors are computed separately for each sample/image in
60the dataset, such as SIFT vectors for each image. If a list if passed
61in, then each element must be a NumPy array in which the number of
62rows may differ (e.g. different number of SIFT vector for each image),
63but the number of columns for each must be the same (i.e. the
64dimensionality must be the same).
65n_modes : int
66The number of modes/Gaussians to estimate during the GMM estimate.
67gm_args : dict
68Keyword arguments that can be passed into the underlying scikit-learn
69:class:`sklearn.mixture.GaussianMixture` class.
70
71Returns
72-------
73gmm : :class:`sklearn.mixture.GaussianMixture`
74The estimated GMM object, which contains the necessary parameters
75needed to compute the Fisher vector.
76
77References
78----------
79.. [1] https://scikit-learn.org/stable/modules/generated/sklearn.mixture.GaussianMixture.html
80
81Examples
82--------
83.. testsetup::
84>>> import pytest; _ = pytest.importorskip('sklearn')
85
86>>> from skimage.feature import fisher_vector
87>>> rng = np.random.Generator(np.random.PCG64())
88>>> sift_for_images = [rng.standard_normal((10, 128)) for _ in range(10)]
89>>> num_modes = 16
90>>> # Estimate 16-mode GMM with these synthetic SIFT vectors
91>>> gmm = learn_gmm(sift_for_images, n_modes=num_modes)
92"""
93
94try:95from sklearn.mixture import GaussianMixture96except ImportError:97raise ImportError(98'scikit-learn is not installed. Please ensure it is installed in '99'order to use the Fisher vector functionality.'100)101
102if not isinstance(descriptors, (list, np.ndarray)):103raise DescriptorException(104'Please ensure descriptors are either a NumPy array, '105'or a list of NumPy arrays.'106)107
108d_mat_1 = descriptors[0]109if isinstance(descriptors, list) and not isinstance(d_mat_1, np.ndarray):110raise DescriptorException(111'Please ensure descriptors are a list of NumPy arrays.'112)113
114if isinstance(descriptors, list):115expected_shape = descriptors[0].shape116ranks = [len(e.shape) == len(expected_shape) for e in descriptors]117if not all(ranks):118raise DescriptorException(119'Please ensure all elements of your descriptor list ' 'are of rank 2.'120)121dims = [e.shape[1] == descriptors[0].shape[1] for e in descriptors]122if not all(dims):123raise DescriptorException(124'Please ensure all descriptors are of the same dimensionality.'125)126
127if not isinstance(n_modes, int) or n_modes <= 0:128raise FisherVectorException('Please ensure n_modes is a positive integer.')129
130if gm_args:131has_cov_type = 'covariance_type' in gm_args132cov_type_not_diag = gm_args['covariance_type'] != 'diag'133if has_cov_type and cov_type_not_diag:134raise FisherVectorException('Covariance type must be "diag".')135
136if isinstance(descriptors, list):137descriptors = np.vstack(descriptors)138
139if gm_args:140has_cov_type = 'covariance_type' in gm_args141if has_cov_type:142gmm = GaussianMixture(n_components=n_modes, **gm_args)143else:144gmm = GaussianMixture(145n_components=n_modes, covariance_type='diag', **gm_args146)147else:148gmm = GaussianMixture(n_components=n_modes, covariance_type='diag')149
150gmm.fit(descriptors)151
152return gmm153
154
155def fisher_vector(descriptors, gmm, *, improved=False, alpha=0.5):156"""Compute the Fisher vector given some descriptors/vectors,157and an associated estimated GMM.
158
159Parameters
160----------
161descriptors : np.ndarray, shape=(n_descriptors, descriptor_length)
162NumPy array of the descriptors for which the Fisher vector
163representation is to be computed.
164gmm : :class:`sklearn.mixture.GaussianMixture`
165An estimated GMM object, which contains the necessary parameters needed
166to compute the Fisher vector.
167improved : bool, default=False
168Flag denoting whether to compute improved Fisher vectors or not.
169Improved Fisher vectors are L2 and power normalized. Power
170normalization is simply f(z) = sign(z) pow(abs(z), alpha) for some
1710 <= alpha <= 1.
172alpha : float, default=0.5
173The parameter for the power normalization step. Ignored if
174improved=False.
175
176Returns
177-------
178fisher_vector : np.ndarray
179The computation Fisher vector, which is given by a concatenation of the
180gradients of a GMM with respect to its parameters (mixture weights,
181means, and covariance matrices). For D-dimensional input descriptors or
182vectors, and a K-mode GMM, the Fisher vector dimensionality will be
1832KD + K. Thus, its dimensionality is invariant to the number of
184descriptors/vectors.
185
186References
187----------
188.. [1] Perronnin, F. and Dance, C. Fisher kernels on Visual Vocabularies
189for Image Categorization, IEEE Conference on Computer Vision and
190Pattern Recognition, 2007
191.. [2] Perronnin, F. and Sanchez, J. and Mensink T. Improving the Fisher
192Kernel for Large-Scale Image Classification, ECCV, 2010
193
194Examples
195--------
196.. testsetup::
197>>> import pytest; _ = pytest.importorskip('sklearn')
198
199>>> from skimage.feature import fisher_vector, learn_gmm
200>>> sift_for_images = [np.random.random((10, 128)) for _ in range(10)]
201>>> num_modes = 16
202>>> # Estimate 16-mode GMM with these synthetic SIFT vectors
203>>> gmm = learn_gmm(sift_for_images, n_modes=num_modes)
204>>> test_image_descriptors = np.random.random((25, 128))
205>>> # Compute the Fisher vector
206>>> fv = fisher_vector(test_image_descriptors, gmm)
207"""
208try:209from sklearn.mixture import GaussianMixture210except ImportError:211raise ImportError(212'scikit-learn is not installed. Please ensure it is installed in '213'order to use the Fisher vector functionality.'214)215
216if not isinstance(descriptors, np.ndarray):217raise DescriptorException('Please ensure descriptors is a NumPy array.')218
219if not isinstance(gmm, GaussianMixture):220raise FisherVectorException(221'Please ensure gmm is a sklearn.mixture.GaussianMixture object.'222)223
224if improved and not isinstance(alpha, float):225raise FisherVectorException(226'Please ensure that the alpha parameter is a float.'227)228
229num_descriptors = len(descriptors)230
231mixture_weights = gmm.weights_232means = gmm.means_233covariances = gmm.covariances_234
235posterior_probabilities = gmm.predict_proba(descriptors)236
237# Statistics necessary to compute GMM gradients wrt its parameters238pp_sum = posterior_probabilities.mean(axis=0, keepdims=True).T239pp_x = posterior_probabilities.T.dot(descriptors) / num_descriptors240pp_x_2 = posterior_probabilities.T.dot(np.power(descriptors, 2)) / num_descriptors241
242# Compute GMM gradients wrt its parameters243d_pi = pp_sum.squeeze() - mixture_weights244
245d_mu = pp_x - pp_sum * means246
247d_sigma_t1 = pp_sum * np.power(means, 2)248d_sigma_t2 = pp_sum * covariances249d_sigma_t3 = 2 * pp_x * means250d_sigma = -pp_x_2 - d_sigma_t1 + d_sigma_t2 + d_sigma_t3251
252# Apply analytical diagonal normalization253sqrt_mixture_weights = np.sqrt(mixture_weights)254d_pi /= sqrt_mixture_weights255d_mu /= sqrt_mixture_weights[:, np.newaxis] * np.sqrt(covariances)256d_sigma /= np.sqrt(2) * sqrt_mixture_weights[:, np.newaxis] * covariances257
258# Concatenate GMM gradients to form Fisher vector representation259fisher_vector = np.hstack((d_pi, d_mu.ravel(), d_sigma.ravel()))260
261if improved:262fisher_vector = np.sign(fisher_vector) * np.power(np.abs(fisher_vector), alpha)263fisher_vector = fisher_vector / np.linalg.norm(fisher_vector)264
265return fisher_vector266