scikit-image

_fisher_vector.py
265 строк · 10.3 Кб
Перенос по словам
1
"""
2
fisher_vector.py - Implementation of the Fisher vector encoding algorithm
3

4
This module contains the source code for Fisher vector computation. The
5
computation is separated into two distinct steps, which are called separately
6
by the user, namely:
7

8
learn_gmm: Used to estimate the GMM for all vectors/descriptors computed for
9
           all examples in the dataset (e.g. estimated using all the SIFT
10
           vectors computed for all images in the dataset, or at least a subset
11
           of this).
12

13
fisher_vector: Used to compute the Fisher vector representation for a
14
               single set of descriptors/vector (e.g. the SIFT
15
               descriptors for a single image in your dataset, or
16
               perhaps a test image).
17

18
Reference: Perronnin, F. and Dance, C. Fisher kernels on Visual Vocabularies
19
           for Image Categorization, IEEE Conference on Computer Vision and
20
           Pattern Recognition, 2007
21

22
Origin Author: Dan Oneata (Author of the original implementation for the Fisher
23
vector computation using scikit-learn and NumPy. Subsequently ported to
24
scikit-image (here) by other authors.)
25
"""
26

27
import numpy as np
28

29

30
class FisherVectorException(Exception):
31
    pass
32

33

34
class DescriptorException(FisherVectorException):
35
    pass
36

37

38
def learn_gmm(descriptors, *, n_modes=32, gm_args=None):
39
    """Estimate a Gaussian mixture model (GMM) given a set of descriptors and
40
    number of modes (i.e. Gaussians). This function is essentially a wrapper
41
    around the scikit-learn implementation of GMM, namely the
42
    :class:`sklearn.mixture.GaussianMixture` class.
43

44
    Due to the nature of the Fisher vector, the only enforced parameter of the
45
    underlying scikit-learn class is the covariance_type, which must be 'diag'.
46

47
    There is no simple way to know what value to use for `n_modes` a-priori.
48
    Typically, the value is usually one of ``{16, 32, 64, 128}``. One may train
49
    a few GMMs and choose the one that maximises the log probability of the
50
    GMM, or choose `n_modes` such that the downstream classifier trained on
51
    the resultant Fisher vectors has maximal performance.
52

53
    Parameters
54
    ----------
55
    descriptors : np.ndarray (N, M) or list [(N1, M), (N2, M), ...]
56
        List of NumPy arrays, or a single NumPy array, of the descriptors
57
        used to estimate the GMM. The reason a list of NumPy arrays is
58
        permissible is because often when using a Fisher vector encoding,
59
        descriptors/vectors are computed separately for each sample/image in
60
        the dataset, such as SIFT vectors for each image. If a list if passed
61
        in, then each element must be a NumPy array in which the number of
62
        rows may differ (e.g. different number of SIFT vector for each image),
63
        but the number of columns for each must be the same (i.e. the
64
        dimensionality must be the same).
65
    n_modes : int
66
        The number of modes/Gaussians to estimate during the GMM estimate.
67
    gm_args : dict
68
        Keyword arguments that can be passed into the underlying scikit-learn
69
        :class:`sklearn.mixture.GaussianMixture` class.
70

71
    Returns
72
    -------
73
    gmm : :class:`sklearn.mixture.GaussianMixture`
74
        The estimated GMM object, which contains the necessary parameters
75
        needed to compute the Fisher vector.
76

77
    References
78
    ----------
79
    .. [1] https://scikit-learn.org/stable/modules/generated/sklearn.mixture.GaussianMixture.html
80

81
    Examples
82
    --------
83
    .. testsetup::
84
        >>> import pytest; _ = pytest.importorskip('sklearn')
85

86
    >>> from skimage.feature import fisher_vector
87
    >>> rng = np.random.Generator(np.random.PCG64())
88
    >>> sift_for_images = [rng.standard_normal((10, 128)) for _ in range(10)]
89
    >>> num_modes = 16
90
    >>> # Estimate 16-mode GMM with these synthetic SIFT vectors
91
    >>> gmm = learn_gmm(sift_for_images, n_modes=num_modes)
92
    """
93

94
    try:
95
        from sklearn.mixture import GaussianMixture
96
    except ImportError:
97
        raise ImportError(
98
            'scikit-learn is not installed. Please ensure it is installed in '
99
            'order to use the Fisher vector functionality.'
100
        )
101

102
    if not isinstance(descriptors, (list, np.ndarray)):
103
        raise DescriptorException(
104
            'Please ensure descriptors are either a NumPy array, '
105
            'or a list of NumPy arrays.'
106
        )
107

108
    d_mat_1 = descriptors[0]
109
    if isinstance(descriptors, list) and not isinstance(d_mat_1, np.ndarray):
110
        raise DescriptorException(
111
            'Please ensure descriptors are a list of NumPy arrays.'
112
        )
113

114
    if isinstance(descriptors, list):
115
        expected_shape = descriptors[0].shape
116
        ranks = [len(e.shape) == len(expected_shape) for e in descriptors]
117
        if not all(ranks):
118
            raise DescriptorException(
119
                'Please ensure all elements of your descriptor list ' 'are of rank 2.'
120
            )
121
        dims = [e.shape[1] == descriptors[0].shape[1] for e in descriptors]
122
        if not all(dims):
123
            raise DescriptorException(
124
                'Please ensure all descriptors are of the same dimensionality.'
125
            )
126

127
    if not isinstance(n_modes, int) or n_modes <= 0:
128
        raise FisherVectorException('Please ensure n_modes is a positive integer.')
129

130
    if gm_args:
131
        has_cov_type = 'covariance_type' in gm_args
132
        cov_type_not_diag = gm_args['covariance_type'] != 'diag'
133
        if has_cov_type and cov_type_not_diag:
134
            raise FisherVectorException('Covariance type must be "diag".')
135

136
    if isinstance(descriptors, list):
137
        descriptors = np.vstack(descriptors)
138

139
    if gm_args:
140
        has_cov_type = 'covariance_type' in gm_args
141
        if has_cov_type:
142
            gmm = GaussianMixture(n_components=n_modes, **gm_args)
143
        else:
144
            gmm = GaussianMixture(
145
                n_components=n_modes, covariance_type='diag', **gm_args
146
            )
147
    else:
148
        gmm = GaussianMixture(n_components=n_modes, covariance_type='diag')
149

150
    gmm.fit(descriptors)
151

152
    return gmm
153

154

155
def fisher_vector(descriptors, gmm, *, improved=False, alpha=0.5):
156
    """Compute the Fisher vector given some descriptors/vectors,
157
    and an associated estimated GMM.
158

159
    Parameters
160
    ----------
161
    descriptors : np.ndarray, shape=(n_descriptors, descriptor_length)
162
        NumPy array of the descriptors for which the Fisher vector
163
        representation is to be computed.
164
    gmm : :class:`sklearn.mixture.GaussianMixture`
165
        An estimated GMM object, which contains the necessary parameters needed
166
        to compute the Fisher vector.
167
    improved : bool, default=False
168
        Flag denoting whether to compute improved Fisher vectors or not.
169
        Improved Fisher vectors are L2 and power normalized. Power
170
        normalization is simply f(z) = sign(z) pow(abs(z), alpha) for some
171
        0 <= alpha <= 1.
172
    alpha : float, default=0.5
173
        The parameter for the power normalization step. Ignored if
174
        improved=False.
175

176
    Returns
177
    -------
178
    fisher_vector : np.ndarray
179
        The computation Fisher vector, which is given by a concatenation of the
180
        gradients of a GMM with respect to its parameters (mixture weights,
181
        means, and covariance matrices). For D-dimensional input descriptors or
182
        vectors, and a K-mode GMM, the Fisher vector dimensionality will be
183
        2KD + K. Thus, its dimensionality is invariant to the number of
184
        descriptors/vectors.
185

186
    References
187
    ----------
188
    .. [1] Perronnin, F. and Dance, C. Fisher kernels on Visual Vocabularies
189
           for Image Categorization, IEEE Conference on Computer Vision and
190
           Pattern Recognition, 2007
191
    .. [2] Perronnin, F. and Sanchez, J. and Mensink T. Improving the Fisher
192
           Kernel for Large-Scale Image Classification, ECCV, 2010
193

194
    Examples
195
    --------
196
    .. testsetup::
197
        >>> import pytest; _ = pytest.importorskip('sklearn')
198

199
    >>> from skimage.feature import fisher_vector, learn_gmm
200
    >>> sift_for_images = [np.random.random((10, 128)) for _ in range(10)]
201
    >>> num_modes = 16
202
    >>> # Estimate 16-mode GMM with these synthetic SIFT vectors
203
    >>> gmm = learn_gmm(sift_for_images, n_modes=num_modes)
204
    >>> test_image_descriptors = np.random.random((25, 128))
205
    >>> # Compute the Fisher vector
206
    >>> fv = fisher_vector(test_image_descriptors, gmm)
207
    """
208
    try:
209
        from sklearn.mixture import GaussianMixture
210
    except ImportError:
211
        raise ImportError(
212
            'scikit-learn is not installed. Please ensure it is installed in '
213
            'order to use the Fisher vector functionality.'
214
        )
215

216
    if not isinstance(descriptors, np.ndarray):
217
        raise DescriptorException('Please ensure descriptors is a NumPy array.')
218

219
    if not isinstance(gmm, GaussianMixture):
220
        raise FisherVectorException(
221
            'Please ensure gmm is a sklearn.mixture.GaussianMixture object.'
222
        )
223

224
    if improved and not isinstance(alpha, float):
225
        raise FisherVectorException(
226
            'Please ensure that the alpha parameter is a float.'
227
        )
228

229
    num_descriptors = len(descriptors)
230

231
    mixture_weights = gmm.weights_
232
    means = gmm.means_
233
    covariances = gmm.covariances_
234

235
    posterior_probabilities = gmm.predict_proba(descriptors)
236

237
    # Statistics necessary to compute GMM gradients wrt its parameters
238
    pp_sum = posterior_probabilities.mean(axis=0, keepdims=True).T
239
    pp_x = posterior_probabilities.T.dot(descriptors) / num_descriptors
240
    pp_x_2 = posterior_probabilities.T.dot(np.power(descriptors, 2)) / num_descriptors
241

242
    # Compute GMM gradients wrt its parameters
243
    d_pi = pp_sum.squeeze() - mixture_weights
244

245
    d_mu = pp_x - pp_sum * means
246

247
    d_sigma_t1 = pp_sum * np.power(means, 2)
248
    d_sigma_t2 = pp_sum * covariances
249
    d_sigma_t3 = 2 * pp_x * means
250
    d_sigma = -pp_x_2 - d_sigma_t1 + d_sigma_t2 + d_sigma_t3
251

252
    # Apply analytical diagonal normalization
253
    sqrt_mixture_weights = np.sqrt(mixture_weights)
254
    d_pi /= sqrt_mixture_weights
255
    d_mu /= sqrt_mixture_weights[:, np.newaxis] * np.sqrt(covariances)
256
    d_sigma /= np.sqrt(2) * sqrt_mixture_weights[:, np.newaxis] * covariances
257

258
    # Concatenate GMM gradients to form Fisher vector representation
259
    fisher_vector = np.hstack((d_pi, d_mu.ravel(), d_sigma.ravel()))
260

261
    if improved:
262
        fisher_vector = np.sign(fisher_vector) * np.power(np.abs(fisher_vector), alpha)
263
        fisher_vector = fisher_vector / np.linalg.norm(fisher_vector)
264

265
    return fisher_vector
266
scikit-image

Использование cookies