scikit-image

sift.py
771 строка · 28.7 Кб
Перенос по словам
1
import math
2

3
import numpy as np
4
import scipy.ndimage as ndi
5

6
from .._shared.utils import check_nD, _supported_float_type
7
from ..feature.util import DescriptorExtractor, FeatureDetector
8
from .._shared.filters import gaussian
9
from ..transform import rescale
10
from ..util import img_as_float
11
from ._sift import _local_max, _ori_distances, _update_histogram
12

13

14
def _edgeness(hxx, hyy, hxy):
15
    """Compute edgeness (eq. 18 of Otero et. al. IPOL paper)"""
16
    trace = hxx + hyy
17
    determinant = hxx * hyy - hxy * hxy
18
    return (trace * trace) / determinant
19

20

21
def _sparse_gradient(vol, positions):
22
    """Gradient of a 3D volume at the provided `positions`.
23

24
    For SIFT we only need the gradient at specific positions and do not need
25
    the gradient at the edge positions, so can just use this simple
26
    implementation instead of numpy.gradient.
27
    """
28
    p0 = positions[..., 0]
29
    p1 = positions[..., 1]
30
    p2 = positions[..., 2]
31
    g0 = vol[p0 + 1, p1, p2] - vol[p0 - 1, p1, p2]
32
    g0 *= 0.5
33
    g1 = vol[p0, p1 + 1, p2] - vol[p0, p1 - 1, p2]
34
    g1 *= 0.5
35
    g2 = vol[p0, p1, p2 + 1] - vol[p0, p1, p2 - 1]
36
    g2 *= 0.5
37
    return g0, g1, g2
38

39

40
def _hessian(d, positions):
41
    """Compute the non-redundant 3D Hessian terms at the requested positions.
42

43
    Source: "Anatomy of the SIFT Method"  p.380 (13)
44
    """
45
    p0 = positions[..., 0]
46
    p1 = positions[..., 1]
47
    p2 = positions[..., 2]
48
    two_d0 = 2 * d[p0, p1, p2]
49
    # 0 = row, 1 = col, 2 = octave
50
    h00 = d[p0 - 1, p1, p2] + d[p0 + 1, p1, p2] - two_d0
51
    h11 = d[p0, p1 - 1, p2] + d[p0, p1 + 1, p2] - two_d0
52
    h22 = d[p0, p1, p2 - 1] + d[p0, p1, p2 + 1] - two_d0
53
    h01 = 0.25 * (
54
        d[p0 + 1, p1 + 1, p2]
55
        - d[p0 - 1, p1 + 1, p2]
56
        - d[p0 + 1, p1 - 1, p2]
57
        + d[p0 - 1, p1 - 1, p2]
58
    )
59
    h02 = 0.25 * (
60
        d[p0 + 1, p1, p2 + 1]
61
        - d[p0 + 1, p1, p2 - 1]
62
        + d[p0 - 1, p1, p2 - 1]
63
        - d[p0 - 1, p1, p2 + 1]
64
    )
65
    h12 = 0.25 * (
66
        d[p0, p1 + 1, p2 + 1]
67
        - d[p0, p1 + 1, p2 - 1]
68
        + d[p0, p1 - 1, p2 - 1]
69
        - d[p0, p1 - 1, p2 + 1]
70
    )
71
    return (h00, h11, h22, h01, h02, h12)
72

73

74
def _offsets(grad, hess):
75
    """Compute position refinement offsets from gradient and Hessian.
76

77
    This is equivalent to np.linalg.solve(-H, J) where H is the Hessian
78
    matrix and J is the gradient (Jacobian).
79

80
    This analytical solution is adapted from (BSD-licensed) C code by
81
    Otero et. al (see SIFT docstring References).
82
    """
83
    h00, h11, h22, h01, h02, h12 = hess
84
    g0, g1, g2 = grad
85
    det = h00 * h11 * h22
86
    det -= h00 * h12 * h12
87
    det -= h01 * h01 * h22
88
    det += 2 * h01 * h02 * h12
89
    det -= h02 * h02 * h11
90
    aa = (h11 * h22 - h12 * h12) / det
91
    ab = (h02 * h12 - h01 * h22) / det
92
    ac = (h01 * h12 - h02 * h11) / det
93
    bb = (h00 * h22 - h02 * h02) / det
94
    bc = (h01 * h02 - h00 * h12) / det
95
    cc = (h00 * h11 - h01 * h01) / det
96
    offset0 = -aa * g0 - ab * g1 - ac * g2
97
    offset1 = -ab * g0 - bb * g1 - bc * g2
98
    offset2 = -ac * g0 - bc * g1 - cc * g2
99
    return np.stack((offset0, offset1, offset2), axis=-1)
100

101

102
class SIFT(FeatureDetector, DescriptorExtractor):
103
    """SIFT feature detection and descriptor extraction.
104

105
    Parameters
106
    ----------
107
    upsampling : int, optional
108
        Prior to the feature detection the image is upscaled by a factor
109
        of 1 (no upscaling), 2 or 4. Method: Bi-cubic interpolation.
110
    n_octaves : int, optional
111
        Maximum number of octaves. With every octave the image size is
112
        halved and the sigma doubled. The number of octaves will be
113
        reduced as needed to keep at least 12 pixels along each dimension
114
        at the smallest scale.
115
    n_scales : int, optional
116
        Maximum number of scales in every octave.
117
    sigma_min : float, optional
118
        The blur level of the seed image. If upsampling is enabled
119
        sigma_min is scaled by factor 1/upsampling
120
    sigma_in : float, optional
121
        The assumed blur level of the input image.
122
    c_dog : float, optional
123
        Threshold to discard low contrast extrema in the DoG. It's final
124
        value is dependent on n_scales by the relation:
125
        final_c_dog = (2^(1/n_scales)-1) / (2^(1/3)-1) * c_dog
126
    c_edge : float, optional
127
        Threshold to discard extrema that lie in edges. If H is the
128
        Hessian of an extremum, its "edgeness" is described by
129
        tr(H)²/det(H). If the edgeness is higher than
130
        (c_edge + 1)²/c_edge, the extremum is discarded.
131
    n_bins : int, optional
132
        Number of bins in the histogram that describes the gradient
133
        orientations around keypoint.
134
    lambda_ori : float, optional
135
        The window used to find the reference orientation of a keypoint
136
        has a width of 6 * lambda_ori * sigma and is weighted by a
137
        standard deviation of 2 * lambda_ori * sigma.
138
    c_max : float, optional
139
        The threshold at which a secondary peak in the orientation
140
        histogram is accepted as orientation
141
    lambda_descr : float, optional
142
        The window used to define the descriptor of a keypoint has a width
143
        of 2 * lambda_descr * sigma * (n_hist+1)/n_hist and is weighted by
144
        a standard deviation of lambda_descr * sigma.
145
    n_hist : int, optional
146
        The window used to define the descriptor of a keypoint consists of
147
        n_hist * n_hist histograms.
148
    n_ori : int, optional
149
        The number of bins in the histograms of the descriptor patch.
150

151
    Attributes
152
    ----------
153
    delta_min : float
154
        The sampling distance of the first octave. It's final value is
155
        1/upsampling.
156
    float_dtype : type
157
        The datatype of the image.
158
    scalespace_sigmas : (n_octaves, n_scales + 3) array
159
        The sigma value of all scales in all octaves.
160
    keypoints : (N, 2) array
161
        Keypoint coordinates as ``(row, col)``.
162
    positions : (N, 2) array
163
        Subpixel-precision keypoint coordinates as ``(row, col)``.
164
    sigmas : (N,) array
165
        The corresponding sigma (blur) value of a keypoint.
166
    scales : (N,) array
167
        The corresponding scale of a keypoint.
168
    orientations : (N,) array
169
        The orientations of the gradient around every keypoint.
170
    octaves : (N,) array
171
        The corresponding octave of a keypoint.
172
    descriptors : (N, n_hist*n_hist*n_ori) array
173
        The descriptors of a keypoint.
174

175
    Notes
176
    -----
177
    The SIFT algorithm was developed by David Lowe [1]_, [2]_ and later
178
    patented by the University of British Columbia. Since the patent expired in
179
    2020 it's free to use. The implementation here closely follows the
180
    detailed description in [3]_, including use of the same default parameters.
181

182
    References
183
    ----------
184
    .. [1] D.G. Lowe. "Object recognition from local scale-invariant
185
           features", Proceedings of the Seventh IEEE International
186
           Conference on Computer Vision, 1999, vol.2, pp. 1150-1157.
187
           :DOI:`10.1109/ICCV.1999.790410`
188

189
    .. [2] D.G. Lowe. "Distinctive Image Features from Scale-Invariant
190
           Keypoints", International Journal of Computer Vision, 2004,
191
           vol. 60, pp. 91–110.
192
           :DOI:`10.1023/B:VISI.0000029664.99615.94`
193

194
    .. [3] I. R. Otero and M. Delbracio. "Anatomy of the SIFT Method",
195
           Image Processing On Line, 4 (2014), pp. 370–396.
196
           :DOI:`10.5201/ipol.2014.82`
197

198
    Examples
199
    --------
200
    >>> from skimage.feature import SIFT, match_descriptors
201
    >>> from skimage.data import camera
202
    >>> from skimage.transform import rotate
203
    >>> img1 = camera()
204
    >>> img2 = rotate(camera(), 90)
205
    >>> detector_extractor1 = SIFT()
206
    >>> detector_extractor2 = SIFT()
207
    >>> detector_extractor1.detect_and_extract(img1)
208
    >>> detector_extractor2.detect_and_extract(img2)
209
    >>> matches = match_descriptors(detector_extractor1.descriptors,
210
    ...                             detector_extractor2.descriptors,
211
    ...                             max_ratio=0.6)
212
    >>> matches[10:15]
213
    array([[ 10, 412],
214
           [ 11, 417],
215
           [ 12, 407],
216
           [ 13, 411],
217
           [ 14, 406]])
218
    >>> detector_extractor1.keypoints[matches[10:15, 0]]
219
    array([[ 95, 214],
220
           [ 97, 211],
221
           [ 97, 218],
222
           [102, 215],
223
           [104, 218]])
224
    >>> detector_extractor2.keypoints[matches[10:15, 1]]
225
    array([[297,  95],
226
           [301,  97],
227
           [294,  97],
228
           [297, 102],
229
           [293, 104]])
230

231
    """
232

233
    def __init__(
234
        self,
235
        upsampling=2,
236
        n_octaves=8,
237
        n_scales=3,
238
        sigma_min=1.6,
239
        sigma_in=0.5,
240
        c_dog=0.04 / 3,
241
        c_edge=10,
242
        n_bins=36,
243
        lambda_ori=1.5,
244
        c_max=0.8,
245
        lambda_descr=6,
246
        n_hist=4,
247
        n_ori=8,
248
    ):
249
        if upsampling in [1, 2, 4]:
250
            self.upsampling = upsampling
251
        else:
252
            raise ValueError("upsampling must be 1, 2 or 4")
253
        self.n_octaves = n_octaves
254
        self.n_scales = n_scales
255
        self.sigma_min = sigma_min / upsampling
256
        self.sigma_in = sigma_in
257
        self.c_dog = (2 ** (1 / n_scales) - 1) / (2 ** (1 / 3) - 1) * c_dog
258
        self.c_edge = c_edge
259
        self.n_bins = n_bins
260
        self.lambda_ori = lambda_ori
261
        self.c_max = c_max
262
        self.lambda_descr = lambda_descr
263
        self.n_hist = n_hist
264
        self.n_ori = n_ori
265
        self.delta_min = 1 / upsampling
266
        self.float_dtype = None
267
        self.scalespace_sigmas = None
268
        self.keypoints = None
269
        self.positions = None
270
        self.sigmas = None
271
        self.scales = None
272
        self.orientations = None
273
        self.octaves = None
274
        self.descriptors = None
275

276
    @property
277
    def deltas(self):
278
        """The sampling distances of all octaves"""
279
        deltas = self.delta_min * np.power(
280
            2, np.arange(self.n_octaves), dtype=self.float_dtype
281
        )
282
        return deltas
283

284
    def _set_number_of_octaves(self, image_shape):
285
        size_min = 12  # minimum size of last octave
286
        s0 = min(image_shape) * self.upsampling
287
        max_octaves = int(math.log2(s0 / size_min) + 1)
288
        if max_octaves < self.n_octaves:
289
            self.n_octaves = max_octaves
290

291
    def _create_scalespace(self, image):
292
        """Source: "Anatomy of the SIFT Method" Alg. 1
293
        Construction of the scalespace by gradually blurring (scales) and
294
        downscaling (octaves) the image.
295
        """
296
        scalespace = []
297
        if self.upsampling > 1:
298
            image = rescale(image, self.upsampling, order=1)
299

300
        # smooth to sigma_min, assuming sigma_in
301
        image = gaussian(
302
            image,
303
            sigma=self.upsampling * math.sqrt(self.sigma_min**2 - self.sigma_in**2),
304
            mode='reflect',
305
        )
306

307
        # Eq. 10:  sigmas.shape = (n_octaves, n_scales + 3).
308
        # The three extra scales are:
309
        #    One for the differences needed for DoG and two auxiliary
310
        #    images (one at either end) for peak_local_max with exclude
311
        #    border = True (see Fig. 5)
312
        # The smoothing doubles after n_scales steps.
313
        tmp = np.power(2, np.arange(self.n_scales + 3) / self.n_scales)
314
        tmp *= self.sigma_min
315
        # all sigmas for the gaussian scalespace
316
        sigmas = self.deltas[:, np.newaxis] / self.deltas[0] * tmp[np.newaxis, :]
317
        self.scalespace_sigmas = sigmas
318

319
        # Eq. 7: Gaussian smoothing depends on difference with previous sigma
320
        #        gaussian_sigmas.shape = (n_octaves, n_scales + 2)
321
        var_diff = np.diff(sigmas * sigmas, axis=1)
322
        gaussian_sigmas = np.sqrt(var_diff) / self.deltas[:, np.newaxis]
323

324
        # one octave is represented by a 3D image with depth (n_scales+x)
325
        for o in range(self.n_octaves):
326
            # Temporarily put scales axis first so octave[i] is C-contiguous
327
            # (this makes Gaussian filtering faster).
328
            octave = np.empty(
329
                (self.n_scales + 3,) + image.shape, dtype=self.float_dtype, order='C'
330
            )
331
            octave[0] = image
332
            for s in range(1, self.n_scales + 3):
333
                # blur new scale assuming sigma of the last one
334
                gaussian(
335
                    octave[s - 1],
336
                    sigma=gaussian_sigmas[o, s - 1],
337
                    mode='reflect',
338
                    out=octave[s],
339
                )
340
            # move scales to last axis as expected by other methods
341
            scalespace.append(np.moveaxis(octave, 0, -1))
342
            if o < self.n_octaves - 1:
343
                # downscale the image by taking every second pixel
344
                image = octave[self.n_scales][::2, ::2]
345
        return scalespace
346

347
    def _inrange(self, a, dim):
348
        return (
349
            (a[:, 0] > 0)
350
            & (a[:, 0] < dim[0] - 1)
351
            & (a[:, 1] > 0)
352
            & (a[:, 1] < dim[1] - 1)
353
        )
354

355
    def _find_localize_evaluate(self, dogspace, img_shape):
356
        """Source: "Anatomy of the SIFT Method" Alg. 4-9
357
        1) first find all extrema of a (3, 3, 3) neighborhood
358
        2) use second order Taylor development to refine the positions to
359
           sub-pixel precision
360
        3) filter out extrema that have low contrast and lie on edges or close
361
           to the image borders
362
        """
363
        extrema_pos = []
364
        extrema_scales = []
365
        extrema_sigmas = []
366
        threshold = self.c_dog * 0.8
367
        for o, (octave, delta) in enumerate(zip(dogspace, self.deltas)):
368
            # find extrema
369
            keys = _local_max(np.ascontiguousarray(octave), threshold)
370
            if keys.size == 0:
371
                extrema_pos.append(np.empty((0, 2)))
372
                continue
373

374
            # localize extrema
375
            oshape = octave.shape
376
            refinement_iterations = 5
377
            offset_max = 0.6
378
            for i in range(refinement_iterations):
379
                if i > 0:
380
                    # exclude any keys that have moved out of bounds
381
                    keys = keys[self._inrange(keys, oshape), :]
382

383
                # Jacobian and Hessian of all extrema
384
                grad = _sparse_gradient(octave, keys)
385
                hess = _hessian(octave, keys)
386

387
                # solve for offset of the extremum
388
                off = _offsets(grad, hess)
389
                if i == refinement_iterations - 1:
390
                    break
391
                # offset is too big and an increase would not bring us out of
392
                # bounds
393
                wrong_position_pos = np.logical_and(
394
                    off > offset_max, keys + 1 < tuple([a - 1 for a in oshape])
395
                )
396
                wrong_position_neg = np.logical_and(off < -offset_max, keys - 1 > 0)
397
                if not np.any(np.logical_or(wrong_position_neg, wrong_position_pos)):
398
                    break
399
                keys[wrong_position_pos] += 1
400
                keys[wrong_position_neg] -= 1
401

402
            # mask for all extrema that have been localized successfully
403
            finished = np.all(np.abs(off) < offset_max, axis=1)
404
            keys = keys[finished]
405
            off = off[finished]
406
            grad = [g[finished] for g in grad]
407

408
            # value of extremum in octave
409
            vals = octave[keys[:, 0], keys[:, 1], keys[:, 2]]
410
            # values at interpolated point
411
            w = vals
412
            for i in range(3):
413
                w += 0.5 * grad[i] * off[:, i]
414

415
            h00, h11, h01 = hess[0][finished], hess[1][finished], hess[3][finished]
416

417
            sigmaratio = self.scalespace_sigmas[0, 1] / self.scalespace_sigmas[0, 0]
418

419
            # filter for contrast, edgeness and borders
420
            contrast_threshold = self.c_dog
421
            contrast_filter = np.abs(w) > contrast_threshold
422

423
            edge_threshold = np.square(self.c_edge + 1) / self.c_edge
424
            edge_response = _edgeness(
425
                h00[contrast_filter], h11[contrast_filter], h01[contrast_filter]
426
            )
427
            edge_filter = np.abs(edge_response) <= edge_threshold
428

429
            keys = keys[contrast_filter][edge_filter]
430
            off = off[contrast_filter][edge_filter]
431
            yx = ((keys[:, :2] + off[:, :2]) * delta).astype(self.float_dtype)
432

433
            sigmas = self.scalespace_sigmas[o, keys[:, 2]] * np.power(
434
                sigmaratio, off[:, 2]
435
            )
436
            border_filter = np.all(
437
                np.logical_and(
438
                    (yx - sigmas[:, np.newaxis]) > 0.0,
439
                    (yx + sigmas[:, np.newaxis]) < img_shape,
440
                ),
441
                axis=1,
442
            )
443
            extrema_pos.append(yx[border_filter])
444
            extrema_scales.append(keys[border_filter, 2])
445
            extrema_sigmas.append(sigmas[border_filter])
446

447
        octave_indices = np.concatenate(
448
            [np.full(len(p), i) for i, p in enumerate(extrema_pos)]
449
        )
450

451
        if len(octave_indices) == 0:
452
            raise RuntimeError(
453
                "SIFT found no features. Try passing in an image containing "
454
                "greater intensity contrasts between adjacent pixels."
455
            )
456

457
        extrema_pos = np.concatenate(extrema_pos)
458
        extrema_scales = np.concatenate(extrema_scales)
459
        extrema_sigmas = np.concatenate(extrema_sigmas)
460
        return extrema_pos, extrema_scales, extrema_sigmas, octave_indices
461

462
    def _fit(self, h):
463
        """Refine the position of the peak by fitting it to a parabola"""
464
        return (h[0] - h[2]) / (2 * (h[0] + h[2] - 2 * h[1]))
465

466
    def _compute_orientation(
467
        self, positions_oct, scales_oct, sigmas_oct, octaves, gaussian_scalespace
468
    ):
469
        """Source: "Anatomy of the SIFT Method" Alg. 11
470
        Calculates the orientation of the gradient around every keypoint
471
        """
472
        gradient_space = []
473
        # list for keypoints that have more than one reference orientation
474
        keypoint_indices = []
475
        keypoint_angles = []
476
        keypoint_octave = []
477
        orientations = np.zeros_like(sigmas_oct, dtype=self.float_dtype)
478
        key_count = 0
479
        for o, (octave, delta) in enumerate(zip(gaussian_scalespace, self.deltas)):
480
            gradient_space.append(np.gradient(octave))
481

482
            in_oct = octaves == o
483
            if not np.any(in_oct):
484
                continue
485
            positions = positions_oct[in_oct]
486
            scales = scales_oct[in_oct]
487
            sigmas = sigmas_oct[in_oct]
488

489
            oshape = octave.shape[:2]
490
            # convert to octave's dimensions
491
            yx = positions / delta
492
            sigma = sigmas / delta
493

494
            # dimensions of the patch
495
            radius = 3 * self.lambda_ori * sigma
496
            p_min = np.maximum(0, yx - radius[:, np.newaxis] + 0.5).astype(int)
497
            p_max = np.minimum(
498
                yx + radius[:, np.newaxis] + 0.5, (oshape[0] - 1, oshape[1] - 1)
499
            ).astype(int)
500
            # orientation histogram
501
            hist = np.empty(self.n_bins, dtype=self.float_dtype)
502
            avg_kernel = np.full((3,), 1 / 3, dtype=self.float_dtype)
503
            for k in range(len(yx)):
504
                hist[:] = 0
505

506
                # use the patch coordinates to get the gradient and then
507
                # normalize them
508
                r, c = np.meshgrid(
509
                    np.arange(p_min[k, 0], p_max[k, 0] + 1),
510
                    np.arange(p_min[k, 1], p_max[k, 1] + 1),
511
                    indexing='ij',
512
                    sparse=True,
513
                )
514
                gradient_row = gradient_space[o][0][r, c, scales[k]]
515
                gradient_col = gradient_space[o][1][r, c, scales[k]]
516
                r = r.astype(self.float_dtype, copy=False)
517
                c = c.astype(self.float_dtype, copy=False)
518
                r -= yx[k, 0]
519
                c -= yx[k, 1]
520

521
                # gradient magnitude and angles
522
                magnitude = np.sqrt(np.square(gradient_row) + np.square(gradient_col))
523
                theta = np.mod(np.arctan2(gradient_col, gradient_row), 2 * np.pi)
524

525
                # more weight to center values
526
                kernel = np.exp(
527
                    np.divide(r * r + c * c, -2 * (self.lambda_ori * sigma[k]) ** 2)
528
                )
529

530
                # fill the histogram
531
                bins = np.floor(
532
                    (theta / (2 * np.pi) * self.n_bins + 0.5) % self.n_bins
533
                ).astype(int)
534
                np.add.at(hist, bins, kernel * magnitude)
535

536
                # smooth the histogram and find the maximum
537
                hist = np.concatenate((hist[-6:], hist, hist[:6]))
538
                for _ in range(6):  # number of smoothings
539
                    hist = np.convolve(hist, avg_kernel, mode='same')
540
                hist = hist[6:-6]
541
                max_filter = ndi.maximum_filter(hist, [3], mode='wrap')
542

543
                # if an angle is in 80% percent range of the maximum, a
544
                # new keypoint is created for it
545
                maxima = np.nonzero(
546
                    np.logical_and(
547
                        hist >= (self.c_max * np.max(hist)), max_filter == hist
548
                    )
549
                )
550

551
                # save the angles
552
                for c, m in enumerate(maxima[0]):
553
                    neigh = np.arange(m - 1, m + 2) % len(hist)
554
                    # use neighbors to fit a parabola, to get more accurate
555
                    # result
556
                    ori = (m + self._fit(hist[neigh]) + 0.5) * 2 * np.pi / self.n_bins
557
                    if ori > np.pi:
558
                        ori -= 2 * np.pi
559
                    if c == 0:
560
                        orientations[key_count] = ori
561
                    else:
562
                        keypoint_indices.append(key_count)
563
                        keypoint_angles.append(ori)
564
                        keypoint_octave.append(o)
565
                key_count += 1
566
        self.positions = np.concatenate(
567
            (positions_oct, positions_oct[keypoint_indices])
568
        )
569
        self.scales = np.concatenate((scales_oct, scales_oct[keypoint_indices]))
570
        self.sigmas = np.concatenate((sigmas_oct, sigmas_oct[keypoint_indices]))
571
        self.orientations = np.concatenate((orientations, keypoint_angles))
572
        self.octaves = np.concatenate((octaves, keypoint_octave))
573
        # return the gradient_space to reuse it to find the descriptor
574
        return gradient_space
575

576
    def _rotate(self, row, col, angle):
577
        c = math.cos(angle)
578
        s = math.sin(angle)
579
        rot_row = c * row + s * col
580
        rot_col = -s * row + c * col
581
        return rot_row, rot_col
582

583
    def _compute_descriptor(self, gradient_space):
584
        """Source: "Anatomy of the SIFT Method" Alg. 12
585
        Calculates the descriptor for every keypoint
586
        """
587
        n_key = len(self.scales)
588
        self.descriptors = np.empty(
589
            (n_key, self.n_hist**2 * self.n_ori), dtype=np.uint8
590
        )
591

592
        # indices of the histograms
593
        hists = np.arange(1, self.n_hist + 1, dtype=self.float_dtype)
594
        # indices of the bins
595
        bins = np.arange(1, self.n_ori + 1, dtype=self.float_dtype)
596

597
        key_numbers = np.arange(n_key)
598
        for o, (gradient, delta) in enumerate(zip(gradient_space, self.deltas)):
599
            in_oct = self.octaves == o
600
            if not np.any(in_oct):
601
                continue
602
            positions = self.positions[in_oct]
603
            scales = self.scales[in_oct]
604
            sigmas = self.sigmas[in_oct]
605
            orientations = self.orientations[in_oct]
606
            numbers = key_numbers[in_oct]
607

608
            dim = gradient[0].shape[:2]
609
            center_pos = positions / delta
610
            sigma = sigmas / delta
611

612
            # dimensions of the patch
613
            radius = self.lambda_descr * (1 + 1 / self.n_hist) * sigma
614
            radius_patch = math.sqrt(2) * radius
615
            p_min = np.asarray(
616
                np.maximum(0, center_pos - radius_patch[:, np.newaxis] + 0.5), dtype=int
617
            )
618
            p_max = np.asarray(
619
                np.minimum(
620
                    center_pos + radius_patch[:, np.newaxis] + 0.5,
621
                    (dim[0] - 1, dim[1] - 1),
622
                ),
623
                dtype=int,
624
            )
625

626
            for k in range(len(p_max)):
627
                rad_k = float(radius[k])
628
                ori = float(orientations[k])
629
                histograms = np.zeros(
630
                    (self.n_hist, self.n_hist, self.n_ori), dtype=self.float_dtype
631
                )
632
                # the patch
633
                r, c = np.meshgrid(
634
                    np.arange(p_min[k, 0], p_max[k, 0]),
635
                    np.arange(p_min[k, 1], p_max[k, 1]),
636
                    indexing='ij',
637
                    sparse=True,
638
                )
639
                # normalized coordinates
640
                r_norm = np.subtract(r, center_pos[k, 0], dtype=self.float_dtype)
641
                c_norm = np.subtract(c, center_pos[k, 1], dtype=self.float_dtype)
642
                r_norm, c_norm = self._rotate(r_norm, c_norm, ori)
643

644
                # select coordinates and gradient values within the patch
645
                inside = np.maximum(np.abs(r_norm), np.abs(c_norm)) < rad_k
646
                r_norm, c_norm = r_norm[inside], c_norm[inside]
647
                r_idx, c_idx = np.nonzero(inside)
648
                r = r[r_idx, 0]
649
                c = c[0, c_idx]
650
                gradient_row = gradient[0][r, c, scales[k]]
651
                gradient_col = gradient[1][r, c, scales[k]]
652
                # compute the (relative) gradient orientation
653
                theta = np.arctan2(gradient_col, gradient_row) - ori
654
                lam_sig = self.lambda_descr * float(sigma[k])
655
                # Gaussian weighted kernel magnitude
656
                kernel = np.exp((r_norm * r_norm + c_norm * c_norm) / (-2 * lam_sig**2))
657
                magnitude = (
658
                    np.sqrt(gradient_row * gradient_row + gradient_col * gradient_col)
659
                    * kernel
660
                )
661

662
                lam_sig_ratio = 2 * lam_sig / self.n_hist
663
                rc_bins = (hists - (1 + self.n_hist) / 2) * lam_sig_ratio
664
                rc_bin_spacing = lam_sig_ratio
665
                ori_bins = (2 * np.pi * bins) / self.n_ori
666

667
                # distances to the histograms and bins
668
                dist_r = np.abs(np.subtract.outer(rc_bins, r_norm))
669
                dist_c = np.abs(np.subtract.outer(rc_bins, c_norm))
670

671
                # the orientation histograms/bins that get the contribution
672
                near_t, near_t_val = _ori_distances(ori_bins, theta)
673

674
                # create the histogram
675
                _update_histogram(
676
                    histograms,
677
                    near_t,
678
                    near_t_val,
679
                    magnitude,
680
                    dist_r,
681
                    dist_c,
682
                    rc_bin_spacing,
683
                )
684

685
                # convert the histograms to a 1d descriptor
686
                histograms = histograms.reshape(-1)
687
                # saturate the descriptor
688
                histograms = np.minimum(histograms, 0.2 * np.linalg.norm(histograms))
689
                # normalize the descriptor
690
                descriptor = (512 * histograms) / np.linalg.norm(histograms)
691
                # quantize the descriptor
692
                descriptor = np.minimum(np.floor(descriptor), 255)
693
                self.descriptors[numbers[k], :] = descriptor
694

695
    def _preprocess(self, image):
696
        check_nD(image, 2)
697
        image = img_as_float(image)
698
        self.float_dtype = _supported_float_type(image.dtype)
699
        image = image.astype(self.float_dtype, copy=False)
700

701
        self._set_number_of_octaves(image.shape)
702
        return image
703

704
    def detect(self, image):
705
        """Detect the keypoints.
706

707
        Parameters
708
        ----------
709
        image : 2D array
710
            Input image.
711

712
        """
713
        image = self._preprocess(image)
714

715
        gaussian_scalespace = self._create_scalespace(image)
716

717
        dog_scalespace = [np.diff(layer, axis=2) for layer in gaussian_scalespace]
718

719
        positions, scales, sigmas, octaves = self._find_localize_evaluate(
720
            dog_scalespace, image.shape
721
        )
722

723
        self._compute_orientation(
724
            positions, scales, sigmas, octaves, gaussian_scalespace
725
        )
726

727
        self.keypoints = self.positions.round().astype(int)
728

729
    def extract(self, image):
730
        """Extract the descriptors for all keypoints in the image.
731

732
        Parameters
733
        ----------
734
        image : 2D array
735
            Input image.
736

737
        """
738
        image = self._preprocess(image)
739

740
        gaussian_scalespace = self._create_scalespace(image)
741

742
        gradient_space = [np.gradient(octave) for octave in gaussian_scalespace]
743

744
        self._compute_descriptor(gradient_space)
745

746
    def detect_and_extract(self, image):
747
        """Detect the keypoints and extract their descriptors.
748

749
        Parameters
750
        ----------
751
        image : 2D array
752
            Input image.
753

754
        """
755
        image = self._preprocess(image)
756

757
        gaussian_scalespace = self._create_scalespace(image)
758

759
        dog_scalespace = [np.diff(layer, axis=2) for layer in gaussian_scalespace]
760

761
        positions, scales, sigmas, octaves = self._find_localize_evaluate(
762
            dog_scalespace, image.shape
763
        )
764

765
        gradient_space = self._compute_orientation(
766
            positions, scales, sigmas, octaves, gaussian_scalespace
767
        )
768

769
        self._compute_descriptor(gradient_space)
770

771
        self.keypoints = self.positions.round().astype(int)
772
scikit-image

Использование cookies