scikit-image
771 строка · 28.7 Кб
1import math
2
3import numpy as np
4import scipy.ndimage as ndi
5
6from .._shared.utils import check_nD, _supported_float_type
7from ..feature.util import DescriptorExtractor, FeatureDetector
8from .._shared.filters import gaussian
9from ..transform import rescale
10from ..util import img_as_float
11from ._sift import _local_max, _ori_distances, _update_histogram
12
13
14def _edgeness(hxx, hyy, hxy):
15"""Compute edgeness (eq. 18 of Otero et. al. IPOL paper)"""
16trace = hxx + hyy
17determinant = hxx * hyy - hxy * hxy
18return (trace * trace) / determinant
19
20
21def _sparse_gradient(vol, positions):
22"""Gradient of a 3D volume at the provided `positions`.
23
24For SIFT we only need the gradient at specific positions and do not need
25the gradient at the edge positions, so can just use this simple
26implementation instead of numpy.gradient.
27"""
28p0 = positions[..., 0]
29p1 = positions[..., 1]
30p2 = positions[..., 2]
31g0 = vol[p0 + 1, p1, p2] - vol[p0 - 1, p1, p2]
32g0 *= 0.5
33g1 = vol[p0, p1 + 1, p2] - vol[p0, p1 - 1, p2]
34g1 *= 0.5
35g2 = vol[p0, p1, p2 + 1] - vol[p0, p1, p2 - 1]
36g2 *= 0.5
37return g0, g1, g2
38
39
40def _hessian(d, positions):
41"""Compute the non-redundant 3D Hessian terms at the requested positions.
42
43Source: "Anatomy of the SIFT Method" p.380 (13)
44"""
45p0 = positions[..., 0]
46p1 = positions[..., 1]
47p2 = positions[..., 2]
48two_d0 = 2 * d[p0, p1, p2]
49# 0 = row, 1 = col, 2 = octave
50h00 = d[p0 - 1, p1, p2] + d[p0 + 1, p1, p2] - two_d0
51h11 = d[p0, p1 - 1, p2] + d[p0, p1 + 1, p2] - two_d0
52h22 = d[p0, p1, p2 - 1] + d[p0, p1, p2 + 1] - two_d0
53h01 = 0.25 * (
54d[p0 + 1, p1 + 1, p2]
55- d[p0 - 1, p1 + 1, p2]
56- d[p0 + 1, p1 - 1, p2]
57+ d[p0 - 1, p1 - 1, p2]
58)
59h02 = 0.25 * (
60d[p0 + 1, p1, p2 + 1]
61- d[p0 + 1, p1, p2 - 1]
62+ d[p0 - 1, p1, p2 - 1]
63- d[p0 - 1, p1, p2 + 1]
64)
65h12 = 0.25 * (
66d[p0, p1 + 1, p2 + 1]
67- d[p0, p1 + 1, p2 - 1]
68+ d[p0, p1 - 1, p2 - 1]
69- d[p0, p1 - 1, p2 + 1]
70)
71return (h00, h11, h22, h01, h02, h12)
72
73
74def _offsets(grad, hess):
75"""Compute position refinement offsets from gradient and Hessian.
76
77This is equivalent to np.linalg.solve(-H, J) where H is the Hessian
78matrix and J is the gradient (Jacobian).
79
80This analytical solution is adapted from (BSD-licensed) C code by
81Otero et. al (see SIFT docstring References).
82"""
83h00, h11, h22, h01, h02, h12 = hess
84g0, g1, g2 = grad
85det = h00 * h11 * h22
86det -= h00 * h12 * h12
87det -= h01 * h01 * h22
88det += 2 * h01 * h02 * h12
89det -= h02 * h02 * h11
90aa = (h11 * h22 - h12 * h12) / det
91ab = (h02 * h12 - h01 * h22) / det
92ac = (h01 * h12 - h02 * h11) / det
93bb = (h00 * h22 - h02 * h02) / det
94bc = (h01 * h02 - h00 * h12) / det
95cc = (h00 * h11 - h01 * h01) / det
96offset0 = -aa * g0 - ab * g1 - ac * g2
97offset1 = -ab * g0 - bb * g1 - bc * g2
98offset2 = -ac * g0 - bc * g1 - cc * g2
99return np.stack((offset0, offset1, offset2), axis=-1)
100
101
102class SIFT(FeatureDetector, DescriptorExtractor):
103"""SIFT feature detection and descriptor extraction.
104
105Parameters
106----------
107upsampling : int, optional
108Prior to the feature detection the image is upscaled by a factor
109of 1 (no upscaling), 2 or 4. Method: Bi-cubic interpolation.
110n_octaves : int, optional
111Maximum number of octaves. With every octave the image size is
112halved and the sigma doubled. The number of octaves will be
113reduced as needed to keep at least 12 pixels along each dimension
114at the smallest scale.
115n_scales : int, optional
116Maximum number of scales in every octave.
117sigma_min : float, optional
118The blur level of the seed image. If upsampling is enabled
119sigma_min is scaled by factor 1/upsampling
120sigma_in : float, optional
121The assumed blur level of the input image.
122c_dog : float, optional
123Threshold to discard low contrast extrema in the DoG. It's final
124value is dependent on n_scales by the relation:
125final_c_dog = (2^(1/n_scales)-1) / (2^(1/3)-1) * c_dog
126c_edge : float, optional
127Threshold to discard extrema that lie in edges. If H is the
128Hessian of an extremum, its "edgeness" is described by
129tr(H)²/det(H). If the edgeness is higher than
130(c_edge + 1)²/c_edge, the extremum is discarded.
131n_bins : int, optional
132Number of bins in the histogram that describes the gradient
133orientations around keypoint.
134lambda_ori : float, optional
135The window used to find the reference orientation of a keypoint
136has a width of 6 * lambda_ori * sigma and is weighted by a
137standard deviation of 2 * lambda_ori * sigma.
138c_max : float, optional
139The threshold at which a secondary peak in the orientation
140histogram is accepted as orientation
141lambda_descr : float, optional
142The window used to define the descriptor of a keypoint has a width
143of 2 * lambda_descr * sigma * (n_hist+1)/n_hist and is weighted by
144a standard deviation of lambda_descr * sigma.
145n_hist : int, optional
146The window used to define the descriptor of a keypoint consists of
147n_hist * n_hist histograms.
148n_ori : int, optional
149The number of bins in the histograms of the descriptor patch.
150
151Attributes
152----------
153delta_min : float
154The sampling distance of the first octave. It's final value is
1551/upsampling.
156float_dtype : type
157The datatype of the image.
158scalespace_sigmas : (n_octaves, n_scales + 3) array
159The sigma value of all scales in all octaves.
160keypoints : (N, 2) array
161Keypoint coordinates as ``(row, col)``.
162positions : (N, 2) array
163Subpixel-precision keypoint coordinates as ``(row, col)``.
164sigmas : (N,) array
165The corresponding sigma (blur) value of a keypoint.
166scales : (N,) array
167The corresponding scale of a keypoint.
168orientations : (N,) array
169The orientations of the gradient around every keypoint.
170octaves : (N,) array
171The corresponding octave of a keypoint.
172descriptors : (N, n_hist*n_hist*n_ori) array
173The descriptors of a keypoint.
174
175Notes
176-----
177The SIFT algorithm was developed by David Lowe [1]_, [2]_ and later
178patented by the University of British Columbia. Since the patent expired in
1792020 it's free to use. The implementation here closely follows the
180detailed description in [3]_, including use of the same default parameters.
181
182References
183----------
184.. [1] D.G. Lowe. "Object recognition from local scale-invariant
185features", Proceedings of the Seventh IEEE International
186Conference on Computer Vision, 1999, vol.2, pp. 1150-1157.
187:DOI:`10.1109/ICCV.1999.790410`
188
189.. [2] D.G. Lowe. "Distinctive Image Features from Scale-Invariant
190Keypoints", International Journal of Computer Vision, 2004,
191vol. 60, pp. 91–110.
192:DOI:`10.1023/B:VISI.0000029664.99615.94`
193
194.. [3] I. R. Otero and M. Delbracio. "Anatomy of the SIFT Method",
195Image Processing On Line, 4 (2014), pp. 370–396.
196:DOI:`10.5201/ipol.2014.82`
197
198Examples
199--------
200>>> from skimage.feature import SIFT, match_descriptors
201>>> from skimage.data import camera
202>>> from skimage.transform import rotate
203>>> img1 = camera()
204>>> img2 = rotate(camera(), 90)
205>>> detector_extractor1 = SIFT()
206>>> detector_extractor2 = SIFT()
207>>> detector_extractor1.detect_and_extract(img1)
208>>> detector_extractor2.detect_and_extract(img2)
209>>> matches = match_descriptors(detector_extractor1.descriptors,
210... detector_extractor2.descriptors,
211... max_ratio=0.6)
212>>> matches[10:15]
213array([[ 10, 412],
214[ 11, 417],
215[ 12, 407],
216[ 13, 411],
217[ 14, 406]])
218>>> detector_extractor1.keypoints[matches[10:15, 0]]
219array([[ 95, 214],
220[ 97, 211],
221[ 97, 218],
222[102, 215],
223[104, 218]])
224>>> detector_extractor2.keypoints[matches[10:15, 1]]
225array([[297, 95],
226[301, 97],
227[294, 97],
228[297, 102],
229[293, 104]])
230
231"""
232
233def __init__(
234self,
235upsampling=2,
236n_octaves=8,
237n_scales=3,
238sigma_min=1.6,
239sigma_in=0.5,
240c_dog=0.04 / 3,
241c_edge=10,
242n_bins=36,
243lambda_ori=1.5,
244c_max=0.8,
245lambda_descr=6,
246n_hist=4,
247n_ori=8,
248):
249if upsampling in [1, 2, 4]:
250self.upsampling = upsampling
251else:
252raise ValueError("upsampling must be 1, 2 or 4")
253self.n_octaves = n_octaves
254self.n_scales = n_scales
255self.sigma_min = sigma_min / upsampling
256self.sigma_in = sigma_in
257self.c_dog = (2 ** (1 / n_scales) - 1) / (2 ** (1 / 3) - 1) * c_dog
258self.c_edge = c_edge
259self.n_bins = n_bins
260self.lambda_ori = lambda_ori
261self.c_max = c_max
262self.lambda_descr = lambda_descr
263self.n_hist = n_hist
264self.n_ori = n_ori
265self.delta_min = 1 / upsampling
266self.float_dtype = None
267self.scalespace_sigmas = None
268self.keypoints = None
269self.positions = None
270self.sigmas = None
271self.scales = None
272self.orientations = None
273self.octaves = None
274self.descriptors = None
275
276@property
277def deltas(self):
278"""The sampling distances of all octaves"""
279deltas = self.delta_min * np.power(
2802, np.arange(self.n_octaves), dtype=self.float_dtype
281)
282return deltas
283
284def _set_number_of_octaves(self, image_shape):
285size_min = 12 # minimum size of last octave
286s0 = min(image_shape) * self.upsampling
287max_octaves = int(math.log2(s0 / size_min) + 1)
288if max_octaves < self.n_octaves:
289self.n_octaves = max_octaves
290
291def _create_scalespace(self, image):
292"""Source: "Anatomy of the SIFT Method" Alg. 1
293Construction of the scalespace by gradually blurring (scales) and
294downscaling (octaves) the image.
295"""
296scalespace = []
297if self.upsampling > 1:
298image = rescale(image, self.upsampling, order=1)
299
300# smooth to sigma_min, assuming sigma_in
301image = gaussian(
302image,
303sigma=self.upsampling * math.sqrt(self.sigma_min**2 - self.sigma_in**2),
304mode='reflect',
305)
306
307# Eq. 10: sigmas.shape = (n_octaves, n_scales + 3).
308# The three extra scales are:
309# One for the differences needed for DoG and two auxiliary
310# images (one at either end) for peak_local_max with exclude
311# border = True (see Fig. 5)
312# The smoothing doubles after n_scales steps.
313tmp = np.power(2, np.arange(self.n_scales + 3) / self.n_scales)
314tmp *= self.sigma_min
315# all sigmas for the gaussian scalespace
316sigmas = self.deltas[:, np.newaxis] / self.deltas[0] * tmp[np.newaxis, :]
317self.scalespace_sigmas = sigmas
318
319# Eq. 7: Gaussian smoothing depends on difference with previous sigma
320# gaussian_sigmas.shape = (n_octaves, n_scales + 2)
321var_diff = np.diff(sigmas * sigmas, axis=1)
322gaussian_sigmas = np.sqrt(var_diff) / self.deltas[:, np.newaxis]
323
324# one octave is represented by a 3D image with depth (n_scales+x)
325for o in range(self.n_octaves):
326# Temporarily put scales axis first so octave[i] is C-contiguous
327# (this makes Gaussian filtering faster).
328octave = np.empty(
329(self.n_scales + 3,) + image.shape, dtype=self.float_dtype, order='C'
330)
331octave[0] = image
332for s in range(1, self.n_scales + 3):
333# blur new scale assuming sigma of the last one
334gaussian(
335octave[s - 1],
336sigma=gaussian_sigmas[o, s - 1],
337mode='reflect',
338out=octave[s],
339)
340# move scales to last axis as expected by other methods
341scalespace.append(np.moveaxis(octave, 0, -1))
342if o < self.n_octaves - 1:
343# downscale the image by taking every second pixel
344image = octave[self.n_scales][::2, ::2]
345return scalespace
346
347def _inrange(self, a, dim):
348return (
349(a[:, 0] > 0)
350& (a[:, 0] < dim[0] - 1)
351& (a[:, 1] > 0)
352& (a[:, 1] < dim[1] - 1)
353)
354
355def _find_localize_evaluate(self, dogspace, img_shape):
356"""Source: "Anatomy of the SIFT Method" Alg. 4-9
3571) first find all extrema of a (3, 3, 3) neighborhood
3582) use second order Taylor development to refine the positions to
359sub-pixel precision
3603) filter out extrema that have low contrast and lie on edges or close
361to the image borders
362"""
363extrema_pos = []
364extrema_scales = []
365extrema_sigmas = []
366threshold = self.c_dog * 0.8
367for o, (octave, delta) in enumerate(zip(dogspace, self.deltas)):
368# find extrema
369keys = _local_max(np.ascontiguousarray(octave), threshold)
370if keys.size == 0:
371extrema_pos.append(np.empty((0, 2)))
372continue
373
374# localize extrema
375oshape = octave.shape
376refinement_iterations = 5
377offset_max = 0.6
378for i in range(refinement_iterations):
379if i > 0:
380# exclude any keys that have moved out of bounds
381keys = keys[self._inrange(keys, oshape), :]
382
383# Jacobian and Hessian of all extrema
384grad = _sparse_gradient(octave, keys)
385hess = _hessian(octave, keys)
386
387# solve for offset of the extremum
388off = _offsets(grad, hess)
389if i == refinement_iterations - 1:
390break
391# offset is too big and an increase would not bring us out of
392# bounds
393wrong_position_pos = np.logical_and(
394off > offset_max, keys + 1 < tuple([a - 1 for a in oshape])
395)
396wrong_position_neg = np.logical_and(off < -offset_max, keys - 1 > 0)
397if not np.any(np.logical_or(wrong_position_neg, wrong_position_pos)):
398break
399keys[wrong_position_pos] += 1
400keys[wrong_position_neg] -= 1
401
402# mask for all extrema that have been localized successfully
403finished = np.all(np.abs(off) < offset_max, axis=1)
404keys = keys[finished]
405off = off[finished]
406grad = [g[finished] for g in grad]
407
408# value of extremum in octave
409vals = octave[keys[:, 0], keys[:, 1], keys[:, 2]]
410# values at interpolated point
411w = vals
412for i in range(3):
413w += 0.5 * grad[i] * off[:, i]
414
415h00, h11, h01 = hess[0][finished], hess[1][finished], hess[3][finished]
416
417sigmaratio = self.scalespace_sigmas[0, 1] / self.scalespace_sigmas[0, 0]
418
419# filter for contrast, edgeness and borders
420contrast_threshold = self.c_dog
421contrast_filter = np.abs(w) > contrast_threshold
422
423edge_threshold = np.square(self.c_edge + 1) / self.c_edge
424edge_response = _edgeness(
425h00[contrast_filter], h11[contrast_filter], h01[contrast_filter]
426)
427edge_filter = np.abs(edge_response) <= edge_threshold
428
429keys = keys[contrast_filter][edge_filter]
430off = off[contrast_filter][edge_filter]
431yx = ((keys[:, :2] + off[:, :2]) * delta).astype(self.float_dtype)
432
433sigmas = self.scalespace_sigmas[o, keys[:, 2]] * np.power(
434sigmaratio, off[:, 2]
435)
436border_filter = np.all(
437np.logical_and(
438(yx - sigmas[:, np.newaxis]) > 0.0,
439(yx + sigmas[:, np.newaxis]) < img_shape,
440),
441axis=1,
442)
443extrema_pos.append(yx[border_filter])
444extrema_scales.append(keys[border_filter, 2])
445extrema_sigmas.append(sigmas[border_filter])
446
447octave_indices = np.concatenate(
448[np.full(len(p), i) for i, p in enumerate(extrema_pos)]
449)
450
451if len(octave_indices) == 0:
452raise RuntimeError(
453"SIFT found no features. Try passing in an image containing "
454"greater intensity contrasts between adjacent pixels."
455)
456
457extrema_pos = np.concatenate(extrema_pos)
458extrema_scales = np.concatenate(extrema_scales)
459extrema_sigmas = np.concatenate(extrema_sigmas)
460return extrema_pos, extrema_scales, extrema_sigmas, octave_indices
461
462def _fit(self, h):
463"""Refine the position of the peak by fitting it to a parabola"""
464return (h[0] - h[2]) / (2 * (h[0] + h[2] - 2 * h[1]))
465
466def _compute_orientation(
467self, positions_oct, scales_oct, sigmas_oct, octaves, gaussian_scalespace
468):
469"""Source: "Anatomy of the SIFT Method" Alg. 11
470Calculates the orientation of the gradient around every keypoint
471"""
472gradient_space = []
473# list for keypoints that have more than one reference orientation
474keypoint_indices = []
475keypoint_angles = []
476keypoint_octave = []
477orientations = np.zeros_like(sigmas_oct, dtype=self.float_dtype)
478key_count = 0
479for o, (octave, delta) in enumerate(zip(gaussian_scalespace, self.deltas)):
480gradient_space.append(np.gradient(octave))
481
482in_oct = octaves == o
483if not np.any(in_oct):
484continue
485positions = positions_oct[in_oct]
486scales = scales_oct[in_oct]
487sigmas = sigmas_oct[in_oct]
488
489oshape = octave.shape[:2]
490# convert to octave's dimensions
491yx = positions / delta
492sigma = sigmas / delta
493
494# dimensions of the patch
495radius = 3 * self.lambda_ori * sigma
496p_min = np.maximum(0, yx - radius[:, np.newaxis] + 0.5).astype(int)
497p_max = np.minimum(
498yx + radius[:, np.newaxis] + 0.5, (oshape[0] - 1, oshape[1] - 1)
499).astype(int)
500# orientation histogram
501hist = np.empty(self.n_bins, dtype=self.float_dtype)
502avg_kernel = np.full((3,), 1 / 3, dtype=self.float_dtype)
503for k in range(len(yx)):
504hist[:] = 0
505
506# use the patch coordinates to get the gradient and then
507# normalize them
508r, c = np.meshgrid(
509np.arange(p_min[k, 0], p_max[k, 0] + 1),
510np.arange(p_min[k, 1], p_max[k, 1] + 1),
511indexing='ij',
512sparse=True,
513)
514gradient_row = gradient_space[o][0][r, c, scales[k]]
515gradient_col = gradient_space[o][1][r, c, scales[k]]
516r = r.astype(self.float_dtype, copy=False)
517c = c.astype(self.float_dtype, copy=False)
518r -= yx[k, 0]
519c -= yx[k, 1]
520
521# gradient magnitude and angles
522magnitude = np.sqrt(np.square(gradient_row) + np.square(gradient_col))
523theta = np.mod(np.arctan2(gradient_col, gradient_row), 2 * np.pi)
524
525# more weight to center values
526kernel = np.exp(
527np.divide(r * r + c * c, -2 * (self.lambda_ori * sigma[k]) ** 2)
528)
529
530# fill the histogram
531bins = np.floor(
532(theta / (2 * np.pi) * self.n_bins + 0.5) % self.n_bins
533).astype(int)
534np.add.at(hist, bins, kernel * magnitude)
535
536# smooth the histogram and find the maximum
537hist = np.concatenate((hist[-6:], hist, hist[:6]))
538for _ in range(6): # number of smoothings
539hist = np.convolve(hist, avg_kernel, mode='same')
540hist = hist[6:-6]
541max_filter = ndi.maximum_filter(hist, [3], mode='wrap')
542
543# if an angle is in 80% percent range of the maximum, a
544# new keypoint is created for it
545maxima = np.nonzero(
546np.logical_and(
547hist >= (self.c_max * np.max(hist)), max_filter == hist
548)
549)
550
551# save the angles
552for c, m in enumerate(maxima[0]):
553neigh = np.arange(m - 1, m + 2) % len(hist)
554# use neighbors to fit a parabola, to get more accurate
555# result
556ori = (m + self._fit(hist[neigh]) + 0.5) * 2 * np.pi / self.n_bins
557if ori > np.pi:
558ori -= 2 * np.pi
559if c == 0:
560orientations[key_count] = ori
561else:
562keypoint_indices.append(key_count)
563keypoint_angles.append(ori)
564keypoint_octave.append(o)
565key_count += 1
566self.positions = np.concatenate(
567(positions_oct, positions_oct[keypoint_indices])
568)
569self.scales = np.concatenate((scales_oct, scales_oct[keypoint_indices]))
570self.sigmas = np.concatenate((sigmas_oct, sigmas_oct[keypoint_indices]))
571self.orientations = np.concatenate((orientations, keypoint_angles))
572self.octaves = np.concatenate((octaves, keypoint_octave))
573# return the gradient_space to reuse it to find the descriptor
574return gradient_space
575
576def _rotate(self, row, col, angle):
577c = math.cos(angle)
578s = math.sin(angle)
579rot_row = c * row + s * col
580rot_col = -s * row + c * col
581return rot_row, rot_col
582
583def _compute_descriptor(self, gradient_space):
584"""Source: "Anatomy of the SIFT Method" Alg. 12
585Calculates the descriptor for every keypoint
586"""
587n_key = len(self.scales)
588self.descriptors = np.empty(
589(n_key, self.n_hist**2 * self.n_ori), dtype=np.uint8
590)
591
592# indices of the histograms
593hists = np.arange(1, self.n_hist + 1, dtype=self.float_dtype)
594# indices of the bins
595bins = np.arange(1, self.n_ori + 1, dtype=self.float_dtype)
596
597key_numbers = np.arange(n_key)
598for o, (gradient, delta) in enumerate(zip(gradient_space, self.deltas)):
599in_oct = self.octaves == o
600if not np.any(in_oct):
601continue
602positions = self.positions[in_oct]
603scales = self.scales[in_oct]
604sigmas = self.sigmas[in_oct]
605orientations = self.orientations[in_oct]
606numbers = key_numbers[in_oct]
607
608dim = gradient[0].shape[:2]
609center_pos = positions / delta
610sigma = sigmas / delta
611
612# dimensions of the patch
613radius = self.lambda_descr * (1 + 1 / self.n_hist) * sigma
614radius_patch = math.sqrt(2) * radius
615p_min = np.asarray(
616np.maximum(0, center_pos - radius_patch[:, np.newaxis] + 0.5), dtype=int
617)
618p_max = np.asarray(
619np.minimum(
620center_pos + radius_patch[:, np.newaxis] + 0.5,
621(dim[0] - 1, dim[1] - 1),
622),
623dtype=int,
624)
625
626for k in range(len(p_max)):
627rad_k = float(radius[k])
628ori = float(orientations[k])
629histograms = np.zeros(
630(self.n_hist, self.n_hist, self.n_ori), dtype=self.float_dtype
631)
632# the patch
633r, c = np.meshgrid(
634np.arange(p_min[k, 0], p_max[k, 0]),
635np.arange(p_min[k, 1], p_max[k, 1]),
636indexing='ij',
637sparse=True,
638)
639# normalized coordinates
640r_norm = np.subtract(r, center_pos[k, 0], dtype=self.float_dtype)
641c_norm = np.subtract(c, center_pos[k, 1], dtype=self.float_dtype)
642r_norm, c_norm = self._rotate(r_norm, c_norm, ori)
643
644# select coordinates and gradient values within the patch
645inside = np.maximum(np.abs(r_norm), np.abs(c_norm)) < rad_k
646r_norm, c_norm = r_norm[inside], c_norm[inside]
647r_idx, c_idx = np.nonzero(inside)
648r = r[r_idx, 0]
649c = c[0, c_idx]
650gradient_row = gradient[0][r, c, scales[k]]
651gradient_col = gradient[1][r, c, scales[k]]
652# compute the (relative) gradient orientation
653theta = np.arctan2(gradient_col, gradient_row) - ori
654lam_sig = self.lambda_descr * float(sigma[k])
655# Gaussian weighted kernel magnitude
656kernel = np.exp((r_norm * r_norm + c_norm * c_norm) / (-2 * lam_sig**2))
657magnitude = (
658np.sqrt(gradient_row * gradient_row + gradient_col * gradient_col)
659* kernel
660)
661
662lam_sig_ratio = 2 * lam_sig / self.n_hist
663rc_bins = (hists - (1 + self.n_hist) / 2) * lam_sig_ratio
664rc_bin_spacing = lam_sig_ratio
665ori_bins = (2 * np.pi * bins) / self.n_ori
666
667# distances to the histograms and bins
668dist_r = np.abs(np.subtract.outer(rc_bins, r_norm))
669dist_c = np.abs(np.subtract.outer(rc_bins, c_norm))
670
671# the orientation histograms/bins that get the contribution
672near_t, near_t_val = _ori_distances(ori_bins, theta)
673
674# create the histogram
675_update_histogram(
676histograms,
677near_t,
678near_t_val,
679magnitude,
680dist_r,
681dist_c,
682rc_bin_spacing,
683)
684
685# convert the histograms to a 1d descriptor
686histograms = histograms.reshape(-1)
687# saturate the descriptor
688histograms = np.minimum(histograms, 0.2 * np.linalg.norm(histograms))
689# normalize the descriptor
690descriptor = (512 * histograms) / np.linalg.norm(histograms)
691# quantize the descriptor
692descriptor = np.minimum(np.floor(descriptor), 255)
693self.descriptors[numbers[k], :] = descriptor
694
695def _preprocess(self, image):
696check_nD(image, 2)
697image = img_as_float(image)
698self.float_dtype = _supported_float_type(image.dtype)
699image = image.astype(self.float_dtype, copy=False)
700
701self._set_number_of_octaves(image.shape)
702return image
703
704def detect(self, image):
705"""Detect the keypoints.
706
707Parameters
708----------
709image : 2D array
710Input image.
711
712"""
713image = self._preprocess(image)
714
715gaussian_scalespace = self._create_scalespace(image)
716
717dog_scalespace = [np.diff(layer, axis=2) for layer in gaussian_scalespace]
718
719positions, scales, sigmas, octaves = self._find_localize_evaluate(
720dog_scalespace, image.shape
721)
722
723self._compute_orientation(
724positions, scales, sigmas, octaves, gaussian_scalespace
725)
726
727self.keypoints = self.positions.round().astype(int)
728
729def extract(self, image):
730"""Extract the descriptors for all keypoints in the image.
731
732Parameters
733----------
734image : 2D array
735Input image.
736
737"""
738image = self._preprocess(image)
739
740gaussian_scalespace = self._create_scalespace(image)
741
742gradient_space = [np.gradient(octave) for octave in gaussian_scalespace]
743
744self._compute_descriptor(gradient_space)
745
746def detect_and_extract(self, image):
747"""Detect the keypoints and extract their descriptors.
748
749Parameters
750----------
751image : 2D array
752Input image.
753
754"""
755image = self._preprocess(image)
756
757gaussian_scalespace = self._create_scalespace(image)
758
759dog_scalespace = [np.diff(layer, axis=2) for layer in gaussian_scalespace]
760
761positions, scales, sigmas, octaves = self._find_localize_evaluate(
762dog_scalespace, image.shape
763)
764
765gradient_space = self._compute_orientation(
766positions, scales, sigmas, octaves, gaussian_scalespace
767)
768
769self._compute_descriptor(gradient_space)
770
771self.keypoints = self.positions.round().astype(int)
772