scikit-image
727 строк · 22.8 Кб
1# -*- python -*-
2
3"""
4Cython implementation of a binary min heap.
5"""
6# cython specific imports
7from libc.stdlib cimport malloc, free
8
9cdef extern from "pyport.h":
10cnp.float64_t Py_HUGE_VAL
11
12cdef VALUE_T inf = Py_HUGE_VAL
13
14# this is handy
15cdef inline INDEX_T index_min(INDEX_T a, INDEX_T b) noexcept nogil:
16return a if a <= b else b
17
18
19cdef class BinaryHeap:
20"""BinaryHeap(initial_capacity=128)
21
22A binary heap class that can store values and an integer reference.
23
24A binary heap is an object to store values in, optimized in such a way
25that the minimum (or maximum, but a minimum in this implementation)
26value can be found in O(log2(N)) time. In this implementation, a reference
27value (a single integer) can also be stored with each value.
28
29Use the methods push() and pop() to put in or extract values.
30In C, use the corresponding push_fast() and pop_fast().
31
32Parameters
33----------
34initial_capacity : int
35Estimate of the size of the heap, if known in advance. (In any case,
36the heap will dynamically grow and shrink as required, though never
37below the `initial_capacity`.)
38
39Attributes
40----------
41count : int
42The number of values in the heap
43levels : int
44The number of levels in the binary heap (see Notes below). The values
45are stored in the last level, so 2**levels is the capacity of the
46heap before another resize is required.
47min_levels : int
48The minimum number of levels in the heap (relates to the
49`initial_capacity` parameter.)
50
51Notes
52-----
53This implementation stores the binary heap in an array twice as long as
54the number of elements in the heap. The array is structured in levels,
55starting at level 0 with a single value, doubling the amount of values in
56each level. The final level contains the actual values, the level before
57it contains the pairwise minimum values. The level before that contains
58the pairwise minimum values of that level, etc. Take a look at this
59illustration:
60
61level: 0 11 2222 33333333 4444444444444444
62index: 0 12 3456 78901234 5678901234567890
631 2 3
64
65The actual values are stored in level 4. The minimum value of position 15
66and 16 is stored in position 7. min(17,18)->8, min(7,8)->3, min(3,4)->1.
67When adding a value, only the path to the top has to be updated, which
68takesO(log2(N)) time.
69
70The advantage of this implementation relative to more common
71implementations that swap values when pushing to the array is that data
72only needs to be swapped once when an element is removed. This means that
73keeping an array of references along with the values is very inexpensive.
74Th disadvantage is that if you pop the minimum value, the tree has to be
75traced from top to bottom and back. So if you only want values and no
76references, this implementation will probably be slower. If you need
77references (and maybe cross references to be kept up to date) this
78implementation will be faster.
79
80"""
81
82## Basic methods
83# The following lines are always "inlined", but documented here for
84# clarity:
85#
86# To calculate the start index of a certain level:
87# 2**l-1 # LevelStart
88# Note that in inner loops, this may also be represented as (1<<l)-1,
89# because code of the form x**y goes via the python pow operations and
90# can thus be a bit slower.
91#
92# To calculate the corresponding ABSOLUTE index at the next level:
93# i*2+1 # CalcNextAbs
94#
95# To calculate the corresponding ABSOLUTE index at the previous level:
96# (i-1)/2 # CalcPrevAbs
97#
98# To calculate the capacity at a certain level:
99# 2**l
100
101def __cinit__(self, INDEX_T initial_capacity=128, *args, **kws):
102# calc levels from the default capacity
103cdef LEVELS_T levels = 0
104while 2**levels < initial_capacity:
105levels += 1
106# set levels
107self.min_levels = self.levels = levels
108
109# we start with 0 values
110self.count = 0
111
112# allocate arrays
113cdef INDEX_T number = 2**self.levels
114self._values = <VALUE_T *>malloc(2 * number * sizeof(VALUE_T))
115self._references = <REFERENCE_T *>malloc(number * sizeof(REFERENCE_T))
116
117def __init__(self, INDEX_T initial_capacity=128):
118"""__init__(initial_capacity=128)
119
120Class constructor.
121
122Takes an optional parameter 'initial_capacity' so that
123if the required heap capacity is known or can be estimated in advance,
124there will need to be fewer resize operations on the heap.
125
126"""
127if self._values is NULL or self._references is NULL:
128raise MemoryError()
129self.reset()
130
131def reset(self):
132"""reset()
133
134Reset the heap to default, empty state.
135
136"""
137cdef INDEX_T number = 2**self.levels
138cdef INDEX_T i
139cdef VALUE_T *values = self._values
140for i in range(number * 2):
141values[i] = inf
142
143def __dealloc__(self):
144free(self._values)
145free(self._references)
146
147def __str__(self):
148s = ''
149for level in range(1, self.levels + 1):
150i0 = 2**level - 1 # LevelStart
151s += 'level %i: ' % level
152for i in range(i0, i0 + 2**level):
153s += '%g, ' % self._values[i]
154s = s[:-1] + '\n'
155return s
156
157## C Maintenance methods
158
159cdef void _add_or_remove_level(self, LEVELS_T add_or_remove) noexcept nogil:
160# init indexing ints
161cdef INDEX_T i, i1, i2, n
162
163# new amount of levels
164cdef LEVELS_T new_levels = self.levels + add_or_remove
165
166# allocate new arrays
167cdef INDEX_T number = 2**new_levels
168cdef VALUE_T *values
169cdef REFERENCE_T *references
170values = <VALUE_T *>malloc(number * 2 * sizeof(VALUE_T))
171references = <REFERENCE_T *>malloc(number * sizeof(REFERENCE_T))
172if values is NULL or references is NULL:
173free(values)
174free(references)
175with gil:
176raise MemoryError()
177
178# init arrays
179for i in range(number * 2):
180values[i] = inf
181for i in range(number):
182references[i] = -1
183
184# copy data
185cdef VALUE_T *old_values = self._values
186cdef REFERENCE_T *old_references = self._references
187if self.count:
188i1 = 2**new_levels - 1 # LevelStart
189i2 = 2**self.levels - 1 # LevelStart
190n = index_min(2**new_levels, 2**self.levels)
191for i in range(n):
192values[i1+i] = old_values[i2+i]
193for i in range(n):
194references[i] = old_references[i]
195
196# make current
197free(self._values)
198free(self._references)
199self._values = values
200self._references = references
201
202# we need a full update
203self.levels = new_levels
204self._update()
205
206cdef void _update(self) noexcept nogil:
207"""Update the full tree from the bottom up.
208
209This should be done after resizing.
210
211"""
212# shorter name for values
213cdef VALUE_T *values = self._values
214
215# Note that i represents an absolute index here
216cdef INDEX_T i0, i, ii, n
217cdef LEVELS_T level
218
219# track tree
220for level in range(self.levels, 1, -1):
221i0 = (1 << level) - 1 # 2**level-1 = LevelStart
222n = i0 + 1 # 2**level
223for i in range(i0, i0+n, 2):
224ii = (i-1) // 2 # CalcPrevAbs
225if values[i] < values[i+1]:
226values[ii] = values[i]
227else:
228values[ii] = values[i+1]
229
230cdef void _update_one(self, INDEX_T i) noexcept nogil:
231"""Update the tree for one value."""
232# shorter name for values
233cdef VALUE_T *values = self._values
234
235# make index uneven
236if i % 2 == 0:
237i -= 1
238
239# track tree
240cdef INDEX_T ii
241cdef LEVELS_T level
242for level in range(self.levels, 1, -1):
243ii = (i-1) // 2 # CalcPrevAbs
244
245# test
246if values[i] < values[i+1]:
247values[ii] = values[i]
248else:
249values[ii] = values[i+1]
250# next
251if ii % 2:
252i = ii
253else:
254i = ii - 1
255
256cdef void _remove(self, INDEX_T i1) noexcept nogil:
257"""Remove a value from the heap. By index."""
258cdef LEVELS_T levels = self.levels
259cdef INDEX_T count = self.count
260# get indices
261cdef INDEX_T i0 = (1 << levels) - 1 # 2**self.levels - 1 # LevelStart
262cdef INDEX_T i2 = i0 + count - 1
263
264# get relative indices
265cdef INDEX_T r1 = i1 - i0
266cdef INDEX_T r2 = count - 1
267
268cdef VALUE_T *values = self._values
269cdef REFERENCE_T *references = self._references
270
271# swap with last
272values[i1] = values[i2]
273references[r1] = references[r2]
274
275# make last Null
276values[i2] = inf
277
278# update
279self.count -= 1
280count -= 1
281if (levels > self.min_levels) and (count < (1 << (levels-2))):
282self._add_or_remove_level(-1)
283else:
284self._update_one(i1)
285self._update_one(i2)
286
287## C Public methods
288
289cdef INDEX_T push_fast(self, VALUE_T value, REFERENCE_T reference) noexcept nogil:
290"""The c-method for fast pushing.
291
292Returns the index relative to the start of the last level in the heap.
293
294"""
295# We need to resize if currently it just fits.
296cdef LEVELS_T levels = self.levels
297cdef INDEX_T count = self.count
298if count >= (1 << levels): # 2**self.levels:
299self._add_or_remove_level(1)
300levels += 1
301
302# insert new value
303cdef INDEX_T i = ((1 << levels) - 1) + count # LevelStart + n
304self._values[i] = value
305self._references[count] = reference
306
307# update
308self.count += 1
309self._update_one(i)
310
311# return
312return count
313
314cdef VALUE_T pop_fast(self) noexcept nogil:
315"""The c-method for fast popping.
316
317Returns the minimum value. The reference is put in self._popped_ref.
318
319"""
320# shorter name for values
321cdef VALUE_T *values = self._values
322
323# init index. start at 1 because we start in level 1
324cdef LEVELS_T level
325cdef INDEX_T i = 1
326cdef LEVELS_T levels = self.levels
327# search tree (using absolute indices)
328for level in range(1, levels):
329if values[i] <= values[i+1]:
330i = i * 2 + 1 # CalcNextAbs
331else:
332i = (i+1) * 2 + 1 # CalcNextAbs
333
334# select best one in last level
335if values[i] <= values[i+1]:
336i = i
337else:
338i += 1
339
340# get values
341cdef INDEX_T ir = i - ((1 << levels) - 1) # (2**self.levels-1)
342# LevelStart
343cdef VALUE_T value = values[i]
344self._popped_ref = self._references[ir]
345
346# remove it
347if self.count:
348self._remove(i)
349
350# return
351return value
352
353## Python Public methods (that do not need to be VERY fast)
354
355def push(self, VALUE_T value, REFERENCE_T reference=-1):
356"""push(value, reference=-1)
357
358Append a value to the heap, with optional reference.
359
360Parameters
361----------
362value : float
363Value to push onto the heap
364reference : int, optional
365Reference to associate with the given value.
366
367"""
368self.push_fast(value, reference)
369
370def min_val(self):
371"""min_val()
372
373Get the minimum value on the heap.
374
375Returns only the value, and does not remove it from the heap.
376
377"""
378# shorter name for values
379cdef VALUE_T *values = self._values
380
381# select best one in last level
382if values[1] < values[2]:
383return values[1]
384else:
385return values[2]
386
387def values(self):
388"""values()
389
390Get the values in the heap as a list.
391
392"""
393cdef INDEX_T i0 = 2**self.levels - 1 # LevelStart
394return [self._values[i] for i in range(i0, i0+self.count)]
395
396def references(self):
397"""references()
398
399Get the references in the heap as a list.
400
401"""
402return [self._references[i] for i in range(self.count)]
403
404def pop(self):
405"""pop()
406
407Get the minimum value and remove it from the list.
408
409Returns
410-------
411value : float
412reference : int
413If no reference was provided, -1 is returned here.
414
415Raises
416------
417IndexError
418On attempt to pop from an empty heap
419
420"""
421if self.count == 0:
422raise IndexError('pop from an empty heap')
423value = self.pop_fast()
424ref = self._popped_ref
425return value, ref
426
427
428cdef class FastUpdateBinaryHeap(BinaryHeap):
429"""FastUpdateBinaryHeap(initial_capacity=128, max_reference=None)
430
431Binary heap that allows the value of a reference to be updated quickly.
432
433This heap class keeps cross-references so that the value associated with a
434given reference can be quickly queried (O(1) time) or updated (O(log2(N))
435time). This is ideal for pathfinding algorithms that implement some
436variant of Dijkstra's algorithm.
437
438Parameters
439----------
440initial_capacity : int
441Estimate of the size of the heap, if known in advance. (In any case,
442the heap will dynamically grow and shrink as required, though never
443below the `initial_capacity`.)
444
445max_reference : int, optional
446Largest reference value that might be pushed to the heap. (Pushing a
447larger value will result in an error.) If no value is provided,
448`1-initial_capacity` will be used. For the cross-reference index to
449work, all references must be in the range [0, max_reference];
450references pushed outside of that range will not be added to the heap.
451The cross-references are kept as a 1-d array of length
452`max_reference+1', so memory use of this heap is effectively
453O(max_reference)
454
455Attributes
456----------
457count : int
458The number of values in the heap
459levels : int
460The number of levels in the binary heap (see Notes below). The values
461are stored in the last level, so 2**levels is the capacity of the
462heap before another resize is required.
463min_levels : int
464The minimum number of levels in the heap (relates to the
465`initial_capacity` parameter.)
466max_reference : int
467The provided or calculated maximum allowed reference value.
468
469Notes
470-----
471The cross-references map data[reference]->internalindex, such that the
472value corresponding to a given reference can be found efficiently. This
473can be queried with the value_of() method.
474
475A special method, push_if_lower() is provided that will update the heap if
476the given reference is not in the heap, or if it is and the provided value
477is lower than the current value in the heap. This is again useful for
478pathfinding algorithms.
479
480"""
481
482def __cinit__(self, INDEX_T initial_capacity=128, max_reference=None):
483if max_reference is None:
484max_reference = initial_capacity - 1
485self.max_reference = max_reference
486self._crossref = <INDEX_T *>malloc((self.max_reference + 1) *
487sizeof(INDEX_T))
488
489def __init__(self, INDEX_T initial_capacity=128, max_reference=None):
490"""__init__(initial_capacity=128, max_reference=None)
491
492Class constructor.
493
494"""
495if self._crossref is NULL:
496raise MemoryError()
497# below will call self.reset
498BinaryHeap.__init__(self, initial_capacity)
499
500def __dealloc__(self):
501free(self._crossref)
502
503def reset(self):
504"""reset()
505
506Reset the heap to default, empty state.
507
508"""
509BinaryHeap.reset(self)
510# set default values of crossrefs
511cdef INDEX_T i
512for i in range(self.max_reference + 1):
513self._crossref[i] = -1
514
515cdef void _remove(self, INDEX_T i1) noexcept nogil:
516"""Remove a value from the heap. By index."""
517cdef LEVELS_T levels = self.levels
518cdef INDEX_T count = self.count
519
520# get indices
521cdef INDEX_T i0 = (1 << levels) - 1 # 2**self.levels - 1 # LevelStart
522cdef INDEX_T i2 = i0 + count - 1
523
524# get relative indices
525cdef INDEX_T r1 = i1 - i0
526cdef INDEX_T r2 = count - 1
527
528cdef VALUE_T *values = self._values
529cdef REFERENCE_T *references = self._references
530cdef INDEX_T *crossref = self._crossref
531
532# update cross reference
533crossref[references[r2]] = r1
534crossref[references[r1]] = -1 # disable removed item
535
536# swap with last
537values[i1] = values[i2]
538references[r1] = references[r2]
539
540# make last Null
541values[i2] = inf
542
543# update
544self.count -= 1
545count -= 1
546if (levels > self.min_levels) & (count < (1 << (levels-2))):
547self._add_or_remove_level(-1)
548else:
549self._update_one(i1)
550self._update_one(i2)
551
552cdef INDEX_T push_fast(self, VALUE_T value, REFERENCE_T reference) noexcept nogil:
553"""The c method for fast pushing.
554
555If the reference is already present, will update its value, otherwise
556will append it.
557
558If -1 is returned, the provided reference was out-of-bounds and no
559value was pushed to the heap.
560
561"""
562if not (0 <= reference <= self.max_reference):
563return -1
564
565# init variable to store the index-in-the-heap
566cdef INDEX_T i
567
568# Reference is the index in the array where MCP is applied to.
569# Find the index-in-the-heap using the crossref array.
570cdef INDEX_T ir = self._crossref[reference]
571
572if ir != -1:
573# update
574i = (1 << self.levels) - 1 + ir
575self._values[i] = value
576self._update_one(i)
577return ir
578
579# if not updated: append normally and store reference
580ir = BinaryHeap.push_fast(self, value, reference)
581self._crossref[reference] = ir
582return ir
583
584cdef INDEX_T push_if_lower_fast(self, VALUE_T value,
585REFERENCE_T reference) noexcept nogil:
586"""If the reference is already present, will update its value ONLY if
587the new value is lower than the old one. If the reference is not
588present, this append it. If a value was appended, self._pushed is
589set to 1.
590
591If -1 is returned, the provided reference was out-of-bounds and no
592value was pushed to the heap.
593
594"""
595if not (0 <= reference <= self.max_reference):
596return -1
597
598# init variable to store the index-in-the-heap
599cdef INDEX_T i
600
601# Reference is the index in the array where MCP is applied to.
602# Find the index-in-the-heap using the crossref array.
603cdef INDEX_T ir = self._crossref[reference]
604cdef VALUE_T *values = self._values
605self._pushed = 1
606if ir != -1:
607# update
608i = (1 << self.levels) - 1 + ir
609if values[i] > value:
610values[i] = value
611self._update_one(i)
612else:
613self._pushed = 0
614return ir
615
616# if not updated: append normally and store reference
617ir = BinaryHeap.push_fast(self, value, reference)
618self._crossref[reference] = ir
619return ir
620
621cdef VALUE_T value_of_fast(self, REFERENCE_T reference):
622"""Return the value corresponding to the given reference.
623
624If inf is returned, the reference may be invalid: check the
625_invaild_ref field in this case.
626
627"""
628if not (0 <= reference <= self.max_reference):
629self._invalid_ref = 1
630return inf
631
632# init variable to store the index-in-the-heap
633cdef INDEX_T i
634
635# Reference is the index in the array where MCP is applied to.
636# Find the index-in-the-heap using the crossref array.
637cdef INDEX_T ir = self._crossref[reference]
638self._invalid_ref = 0
639if ir == -1:
640self._invalid_ref = 1
641return inf
642i = (1 << self.levels) - 1 + ir
643return self._values[i]
644
645def push(self, VALUE_T value, int reference):
646"""push(value, reference)
647
648Append/update a value in the heap.
649
650Parameters
651----------
652value : float
653reference : int
654If the reference is already present in the array, the value for
655that reference will be updated, otherwise the (value, reference)
656pair will be added to the heap.
657
658Raises
659------
660ValueError
661On pushing a reference outside the range [0, max_reference].
662
663"""
664if self.push_fast(value, reference) == -1:
665raise ValueError('reference outside of range [0, max_reference]')
666
667def push_if_lower(self, VALUE_T value, int reference):
668"""push_if_lower(value, reference)
669
670Append/update a value in the heap if the extant value is lower.
671
672If the reference is already in the heap, update only of the new value
673is lower than the current one. If the reference is not present, the
674value will always be pushed to the heap.
675
676Parameters
677----------
678value : float
679reference : int
680If the reference is already present in the array, the value for
681that reference will be updated, otherwise the (value, reference)
682pair will be added to the heap.
683
684Returns
685-------
686pushed : bool
687True if an append/update occurred, False if otherwise.
688
689Raises
690------
691ValueError
692On pushing a reference outside the range [0, max_reference].
693
694"""
695if self.push_if_lower_fast(value, reference) == -1:
696raise ValueError('reference outside of range [0, max_reference]')
697return self._pushed == 1
698
699def value_of(self, int reference):
700"""value_of(reference)
701
702Get the value corresponding to a given reference.
703
704Parameters
705----------
706reference : int
707A reference already pushed to the heap.
708
709Returns
710-------
711value : float
712
713Raises
714------
715ValueError
716On querying a reference outside the range [0, max_reference], or
717not already pushed to the heap.
718
719"""
720value = self.value_of_fast(reference)
721if self._invalid_ref:
722raise ValueError('invalid reference')
723return value
724
725def cross_references(self):
726"""Get the cross references in the heap as a list."""
727return [self._crossref[i] for i in range(self.max_reference + 1)]
728