scikit-image

Форк
0
727 строк · 22.8 Кб
1
# -*- python -*-
2

3
"""
4
Cython implementation of a binary min heap.
5
"""
6
# cython specific imports
7
from libc.stdlib cimport malloc, free
8

9
cdef extern from "pyport.h":
10
  cnp.float64_t Py_HUGE_VAL
11

12
cdef VALUE_T inf = Py_HUGE_VAL
13

14
# this is handy
15
cdef inline INDEX_T index_min(INDEX_T a, INDEX_T b) noexcept nogil:
16
    return a if a <= b else b
17

18

19
cdef class BinaryHeap:
20
    """BinaryHeap(initial_capacity=128)
21

22
    A binary heap class that can store values and an integer reference.
23

24
    A binary heap is an object to store values in, optimized in such a way
25
    that the minimum (or maximum, but a minimum in this implementation)
26
    value can be found in O(log2(N)) time. In this implementation, a reference
27
    value (a single integer) can also be stored with each value.
28

29
    Use the methods push() and pop() to put in or extract values.
30
    In C, use the corresponding push_fast() and pop_fast().
31

32
    Parameters
33
    ----------
34
    initial_capacity : int
35
        Estimate of the size of the heap, if known in advance. (In any case,
36
        the heap will dynamically grow and shrink as required, though never
37
        below the `initial_capacity`.)
38

39
    Attributes
40
    ----------
41
    count : int
42
        The number of values in the heap
43
    levels : int
44
        The number of levels in the binary heap (see Notes below). The values
45
        are stored in the last level, so 2**levels is the capacity of the
46
        heap before another resize is required.
47
    min_levels : int
48
        The minimum number of levels in the heap (relates to the
49
        `initial_capacity` parameter.)
50

51
    Notes
52
    -----
53
    This implementation stores the binary heap in an array twice as long as
54
    the number of elements in the heap. The array is structured in levels,
55
    starting at level 0 with a single value, doubling the amount of values in
56
    each level. The final level contains the actual values, the level before
57
    it contains the pairwise minimum values. The level before that contains
58
    the pairwise minimum values of that level, etc. Take a look at this
59
    illustration:
60

61
    level: 0 11 2222 33333333 4444444444444444
62
    index: 0 12 3456 78901234 5678901234567890
63
                        1          2         3
64

65
     The actual values are stored in level 4. The minimum value of position 15
66
    and 16 is stored in position 7. min(17,18)->8, min(7,8)->3, min(3,4)->1.
67
    When adding a value, only the path to the top has to be updated, which
68
    takesO(log2(N)) time.
69

70
     The advantage of this implementation relative to more common
71
    implementations that swap values when pushing to the array is that data
72
    only needs to be swapped once when an element is removed. This means that
73
    keeping an array of references along with the values is very inexpensive.
74
    Th disadvantage is that if you pop the minimum value, the tree has to be
75
    traced from top to bottom and back. So if you only want values and no
76
    references, this implementation will probably be slower. If you need
77
    references (and maybe cross references to be kept up to date) this
78
    implementation will be faster.
79

80
    """
81

82
    ## Basic methods
83
    # The following lines are always "inlined", but documented here for
84
    # clarity:
85
    #
86
    # To calculate the start index of a certain level:
87
    # 2**l-1 # LevelStart
88
    # Note that in inner loops, this may also be represented as (1<<l)-1,
89
    # because code of the form x**y goes via the python pow operations and
90
    # can thus be a bit slower.
91
    #
92
    # To calculate the corresponding ABSOLUTE index at the next level:
93
    # i*2+1 # CalcNextAbs
94
    #
95
    # To calculate the corresponding ABSOLUTE index at the previous level:
96
    # (i-1)/2 # CalcPrevAbs
97
    #
98
    # To calculate the capacity at a certain level:
99
    # 2**l
100

101
    def __cinit__(self, INDEX_T initial_capacity=128, *args, **kws):
102
        # calc levels from the default capacity
103
        cdef LEVELS_T levels = 0
104
        while 2**levels < initial_capacity:
105
            levels += 1
106
        # set levels
107
        self.min_levels = self.levels = levels
108

109
        # we start with 0 values
110
        self.count = 0
111

112
        # allocate arrays
113
        cdef INDEX_T number = 2**self.levels
114
        self._values = <VALUE_T *>malloc(2 * number * sizeof(VALUE_T))
115
        self._references = <REFERENCE_T *>malloc(number * sizeof(REFERENCE_T))
116

117
    def __init__(self, INDEX_T initial_capacity=128):
118
        """__init__(initial_capacity=128)
119

120
        Class constructor.
121

122
        Takes an optional parameter 'initial_capacity' so that
123
        if the required heap capacity is known or can be estimated in advance,
124
        there will need to be fewer resize operations on the heap.
125

126
        """
127
        if self._values is NULL or self._references is NULL:
128
            raise MemoryError()
129
        self.reset()
130

131
    def reset(self):
132
        """reset()
133

134
        Reset the heap to default, empty state.
135

136
        """
137
        cdef INDEX_T number = 2**self.levels
138
        cdef INDEX_T i
139
        cdef VALUE_T *values = self._values
140
        for i in range(number * 2):
141
            values[i] = inf
142

143
    def __dealloc__(self):
144
        free(self._values)
145
        free(self._references)
146

147
    def __str__(self):
148
        s = ''
149
        for level in range(1, self.levels + 1):
150
            i0 = 2**level - 1  # LevelStart
151
            s += 'level %i: ' % level
152
            for i in range(i0, i0 + 2**level):
153
                s += '%g, ' % self._values[i]
154
            s = s[:-1] + '\n'
155
        return s
156

157
    ## C Maintenance methods
158

159
    cdef void _add_or_remove_level(self, LEVELS_T add_or_remove) noexcept nogil:
160
        # init indexing ints
161
        cdef INDEX_T i, i1, i2, n
162

163
        # new amount of levels
164
        cdef LEVELS_T new_levels = self.levels + add_or_remove
165

166
        # allocate new arrays
167
        cdef INDEX_T number = 2**new_levels
168
        cdef VALUE_T *values
169
        cdef REFERENCE_T *references
170
        values = <VALUE_T *>malloc(number * 2 * sizeof(VALUE_T))
171
        references = <REFERENCE_T *>malloc(number * sizeof(REFERENCE_T))
172
        if values is NULL or references is NULL:
173
            free(values)
174
            free(references)
175
            with gil:
176
                raise MemoryError()
177

178
        # init arrays
179
        for i in range(number * 2):
180
            values[i] = inf
181
        for i in range(number):
182
            references[i] = -1
183

184
        # copy data
185
        cdef VALUE_T *old_values = self._values
186
        cdef REFERENCE_T *old_references = self._references
187
        if self.count:
188
            i1 = 2**new_levels - 1  # LevelStart
189
            i2 = 2**self.levels - 1  # LevelStart
190
            n = index_min(2**new_levels, 2**self.levels)
191
            for i in range(n):
192
                values[i1+i] = old_values[i2+i]
193
            for i in range(n):
194
                references[i] = old_references[i]
195

196
        # make current
197
        free(self._values)
198
        free(self._references)
199
        self._values = values
200
        self._references = references
201

202
        # we need a full update
203
        self.levels = new_levels
204
        self._update()
205

206
    cdef void _update(self) noexcept nogil:
207
        """Update the full tree from the bottom up.
208

209
        This should be done after resizing.
210

211
        """
212
        # shorter name for values
213
        cdef VALUE_T *values = self._values
214

215
        # Note that i represents an absolute index here
216
        cdef INDEX_T i0, i, ii, n
217
        cdef LEVELS_T level
218

219
        # track tree
220
        for level in range(self.levels, 1, -1):
221
            i0 = (1 << level) - 1  # 2**level-1 = LevelStart
222
            n = i0 + 1  # 2**level
223
            for i in range(i0, i0+n, 2):
224
                ii = (i-1) // 2  # CalcPrevAbs
225
                if values[i] < values[i+1]:
226
                    values[ii] = values[i]
227
                else:
228
                    values[ii] = values[i+1]
229

230
    cdef void _update_one(self, INDEX_T i) noexcept nogil:
231
        """Update the tree for one value."""
232
        # shorter name for values
233
        cdef VALUE_T *values = self._values
234

235
        # make index uneven
236
        if i % 2 == 0:
237
            i -= 1
238

239
        # track tree
240
        cdef INDEX_T ii
241
        cdef LEVELS_T level
242
        for level in range(self.levels, 1, -1):
243
            ii = (i-1) // 2  # CalcPrevAbs
244

245
            # test
246
            if values[i] < values[i+1]:
247
                values[ii] = values[i]
248
            else:
249
                values[ii] = values[i+1]
250
            # next
251
            if ii % 2:
252
                i = ii
253
            else:
254
                i = ii - 1
255

256
    cdef void _remove(self, INDEX_T i1) noexcept nogil:
257
        """Remove a value from the heap. By index."""
258
        cdef LEVELS_T levels = self.levels
259
        cdef INDEX_T count = self.count
260
        # get indices
261
        cdef INDEX_T i0 = (1 << levels) - 1  # 2**self.levels - 1 # LevelStart
262
        cdef INDEX_T i2 = i0 + count - 1
263

264
        # get relative indices
265
        cdef INDEX_T r1 = i1 - i0
266
        cdef INDEX_T r2 = count - 1
267

268
        cdef VALUE_T *values = self._values
269
        cdef REFERENCE_T *references = self._references
270

271
        # swap with last
272
        values[i1] = values[i2]
273
        references[r1] = references[r2]
274

275
        # make last Null
276
        values[i2] = inf
277

278
        # update
279
        self.count -= 1
280
        count -= 1
281
        if (levels > self.min_levels) and (count < (1 << (levels-2))):
282
            self._add_or_remove_level(-1)
283
        else:
284
            self._update_one(i1)
285
            self._update_one(i2)
286

287
    ## C Public methods
288

289
    cdef INDEX_T push_fast(self, VALUE_T value, REFERENCE_T reference) noexcept nogil:
290
        """The c-method for fast pushing.
291

292
        Returns the index relative to the start of the last level in the heap.
293

294
        """
295
        # We need to resize if currently it just fits.
296
        cdef LEVELS_T levels = self.levels
297
        cdef INDEX_T count = self.count
298
        if count >= (1 << levels):  # 2**self.levels:
299
            self._add_or_remove_level(1)
300
            levels += 1
301

302
        # insert new value
303
        cdef INDEX_T i = ((1 << levels) - 1) + count  # LevelStart + n
304
        self._values[i] = value
305
        self._references[count] = reference
306

307
        # update
308
        self.count += 1
309
        self._update_one(i)
310

311
        # return
312
        return count
313

314
    cdef VALUE_T pop_fast(self) noexcept nogil:
315
        """The c-method for fast popping.
316

317
        Returns the minimum value. The reference is put in self._popped_ref.
318

319
        """
320
        # shorter name for values
321
        cdef VALUE_T *values = self._values
322

323
        # init index. start at 1 because we start in level 1
324
        cdef LEVELS_T level
325
        cdef INDEX_T i = 1
326
        cdef LEVELS_T levels = self.levels
327
        # search tree (using absolute indices)
328
        for level in range(1, levels):
329
            if values[i] <= values[i+1]:
330
                i = i * 2 + 1  # CalcNextAbs
331
            else:
332
                i = (i+1) * 2 + 1  # CalcNextAbs
333

334
        # select best one in last level
335
        if values[i] <= values[i+1]:
336
            i = i
337
        else:
338
            i += 1
339

340
        # get values
341
        cdef INDEX_T ir = i - ((1 << levels) - 1) # (2**self.levels-1)
342
                                                  # LevelStart
343
        cdef VALUE_T value = values[i]
344
        self._popped_ref = self._references[ir]
345

346
        # remove it
347
        if self.count:
348
            self._remove(i)
349

350
        # return
351
        return value
352

353
    ## Python Public methods (that do not need to be VERY fast)
354

355
    def push(self, VALUE_T value, REFERENCE_T reference=-1):
356
        """push(value, reference=-1)
357

358
        Append a value to the heap, with optional reference.
359

360
        Parameters
361
        ----------
362
        value : float
363
            Value to push onto the heap
364
        reference : int, optional
365
            Reference to associate with the given value.
366

367
        """
368
        self.push_fast(value, reference)
369

370
    def min_val(self):
371
        """min_val()
372

373
        Get the minimum value on the heap.
374

375
        Returns only the value, and does not remove it from the heap.
376

377
        """
378
        # shorter name for values
379
        cdef VALUE_T *values = self._values
380

381
        # select best one in last level
382
        if values[1] < values[2]:
383
            return values[1]
384
        else:
385
            return values[2]
386

387
    def values(self):
388
        """values()
389

390
        Get the values in the heap as a list.
391

392
        """
393
        cdef INDEX_T i0 = 2**self.levels - 1  # LevelStart
394
        return [self._values[i] for i in range(i0, i0+self.count)]
395

396
    def references(self):
397
        """references()
398

399
        Get the references in the heap as a list.
400

401
        """
402
        return [self._references[i] for i in range(self.count)]
403

404
    def pop(self):
405
        """pop()
406

407
        Get the minimum value and remove it from the list.
408

409
        Returns
410
        -------
411
        value : float
412
        reference : int
413
            If no reference was provided, -1 is returned here.
414

415
        Raises
416
        ------
417
        IndexError
418
            On attempt to pop from an empty heap
419

420
        """
421
        if self.count == 0:
422
            raise IndexError('pop from an empty heap')
423
        value = self.pop_fast()
424
        ref = self._popped_ref
425
        return value, ref
426

427

428
cdef class FastUpdateBinaryHeap(BinaryHeap):
429
    """FastUpdateBinaryHeap(initial_capacity=128, max_reference=None)
430

431
    Binary heap that allows the value of a reference to be updated quickly.
432

433
    This heap class keeps cross-references so that the value associated with a
434
    given reference can be quickly queried (O(1) time) or updated (O(log2(N))
435
    time). This is ideal for pathfinding algorithms that implement some
436
    variant of Dijkstra's algorithm.
437

438
    Parameters
439
    ----------
440
    initial_capacity : int
441
        Estimate of the size of the heap, if known in advance. (In any case,
442
        the heap will dynamically grow and shrink as required, though never
443
        below the `initial_capacity`.)
444

445
    max_reference : int, optional
446
        Largest reference value that might be pushed to the heap. (Pushing a
447
        larger value will result in an error.) If no value is provided,
448
        `1-initial_capacity` will be used. For the cross-reference index to
449
        work, all references must be in the range [0, max_reference];
450
        references pushed outside of that range will not be added to the heap.
451
        The cross-references are kept as a 1-d array of length
452
        `max_reference+1', so memory use of this heap is effectively
453
        O(max_reference)
454

455
    Attributes
456
    ----------
457
    count : int
458
        The number of values in the heap
459
    levels : int
460
        The number of levels in the binary heap (see Notes below). The values
461
        are stored in the last level, so 2**levels is the capacity of the
462
        heap before another resize is required.
463
    min_levels : int
464
        The minimum number of levels in the heap (relates to the
465
        `initial_capacity` parameter.)
466
    max_reference : int
467
        The provided or calculated maximum allowed reference value.
468

469
    Notes
470
    -----
471
    The cross-references map data[reference]->internalindex, such that the
472
    value corresponding to a given reference can be found efficiently. This
473
    can be queried with the value_of() method.
474

475
    A special method, push_if_lower() is provided that will update the heap if
476
    the given reference is not in the heap, or if it is and the provided value
477
    is lower than the current value in the heap. This is again useful for
478
    pathfinding algorithms.
479

480
    """
481

482
    def __cinit__(self, INDEX_T initial_capacity=128, max_reference=None):
483
        if max_reference is None:
484
            max_reference = initial_capacity - 1
485
        self.max_reference = max_reference
486
        self._crossref = <INDEX_T *>malloc((self.max_reference + 1) *
487
                                           sizeof(INDEX_T))
488

489
    def __init__(self, INDEX_T initial_capacity=128, max_reference=None):
490
        """__init__(initial_capacity=128, max_reference=None)
491

492
        Class constructor.
493

494
        """
495
        if self._crossref is NULL:
496
            raise MemoryError()
497
        # below will call self.reset
498
        BinaryHeap.__init__(self, initial_capacity)
499

500
    def __dealloc__(self):
501
        free(self._crossref)
502

503
    def reset(self):
504
        """reset()
505

506
        Reset the heap to default, empty state.
507

508
        """
509
        BinaryHeap.reset(self)
510
        # set default values of crossrefs
511
        cdef INDEX_T i
512
        for i in range(self.max_reference + 1):
513
            self._crossref[i] = -1
514

515
    cdef void _remove(self, INDEX_T i1) noexcept nogil:
516
        """Remove a value from the heap. By index."""
517
        cdef LEVELS_T levels = self.levels
518
        cdef INDEX_T count = self.count
519

520
        # get indices
521
        cdef INDEX_T i0 = (1 << levels) - 1  # 2**self.levels - 1 # LevelStart
522
        cdef INDEX_T i2 = i0 + count - 1
523

524
        # get relative indices
525
        cdef INDEX_T r1 = i1 - i0
526
        cdef INDEX_T r2 = count - 1
527

528
        cdef VALUE_T *values = self._values
529
        cdef REFERENCE_T *references = self._references
530
        cdef INDEX_T *crossref = self._crossref
531

532
        # update cross reference
533
        crossref[references[r2]] = r1
534
        crossref[references[r1]] = -1  # disable removed item
535

536
        # swap with last
537
        values[i1] = values[i2]
538
        references[r1] = references[r2]
539

540
        # make last Null
541
        values[i2] = inf
542

543
        # update
544
        self.count -= 1
545
        count -= 1
546
        if (levels > self.min_levels) & (count < (1 << (levels-2))):
547
            self._add_or_remove_level(-1)
548
        else:
549
            self._update_one(i1)
550
            self._update_one(i2)
551

552
    cdef INDEX_T push_fast(self, VALUE_T value, REFERENCE_T reference) noexcept nogil:
553
        """The c method for fast pushing.
554

555
        If the reference is already present, will update its value, otherwise
556
        will append it.
557

558
        If -1 is returned, the provided reference was out-of-bounds and no
559
        value was pushed to the heap.
560

561
        """
562
        if not (0 <= reference <= self.max_reference):
563
            return -1
564

565
        # init variable to store the index-in-the-heap
566
        cdef INDEX_T i
567

568
        # Reference is the index in the array where MCP is applied to.
569
        # Find the index-in-the-heap using the crossref array.
570
        cdef INDEX_T ir = self._crossref[reference]
571

572
        if ir != -1:
573
            # update
574
            i = (1 << self.levels) - 1 + ir
575
            self._values[i] = value
576
            self._update_one(i)
577
            return ir
578

579
        # if not updated: append normally and store reference
580
        ir = BinaryHeap.push_fast(self, value, reference)
581
        self._crossref[reference] = ir
582
        return ir
583

584
    cdef INDEX_T push_if_lower_fast(self, VALUE_T value,
585
                                    REFERENCE_T reference) noexcept nogil:
586
        """If the reference is already present, will update its value ONLY if
587
        the new value is lower than the old one. If the reference is not
588
        present, this append it. If a value was appended, self._pushed is
589
        set to 1.
590

591
        If -1 is returned, the provided reference was out-of-bounds and no
592
        value was pushed to the heap.
593

594
        """
595
        if not (0 <= reference <= self.max_reference):
596
            return -1
597

598
        # init variable to store the index-in-the-heap
599
        cdef INDEX_T i
600

601
        # Reference is the index in the array where MCP is applied to.
602
        # Find the index-in-the-heap using the crossref array.
603
        cdef INDEX_T ir = self._crossref[reference]
604
        cdef VALUE_T *values = self._values
605
        self._pushed = 1
606
        if ir != -1:
607
            # update
608
            i = (1 << self.levels) - 1 + ir
609
            if values[i] > value:
610
                values[i] = value
611
                self._update_one(i)
612
            else:
613
                self._pushed = 0
614
            return ir
615

616
        # if not updated: append normally and store reference
617
        ir = BinaryHeap.push_fast(self, value, reference)
618
        self._crossref[reference] = ir
619
        return ir
620

621
    cdef VALUE_T value_of_fast(self, REFERENCE_T reference):
622
        """Return the value corresponding to the given reference.
623

624
        If inf is returned, the reference may be invalid: check the
625
        _invaild_ref field in this case.
626

627
        """
628
        if not (0 <= reference <= self.max_reference):
629
            self._invalid_ref = 1
630
            return inf
631

632
        # init variable to store the index-in-the-heap
633
        cdef INDEX_T i
634

635
        # Reference is the index in the array where MCP is applied to.
636
        # Find the index-in-the-heap using the crossref array.
637
        cdef INDEX_T ir = self._crossref[reference]
638
        self._invalid_ref = 0
639
        if ir == -1:
640
            self._invalid_ref = 1
641
            return inf
642
        i = (1 << self.levels) - 1 + ir
643
        return self._values[i]
644

645
    def push(self, VALUE_T value, int reference):
646
        """push(value, reference)
647

648
        Append/update a value in the heap.
649

650
        Parameters
651
        ----------
652
        value : float
653
        reference : int
654
            If the reference is already present in the array, the value for
655
            that reference will be updated, otherwise the (value, reference)
656
            pair will be added to the heap.
657

658
        Raises
659
        ------
660
        ValueError
661
            On pushing a reference outside the range [0, max_reference].
662

663
        """
664
        if self.push_fast(value, reference) == -1:
665
            raise ValueError('reference outside of range [0, max_reference]')
666

667
    def push_if_lower(self, VALUE_T value, int reference):
668
        """push_if_lower(value, reference)
669

670
        Append/update a value in the heap if the extant value is lower.
671

672
        If the reference is already in the heap, update only of the new value
673
        is lower than the current one. If the reference is not present, the
674
        value will always be pushed to the heap.
675

676
        Parameters
677
        ----------
678
        value : float
679
        reference : int
680
            If the reference is already present in the array, the value for
681
            that reference will be updated, otherwise the (value, reference)
682
            pair will be added to the heap.
683

684
        Returns
685
        -------
686
        pushed : bool
687
            True if an append/update occurred, False if otherwise.
688

689
        Raises
690
        ------
691
        ValueError
692
            On pushing a reference outside the range [0, max_reference].
693

694
        """
695
        if self.push_if_lower_fast(value, reference) == -1:
696
            raise ValueError('reference outside of range [0, max_reference]')
697
        return self._pushed == 1
698

699
    def value_of(self, int reference):
700
        """value_of(reference)
701

702
        Get the value corresponding to a given reference.
703

704
        Parameters
705
        ----------
706
        reference : int
707
            A reference already pushed to the heap.
708

709
        Returns
710
        -------
711
        value : float
712

713
        Raises
714
        ------
715
        ValueError
716
            On querying a reference outside the range [0, max_reference], or
717
            not already pushed to the heap.
718

719
        """
720
        value = self.value_of_fast(reference)
721
        if self._invalid_ref:
722
            raise ValueError('invalid reference')
723
        return value
724

725
    def cross_references(self):
726
        """Get the cross references in the heap as a list."""
727
        return [self._crossref[i] for i in range(self.max_reference + 1)]
728

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.