2
* Copyright (c) 2019, 2023, Oracle and/or its affiliates. All rights reserved.
3
* Copyright Amazon.com Inc. or its affiliates. All Rights Reserved.
4
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6
* This code is free software; you can redistribute it and/or modify it
7
* under the terms of the GNU General Public License version 2 only, as
8
* published by the Free Software Foundation.
10
* This code is distributed in the hope that it will be useful, but WITHOUT
11
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13
* version 2 for more details (a copy is included in the LICENSE file that
14
* accompanied this code).
16
* You should have received a copy of the GNU General Public License version
17
* 2 along with this work; if not, write to the Free Software Foundation,
18
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
21
* or visit www.oracle.com if you need additional information or have any
26
#include "precompiled.hpp"
27
#include "runtime/atomic.hpp"
28
#include "runtime/orderAccess.hpp"
29
#include "runtime/os.hpp"
30
#include "utilities/waitBarrier_generic.hpp"
31
#include "utilities/spinYield.hpp"
33
// Implements the striped semaphore wait barrier.
35
// To guarantee progress and safety, we need to make sure that new barrier tag
36
// starts with the completely empty set of waiters and free semaphore. This
37
// requires either waiting for all threads to leave wait() for current barrier
38
// tag on disarm(), or waiting for all threads to leave the previous tag before
39
// reusing the semaphore in arm().
41
// When there are multiple threads, it is normal for some threads to take
42
// significant time to leave the barrier. Waiting for these threads introduces
43
// stalls on barrier reuse.
45
// If we wait on disarm(), this stall is nearly guaranteed to happen if some threads
46
// are de-scheduled by prior wait(). It would be especially bad if there are more
47
// waiting threads than CPUs: every thread would need to wake up and register itself
48
// as leaving, before we can unblock from disarm().
50
// If we wait on arm(), we can get lucky that most threads would be able to catch up,
51
// exit wait(), and so we arrive to arm() with semaphore ready for reuse. However,
52
// that is still insufficient in practice.
54
// Therefore, this implementation goes a step further and implements the _striped_
55
// semaphores. We maintain several semaphores in cells. The barrier tags are assigned
56
// to cells in some simple manner. Most of the current uses have sequential barrier
57
// tags, so simple modulo works well. We then operate on a cell like we would operate
58
// on a single semaphore: we wait at arm() for all threads to catch up before reusing
59
// the cell. For the cost of maintaining just a few cells, we have enough window for
60
// threads to catch up.
62
// The correctness is guaranteed by using a single atomic state variable per cell,
63
// with updates always done with CASes:
65
// [.......... barrier tag ..........][.......... waiters ..........]
68
// Cell starts with zero tag and zero waiters. Arming the cell swings barrier tag from
69
// zero to some tag, while checking that no waiters have appeared. Disarming swings
70
// the barrier tag back from tag to zero. Every waiter registers itself by incrementing
71
// the "waiters", while checking that barrier tag is still the same. Every completing waiter
72
// decrements the "waiters". When all waiters complete, a cell ends up in initial state,
73
// ready to be armed again. This allows accurate tracking of how many signals
74
// to issue and does not race with disarm.
76
// The implementation uses the strongest (default) barriers for extra safety, even
77
// when not strictly required to do so for correctness. Extra barrier overhead is
78
// dominated by the actual wait/notify latency anyway.
81
void GenericWaitBarrier::arm(int barrier_tag) {
82
assert(barrier_tag != 0, "Pre arm: Should be arming with armed value");
83
assert(Atomic::load(&_barrier_tag) == 0,
84
"Pre arm: Should not be already armed. Tag: %d",
85
Atomic::load(&_barrier_tag));
86
Atomic::release_store(&_barrier_tag, barrier_tag);
88
Cell &cell = tag_to_cell(barrier_tag);
89
cell.arm(barrier_tag);
91
// API specifies arm() must provide a trailing fence.
95
void GenericWaitBarrier::disarm() {
96
int barrier_tag = Atomic::load_acquire(&_barrier_tag);
97
assert(barrier_tag != 0, "Pre disarm: Should be armed. Tag: %d", barrier_tag);
98
Atomic::release_store(&_barrier_tag, 0);
100
Cell &cell = tag_to_cell(barrier_tag);
101
cell.disarm(barrier_tag);
103
// API specifies disarm() must provide a trailing fence.
104
OrderAccess::fence();
107
void GenericWaitBarrier::wait(int barrier_tag) {
108
assert(barrier_tag != 0, "Pre wait: Should be waiting on armed value");
110
Cell &cell = tag_to_cell(barrier_tag);
111
cell.wait(barrier_tag);
113
// API specifies wait() must provide a trailing fence.
114
OrderAccess::fence();
117
void GenericWaitBarrier::Cell::arm(int32_t requested_tag) {
118
// Before we continue to arm, we need to make sure that all threads
119
// have left the previous cell.
125
state = Atomic::load_acquire(&_state);
126
assert(decode_tag(state) == 0,
127
"Pre arm: Should not be armed. "
128
"Tag: " INT32_FORMAT "; Waiters: " INT32_FORMAT,
129
decode_tag(state), decode_waiters(state));
130
if (decode_waiters(state) == 0) {
136
// Try to swing cell to armed. This should always succeed after the check above.
137
int64_t new_state = encode(requested_tag, 0);
138
int64_t prev_state = Atomic::cmpxchg(&_state, state, new_state);
139
if (prev_state != state) {
140
fatal("Cannot arm the wait barrier. "
141
"Tag: " INT32_FORMAT "; Waiters: " INT32_FORMAT,
142
decode_tag(prev_state), decode_waiters(prev_state));
146
int GenericWaitBarrier::Cell::signal_if_needed(int max) {
149
int cur = Atomic::load_acquire(&_outstanding_wakeups);
151
// All done, no more waiters.
154
assert(cur > 0, "Sanity");
156
int prev = Atomic::cmpxchg(&_outstanding_wakeups, cur, cur - 1);
158
// Contention, return to caller for early return or backoff.
165
if (++signals >= max) {
166
// Signalled requested number of times, break out.
172
void GenericWaitBarrier::Cell::disarm(int32_t expected_tag) {
176
int64_t state = Atomic::load_acquire(&_state);
177
int32_t tag = decode_tag(state);
178
waiters = decode_waiters(state);
180
assert((tag == expected_tag) && (waiters >= 0),
181
"Mid disarm: Should be armed with expected tag and have sane waiters. "
182
"Tag: " INT32_FORMAT "; Waiters: " INT32_FORMAT,
185
int64_t new_state = encode(0, waiters);
186
if (Atomic::cmpxchg(&_state, state, new_state) == state) {
187
// Successfully disarmed.
192
// Wake up waiters, if we have at least one.
193
// Allow other threads to assist with wakeups, if possible.
195
Atomic::release_store(&_outstanding_wakeups, waiters);
197
while (signal_if_needed(INT_MAX) > 0) {
201
assert(Atomic::load(&_outstanding_wakeups) == 0, "Post disarm: Should not have outstanding wakeups");
204
void GenericWaitBarrier::Cell::wait(int32_t expected_tag) {
205
// Try to register ourselves as pending waiter.
207
int64_t state = Atomic::load_acquire(&_state);
208
int32_t tag = decode_tag(state);
209
if (tag != expected_tag) {
210
// Cell tag had changed while waiting here. This means either the cell had
211
// been disarmed, or we are late and the cell was armed with a new tag.
212
// Exit without touching anything else.
215
int32_t waiters = decode_waiters(state);
217
assert((tag == expected_tag) && (waiters >= 0 && waiters < INT32_MAX),
218
"Before wait: Should be armed with expected tag and waiters are in range. "
219
"Tag: " INT32_FORMAT "; Waiters: " INT32_FORMAT,
222
int64_t new_state = encode(tag, waiters + 1);
223
if (Atomic::cmpxchg(&_state, state, new_state) == state) {
224
// Success! Proceed to wait.
229
// Wait for notification.
232
// Unblocked! We help out with waking up two siblings. This allows to avalanche
233
// the wakeups for many threads, even if some threads are lagging behind.
234
// Note that we can only do this *before* reporting back as completed waiter,
235
// otherwise we might prematurely wake up threads for another barrier tag.
236
// Current arm() sequence protects us from this trouble by waiting until all waiters
240
// Register ourselves as completed waiter before leaving.
242
int64_t state = Atomic::load_acquire(&_state);
243
int32_t tag = decode_tag(state);
244
int32_t waiters = decode_waiters(state);
246
assert((tag == 0) && (waiters > 0),
247
"After wait: Should be not armed and have non-complete waiters. "
248
"Tag: " INT32_FORMAT "; Waiters: " INT32_FORMAT,
251
int64_t new_state = encode(tag, waiters - 1);
252
if (Atomic::cmpxchg(&_state, state, new_state) == state) {