prometheus-net
497 строк · 17.9 Кб
1using System.Buffers;2
3namespace Prometheus;4
5/// <summary>
6/// Represents a metric whose lifetime is managed by the caller, either via explicit leases or via extend-on-use behavior (implicit leases).
7/// </summary>
8/// <remarks>
9/// Each metric handle maintains a reaper task that occasionally removes metrics that have expired. The reaper is started
10/// when the first lifetime-managed metric is created and terminates when the last lifetime-managed metric expires.
11/// This does mean that the metric handle may keep objects alive until expiration, even if the handle itself is no longer used.
12/// </remarks>
13internal abstract class ManagedLifetimeMetricHandle<TChild, TMetricInterface>14: IManagedLifetimeMetricHandle<TMetricInterface>, INotifyLeaseEnded15where TChild : ChildBase, TMetricInterface16where TMetricInterface : ICollectorChild17{
18internal ManagedLifetimeMetricHandle(Collector<TChild> metric, TimeSpan expiresAfter)19{20_reaperFunc = Reaper;21
22_metric = metric;23_expiresAfter = expiresAfter;24}25
26protected readonly Collector<TChild> _metric;27protected readonly TimeSpan _expiresAfter;28
29#region Lease(string[])30public IDisposable AcquireLease(out TMetricInterface metric, params string[] labelValues)31{32var child = _metric.WithLabels(labelValues);33metric = child;34
35return TakeLease(child);36}37
38public RefLease AcquireRefLease(out TMetricInterface metric, params string[] labelValues)39{40var child = _metric.WithLabels(labelValues);41metric = child;42
43return TakeRefLease(child);44}45
46public void WithLease(Action<TMetricInterface> action, params string[] labelValues)47{48var child = _metric.WithLabels(labelValues);49using var lease = TakeRefLease(child);50
51action(child);52}53
54public void WithLease<TArg>(Action<TArg, TMetricInterface> action, TArg arg, params string[] labelValues)55{56var child = _metric.WithLabels(labelValues);57using var lease = TakeRefLease(child);58
59action(arg, child);60}61
62public async Task WithLeaseAsync(Func<TMetricInterface, Task> action, params string[] labelValues)63{64using var lease = AcquireLease(out var metric, labelValues);65await action(metric);66}67
68public TResult WithLease<TResult>(Func<TMetricInterface, TResult> func, params string[] labelValues)69{70using var lease = AcquireLease(out var metric, labelValues);71return func(metric);72}73
74public async Task<TResult> WithLeaseAsync<TResult>(Func<TMetricInterface, Task<TResult>> func, params string[] labelValues)75{76using var lease = AcquireLease(out var metric, labelValues);77return await func(metric);78}79#endregion80
81#region Lease(ReadOnlyMemory<string>)82public IDisposable AcquireLease(out TMetricInterface metric, ReadOnlyMemory<string> labelValues)83{84var child = _metric.WithLabels(labelValues);85metric = child;86
87return TakeLease(child);88}89
90public RefLease AcquireRefLease(out TMetricInterface metric, ReadOnlyMemory<string> labelValues)91{92var child = _metric.WithLabels(labelValues);93metric = child;94
95return TakeRefLease(child);96}97
98public void WithLease(Action<TMetricInterface> action, ReadOnlyMemory<string> labelValues)99{100var child = _metric.WithLabels(labelValues);101using var lease = TakeRefLease(child);102
103action(child);104}105
106public void WithLease<TArg>(Action<TArg, TMetricInterface> action, TArg arg, ReadOnlyMemory<string> labelValues)107{108var child = _metric.WithLabels(labelValues);109using var lease = TakeRefLease(child);110
111action(arg, child);112}113
114public async Task WithLeaseAsync(Func<TMetricInterface, Task> action, ReadOnlyMemory<string> labelValues)115{116using var lease = AcquireLease(out var metric, labelValues);117await action(metric);118}119
120public TResult WithLease<TResult>(Func<TMetricInterface, TResult> func, ReadOnlyMemory<string> labelValues)121{122using var lease = AcquireLease(out var metric, labelValues);123return func(metric);124}125
126public async Task<TResult> WithLeaseAsync<TResult>(Func<TMetricInterface, Task<TResult>> func, ReadOnlyMemory<string> labelValues)127{128using var lease = AcquireLease(out var metric, labelValues);129return await func(metric);130}131#endregion132
133#region Lease(ReadOnlySpan<string>)134public IDisposable AcquireLease(out TMetricInterface metric, ReadOnlySpan<string> labelValues)135{136var child = _metric.WithLabels(labelValues);137metric = child;138
139return TakeLease(child);140}141
142public RefLease AcquireRefLease(out TMetricInterface metric, ReadOnlySpan<string> labelValues)143{144var child = _metric.WithLabels(labelValues);145metric = child;146
147return TakeRefLease(child);148}149
150public void WithLease(Action<TMetricInterface> action, ReadOnlySpan<string> labelValues)151{152var child = _metric.WithLabels(labelValues);153using var lease = TakeRefLease(child);154
155action(child);156}157
158public void WithLease<TArg>(Action<TArg, TMetricInterface> action, TArg arg, ReadOnlySpan<string> labelValues)159{160var child = _metric.WithLabels(labelValues);161using var lease = TakeRefLease(child);162
163action(arg, child);164}165
166public TResult WithLease<TResult>(Func<TMetricInterface, TResult> func, ReadOnlySpan<string> labelValues)167{168using var lease = AcquireLease(out var metric, labelValues);169return func(metric);170}171#endregion172
173public abstract ICollector<TMetricInterface> WithExtendLifetimeOnUse();174
175/// <summary>176/// Internal to allow the delay logic to be replaced in test code, enabling (non-)expiration on demand.177/// </summary>178internal IDelayer Delayer = RealDelayer.Instance;179
180#region Lease tracking181private readonly Dictionary<TChild, ChildLifetimeInfo> _lifetimes = new();182
183// Guards the collection but not the contents.184private readonly ReaderWriterLockSlim _lifetimesLock = new();185
186private bool HasAnyTrackedLifetimes()187{188_lifetimesLock.EnterReadLock();189
190try191{192return _lifetimes.Count != 0;193}194finally195{196_lifetimesLock.ExitReadLock();197}198}199
200/// <summary>201/// For testing only. Sets all keepalive timestamps to a time in the disstant past,202/// which will cause all lifetimes to expire (if they have no leases).203/// </summary>204internal void SetAllKeepaliveTimestampsToDistantPast()205{206// We cannot just zero this because zero is the machine start timestamp, so zero is not necessarily207// far in the past (especially if the machine is a build agent that just started up). 1 year negative should work, though.208var distantPast = -PlatformCompatibilityHelpers.ElapsedToTimeStopwatchTicks(TimeSpan.FromDays(365));209
210_lifetimesLock.EnterReadLock();211
212try213{214foreach (var lifetime in _lifetimes.Values)215Volatile.Write(ref lifetime.KeepaliveTimestamp, distantPast);216}217finally218{219_lifetimesLock.ExitReadLock();220}221}222
223/// <summary>224/// For anomaly analysis during testing only.225/// </summary>226internal void DebugDumpLifetimes()227{228_lifetimesLock.EnterReadLock();229
230try231{232Console.WriteLine($"Dumping {_lifetimes.Count} lifetimes of {_metric}. Reaper status: {Volatile.Read(ref _reaperActiveBool)}.");233
234foreach (var pair in _lifetimes)235{236Console.WriteLine($"{pair.Key} -> {pair.Value}");237}238}239finally240{241_lifetimesLock.ExitReadLock();242}243}244
245private IDisposable TakeLease(TChild child)246{247var lifetime = GetOrCreateLifetimeAndIncrementLeaseCount(child);248EnsureReaperActive();249
250return new Lease(this, child, lifetime);251}252
253private RefLease TakeRefLease(TChild child)254{255var lifetime = GetOrCreateLifetimeAndIncrementLeaseCount(child);256EnsureReaperActive();257
258return new RefLease(this, child, lifetime);259}260
261private ChildLifetimeInfo GetOrCreateLifetimeAndIncrementLeaseCount(TChild child)262{263_lifetimesLock.EnterReadLock();264
265try266{267// Ideally, there already exists a registered lifetime for this metric instance.268if (_lifetimes.TryGetValue(child, out var existing))269{270// Immediately increment it, to reduce the risk of any concurrent activities ending the lifetime.271Interlocked.Increment(ref existing.LeaseCount);272return existing;273}274}275finally276{277_lifetimesLock.ExitReadLock();278}279
280// No lifetime registered yet - we need to take a write lock and register it.281var newLifetime = new ChildLifetimeInfo282{283LeaseCount = 1284};285
286_lifetimesLock.EnterWriteLock();287
288try289{290#if NET291// It could be that someone beats us to it! Probably not, though.292if (_lifetimes.TryAdd(child, newLifetime))293return newLifetime;294
295var existing = _lifetimes[child];296
297// Immediately increment it, to reduce the risk of any concurrent activities ending the lifetime.298// Even if something does, it is not the end of the world - the reaper will create a new lifetime when it realizes this happened.299Interlocked.Increment(ref existing.LeaseCount);300return existing;301#else302// On .NET Fx we need to do the pessimistic case first because there is no TryAdd().303if (_lifetimes.TryGetValue(child, out var existing))304{305// Immediately increment it, to reduce the risk of any concurrent activities ending the lifetime.306// Even if something does, it is not the end of the world - the reaper will create a new lifetime when it realizes this happened.307Interlocked.Increment(ref existing.LeaseCount);308return existing;309}310
311_lifetimes.Add(child, newLifetime);312return newLifetime;313#endif314}315finally316{317_lifetimesLock.ExitWriteLock();318}319}320
321internal void OnLeaseEnded(TChild child, ChildLifetimeInfo lifetime)322{323// Update keepalive timestamp before anything else, to avoid racing.324Volatile.Write(ref lifetime.KeepaliveTimestamp, LowGranularityTimeSource.GetStopwatchTimestamp());325
326// If the lifetime has been ended while we still held a lease, it means there was a race that we lost.327// The metric instance may or may not be still alive. To ensure proper cleanup, we re-register a lifetime328// for the metric instance, which will ensure it gets cleaned up when it expires.329if (Volatile.Read(ref lifetime.Ended))330{331// We just take a new lease and immediately dispose it. We are guaranteed not to loop here because the332// reaper removes lifetimes from the dictionary once ended, so we can never run into the same lifetime again.333TakeRefLease(child).Dispose();334}335
336// Finally, decrement the lease count to relinquish any claim on extending the lifetime.337Interlocked.Decrement(ref lifetime.LeaseCount);338}339
340void INotifyLeaseEnded.OnLeaseEnded(object child, ChildLifetimeInfo lifetime)341{342OnLeaseEnded((TChild)child, lifetime);343}344
345private sealed class Lease(ManagedLifetimeMetricHandle<TChild, TMetricInterface> parent, TChild child, ChildLifetimeInfo lifetime) : IDisposable346{347public void Dispose() => parent.OnLeaseEnded(child, lifetime);348}349#endregion350
351#region Reaper352// Whether the reaper is currently active. This is set to true when a metric instance is created and353// reset when the last metric instance expires (after which it may be set again).354// We use atomic operations without locking.355private int _reaperActiveBool = ReaperInactive;356
357private const int ReaperActive = 1;358private const int ReaperInactive = 0;359
360/// <summary>361/// Call this immediately after creating a metric instance that will eventually expire.362/// </summary>363private void EnsureReaperActive()364{365if (Interlocked.CompareExchange(ref _reaperActiveBool, ReaperActive, ReaperInactive) == ReaperActive)366{367// It was already active - nothing for us to do.368return;369}370
371_ = Task.Run(_reaperFunc);372}373
374private async Task Reaper()375{376while (true)377{378var now = LowGranularityTimeSource.GetStopwatchTimestamp();379
380// Will contains the results of pass 1.381TChild[] expiredInstancesBuffer = null!;382int expiredInstanceCount = 0;383
384// Pass 1: holding only a read lock, make a list of metric instances that have expired.385_lifetimesLock.EnterReadLock();386
387try388{389try390{391expiredInstancesBuffer = ArrayPool<TChild>.Shared.Rent(_lifetimes.Count);392
393foreach (var pair in _lifetimes)394{395if (Volatile.Read(ref pair.Value.LeaseCount) != 0)396continue; // Not expired.397
398if (PlatformCompatibilityHelpers.StopwatchGetElapsedTime(Volatile.Read(ref pair.Value.KeepaliveTimestamp), now) < _expiresAfter)399continue; // Not expired.400
401// No leases and keepalive has expired - it is an expired instance!402expiredInstancesBuffer[expiredInstanceCount++] = pair.Key;403}404}405finally406{407_lifetimesLock.ExitReadLock();408}409
410// Pass 2: if we have any work to do, take a write lock and remove the expired metric instances,411// assuming our judgement about their expiration remains valid. We process and lock one by one,412// to avoid holding locks for a long duration if many items expire at once - we are not in any rush.413for (var i = 0; i < expiredInstanceCount; i++)414{415var expiredInstance = expiredInstancesBuffer[i];416
417_lifetimesLock.EnterWriteLock();418
419try420{421if (!_lifetimes.TryGetValue(expiredInstance, out var lifetime))422continue; // Already gone, nothing for us to do.423
424// We need to check again whether the metric instance is still expired, because it may have been425// renewed by a new lease in the meantime. If it is still expired, we can remove it.426if (Volatile.Read(ref lifetime.LeaseCount) != 0)427continue; // Not expired.428
429if (PlatformCompatibilityHelpers.StopwatchGetElapsedTime(Volatile.Read(ref lifetime.KeepaliveTimestamp), now) < _expiresAfter)430continue; // Not expired.431
432// No leases and keepalive has expired - it is an expired instance!433
434// We mark the old lifetime as ended - if it happened that it got associated with a new lease435// (which is possible because we do not prevent lease-taking while in this loop), the new lease436// upon being ended will re-register the lifetime instead of just extending the existing one.437// We can be certain that any concurrent lifetime-affecting logic is using the same LifetimeInfo438// instance because the lifetime dictionary remains locked until we are done (by which time this flag is set).439Volatile.Write(ref lifetime.Ended, true);440
441_lifetimes.Remove(expiredInstance);442
443// If we did encounter a race, removing the metric instance here means that some metric value updates444// may go missing (until the next lease creates a new instance). This is acceptable behavior, to keep the code simple.445expiredInstance.Remove();446}447finally448{449_lifetimesLock.ExitWriteLock();450}451}452}453finally454{455ArrayPool<TChild>.Shared.Return(expiredInstancesBuffer);456}457
458// Check if we need to shut down the reaper or keep going.459_lifetimesLock.EnterReadLock();460
461try462{463if (_lifetimes.Count != 0)464goto has_more_work;465}466finally467{468_lifetimesLock.ExitReadLock();469}470
471CleanupReaper();472return;473
474has_more_work:475// Work done! Go sleep a bit and come back when something may have expired.476// We do not need to be too aggressive here, as expiration is not a hard schedule guarantee.477await Delayer.Delay(_expiresAfter);478}479}480
481/// <summary>482/// Called when the reaper has noticed that all metric instances have expired and it has no more work to do.483/// </summary>484private void CleanupReaper()485{486Volatile.Write(ref _reaperActiveBool, ReaperInactive);487
488// The reaper is now gone. However, as we do not use locking here it is possible that someone already489// added metric instances (which saw "oh reaper is still running") before we got here. Let's check - if490// there appear to be metric instances registered, we may need to start the reaper again.491if (HasAnyTrackedLifetimes())492EnsureReaperActive();493}494
495private readonly Func<Task> _reaperFunc;496#endregion497}