pangolin_exporter
271 строка · 8.7 Кб
1// Copyright 2023 The Prometheus Authors
2// Licensed under the Apache License, Version 2.0 (the "License");
3// you may not use this file except in compliance with the License.
4// You may obtain a copy of the License at
5//
6// http://www.apache.org/licenses/LICENSE-2.0
7//
8// Unless required by applicable law or agreed to in writing, software
9// distributed under the License is distributed on an "AS IS" BASIS,
10// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11// See the License for the specific language governing permissions and
12// limitations under the License.
13package collector
14
15import (
16"context"
17"database/sql"
18"fmt"
19
20"github.com/go-kit/log"
21"github.com/go-kit/log/level"
22"github.com/prometheus/client_golang/prometheus"
23)
24
25func init() {
26registerCollector(statWalReceiverSubsystem, defaultDisabled, NewPGStatWalReceiverCollector)
27}
28
29type PGStatWalReceiverCollector struct {
30log log.Logger
31}
32
33const statWalReceiverSubsystem = "stat_wal_receiver"
34
35func NewPGStatWalReceiverCollector(config collectorConfig) (Collector, error) {
36return &PGStatWalReceiverCollector{log: config.logger}, nil
37}
38
39var (
40labelCats = []string{"upstream_host", "slot_name", "status"}
41statWalReceiverReceiveStartLsn = prometheus.NewDesc(
42prometheus.BuildFQName(namespace, statWalReceiverSubsystem, "receive_start_lsn"),
43"First write-ahead log location used when WAL receiver is started represented as a decimal",
44labelCats,
45prometheus.Labels{},
46)
47statWalReceiverReceiveStartTli = prometheus.NewDesc(
48prometheus.BuildFQName(namespace, statWalReceiverSubsystem, "receive_start_tli"),
49"First timeline number used when WAL receiver is started",
50labelCats,
51prometheus.Labels{},
52)
53statWalReceiverFlushedLSN = prometheus.NewDesc(
54prometheus.BuildFQName(namespace, statWalReceiverSubsystem, "flushed_lsn"),
55"Last write-ahead log location already received and flushed to disk, the initial value of this field being the first log location used when WAL receiver is started represented as a decimal",
56labelCats,
57prometheus.Labels{},
58)
59statWalReceiverReceivedTli = prometheus.NewDesc(
60prometheus.BuildFQName(namespace, statWalReceiverSubsystem, "received_tli"),
61"Timeline number of last write-ahead log location received and flushed to disk",
62labelCats,
63prometheus.Labels{},
64)
65statWalReceiverLastMsgSendTime = prometheus.NewDesc(
66prometheus.BuildFQName(namespace, statWalReceiverSubsystem, "last_msg_send_time"),
67"Send time of last message received from origin WAL sender",
68labelCats,
69prometheus.Labels{},
70)
71statWalReceiverLastMsgReceiptTime = prometheus.NewDesc(
72prometheus.BuildFQName(namespace, statWalReceiverSubsystem, "last_msg_receipt_time"),
73"Send time of last message received from origin WAL sender",
74labelCats,
75prometheus.Labels{},
76)
77statWalReceiverLatestEndLsn = prometheus.NewDesc(
78prometheus.BuildFQName(namespace, statWalReceiverSubsystem, "latest_end_lsn"),
79"Last write-ahead log location reported to origin WAL sender as integer",
80labelCats,
81prometheus.Labels{},
82)
83statWalReceiverLatestEndTime = prometheus.NewDesc(
84prometheus.BuildFQName(namespace, statWalReceiverSubsystem, "latest_end_time"),
85"Time of last write-ahead log location reported to origin WAL sender",
86labelCats,
87prometheus.Labels{},
88)
89statWalReceiverUpstreamNode = prometheus.NewDesc(
90prometheus.BuildFQName(namespace, statWalReceiverSubsystem, "upstream_node"),
91"Node ID of the upstream node",
92labelCats,
93prometheus.Labels{},
94)
95
96pgStatWalColumnQuery = `
97SELECT
98column_name
99FROM information_schema.columns
100WHERE
101table_name = 'pg_stat_wal_receiver' and
102column_name = 'flushed_lsn'
103`
104
105pgStatWalReceiverQueryTemplate = `
106SELECT
107trim(both '''' from substring(conninfo from 'host=([^ ]*)')) as upstream_host,
108slot_name,
109status,
110(receive_start_lsn- '0/0') %% (2^52)::bigint as receive_start_lsn,
111%s
112receive_start_tli,
113received_tli,
114extract(epoch from last_msg_send_time) as last_msg_send_time,
115extract(epoch from last_msg_receipt_time) as last_msg_receipt_time,
116(latest_end_lsn - '0/0') %% (2^52)::bigint as latest_end_lsn,
117extract(epoch from latest_end_time) as latest_end_time,
118substring(slot_name from 'repmgr_slot_([0-9]*)') as upstream_node
119FROM pg_catalog.pg_stat_wal_receiver
120`
121)
122
123func (c *PGStatWalReceiverCollector) Update(ctx context.Context, instance *instance, ch chan<- prometheus.Metric) error {
124db := instance.getDB()
125hasFlushedLSNRows, err := db.QueryContext(ctx, pgStatWalColumnQuery)
126if err != nil {
127return err
128}
129
130hasFlushedLSN := hasFlushedLSNRows.Next()
131var query string
132if hasFlushedLSN {
133query = fmt.Sprintf(pgStatWalReceiverQueryTemplate, "(flushed_lsn - '0/0') % (2^52)::bigint as flushed_lsn,\n")
134} else {
135query = fmt.Sprintf(pgStatWalReceiverQueryTemplate, "")
136}
137
138hasFlushedLSNRows.Close()
139
140rows, err := db.QueryContext(ctx, query)
141if err != nil {
142return err
143}
144defer rows.Close()
145for rows.Next() {
146var upstreamHost, slotName, status sql.NullString
147var receiveStartLsn, receiveStartTli, flushedLsn, receivedTli, latestEndLsn, upstreamNode sql.NullInt64
148var lastMsgSendTime, lastMsgReceiptTime, latestEndTime sql.NullFloat64
149
150if hasFlushedLSN {
151if err := rows.Scan(&upstreamHost, &slotName, &status, &receiveStartLsn, &receiveStartTli, &flushedLsn, &receivedTli, &lastMsgSendTime, &lastMsgReceiptTime, &latestEndLsn, &latestEndTime, &upstreamNode); err != nil {
152return err
153}
154} else {
155if err := rows.Scan(&upstreamHost, &slotName, &status, &receiveStartLsn, &receiveStartTli, &receivedTli, &lastMsgSendTime, &lastMsgReceiptTime, &latestEndLsn, &latestEndTime, &upstreamNode); err != nil {
156return err
157}
158}
159if !upstreamHost.Valid {
160level.Debug(c.log).Log("msg", "Skipping wal receiver stats because upstream host is null")
161continue
162}
163
164if !slotName.Valid {
165level.Debug(c.log).Log("msg", "Skipping wal receiver stats because slotname host is null")
166continue
167}
168
169if !status.Valid {
170level.Debug(c.log).Log("msg", "Skipping wal receiver stats because status is null")
171continue
172}
173labels := []string{upstreamHost.String, slotName.String, status.String}
174
175if !receiveStartLsn.Valid {
176level.Debug(c.log).Log("msg", "Skipping wal receiver stats because receive_start_lsn is null")
177continue
178}
179if !receiveStartTli.Valid {
180level.Debug(c.log).Log("msg", "Skipping wal receiver stats because receive_start_tli is null")
181continue
182}
183if hasFlushedLSN && !flushedLsn.Valid {
184level.Debug(c.log).Log("msg", "Skipping wal receiver stats because flushed_lsn is null")
185continue
186}
187if !receivedTli.Valid {
188level.Debug(c.log).Log("msg", "Skipping wal receiver stats because received_tli is null")
189continue
190}
191if !lastMsgSendTime.Valid {
192level.Debug(c.log).Log("msg", "Skipping wal receiver stats because last_msg_send_time is null")
193continue
194}
195if !lastMsgReceiptTime.Valid {
196level.Debug(c.log).Log("msg", "Skipping wal receiver stats because last_msg_receipt_time is null")
197continue
198}
199if !latestEndLsn.Valid {
200level.Debug(c.log).Log("msg", "Skipping wal receiver stats because latest_end_lsn is null")
201continue
202}
203if !latestEndTime.Valid {
204level.Debug(c.log).Log("msg", "Skipping wal receiver stats because latest_end_time is null")
205continue
206}
207if !upstreamNode.Valid {
208level.Debug(c.log).Log("msg", "Skipping wal receiver stats because upstream_node is null")
209continue
210}
211ch <- prometheus.MustNewConstMetric(
212statWalReceiverReceiveStartLsn,
213prometheus.CounterValue,
214float64(receiveStartLsn.Int64),
215labels...)
216
217ch <- prometheus.MustNewConstMetric(
218statWalReceiverReceiveStartTli,
219prometheus.GaugeValue,
220float64(receiveStartTli.Int64),
221labels...)
222
223if hasFlushedLSN {
224ch <- prometheus.MustNewConstMetric(
225statWalReceiverFlushedLSN,
226prometheus.CounterValue,
227float64(flushedLsn.Int64),
228labels...)
229}
230
231ch <- prometheus.MustNewConstMetric(
232statWalReceiverReceivedTli,
233prometheus.GaugeValue,
234float64(receivedTli.Int64),
235labels...)
236
237ch <- prometheus.MustNewConstMetric(
238statWalReceiverLastMsgSendTime,
239prometheus.CounterValue,
240float64(lastMsgSendTime.Float64),
241labels...)
242
243ch <- prometheus.MustNewConstMetric(
244statWalReceiverLastMsgReceiptTime,
245prometheus.CounterValue,
246float64(lastMsgReceiptTime.Float64),
247labels...)
248
249ch <- prometheus.MustNewConstMetric(
250statWalReceiverLatestEndLsn,
251prometheus.CounterValue,
252float64(latestEndLsn.Int64),
253labels...)
254
255ch <- prometheus.MustNewConstMetric(
256statWalReceiverLatestEndTime,
257prometheus.CounterValue,
258latestEndTime.Float64,
259labels...)
260
261ch <- prometheus.MustNewConstMetric(
262statWalReceiverUpstreamNode,
263prometheus.GaugeValue,
264float64(upstreamNode.Int64),
265labels...)
266}
267if err := rows.Err(); err != nil {
268return err
269}
270return nil
271}
272