Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
## Unreleased

* [FEATURE] HTTP handlers created by `promhttp` package now support metrics filtering by providing one or more `name[]` query parameters. The default behavior when none are provided remains the same, returning all metrics. #1925
* [FEATURE] prometheus: `Registry` exposes a new `DisableHelpTextValidation` method to opt out of the strict per-fqName help-text consistency check during `Gather`. Useful when multiple libraries (or different versions of the same library) register compatible metrics into the same registry. Default behavior is unchanged, and `NewPedanticRegistry` always overrides the toggle. #2006

## Unreleased `exp` module

Expand Down
58 changes: 51 additions & 7 deletions prometheus/registry.go
Original file line number Diff line number Diff line change
Expand Up @@ -271,12 +271,38 @@ func (errs MultiError) MaybeUnwrap() error {
// Registry implements Collector to allow it to be used for creating groups of
// metrics. See the Grouping example for how this can be done.
type Registry struct {
mtx sync.RWMutex
collectorsByID map[uint64]Collector // ID is a hash of the descIDs.
descIDs map[uint64]struct{}
dimHashesByName map[string]uint64
uncheckedCollectors []Collector
pedanticChecksEnabled bool
mtx sync.RWMutex
collectorsByID map[uint64]Collector // ID is a hash of the descIDs.
descIDs map[uint64]struct{}
dimHashesByName map[string]uint64
uncheckedCollectors []Collector
pedanticChecksEnabled bool
helpTextValidationDisabled bool
}

// DisableHelpTextValidation opts this Registry out of the strict check that
// every metric sharing a fully-qualified name also share the same help text.
// When disabled, Gather keeps the help text from the first metric it sees for
// a given fqName and aggregates later metrics into the same family even if
// their help differs.
//
// This is useful when multiple libraries — or different versions of the same
// library — register compatible metrics into a shared registry. A trivial
// difference such as a trailing period gained across a client_golang upgrade
// can otherwise turn a metadata mismatch into a hard /metrics outage. The
// Prometheus server itself applies the same lenient semantics on scrape (see
// https://github.com/prometheus/client_golang/issues/1820).
//
// The strict default is recommended for development and testing, where it
// surfaces accidental help-text changes early. Calling this method on a
// Registry returned by NewPedanticRegistry has no effect: pedantic mode
// always enforces help-text consistency. The change applies to subsequent
// Gather calls and is safe to invoke after registration; concurrent calls
// are serialized by the Registry's lock.
func (r *Registry) DisableHelpTextValidation() {
r.mtx.Lock()
defer r.mtx.Unlock()
r.helpTextValidationDisabled = true
}

// Register implements Registerer.
Expand Down Expand Up @@ -468,6 +494,13 @@ func (r *Registry) Gather() ([]*dto.MetricFamily, error) {
registeredDescIDs[id] = struct{}{}
}
}
// Snapshot the effective help-text validation toggle while still under
// the lock so that all processMetric calls in this Gather see a
// consistent value. Pedantic mode unconditionally overrides the toggle
// because pedantic callers have explicitly opted into the strictest set
// of checks; mixing pedantic with lenient help validation would be
// internally contradictory.
helpTextValidationDisabled := r.helpTextValidationDisabled && !r.pedanticChecksEnabled
r.mtx.RUnlock()

wg.Add(goroutineBudget)
Expand Down Expand Up @@ -526,6 +559,7 @@ func (r *Registry) Gather() ([]*dto.MetricFamily, error) {
metric, metricFamiliesByName,
metricHashes,
registeredDescIDs,
helpTextValidationDisabled,
))
case metric, ok := <-umc:
if !ok {
Expand All @@ -536,6 +570,7 @@ func (r *Registry) Gather() ([]*dto.MetricFamily, error) {
metric, metricFamiliesByName,
metricHashes,
nil,
helpTextValidationDisabled,
))
default:
if goroutineBudget <= 0 || len(checkedCollectors)+len(uncheckedCollectors) == 0 {
Expand All @@ -553,6 +588,7 @@ func (r *Registry) Gather() ([]*dto.MetricFamily, error) {
metric, metricFamiliesByName,
metricHashes,
registeredDescIDs,
helpTextValidationDisabled,
))
case metric, ok := <-umc:
if !ok {
Expand All @@ -563,6 +599,7 @@ func (r *Registry) Gather() ([]*dto.MetricFamily, error) {
metric, metricFamiliesByName,
metricHashes,
nil,
helpTextValidationDisabled,
))
}
break
Expand Down Expand Up @@ -659,11 +696,18 @@ func WriteToTextfile(filename string, g Gatherer) error {
}

// processMetric is an internal helper method only used by the Gather method.
//
// helpTextValidationDisabled gates the strict per-fqName help-text consistency
// check. When true, a metric whose help string differs from the family's
// existing help is still aggregated into the family, and the family keeps the
// first help text seen. See Registry.DisableHelpTextValidation for the
// motivating use case (#1820).
func processMetric(
metric Metric,
metricFamiliesByName map[string]*dto.MetricFamily,
metricHashes map[uint64]struct{},
registeredDescIDs map[uint64]struct{},
helpTextValidationDisabled bool,
) error {
desc := metric.Desc()
// Wrapped metrics collected by an unchecked Collector can have an
Expand All @@ -677,7 +721,7 @@ func processMetric(
}
metricFamily, ok := metricFamiliesByName[desc.fqName]
if ok { // Existing name.
if metricFamily.GetHelp() != desc.help {
if metricFamily.GetHelp() != desc.help && !helpTextValidationDisabled {
return fmt.Errorf(
"collected metric %s %s has help %q but should have %q",
desc.fqName, dtoMetric, desc.help, metricFamily.GetHelp(),
Expand Down
156 changes: 156 additions & 0 deletions prometheus/registry_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1396,6 +1396,162 @@ func TestCheckMetricConsistency(t *testing.T) {
reg.Unregister(invalidCollector)
}

// helpTextCollector emits a single counter with a configurable fully-qualified
// name, help text, and label set. Its Describe method yields no Desc so the
// collector is treated as "unchecked" by the registry, which means it bypasses
// the help/dimHash consistency checks performed at Register time. This is the
// shape that triggers the cross-version scenario described in
// https://github.com/prometheus/client_golang/issues/1820: two libraries (often
// different versions of the same library) register compatible metrics into the
// same registry but use slightly different help strings — for example, one
// version adds a trailing period.
type helpTextCollector struct {
fqName string
help string
label string
}

func (c helpTextCollector) Describe(_ chan<- *prometheus.Desc) {}

func (c helpTextCollector) Collect(ch chan<- prometheus.Metric) {
desc := prometheus.NewDesc(c.fqName, c.help, []string{"src"}, nil)
ch <- prometheus.MustNewConstMetric(desc, prometheus.CounterValue, 1, c.label)
}

// orderedHelpTextCollector emits multiple ConstMetrics under the same fqName
// with different help texts in deterministic source order. A single collector
// is used so that the order of emissions on the metric channel — and therefore
// which help text populates the MetricFamily first — is well-defined. Two
// separately-registered collectors are insufficient here because the registry
// fans out collection across goroutines.
type orderedHelpTextCollector struct {
fqName string
emits []struct{ help, label string }
}

func (c orderedHelpTextCollector) Describe(_ chan<- *prometheus.Desc) {}

func (c orderedHelpTextCollector) Collect(ch chan<- prometheus.Metric) {
for _, e := range c.emits {
desc := prometheus.NewDesc(c.fqName, e.help, []string{"src"}, nil)
ch <- prometheus.MustNewConstMetric(desc, prometheus.CounterValue, 1, e.label)
}
}

// TestGatherRejectsHelpTextMismatchByDefault locks in the long-standing strict
// behavior: a non-pedantic Registry with no opt-in still rejects metrics that
// share an fqName but disagree on help text. This is the backwards-compatibility
// guarantee that callers depending on the existing error contract continue to
// observe.
func TestGatherRejectsHelpTextMismatchByDefault(t *testing.T) {
const fqName = "interop_total"
first := helpTextCollector{fqName: fqName, help: "Total count of interop events", label: "first"}
second := helpTextCollector{fqName: fqName, help: "Total count of interop events.", label: "second"}

reg := prometheus.NewRegistry()
reg.MustRegister(first, second)

if _, err := reg.Gather(); err == nil {
t.Fatal("expected Gather to return an error for help-text mismatch on a strict (default) Registry, got nil")
}
}

// TestGatherAcceptsHelpTextMismatchWhenDisabled is a regression test for
// https://github.com/prometheus/client_golang/issues/1820. When the caller has
// opted out of help-text validation via Registry.DisableHelpTextValidation,
// Gather must aggregate metrics that share an fqName but use slightly
// different help strings — most commonly two libraries (or two versions of the
// same library) registering compatible metrics into the same registry.
//
// The test asserts that Gather succeeds, returns exactly one MetricFamily
// containing the metrics from both collectors, and keeps one of the observed
// help strings on the family (the registry fans out collection across
// goroutines, so we do not constrain which one — see
// TestGatherWithDisabledHelpTextValidationKeepsFirstHelp below for the
// deterministic guarantee).
func TestGatherAcceptsHelpTextMismatchWhenDisabled(t *testing.T) {
const fqName = "interop_total"
first := helpTextCollector{fqName: fqName, help: "Total count of interop events", label: "first"}
second := helpTextCollector{fqName: fqName, help: "Total count of interop events.", label: "second"}

reg := prometheus.NewRegistry()
reg.DisableHelpTextValidation()
reg.MustRegister(first, second)

got, err := reg.Gather()
if err != nil {
t.Fatalf("Gather returned an error after DisableHelpTextValidation: %v", err)
}
if len(got) != 1 {
t.Fatalf("expected exactly one MetricFamily, got %d: %v", len(got), got)
}
mf := got[0]
if mf.GetName() != fqName {
t.Errorf("MetricFamily name = %q, want %q", mf.GetName(), fqName)
}
if h := mf.GetHelp(); h != first.help && h != second.help {
t.Errorf("MetricFamily help = %q, want one of %q or %q", h, first.help, second.help)
}
if len(mf.GetMetric()) != 2 {
t.Errorf("expected MetricFamily to hold metrics from both collectors, got %d", len(mf.GetMetric()))
}
}

// TestGatherWithDisabledHelpTextValidationKeepsFirstHelp pins the documented
// "first help text wins" semantics. A single collector emits both metrics in
// source order, so the metric-channel order — and therefore which help text
// is recorded on the MetricFamily — is deterministic.
func TestGatherWithDisabledHelpTextValidationKeepsFirstHelp(t *testing.T) {
const (
fqName = "interop_total"
firstHelp = "Total count of interop events"
laterHelp = "Total count of interop events."
)
c := orderedHelpTextCollector{
fqName: fqName,
emits: []struct{ help, label string }{
{help: firstHelp, label: "first"},
{help: laterHelp, label: "later"},
},
}

reg := prometheus.NewRegistry()
reg.DisableHelpTextValidation()
reg.MustRegister(c)

got, err := reg.Gather()
if err != nil {
t.Fatalf("Gather returned an error: %v", err)
}
if len(got) != 1 {
t.Fatalf("expected exactly one MetricFamily, got %d: %v", len(got), got)
}
if h := got[0].GetHelp(); h != firstHelp {
t.Errorf("MetricFamily help = %q, want %q (first-emitted help text should win)", h, firstHelp)
}
if n := len(got[0].GetMetric()); n != 2 {
t.Errorf("expected MetricFamily to hold 2 metrics, got %d", n)
}
}

// TestPedanticRegistryOverridesHelpTextValidationToggle documents that calling
// DisableHelpTextValidation on a pedantic Registry has no effect: pedantic
// callers have opted into the strictest set of checks, so a help-text
// mismatch must still fail Gather even if the toggle was flipped.
func TestPedanticRegistryOverridesHelpTextValidationToggle(t *testing.T) {
const fqName = "interop_total"
first := helpTextCollector{fqName: fqName, help: "Total count of interop events", label: "first"}
second := helpTextCollector{fqName: fqName, help: "Total count of interop events.", label: "second"}

reg := prometheus.NewPedanticRegistry()
reg.DisableHelpTextValidation() // Must be ignored under pedantic mode.
reg.MustRegister(first, second)

if _, err := reg.Gather(); err == nil {
t.Fatal("expected pedantic Registry to still reject help-text mismatch even after DisableHelpTextValidation, got nil error")
}
}

func TestGatherDoesNotLeakGoroutines(t *testing.T) {
// Use goleak to verify that no unexpected goroutines are leaked during the test.
defer goleak.VerifyNone(t)
Expand Down
Loading