internal/trace: correctly handle GoUndetermined for GoroutineSummary

Currently the trace summarization incorrectly handles GoUndetermined by
treating it too much like GoNotExist. In particular, it should be
accumulating all the time since the start of the trace in a particular
bucket, but it doesn't, so that instead gets counted as "unknown time"
because the "creation time" is at the start of the trace.

This change fixes the problem by simply doing the accumulation. It's
very straightforward. It also side-steps some other inaccuracies, like
associating a goroutine that is being named with the current task. I
don't think this can ever actually happen in practice, but splitting up
the two cases, GoUndetermined and GoNotExist, fixes it.

Fixes #76716.

Change-Id: I3ac1557044f99c92bada2cb0e124b2192b1d6ebb
Reviewed-on: https://go-review.googlesource.com/c/go/+/728822
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Carlos Amedee <carlos@golang.org>
Reviewed-by: Nick Ripley <nick.ripley@datadoghq.com>
This commit is contained in:
Michael Anthony Knyszek 2025-12-10 02:50:56 +00:00 committed by Michael Knyszek
parent c0ba519764
commit e38c38f0e5
2 changed files with 42 additions and 6 deletions

View File

@ -321,16 +321,47 @@ func (s *Summarizer) Event(ev *Event) {
// Handle transition out.
g := s.gs[id]
switch old {
case GoUndetermined, GoNotExist:
case GoUndetermined:
g = &GoroutineSummary{ID: id, goroutineSummary: &goroutineSummary{}}
// The goroutine is being named for the first time.
// If we're coming out of GoUndetermined, then the creation time is the
// time of the last sync.
if old == GoUndetermined {
g.CreationTime = s.syncTs
} else {
g.CreationTime = ev.Time()
g.CreationTime = s.syncTs
g.lastRangeTime = make(map[string]Time)
g.BlockTimeByReason = make(map[string]time.Duration)
g.RangeTime = make(map[string]time.Duration)
// Accumulate all the time we spent in new as if it was old.
//
// Note that we still handle "new" again below because the time until
// the *next* event still needs to be accumulated (even though it's
// probably going to come immediately after this event).
stateTime := ev.Time().Sub(s.syncTs)
switch new {
case GoRunning:
g.ExecTime += stateTime
case GoWaiting:
g.BlockTimeByReason[st.Reason] += stateTime
case GoRunnable:
g.SchedWaitTime += stateTime
case GoSyscall:
// For a syscall, we're not going to be 'naming' this P from afar.
// It's "executing" somewhere. If we've got a P, then that means
// we've had that P since the start, so this is regular syscall time.
// Otherwise, this is syscall block time (no P).
if ev.Proc() == NoProc {
g.SyscallBlockTime += stateTime
} else {
g.SyscallTime += stateTime
}
}
// The goroutine is being created, or it's being named for the first time.
s.gs[g.ID] = g
case GoNotExist:
g = &GoroutineSummary{ID: id, goroutineSummary: &goroutineSummary{}}
// The goroutine is being created.
g.CreationTime = ev.Time()
g.lastRangeTime = make(map[string]Time)
g.BlockTimeByReason = make(map[string]time.Duration)
g.RangeTime = make(map[string]time.Duration)

View File

@ -17,6 +17,7 @@ func TestSummarizeGoroutinesTrace(t *testing.T) {
hasSchedWaitTime bool
hasSyncBlockTime bool
hasGCMarkAssistTime bool
hasUnknownTime bool
)
assertContainsGoroutine(t, summaries, "runtime.gcBgMarkWorker")
@ -31,6 +32,7 @@ func TestSummarizeGoroutinesTrace(t *testing.T) {
if dt, ok := summary.RangeTime["GC mark assist"]; ok && dt > 0 {
hasGCMarkAssistTime = true
}
hasUnknownTime = hasUnknownTime || summary.UnknownTime() > 0
}
if !hasSchedWaitTime {
t.Error("missing sched wait time")
@ -41,6 +43,9 @@ func TestSummarizeGoroutinesTrace(t *testing.T) {
if !hasGCMarkAssistTime {
t.Error("missing GC mark assist time")
}
if hasUnknownTime {
t.Error("has time that is unaccounted for")
}
}
func TestSummarizeGoroutinesRegionsTrace(t *testing.T) {