Skip to content

Commit 7453497

Browse files
committed
runtime: don't count nGsyscallNoP for extra Ms in C
In #76435, it turns out that the new metric /sched/goroutines/not-in-go:goroutines counts C threads that have called into Go before (on Linux) as not-in-go goroutines. The reason for this is that the M is still attached to the C thread on Linux as an optimization, so we don't go through all the trouble of detaching the M and, of course, decrementing nGsyscallNoP. There's an easy fix to this accounting issue. The flag on the M, isExtraInC, says whether a thread with an extra M attached no longer has any Go on its (logical) stack. When we take the P from an M in this state, we simply just don't increment nGsyscallNoP. When it calls back into Go, we similarly skip the decrement to nGsyscallNoP. This is more efficient than alternatives, like always updating nGsyscallNoP in cgocallbackg, since that would add a new read-modify-write atomic onto that fast path. It does mean we count threads in C with a P still attached as not-in-go, but this transient in most real programs, assuming the thread indeed does not call back into Go any time soon. Fixes #76435. Change-Id: Id05563bacbe35d3fae17d67fb5ed45fa43fa0548 Reviewed-on: https://go-review.googlesource.com/c/go/+/726964 LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Michael Pratt <mpratt@google.com>
1 parent f3d572d commit 7453497

File tree

4 files changed

+169
-10
lines changed

4 files changed

+169
-10
lines changed

src/runtime/metrics_test.go

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1584,3 +1584,18 @@ func TestReadMetricsSched(t *testing.T) {
15841584
t.Fatalf("output:\n%s\n\nwanted:\n%s", output, want)
15851585
}
15861586
}
1587+
1588+
func TestNotInGoMetricCallback(t *testing.T) {
1589+
switch runtime.GOOS {
1590+
case "windows", "plan9":
1591+
t.Skip("unsupported on Windows and Plan9")
1592+
}
1593+
1594+
// This test is run in a subprocess to prevent other tests from polluting the metrics
1595+
// and because we need to make some cgo callbacks.
1596+
output := runTestProg(t, "testprogcgo", "NotInGoMetricCallback")
1597+
want := "OK\n"
1598+
if output != want {
1599+
t.Fatalf("output:\n%s\n\nwanted:\n%s", output, want)
1600+
}
1601+
}

src/runtime/proc.go

Lines changed: 45 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2433,7 +2433,7 @@ func needm(signal bool) {
24332433
sp := sys.GetCallerSP()
24342434
callbackUpdateSystemStack(mp, sp, signal)
24352435

2436-
// Should mark we are already in Go now.
2436+
// We must mark that we are already in Go now.
24372437
// Otherwise, we may call needm again when we get a signal, before cgocallbackg1,
24382438
// which means the extram list may be empty, that will cause a deadlock.
24392439
mp.isExtraInC = false
@@ -2455,7 +2455,8 @@ func needm(signal bool) {
24552455
// mp.curg is now a real goroutine.
24562456
casgstatus(mp.curg, _Gdeadextra, _Gsyscall)
24572457
sched.ngsys.Add(-1)
2458-
sched.nGsyscallNoP.Add(1)
2458+
// N.B. We do not update nGsyscallNoP, because isExtraInC threads are not
2459+
// counted as real goroutines while they're in C.
24592460

24602461
if !signal {
24612462
if trace.ok() {
@@ -2590,7 +2591,7 @@ func dropm() {
25902591
casgstatus(mp.curg, _Gsyscall, _Gdeadextra)
25912592
mp.curg.preemptStop = false
25922593
sched.ngsys.Add(1)
2593-
sched.nGsyscallNoP.Add(-1)
2594+
decGSyscallNoP(mp)
25942595

25952596
if !mp.isExtraInSig {
25962597
if trace.ok() {
@@ -4732,7 +4733,7 @@ func entersyscallHandleGCWait(trace traceLocker) {
47324733
if trace.ok() {
47334734
trace.ProcStop(pp)
47344735
}
4735-
sched.nGsyscallNoP.Add(1)
4736+
addGSyscallNoP(gp.m) // We gave up our P voluntarily.
47364737
pp.gcStopTime = nanotime()
47374738
pp.syscalltick++
47384739
if sched.stopwait--; sched.stopwait == 0 {
@@ -4763,7 +4764,7 @@ func entersyscallblock() {
47634764
gp.m.syscalltick = gp.m.p.ptr().syscalltick
47644765
gp.m.p.ptr().syscalltick++
47654766

4766-
sched.nGsyscallNoP.Add(1)
4767+
addGSyscallNoP(gp.m) // We're going to give up our P.
47674768

47684769
// Leave SP around for GC and traceback.
47694770
pc := sys.GetCallerPC()
@@ -5001,8 +5002,8 @@ func exitsyscallTryGetP(oldp *p) *p {
50015002
if oldp != nil {
50025003
if thread, ok := setBlockOnExitSyscall(oldp); ok {
50035004
thread.takeP()
5005+
addGSyscallNoP(thread.mp) // takeP does the opposite, but this is a net zero change.
50045006
thread.resume()
5005-
sched.nGsyscallNoP.Add(-1) // takeP adds 1.
50065007
return oldp
50075008
}
50085009
}
@@ -5017,7 +5018,7 @@ func exitsyscallTryGetP(oldp *p) *p {
50175018
}
50185019
unlock(&sched.lock)
50195020
if pp != nil {
5020-
sched.nGsyscallNoP.Add(-1)
5021+
decGSyscallNoP(getg().m) // We got a P for ourselves.
50215022
return pp
50225023
}
50235024
}
@@ -5043,7 +5044,7 @@ func exitsyscallNoP(gp *g) {
50435044
trace.GoSysExit(true)
50445045
traceRelease(trace)
50455046
}
5046-
sched.nGsyscallNoP.Add(-1)
5047+
decGSyscallNoP(getg().m)
50475048
dropg()
50485049
lock(&sched.lock)
50495050
var pp *p
@@ -5081,6 +5082,41 @@ func exitsyscallNoP(gp *g) {
50815082
schedule() // Never returns.
50825083
}
50835084

5085+
// addGSyscallNoP must be called when a goroutine in a syscall loses its P.
5086+
// This function updates all relevant accounting.
5087+
//
5088+
// nosplit because it's called on the syscall paths.
5089+
//
5090+
//go:nosplit
5091+
func addGSyscallNoP(mp *m) {
5092+
// It's safe to read isExtraInC here because it's only mutated
5093+
// outside of _Gsyscall, and we know this thread is attached
5094+
// to a goroutine in _Gsyscall and blocked from exiting.
5095+
if !mp.isExtraInC {
5096+
// Increment nGsyscallNoP since we're taking away a P
5097+
// from a _Gsyscall goroutine, but only if isExtraInC
5098+
// is not set on the M. If it is, then this thread is
5099+
// back to being a full C thread, and will just inflate
5100+
// the count of not-in-go goroutines. See go.dev/issue/76435.
5101+
sched.nGsyscallNoP.Add(1)
5102+
}
5103+
}
5104+
5105+
// decGSsyscallNoP must be called whenever a goroutine in a syscall without
5106+
// a P exits the system call. This function updates all relevant accounting.
5107+
//
5108+
// nosplit because it's called from dropm.
5109+
//
5110+
//go:nosplit
5111+
func decGSyscallNoP(mp *m) {
5112+
// Update nGsyscallNoP, but only if this is not a thread coming
5113+
// out of C. See the comment in addGSyscallNoP. This logic must match,
5114+
// to avoid unmatched increments and decrements.
5115+
if !mp.isExtraInC {
5116+
sched.nGsyscallNoP.Add(-1)
5117+
}
5118+
}
5119+
50845120
// Called from syscall package before fork.
50855121
//
50865122
// syscall_runtime_BeforeFork is for package syscall,
@@ -6758,7 +6794,7 @@ func (s syscallingThread) releaseP(state uint32) {
67586794
trace.ProcSteal(s.pp)
67596795
traceRelease(trace)
67606796
}
6761-
sched.nGsyscallNoP.Add(1)
6797+
addGSyscallNoP(s.mp)
67626798
s.pp.syscalltick++
67636799
}
67646800

src/runtime/runtime2.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -945,7 +945,7 @@ type schedt struct {
945945
nmfreed int64 // cumulative number of freed m's
946946

947947
ngsys atomic.Int32 // number of system goroutines
948-
nGsyscallNoP atomic.Int32 // number of goroutines in syscalls without a P
948+
nGsyscallNoP atomic.Int32 // number of goroutines in syscalls without a P but whose M is not isExtraInC
949949

950950
pidle puintptr // idle p's
951951
npidle atomic.Int32
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
// Copyright 2025 The Go Authors. All rights reserved.
2+
// Use of this source code is governed by a BSD-style
3+
// license that can be found in the LICENSE file.
4+
5+
//go:build !plan9 && !windows
6+
// +build !plan9,!windows
7+
8+
package main
9+
10+
/*
11+
#include <stdatomic.h>
12+
#include <stddef.h>
13+
#include <pthread.h>
14+
15+
extern void Ready();
16+
17+
static int spinning;
18+
static int released;
19+
20+
static void* enterGoThenSpinTwice(void* arg __attribute__ ((unused))) {
21+
Ready();
22+
atomic_fetch_add(&spinning, 1);
23+
while(atomic_load(&released) == 0) {};
24+
25+
Ready();
26+
atomic_fetch_add(&spinning, 1);
27+
while(1) {};
28+
return NULL;
29+
}
30+
31+
static void SpinTwiceInNewCThread() {
32+
pthread_t tid;
33+
pthread_create(&tid, NULL, enterGoThenSpinTwice, NULL);
34+
}
35+
36+
static int Spinning() {
37+
return atomic_load(&spinning);
38+
}
39+
40+
static void Release() {
41+
atomic_store(&spinning, 0);
42+
atomic_store(&released, 1);
43+
}
44+
*/
45+
import "C"
46+
47+
import (
48+
"os"
49+
"runtime"
50+
"runtime/metrics"
51+
)
52+
53+
func init() {
54+
register("NotInGoMetricCallback", NotInGoMetricCallback)
55+
}
56+
57+
func NotInGoMetricCallback() {
58+
const N = 10
59+
s := []metrics.Sample{{Name: "/sched/goroutines/not-in-go:goroutines"}}
60+
61+
// Create N new C threads that have called into Go at least once.
62+
for range N {
63+
C.SpinTwiceInNewCThread()
64+
}
65+
66+
// Synchronize with spinning threads twice.
67+
//
68+
// This helps catch bad accounting by taking at least a couple other
69+
// codepaths which would cause the accounting to change.
70+
for i := range 2 {
71+
// Make sure they pass through Go.
72+
// N.B. Ready is called twice by the new threads.
73+
for j := range N {
74+
<-readyCh
75+
if j == 2 {
76+
// Try to trigger an update in the immediate STW handoff case.
77+
runtime.ReadMemStats(&m)
78+
}
79+
}
80+
81+
// Make sure they're back in C.
82+
for C.Spinning() < N {
83+
}
84+
85+
// Do something that stops the world to take all the Ps back.
86+
runtime.ReadMemStats(&m)
87+
88+
if i == 0 {
89+
C.Release()
90+
}
91+
}
92+
93+
// Read not-in-go.
94+
metrics.Read(s)
95+
if n := s[0].Value.Uint64(); n != 0 {
96+
println("expected 0 not-in-go goroutines, found", n)
97+
os.Exit(2)
98+
}
99+
println("OK")
100+
}
101+
102+
var m runtime.MemStats
103+
var readyCh = make(chan bool)
104+
105+
//export Ready
106+
func Ready() {
107+
readyCh <- true
108+
}

0 commit comments

Comments
 (0)