// Copyright 2011 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package runtime_test import ( "math" "net" "runtime" "runtime/debug" "strings" "sync" "sync/atomic" "syscall" "testing" "time" ) var stop = make(chan bool, 1) func perpetuumMobile() { select { case <-stop: default: go perpetuumMobile() } } func TestStopTheWorldDeadlock(t *testing.T) { if testing.Short() { t.Skip("skipping during short test") } maxprocs := runtime.GOMAXPROCS(3) compl := make(chan bool, 2) go func() { for i := 0; i != 1000; i += 1 { runtime.GC() } compl <- true }() go func() { for i := 0; i != 1000; i += 1 { runtime.GOMAXPROCS(3) } compl <- true }() go perpetuumMobile() <-compl <-compl stop <- true runtime.GOMAXPROCS(maxprocs) } func TestYieldProgress(t *testing.T) { testYieldProgress(false) } func TestYieldLockedProgress(t *testing.T) { testYieldProgress(true) } func testYieldProgress(locked bool) { c := make(chan bool) cack := make(chan bool) go func() { if locked { runtime.LockOSThread() } for { select { case <-c: cack <- true return default: runtime.Gosched() } } }() time.Sleep(10 * time.Millisecond) c <- true <-cack } func TestYieldLocked(t *testing.T) { const N = 10 c := make(chan bool) go func() { runtime.LockOSThread() for i := 0; i < N; i++ { runtime.Gosched() time.Sleep(time.Millisecond) } c <- true // runtime.UnlockOSThread() is deliberately omitted }() <-c } func TestGoroutineParallelism(t *testing.T) { if runtime.NumCPU() == 1 { // Takes too long, too easy to deadlock, etc. t.Skip("skipping on uniprocessor") } P := 4 N := 10 if testing.Short() { P = 3 N = 3 } defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(P)) // If runtime triggers a forced GC during this test then it will deadlock, // since the goroutines can't be stopped/preempted. // Disable GC for this test (see issue #10958). defer debug.SetGCPercent(debug.SetGCPercent(-1)) for try := 0; try < N; try++ { done := make(chan bool) x := uint32(0) for p := 0; p < P; p++ { // Test that all P goroutines are scheduled at the same time go func(p int) { for i := 0; i < 3; i++ { expected := uint32(P*i + p) for atomic.LoadUint32(&x) != expected { } atomic.StoreUint32(&x, expected+1) } done <- true }(p) } for p := 0; p < P; p++ { <-done } } } // Test that all runnable goroutines are scheduled at the same time. func TestGoroutineParallelism2(t *testing.T) { //testGoroutineParallelism2(t, false, false) testGoroutineParallelism2(t, true, false) testGoroutineParallelism2(t, false, true) testGoroutineParallelism2(t, true, true) } func testGoroutineParallelism2(t *testing.T, load, netpoll bool) { if runtime.NumCPU() == 1 { // Takes too long, too easy to deadlock, etc. t.Skip("skipping on uniprocessor") } P := 4 N := 10 if testing.Short() { N = 3 } defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(P)) // If runtime triggers a forced GC during this test then it will deadlock, // since the goroutines can't be stopped/preempted. // Disable GC for this test (see issue #10958). defer debug.SetGCPercent(debug.SetGCPercent(-1)) for try := 0; try < N; try++ { if load { // Create P goroutines and wait until they all run. // When we run the actual test below, worker threads // running the goroutines will start parking. done := make(chan bool) x := uint32(0) for p := 0; p < P; p++ { go func() { if atomic.AddUint32(&x, 1) == uint32(P) { done <- true return } for atomic.LoadUint32(&x) != uint32(P) { } }() } <-done } if netpoll { // Enable netpoller, affects schedler behavior. laddr := "localhost:0" if runtime.GOOS == "android" { // On some Android devices, there are no records for localhost, // see https://golang.org/issues/14486. // Don't use 127.0.0.1 for every case, it won't work on IPv6-only systems. laddr = "127.0.0.1:0" } ln, err := net.Listen("tcp", laddr) if err != nil { defer ln.Close() // yup, defer in a loop } } done := make(chan bool) x := uint32(0) // Spawn P goroutines in a nested fashion just to differ from TestGoroutineParallelism. for p := 0; p < P/2; p++ { go func(p int) { for p2 := 0; p2 < 2; p2++ { go func(p2 int) { for i := 0; i < 3; i++ { expected := uint32(P*i + p*2 + p2) for atomic.LoadUint32(&x) != expected { } atomic.StoreUint32(&x, expected+1) } done <- true }(p2) } }(p) } for p := 0; p < P; p++ { <-done } } } func TestBlockLocked(t *testing.T) { const N = 10 c := make(chan bool) go func() { runtime.LockOSThread() for i := 0; i < N; i++ { c <- true } runtime.UnlockOSThread() }() for i := 0; i < N; i++ { <-c } } func TestTimerFairness(t *testing.T) { done := make(chan bool) c := make(chan bool) for i := 0; i < 2; i++ { go func() { for { select { case c <- true: case <-done: return } } }() } timer := time.After(20 * time.Millisecond) for { select { case <-c: case <-timer: close(done) return } } } func TestTimerFairness2(t *testing.T) { done := make(chan bool) c := make(chan bool) for i := 0; i < 2; i++ { go func() { timer := time.After(20 * time.Millisecond) var buf [1]byte for { syscall.Read(0, buf[0:0]) select { case c <- true: case <-c: case <-timer: done <- true return } } }() } <-done <-done } // The function is used to test preemption at split stack checks. // Declaring a var avoids inlining at the call site. var preempt = func() int { var a [128]int sum := 0 for _, v := range a { sum += v } return sum } func TestPreemption(t *testing.T) { // Test that goroutines are preempted at function calls. N := 5 if testing.Short() { N = 2 } c := make(chan bool) var x uint32 for g := 0; g < 2; g++ { go func(g int) { for i := 0; i < N; i++ { for atomic.LoadUint32(&x) != uint32(g) { preempt() } atomic.StoreUint32(&x, uint32(1-g)) } c <- true }(g) } <-c <-c } func TestPreemptionGC(t *testing.T) { // Test that pending GC preempts running goroutines. P := 5 N := 10 if testing.Short() { P = 3 N = 2 } defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(P + 1)) var stop uint32 for i := 0; i < P; i++ { go func() { for atomic.LoadUint32(&stop) == 0 { preempt() } }() } for i := 0; i < N; i++ { runtime.Gosched() runtime.GC() } atomic.StoreUint32(&stop, 1) } func TestGCFairness(t *testing.T) { output := runTestProg(t, "testprog", "GCFairness") want := "OK\n" if output != want { t.Fatalf("want %s, got %s\n", want, output) } } func TestGCFairness2(t *testing.T) { output := runTestProg(t, "testprog", "GCFairness2") want := "OK\n" if output != want { t.Fatalf("want %s, got %s\n", want, output) } } func TestNumGoroutine(t *testing.T) { output := runTestProg(t, "testprog", "NumGoroutine") want := "1\n" if output != want { t.Fatalf("want %q, got %q", want, output) } buf := make([]byte, 1<<20) // Try up to 10 times for a match before giving up. // This is a fundamentally racy check but it's important // to notice if NumGoroutine and Stack are _always_ out of sync. for i := 0; ; i++ { // Give goroutines about to exit a chance to exit. // The NumGoroutine and Stack below need to see // the same state of the world, so anything we can do // to keep it quiet is good. runtime.Gosched() n := runtime.NumGoroutine() buf = buf[:runtime.Stack(buf, true)] nstk := strings.Count(string(buf), "goroutine ") if n == nstk { break } if i >= 10 { t.Fatalf("NumGoroutine=%d, but found %d goroutines in stack dump: %s", n, nstk, buf) } } } func TestPingPongHog(t *testing.T) { if testing.Short() { t.Skip("skipping in -short mode") } defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(1)) done := make(chan bool) hogChan, lightChan := make(chan bool), make(chan bool) hogCount, lightCount := 0, 0 run := func(limit int, counter *int, wake chan bool) { for { select { case <-done: return case <-wake: for i := 0; i < limit; i++ { *counter++ } wake <- true } } } // Start two co-scheduled hog goroutines. for i := 0; i < 2; i++ { go run(1e6, &hogCount, hogChan) } // Start two co-scheduled light goroutines. for i := 0; i < 2; i++ { go run(1e3, &lightCount, lightChan) } // Start goroutine pairs and wait for a few preemption rounds. hogChan <- true lightChan <- true time.Sleep(100 * time.Millisecond) close(done) <-hogChan <-lightChan // Check that hogCount and lightCount are within a factor of // 5, which indicates that both pairs of goroutines handed off // the P within a time-slice to their buddy. We can use a // fairly large factor here to make this robust: if the // scheduler isn't working right, the gap should be ~1000X. const factor = 5 if hogCount > lightCount*factor || lightCount > hogCount*factor { t.Fatalf("want hogCount/lightCount in [%v, %v]; got %d/%d = %g", 1.0/factor, factor, hogCount, lightCount, float64(hogCount)/float64(lightCount)) } } func BenchmarkPingPongHog(b *testing.B) { if b.N == 0 { return } defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(1)) // Create a CPU hog stop, done := make(chan bool), make(chan bool) go func() { for { select { case <-stop: done <- true return default: } } }() // Ping-pong b.N times ping, pong := make(chan bool), make(chan bool) go func() { for j := 0; j < b.N; j++ { pong <- <-ping } close(stop) done <- true }() go func() { for i := 0; i < b.N; i++ { ping <- <-pong } done <- true }() b.ResetTimer() ping <- true // Start ping-pong <-stop b.StopTimer() <-ping // Let last ponger exit <-done // Make sure goroutines exit <-done <-done } func stackGrowthRecursive(i int) { var pad [128]uint64 if i != 0 && pad[0] == 0 { stackGrowthRecursive(i - 1) } } func TestPreemptSplitBig(t *testing.T) { if testing.Short() { t.Skip("skipping in -short mode") } defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(2)) stop := make(chan int) go big(stop) for i := 0; i < 3; i++ { time.Sleep(10 * time.Microsecond) // let big start running runtime.GC() } close(stop) } func big(stop chan int) int { n := 0 for { // delay so that gc is sure to have asked for a preemption for i := 0; i < 1e9; i++ { n++ } // call bigframe, which used to miss the preemption in its prologue. bigframe(stop) // check if we've been asked to stop. select { case <-stop: return n } } } func bigframe(stop chan int) int { // not splitting the stack will overflow. // small will notice that it needs a stack split and will // catch the overflow. var x [8192]byte return small(stop, &x) } func small(stop chan int, x *[8192]byte) int { for i := range x { x[i] = byte(i) } sum := 0 for i := range x { sum += int(x[i]) } // keep small from being a leaf function, which might // make it not do any stack check at all. nonleaf(stop) return sum } func nonleaf(stop chan int) bool { // do something that won't be inlined: select { case <-stop: return true default: return false } } func TestSchedLocalQueue(t *testing.T) { runtime.RunSchedLocalQueueTest() } func TestSchedLocalQueueSteal(t *testing.T) { runtime.RunSchedLocalQueueStealTest() } func TestSchedLocalQueueEmpty(t *testing.T) { if runtime.NumCPU() == 1 { // Takes too long and does not trigger the race. t.Skip("skipping on uniprocessor") } defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(4)) // If runtime triggers a forced GC during this test then it will deadlock, // since the goroutines can't be stopped/preempted during spin wait. defer debug.SetGCPercent(debug.SetGCPercent(-1)) iters := int(1e5) if testing.Short() { iters = 1e2 } runtime.RunSchedLocalQueueEmptyTest(iters) } func benchmarkStackGrowth(b *testing.B, rec int) { b.RunParallel(func(pb *testing.PB) { for pb.Next() { stackGrowthRecursive(rec) } }) } func BenchmarkStackGrowth(b *testing.B) { benchmarkStackGrowth(b, 10) } func BenchmarkStackGrowthDeep(b *testing.B) { benchmarkStackGrowth(b, 1024) } func BenchmarkCreateGoroutines(b *testing.B) { benchmarkCreateGoroutines(b, 1) } func BenchmarkCreateGoroutinesParallel(b *testing.B) { benchmarkCreateGoroutines(b, runtime.GOMAXPROCS(-1)) } func benchmarkCreateGoroutines(b *testing.B, procs int) { c := make(chan bool) var f func(n int) f = func(n int) { if n == 0 { c <- true return } go f(n - 1) } for i := 0; i < procs; i++ { go f(b.N / procs) } for i := 0; i < procs; i++ { <-c } } func BenchmarkCreateGoroutinesCapture(b *testing.B) { b.ReportAllocs() for i := 0; i < b.N; i++ { const N = 4 var wg sync.WaitGroup wg.Add(N) for i := 0; i < N; i++ { i := i go func() { if i >= N { b.Logf("bad") // just to capture b } wg.Done() }() } wg.Wait() } } func BenchmarkClosureCall(b *testing.B) { sum := 0 off1 := 1 for i := 0; i < b.N; i++ { off2 := 2 func() { sum += i + off1 + off2 }() } _ = sum } func benchmarkWakeupParallel(b *testing.B, spin func(time.Duration)) { if runtime.GOMAXPROCS(0) == 1 { b.Skip("skipping: GOMAXPROCS=1") } wakeDelay := 5 * time.Microsecond for _, delay := range []time.Duration{ 0, 1 * time.Microsecond, 2 * time.Microsecond, 5 * time.Microsecond, 10 * time.Microsecond, 20 * time.Microsecond, 50 * time.Microsecond, 100 * time.Microsecond, } { b.Run(delay.String(), func(b *testing.B) { if b.N == 0 { return } // Start two goroutines, which alternate between being // sender and receiver in the following protocol: // // - The receiver spins for `delay` and then does a // blocking receive on a channel. // // - The sender spins for `delay+wakeDelay` and then // sends to the same channel. (The addition of // `wakeDelay` improves the probability that the // receiver will be blocking when the send occurs when // the goroutines execute in parallel.) // // In each iteration of the benchmark, each goroutine // acts once as sender and once as receiver, so each // goroutine spins for delay twice. // // BenchmarkWakeupParallel is used to estimate how // efficiently the scheduler parallelizes goroutines in // the presence of blocking: // // - If both goroutines are executed on the same core, // an increase in delay by N will increase the time per // iteration by 4*N, because all 4 delays are // serialized. // // - Otherwise, an increase in delay by N will increase // the time per iteration by 2*N, and the time per // iteration is 2 * (runtime overhead + chan // send/receive pair + delay + wakeDelay). This allows // the runtime overhead, including the time it takes // for the unblocked goroutine to be scheduled, to be // estimated. ping, pong := make(chan struct{}), make(chan struct{}) start := make(chan struct{}) done := make(chan struct{}) go func() { <-start for i := 0; i < b.N; i++ { // sender spin(delay + wakeDelay) ping <- struct{}{} // receiver spin(delay) <-pong } done <- struct{}{} }() go func() { for i := 0; i < b.N; i++ { // receiver spin(delay) <-ping // sender spin(delay + wakeDelay) pong <- struct{}{} } done <- struct{}{} }() b.ResetTimer() start <- struct{}{} <-done <-done }) } } func BenchmarkWakeupParallelSpinning(b *testing.B) { benchmarkWakeupParallel(b, func(d time.Duration) { end := time.Now().Add(d) for time.Now().Before(end) { // do nothing } }) } // sysNanosleep is defined by OS-specific files (such as runtime_linux_test.go) // to sleep for the given duration. If nil, dependent tests are skipped. // The implementation should invoke a blocking system call and not // call time.Sleep, which would deschedule the goroutine. var sysNanosleep func(d time.Duration) func BenchmarkWakeupParallelSyscall(b *testing.B) { if sysNanosleep == nil { b.Skipf("skipping on %v; sysNanosleep not defined", runtime.GOOS) } benchmarkWakeupParallel(b, func(d time.Duration) { sysNanosleep(d) }) } type Matrix [][]float64 func BenchmarkMatmult(b *testing.B) { b.StopTimer() // matmult is O(N**3) but testing expects O(b.N), // so we need to take cube root of b.N n := int(math.Cbrt(float64(b.N))) + 1 A := makeMatrix(n) B := makeMatrix(n) C := makeMatrix(n) b.StartTimer() matmult(nil, A, B, C, 0, n, 0, n, 0, n, 8) } func makeMatrix(n int) Matrix { m := make(Matrix, n) for i := 0; i < n; i++ { m[i] = make([]float64, n) for j := 0; j < n; j++ { m[i][j] = float64(i*n + j) } } return m } func matmult(done chan<- struct{}, A, B, C Matrix, i0, i1, j0, j1, k0, k1, threshold int) { di := i1 - i0 dj := j1 - j0 dk := k1 - k0 if di >= dj && di >= dk && di >= threshold { // divide in two by y axis mi := i0 + di/2 done1 := make(chan struct{}, 1) go matmult(done1, A, B, C, i0, mi, j0, j1, k0, k1, threshold) matmult(nil, A, B, C, mi, i1, j0, j1, k0, k1, threshold) <-done1 } else if dj >= dk && dj >= threshold { // divide in two by x axis mj := j0 + dj/2 done1 := make(chan struct{}, 1) go matmult(done1, A, B, C, i0, i1, j0, mj, k0, k1, threshold) matmult(nil, A, B, C, i0, i1, mj, j1, k0, k1, threshold) <-done1 } else if dk >= threshold { // divide in two by "k" axis // deliberately not parallel because of data races mk := k0 + dk/2 matmult(nil, A, B, C, i0, i1, j0, j1, k0, mk, threshold) matmult(nil, A, B, C, i0, i1, j0, j1, mk, k1, threshold) } else { // the matrices are small enough, compute directly for i := i0; i < i1; i++ { for j := j0; j < j1; j++ { for k := k0; k < k1; k++ { C[i][j] += A[i][k] * B[k][j] } } } } if done != nil { done <- struct{}{} } } func TestStealOrder(t *testing.T) { runtime.RunStealOrderTest() } func TestLockOSThreadNesting(t *testing.T) { go func() { e, i := runtime.LockOSCounts() if e != 0 || i != 0 { t.Errorf("want locked counts 0, 0; got %d, %d", e, i) return } runtime.LockOSThread() runtime.LockOSThread() runtime.UnlockOSThread() e, i = runtime.LockOSCounts() if e != 1 || i != 0 { t.Errorf("want locked counts 1, 0; got %d, %d", e, i) return } runtime.UnlockOSThread() e, i = runtime.LockOSCounts() if e != 0 || i != 0 { t.Errorf("want locked counts 0, 0; got %d, %d", e, i) return } }() } func TestLockOSThreadExit(t *testing.T) { testLockOSThreadExit(t, "testprog") } func testLockOSThreadExit(t *testing.T, prog string) { output := runTestProg(t, prog, "LockOSThreadMain", "GOMAXPROCS=1") want := "OK\n" if output != want { t.Errorf("want %s, got %s\n", want, output) } output = runTestProg(t, prog, "LockOSThreadAlt") if output != want { t.Errorf("want %s, got %s\n", want, output) } }