package perftest import ( "regexp" "strings" "testing" ) var haystack = `Lorem ipsum dolor sit amet, consectetur adipiscing elit. Cras accumsan nisl et iaculis fringilla. Integer sapien orci, facilisis ut venenatis nec, suscipit at massa. Cras suscipit lectus non neque molestie, et imperdiet sem ultricies. Donec sit amet mattis nisi, efficitur posuere enim. Aliquam erat volutpat. Curabitur mattis nunc nisi, eu maximus dui facilisis in. Quisque vel tortor mauris. Praesent tellus sapien, vestibulum nec purus ut, luctus egestas odio. Ut ac ipsum non ipsum elementum pretium in id enim. Aenean eu augue fringilla, molestie orci et, tincidunt ipsum. Nullam maximus odio vitae augue fermentum laoreet eget scelerisque ligula. Praesent pretium eu lacus in ornare. Maecenas fermentum id sapien non faucibus. Donec est tellus, auctor eu iaculis quis, accumsan vitae ligula. Fusce dolor nisl, pharetra eu facilisis non, hendrerit ac turpis. Pellentesque imperdiet aliquam quam in luctus. Curabitur ut orci sodales, faucibus nunc ac, maximus odio. Vivamus vitae nulla posuere, pellentesque quam posuere` func BenchmarkSubstring(b *testing.B) { for i := 0; i < bN; i++ { strings.Contains(haystack, "auctor") } } func BenchmarkRegex(b *testing.B) { for i := 0; i < bN; i++ { regexp.MatchString("auctor", haystack) } }
$ go test -bench=. testing: warning: no tests to run BenchmarkSubstring-8 10000000 194 ns/op BenchmarkRegex-8 200000 7516 ns/op PASS ok github.com/mkevac/perftest00 3.789s
$ GOGC=off go test -bench=BenchmarkRegex -cpuprofile cpu.out testing: warning: no tests to run BenchmarkRegex-8 200000 6773 ns/op PASS ok github.com/mkevac/perftest00 1.491s
$ go tool pprof perftest00.test cpu.out
package perftest import ( "regexp" "strings" "testing" ) var haystack = `Lorem ipsum dolor sit amet, consectetur adipiscing [...] Vivamus vitae nulla posuere, pellentesque quam posuere` var pattern = regexp.MustCompile("auctor") func BenchmarkSubstring(b *testing.B) { for i := 0; i < bN; i++ { strings.Contains(haystack, "auctor") } } func BenchmarkRegex(b *testing.B) { for i := 0; i < bN; i++ { pattern.MatchString(haystack) } }
$ go test -bench=. testing: warning: no tests to run BenchmarkSubstring-8 10000000 170 ns/op BenchmarkRegex-8 5000000 297 ns/op PASS ok github.com/mkevac/perftest01 3.685s
package main import ( "net/http" _ "net/http/pprof" ) func cpuhogger() { var acc uint64 for { acc += 1 if acc&1 == 0 { acc <<= 1 } } } func main() { go http.ListenAndServe("0.0.0.0:8080", nil) cpuhogger() }
$ go tool pprof http://localhost:8080/debug/pprof/profile?seconds=5
(pprof) web (pprof) top 4.99s of 4.99s total ( 100%) flat flat% sum% cum cum% 4.99s 100% 100% 4.99s 100% main.cpuhogger 0 0% 100% 4.99s 100% runtime.goexit 0 0% 100% 4.99s 100% runtime.main (pprof) list cpuhogger Total: 4.99s No source information for main.cpuhogger
$ go tool pprof pproftest http://localhost:8080/debug/pprof/profile?seconds=5
(pprof) list cpuhogger Total: 4.97s ROUTINE ======================== main.cpuhogger in /home/marko/goprojects/src/github.com/mkevac/pproftest/main.go 4.97s 4.97s (flat, cum) 100% of Total . . 6:) . . 7: . . 8:func cpuhogger() { . . 9: var acc uint64 . . 10: for { 2.29s 2.29s 11: acc += 1 1.14s 1.14s 12: if acc&1 == 0 { 1.54s 1.54s 13: acc <<= 1 . . 14: } . . 15: } . . 16:} . . 17: . . 18:func main() { (pprof) disasm cpuhogger Total: 4.97s ROUTINE ======================== main.cpuhogger 4.97s 4.97s (flat, cum) 100% of Total . . 401000: XORL AX, AX 1.75s 1.75s 401002: INCQ AX 1.14s 1.14s 401005: TESTQ $0x1, AX . . 40100b: JNE 0x401002 1.54s 1.54s 40100d: SHLQ $0x1, AX 540ms 540ms 401010: JMP 0x401002 . . 401012: INT $0x3
$ curl http://localhost:8080/debug/pprof/profile?seconds=5 -o /tmp/cpu.log
$ strings /tmp/cpu.log | grep cpuhogger
package main import ( "regexp" "strings" ) var haystack = `Lorem ipsum dolor sit amet, consectetur adipiscing [...] Vivamus vitae nulla posuere, pellentesque quam posuere` func UsingSubstring() bool { found := strings.Contains(haystack, "auctor") return found } func UsingRegex() bool { found, _ := regexp.MatchString("auctor", haystack) return found } func main() { go func() { for { UsingSubstring() } }() for { UsingRegex() } }
$ stap -l 'process("systemtap").function("main.*")' process("systemtap").function("main.UsingRegex@main.go:16") process("systemtap").function("main.UsingSubstring@main.go:11") process("systemtap").function("main.init@main.go:32") process("systemtap").function("main.main.func1@main.go:22") process("systemtap").function("main.main@main.go:21")
global etime global intervals probe $1.call { etime = gettimeofday_ns() } probe $1.return { intervals <<< (gettimeofday_ns() - etime)/1000 } probe end { printf("Duration min:%dus avg:%dus max:%dus count:%d\n", @min(intervals), @avg(intervals), @max(intervals), @count(intervals)) printf("Duration (us):\n") print(@hist_log(intervals)); printf("\n") }
$ sudo stap main.stap 'process("systemtap").function("main.UsingSubstring")' ^CDuration min:0us avg:1us max:586us count:1628362 Duration (us): value |-------------------------------------------------- count 0 | 10 1 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 1443040 2 |@@@@@ 173089 4 | 6982 8 | 4321 16 | 631 32 | 197 64 | 74 128 | 13 256 | 4 512 | 1 1024 | 0 2048 | 0
$ ./systemtap runtime: unexpected return pc for main.UsingSubstring called from 0x7fffffffe000 fatal error: unknown caller pc runtime stack: runtime.throw(0x494e40, 0x11) /home/marko/go/src/runtime/panic.go:566 +0x8b runtime.gentraceback(0xffffffffffffffff, 0xc8200337a8, 0x0, 0xc820001d40, 0x0, 0x0, 0x7fffffff, 0x7fff2fa88030, 0x0, 0x0, ...) /home/marko/go/src/runtime/traceback.go:311 +0x138c runtime.scanstack(0xc820001d40) /home/marko/go/src/runtime/mgcmark.go:755 +0x249 runtime.scang(0xc820001d40) /home/marko/go/src/runtime/proc.go:836 +0x132 runtime.markroot.func1() /home/marko/go/src/runtime/mgcmark.go:234 +0x55 runtime.systemstack(0x4e4f00) /home/marko/go/src/runtime/asm_amd64.s:298 +0x79 runtime.mstart() /home/marko/go/src/runtime/proc.go:1087
package main import ( "bytes" "fmt" "math/rand" "time" ) func ToString(number int) string { return fmt.Sprintf("%d", number) } func main() { r := rand.New(rand.NewSource(time.Now().UnixNano())) var buf bytes.Buffer for i := 0; i < 1000; i++ { value := r.Int() % 1000 value = value - 500 buf.WriteString(ToString(value)) } }
global intervals probe process("systemtap02").function("main.ToString").call { intervals <<< $number } probe end { printf("Variables min:%dus avg:%dus max:%dus count:%d\n", @min(intervals), @avg(intervals), @max(intervals), @count(intervals)) printf("Variables:\n") print(@hist_log(intervals)); printf("\n") }
$ sudo stap main.stap -c ./systemtap02 Variables min:-499us avg:8us max:497us count:1000 Variables: value |-------------------------------------------------- count -1024 | 0 -512 | 0 -256 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 249 -128 |@@@@@@@@@@@@@@@@@@@@ 121 -64 |@@@@@@@@@@ 60 -32 |@@@@@@ 36 -16 |@@ 12 -8 |@ 8 -4 | 5 -2 | 3 -1 | 2 0 | 2 1 | 2 2 | 3 4 |@ 7 8 | 4 16 |@@@ 20 32 |@@@@@ 33 64 |@@@@@@@ 44 128 |@@@@@@@@@@@@@@@@@@ 110 256 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 279 512 | 0 1024 | 0
$ sudo perf top -p $(pidof systemtap)
$ sudo perf record -F 99 -g -p $(pidof systemtap) -- sleep 10 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.149 MB perf.data (1719 samples) ]
$ sudo perf script | ~/tmp/FlameGraph/stackcollapse-perf.pl > out.perf-folded $ ~/tmp/FlameGraph/flamegraph.pl out.perf-folded > perf-kernel.svg
#include <stdlib.h> #include <unistd.h> #include <string.h> int main() { const size_t MB = 1024*1024; const unsigned count = 20; char **buf = calloc(count, sizeof(*buf)); for (unsigned i = 0; i < count; i++) { buf[i] = calloc(1, MB); memset(buf[i], 0xFF, MB); sleep(1); } for (unsigned i = 0; i < count; i++) { free(buf[i]); sleep(1); } free(buf); }
-------------------------------------------------------------------------------- Command: ./main Massif arguments: --pages-as-heap=yes --time-unit=ms ms_print arguments: massif.out.15091 -------------------------------------------------------------------------------- MB 26.20^ :: | ::: # | @@: : #:: | ::@ : : #: :: | ::::: @ : : #: : :::: | :: : : @ : : #: : : : :: | :::: : : @ : : #: : : : : : | ::::: :: : : @ : : #: : : : : ::::: | ::: : : :: : : @ : : #: : : : : :: : @@ | ::: : : : :: : : @ : : #: : : : : :: : @ :: | ::@: : : : : :: : : @ : : #: : : : : :: : @ : ::: | ::: @: : : : : :: : : @ : : #: : : : : :: : @ : : ::: | ::: : @: : : : : :: : : @ : : #: : : : : :: : @ : : :: :: | ::: : : @: : : : : :: : : @ : : #: : : : : :: : @ : : :: : :: | ::::: : : : @: : : : : :: : : @ : : #: : : : : :: : @ : : :: : : :::: |:: : : : : : @: : : : : :: : : @ : : #: : : : : :: : @ : : :: : : : : : |@: : : : : : @: : : : : :: : : @ : : #: : : : : :: : @ : : :: : : : : :@ |@: : : : : : @: : : : : :: : : @ : : #: : : : : :: : @ : : :: : : : : :@ |@: : : : : : @: : : : : :: : : @ : : #: : : : : :: : @ : : :: : : : : :@ |@: : : : : : @: : : : : :: : : @ : : #: : : : : :: : @ : : :: : : : : :@ 0 +----------------------------------------------------------------------->s 0 39.13 Number of snapshots: 66 Detailed snapshots: [9, 10, 20, 22, 32, 42, 45 (peak), 55, 65]
package main import ( "net/http" _ "net/http/pprof" "time" ) func allocAndKeep() { var b [][]byte for { b = append(b, make([]byte, 1024)) time.Sleep(time.Millisecond) } } func allocAndLeave() { var b [][]byte for { b = append(b, make([]byte, 1024)) if len(b) == 20 { b = nil } time.Sleep(time.Millisecond) } } func main() { go allocAndKeep() go allocAndLeave() http.ListenAndServe("0.0.0.0:8080", nil) }
$ go tool pprof -inuse_space memtest http://localhost:8080/debug/pprof/heap Fetching profile from http://localhost:8080/debug/pprof/heap Saved profile in /home/marko/pprof/pprof.memtest.localhost:8080.inuse_objects.inuse_space.005.pb.gz Entering interactive mode (type "help" for commands) (pprof) top 15.36MB of 15.36MB total ( 100%) Dropped 2 nodes (cum <= 0.08MB) flat flat% sum% cum cum% 15.36MB 100% 100% 15.36MB 100% main.allocAndKeep 0 0% 100% 15.36MB 100% runtime.goexit $ go tool pprof -alloc_space memtest http://localhost:8080/debug/pprof/heap Fetching profile from http://localhost:8080/debug/pprof/heap Saved profile in /home/marko/pprof/pprof.memtest.localhost:8080.alloc_objects.alloc_space.008.pb.gz Entering interactive mode (type "help" for commands) (pprof) top 54.49MB of 54.49MB total ( 100%) Dropped 8 nodes (cum <= 0.27MB) flat flat% sum% cum cum% 27.97MB 51.33% 51.33% 29.47MB 54.08% main.allocAndKeep 23.52MB 43.17% 94.49% 25.02MB 45.92% main.allocAndLeave 3MB 5.51% 100% 3MB 5.51% time.Sleep 0 0% 100% 54.49MB 100% runtime.goexit
(pprof) list time.Sleep Total: 54.49MB ROUTINE ======================== time.Sleep in /home/marko/go/src/runtime/time.go 3MB 3MB (flat, cum) 5.51% of Total . . 48:func timeSleep(ns int64) { . . 49: if ns <= 0 { . . 50: return . . 51: } . . 52: 3MB 3MB 53: t := new(timer) . . 54: t.when = nanotime() + ns . . 55: tf = goroutineReady . . 56: t.arg = getg() . . 57: lock(&timers.lock) . . 58: addtimerLocked(t)
package printtest import ( "bytes" "fmt" "testing" ) func BenchmarkPrint(b *testing.B) { var buf bytes.Buffer var s string = "test string" for i := 0; i < bN; i++ { buf.Reset() fmt.Fprintf(&buf, "string is: %s", s) } }
$ go test -bench=. -benchmem testing: warning: no tests to run BenchmarkPrint-8 10000000 128 ns/op 16 B/op 1 allocs/op PASS ok github.com/mkevac/converttest 1.420s
$ go test -bench=. -memprofile=mem.out -memprofilerate=1
$ go tool pprof -alloc_space converttest.test mem.out (pprof) top 15.41MB of 15.48MB total (99.59%) Dropped 73 nodes (cum <= 0.08MB) flat flat% sum% cum cum% 15.41MB 99.59% 99.59% 15.43MB 99.67% github.com/mkevac/converttest.BenchmarkPrint 0 0% 99.59% 15.47MB 99.93% runtime.goexit 0 0% 99.59% 15.42MB 99.66% testing.(*B).launch 0 0% 99.59% 15.43MB 99.67% testing.(*B).runN
(pprof) list BenchmarkPrint Total: 15.48MB ROUTINE ======================== github.com/mkevac/converttest.BenchmarkPrint in /home/marko/goprojects/src/github.com/mkevac/converttest/convert_test.go 15.41MB 15.43MB (flat, cum) 99.67% of Total . . 9:func BenchmarkPrint(b *testing.B) { . . 10: var buf bytes.Buffer . . 11: var s string = "test string" . . 12: for i := 0; i < bN; i++ { . . 13: buf.Reset() 15.41MB 15.43MB 14: fmt.Fprintf(&buf, "string is: %s", s) . . 15: } . . 16:}
(pprof) list fmt.Fprintf Total: 15.48MB ROUTINE ======================== fmt.Fprintf in /home/marko/go/src/fmt/print.go 0 12.02kB (flat, cum) 0.076% of Total . . 175:// These routines end in 'f' and take a format string. . . 176: . . 177:// Fprintf formats according to a format specifier and writes to w. . . 178:// It returns the number of bytes written and any write error encountered. . . 179:func Fprintf(w io.Writer, format string, a ...interface{}) (n int, err error) { . 11.55kB 180: p := newPrinter() . 480B 181: p.doPrintf(format, a) . . 182: n, err = w.Write(p.buf) . . 183: p.free() . . 184: return . . 185:} . . 186:
. . 466edb: CALL bytes.(*Buffer).Reset(SB) . . 466ee0: LEAQ 0x98b6b(IP), AX . . 466ee7: MOVQ AX, 0x70(SP) . . 466eec: MOVQ $0xb, 0x78(SP) . . 466ef5: MOVQ $0x0, 0x60(SP) . . 466efe: MOVQ $0x0, 0x68(SP) . . 466f07: LEAQ 0x70d92(IP), AX . . 466f0e: MOVQ AX, 0(SP) . . 466f12: LEAQ 0x70(SP), AX . . 466f17: MOVQ AX, 0x8(SP) . . 466f1c: MOVQ $0x0, 0x10(SP) 15.41MB 15.41MB 466f25: CALL runtime.convT2E(SB) . . 466f2a: MOVQ 0x18(SP), AX . . 466f2f: MOVQ 0x20(SP), CX . . 466f34: MOVQ AX, 0x60(SP) . . 466f39: MOVQ CX, 0x68(SP) . . 466f3e: LEAQ 0x10b35b(IP), AX . . 466f45: MOVQ AX, 0(SP) . . 466f49: MOVQ 0x58(SP), AX . . 466f4e: MOVQ AX, 0x8(SP) . . 466f53: LEAQ 0x99046(IP), CX . . 466f5a: MOVQ CX, 0x10(SP) . . 466f5f: MOVQ $0xd, 0x18(SP) . . 466f68: LEAQ 0x60(SP), CX . . 466f6d: MOVQ CX, 0x20(SP) . . 466f72: MOVQ $0x1, 0x28(SP) . . 466f7b: MOVQ $0x1, 0x30(SP) . 12.02kB 466f84: CALL fmt.Fprintf(SB)
func Fprintf(w io.Writer, format string, a ...interface{}) (n int, err error)
var s string = "marko" var a interface{} = &s
var s string = "marko" var a interface{} = s
package main import ( "bytes" "testing" ) func BenchmarkPrint(b *testing.B) { var buf bytes.Buffer var s string = "test string" for i := 0; i < bN; i++ { buf.Reset() buf.WriteString("string is: ") buf.WriteString(s) } }
$ go test -bench=BenchmarkPrint -benchmem testing: warning: no tests to run BenchmarkPrint-8 50000000 27.5 ns/op 0 B/op 0 allocs/op PASS ok github.com/mkevac/converttest01 1.413s
package main import ( "fmt" ) func main() { var array = []byte{'m', 'a', 'r', 'k', 'o'} if string(array) == "marko" { fmt.Println("equal") } }
package main import ( "bytes" "testing" "unsafe" ) var s string func BenchmarkConvert(b *testing.B) { var buf bytes.Buffer var array = []byte{'m', 'a', 'r', 'k', 'o', 0} for i := 0; i < bN; i++ { buf.Reset() s = string(array) buf.WriteString(s) } }
$ go test -bench=. -benchmem testing: warning: no tests to run BenchmarkConvert-8 30000000 42.1 ns/op 8 B/op 1 allocs/op
func BytesToString(b []byte) string { bh := (*reflect.SliceHeader)(unsafe.Pointer(&b)) sh := reflect.StringHeader{bh.Data, bh.Len} return *(*string)(unsafe.Pointer(&sh)) } func BenchmarkNoConvert(b *testing.B) { var buf bytes.Buffer var array = []byte{'m', 'a', 'r', 'k', 'o', 0} for i := 0; i < bN; i++ { buf.Reset() s = BytesToString(array) buf.WriteString(s) } }
$ go test -bench=. -benchmem testing: warning: no tests to run BenchmarkConvert-8 30000000 44.5 ns/op 8 B/op 1 allocs/op BenchmarkNoConvert-8 100000000 19.2 ns/op 0 B/op 0 allocs/op PASS ok github.com/mkevac/bytetostring 3.332s
package main import ( "net/http" _ "net/http/pprof" "time" _ "github.com/mkevac/debugcharts" ) func CPUHogger() { var acc uint64 t := time.Tick(2 * time.Second) for { select { case <-t: time.Sleep(50 * time.Millisecond) default: acc++ } } } func main() { go CPUHogger() go CPUHogger() http.ListenAndServe("0.0.0.0:8181", nil) }
curl http://localhost:8181/debug/pprof/trace?seconds=10 -o trace.out
go tool trace -http "0.0.0.0:8080" ./tracetest trace.out
180 // ReadMemStats populates m with memory allocator statistics. 181 func ReadMemStats(m *MemStats) { 182 stopTheWorld("read mem stats") 183 184 systemstack(func() { 185 readmemstats_m(m) 186 }) 187 188 startTheWorld() 189 }
Source: https://habr.com/ru/post/301990/
All Articles