package main import (...) func readdir(dir string) { dh, err := os.Open(dir) defer dh.Close() for { fis, err := dh.Readdir(10) if err == io.EOF { break } for _, fi := range fis { fmt.Printf("%s/%s\n", dir, fi.Name()) if fi.IsDir() { readdir(dir + "/" + fi.Name()) } } } } func main() { readdir(os.Args[1]) }
$ time find /path/to/dir | wc -l 169561 real 0m0.333s user 0m0.104s sys 0m0.272s $ time GOGC=300 ./find-simple /path/to/dir | wc -l 169560 real 0m1.160s user 0m0.540s sys 0m0.944s
@@ -7,5 +7,7 @@ import ( +var bufout = bufio.NewWriter(os.Stdout) func readdir(dir string) { @@ -28,3 +30,7 @@ func readdir(dir string) { for _, fi := range fis { - fmt.Printf("%s/%s\n", dir, fi.Name()) + bufout.WriteString(dir) + bufout.WriteByte('/') + bufout.WriteString(fi.Name()) + bufout.WriteByte('\n') if fi.IsDir() { @@ -44,2 +50,3 @@ func main() { readdir(dir) + bufout.Flush() }
$ time GOGC=300 ./find-simple-bufio /path/to/dir | wc -l 169560 real 0m0.796s user 0m0.352s sys 0m0.616s
$ php ~/find-ob.php /path/to/dir | wc -l 169560 real 0m0.777s user 0m0.276s sys 0m0.500s
SYNOPSIS int getdents(unsigned int fd, struct linux_dirent *dirp, unsigned int count); DESCRIPTION This is not the function you are interested in. Look at readdir(3) for the POSIX conforming C library interface. This page documents the bare kernel system call interface. The system call getdents() reads several linux_dirent structures from the directory referred to by the open file descriptor fd into the buffer pointed to by dirp. The argument count is specifies the size of that buffer. The linux_dirent structure is declared as follows: struct linux_dirent { ... char d_name[]; char pad; // Zero padding byte */ char d_type; // File type (only since Linux 2.6.4 }
package main import (...) var bufout = bufio.NewWriter(os.Stdout) func readdir(dir string, file string, dirfd int, pathbuf []byte) { origbuf := make([]byte, 4096) // , getdents dh, err := syscall.Openat(dirfd, file, syscall.O_RDONLY, 0777) // origpathbuf := pathbuf[0:0] // , for { n, errno := syscall.ReadDirent(dh, origbuf) // getdents if n <= 0 { break // EOF } buf := origbuf[0:n] // , :) for len(buf) > 0 { // ParseDirent: dirent := (*syscall.Dirent)(unsafe.Pointer(&buf[0])) buf = buf[dirent.Reclen:] if dirent.Ino == 0 { // File absent in directory. continue } bytes := (*[10000]byte)(unsafe.Pointer(&dirent.Name[0])) name := bytes[0:clen(bytes[:])] // clen() if len(name) == 1 && name[0] == '.' || len(name) == 2 && name[0] == '.' && name[1] == '.' { continue } // , // pathbuf = append(pathbuf, dir...) pathbuf = append(pathbuf, '/') pathbuf = append(pathbuf, file...) dirlen := len(pathbuf) pathbuf = append(pathbuf, '/') pathbuf = append(pathbuf, name...) pathbuf = append(pathbuf, '\n') bufout.Write(pathbuf) // , , string if dirent.Type == syscall.DT_DIR { readdir(string(pathbuf[0:dirlen]), string(name), dh, origpathbuf) } pathbuf = origpathbuf[0:0] } buf = origbuf[0:0] } syscall.Close(dh) } func main() { dir := os.Args[1] parentDir := filepath.Dir(dir) dh, err := os.Open(parentDir) pathbuf := make([]byte, 16 * 1024) readdir(parentDir, filepath.Base(dir), int(dh.Fd()), pathbuf) bufout.Flush() }
$ time GOGC=300 ./find-simple /path/to/dir | wc -l 169560 real 0m1.160s user 0m0.540s sys 0m0.944s $ time GOGC=300 ./find-optimized /path/to/dir | wc -l 169560 real 0m0.200s user 0m0.060s sys 0m0.160s $ time find /path/to/dir | wc -l 169561 real 0m0.333s user 0m0.104s sys 0m0.272s
Source: https://habr.com/ru/post/281382/
All Articles