cmd1 | cmd2 | .... | cmdN
$ grep -i “error” ./log | wc -l 43
$ strace -f bash -c '/bin/echo foo | grep bar' .... getpid() = 13726 <– PID ... pipe([3, 4]) <– .... clone(....) = 13727 <– (echo) ... [pid 13727] execve("/bin/echo", ["/bin/echo", "foo"], [/* 61 vars */] ..... [pid 13726] clone(....) = 13728 <– (grep) ... [pid 13728] stat("/home/aikikode/bin/grep", ...
It can be seen that the pipe () system call is used to create the pipeline, and also that both processes are executed in parallel in different threads. 1242 pipeline: pipeline '|' newline_list pipeline 1243 { $$ = command_connect ($1, $4, '|'); } 1244 | pipeline BAR_AND newline_list pipeline 1245 { 1246 /* Make cmd1 |& cmd2 equivalent to cmd1 2>&1 | cmd2 */ 1247 COMMAND *tc; 1248 REDIRECTEE rd, sd; 1249 REDIRECT *r; 1250 1251 tc = $1->type == cm_simple ? (COMMAND *)$1->value.Simple : $1; 1252 sd.dest = 2; 1253 rd.dest = 1; 1254 r = make_redirection (sd, r_duplicating_output, rd, 0); 1255 if (tc->redirects) 1256 { 1257 register REDIRECT *t; 1258 for (t = tc->redirects; t->next; t = t->next) 1259 ; 1260 t->next = r; 1261 } 1262 else 1263 tc->redirects = r; 1264 1265 $$ = command_connect ($1, $4, '|'); 1266 } 1267 | command 1268 { $$ = $1; } 1269 ;
Also here we see the processing of the pair of characters '| &', which is equivalent to redirection of both stdout and stderr to the pipeline. Next we turn to command_connect (): make_cmd.c : 194 COMMAND * 195 command_connect (com1, com2, connector) 196 COMMAND *com1, *com2; 197 int connector; 198 { 199 CONNECTION *temp; 200 201 temp = (CONNECTION *)xmalloc (sizeof (CONNECTION)); 202 temp->connector = connector; 203 temp->first = com1; 204 temp->second = com2; 205 return (make_command (cm_connection, (SIMPLE_COM *)temp)); 206 }
where connector is the character '|' as int. When executing a sequence of commands (linked by '&', '|', ';', etc.), execute_connection () is called: execute_cmd.c : 2325 case '|': ... 2331 exec_result = execute_pipeline (command, asynchronous, pipe_in, pipe_out, fds_to_close);
2112 prev = pipe_in; 2113 cmd = command; 2114 2115 while (cmd && cmd->type == cm_connection && 2116 cmd->value.Connection && cmd->value.Connection->connector == '|') 2117 { 2118 /* */ 2119 if (pipe (fildes) < 0) 2120 { /* */ } ....... /* , prev — , fildes[1] — , pipe() */ 2178 execute_command_internal (cmd->value.Connection->first, asynchronous, 2179 prev, fildes[1], fd_bitmap); 2180 2181 if (prev >= 0) 2182 close (prev); 2183 2184 prev = fildes[0]; /* */ 2185 close (fildes[1]); ....... 2190 cmd = cmd->value.Connection->second; /* “” */ 2191 }
Thus, bash processes the pipeline symbol by system pipe () call for each encountered '|' and executes each command in a separate process using the appropriate file descriptors as input and output streams. /* . /proc/sys/fs/pipe-max-size */ 35 unsigned int pipe_max_size = 1048576; /* , POSIX , .. 4 */ 40 unsigned int pipe_min_size = PAGE_SIZE; 869 int create_pipe_files(struct file **res, int flags) 870 { 871 int err; 872 struct inode *inode = get_pipe_inode(); 873 struct file *f; 874 struct path path; 875 static struct qstr name = {. name = “” }; /* dentry dcache */ 881 path.dentry = d_alloc_pseudo(pipe_mnt->mnt_sb, &name); /* file. FMODE_WRITE, O_WRONLY, .. . O_NONBLOCK . */ 889 f = alloc_file(&path, FMODE_WRITE, &pipefifo_fops); 893 f->f_flags = O_WRONLY | (flags & (O_NONBLOCK | O_DIRECT)); /* file (. FMODE_READ O_RDONLY) */ 896 res[0] = alloc_file(&path, FMODE_READ, &pipefifo_fops); 902 res[0]->f_flags = O_RDONLY | (flags & O_NONBLOCK); 903 res[1] = f; 904 return 0; 917 } 918 919 static int __do_pipe_flags(int *fd, struct file **files, int flags) 920 { 921 int error; 922 int fdw, fdr; /* file (. ) */ 927 error = create_pipe_files(files, flags); /* */ 931 fdr = get_unused_fd_flags(flags); 936 fdw = get_unused_fd_flags(flags); 941 audit_fd_pair(fdr, fdw); 942 fd[0] = fdr; 943 fd[1] = fdw; 944 return 0; 952 } /* int pipe2(int pipefd[2], int flags)... */ 969 SYSCALL_DEFINE2(pipe2, int __user *, fildes, int, flags) 970 { 971 struct file *files[2]; 972 int fd[2]; /* / */ 975 __do_pipe_flags(fd, files, flags); /* kernel space user space */ 977 copy_to_user(fildes, fd, sizeof(fd)); /* */ 984 fd_install(fd[0], files[0]); 985 fd_install(fd[1], files[1]); 989 } /* ... int pipe(int pipefd[2]), pipe2 ; */ 991 SYSCALL_DEFINE1(pipe, int __user *, fildes) 992 { 993 return sys_pipe2(fildes, 0); 994 }
If you noticed, the code checks for the O_NONBLOCK flag. It can be set using the F_SETFL operation in fcntl. He is responsible for the transition to the mode without blocking I / O flows in the pipeline. In this mode, instead of blocking, the read / write process to the stream will end with errno code EAGAIN. 8 #define PIPE_BUF PAGE_SIZE
For kernels> = 2.6.35, you can change the size of the pipeline buffer: fcntl(fd, F_SETPIPE_SZ, <size>)
The maximum allowed buffer size, as we saw above, is specified in the / proc / sys / fs / pipe-max-size file. ls -d ./Documents ./non-existent_file ./other_non-existent_file 2>&1 | egrep “Doc|other” ls: cannot access ./other_non-existent_file: No such file or directory ./Documents
or you can use the combination of characters '| &' (you can learn about it from the shell documentation (man bash) or from the sources above, where we parsed the Yashc bash parser): ls -d ./Documents ./non-existent_file ./other_non-existent_file |& egrep “Doc|other” ls: cannot access ./other_non-existent_file: No such file or directory ./Documents
$ ls -d ./Documents ./non-existent_file ./other_non-existent_file 2>&1 >/dev/null | egrep “Doc|other” ls: cannot access ./other_non-existent_file: No such file or directory
Shoot yourself in the foot $ ls -d ./non-existent_file 2>/dev/null; echo $? 2
And put it in the pipe: $ ls -d ./non-existent_file 2>/dev/null | wc; echo $? 0 0 0 0
Now the pipeline completion code is the wc command completion code, i.e. 0 $ set -o pipefail $ ls -d ./non-existent_file 2>/dev/null | wc; echo $? 0 0 0 2
Shoot yourself in the foot $ egrep “^foo=[0-9]+” ./config | awk '{print “new_”$0;}'
Here we print all the found lines, assigning 'new_' at the beginning of each line, or we do not print anything if there is not a single line of the required format. The problem is that grep completes with code 1, if no matches were found, so if the pipefail option is set in our script, this example will end with code 1: $ set -o pipefail $ egrep “^foo=[0-9]+” ./config | awk '{print “new_”$0;}' >/dev/null; echo $? 1
In large scripts with complex constructions and long conveyors, this moment can be overlooked, which can lead to incorrect results. $ a=aaa $ b=bbb $ echo “one two” | read ab
We now expect that the values ​​of the variables a and b will be “one” and “two”, respectively. In fact, they will remain “aaa” and “bbb”. In general, any change in the values ​​of variables in the pipeline outside of it will leave the variables unchanged: $ filefound=0 $ find . -type f -size +100k | while true do read f echo “$f is over 100KB” filefound=1 break # done $ echo $filefound;
Even if find finds a file larger than 100Kb, the filefound flag will still have the value 0. set -- $var
$ var=”one two” $ set -- $var $ a=$1 # “one” $ b=$2 # “two”
It should be borne in mind that the script will lose the original positional parameters with which it was called. $ echo “one” | (read a; echo $a;) one
$ filefound=0 $ for f in $(find . -type f -size +100k) # , do read f echo “$f is over 100KB” filefound=1 break done $ echo $filefound;
$ (shopt -s lastpipe; a=”aaa”; echo “one” | read a; echo $a) one
It is important that in the command line it is necessary to set the lastpipe option in the same process where the corresponding pipeline will be called, therefore the brackets in the example above are required. Brackets are optional in scripts.Source: https://habr.com/ru/post/195152/
All Articles