# # Program: pstree.awk # Version: 1.10 # Author: Paul Serice # E-Mail: paul@serice.net # URL: http://www.serice.net/pstree/ # Date: 2018-01-07 # # # You need to start this program from the "pstree" shell-script wrapper. # # # This AWK script is designed to work with the output "ps -ef". It # was inspired by the Linux version of "ps", and many different # programs that also go by the name of "pstree". # # If your version of "ps" doesn't give the right output for "ps -ef", # your "ps" probably allows for command-line options to customize its # output. If so, make the output look like the following (without the # beginning "# " of course): # # UID PID PPID C STIME TTY TIME CMD # root 1 0 0 Nov 28 ? 0:23 /etc/init # ss31672 985762 992622 0 08:50:59 pts/10 0:01 -tcsh # ss31672 992032 1061705 0 14:31:04 pts/7 0:00 /bin/tcsh # root 99 1 0 Nov 28 ? 0:03 /usr/etc/syslogd # ss31672 1065056 137767 0 13:34:05 ? 0:28 cvpcs # root 180 1 0 Nov 28 ? 1:31 /usr/etc/rpcbind # root 185 1 0 Nov 28 ? 0:00 bio3d # root 201 1 0 Nov 28 ? 2:18 /usr/etc/nfsd 4 # root 203 201 0 Nov 28 ? 0:00 /usr/etc/nfsd 4 # root 204 201 0 Nov 28 ? 0:30 /usr/etc/nfsd 4 # root 205 201 0 Nov 28 ? 1:40 /usr/etc/nfsd 4 # root 207 201 0 Nov 28 ? 0:54 /usr/etc/nfsd 4 # root 208 1 0 Nov 28 ? 0:00 /usr/etc/biod 4 # root 209 1 0 Nov 28 ? 0:00 /usr/etc/biod 4 # # (It is important that you have a header on the very first line. It # is not important if it looks exactly like the one shown above. The # only thing that really matters is that you have a PID and a PPID # column that you can specify in set_portability_variables() below, # that you can specify the start column for STIME, and that you know # where the CMD column starts if the STIME column is short (i.e., the # STIME column has no embedded spaces) and where it # starts with the STIME column is long (i.e., it has at least one # embedded space.) # # Then, simply grep this awk script for "uname" and do something # similar to what you see for the currently support versions of "ps". # # # Set the progname variable for reporting errors. # function set_progname() { if( progname == "" ) { progname = "pstree"; } } # # This script needs to do slightly different things depending on the # version of "ps" avaliable on the system, but allow the user to # override at the command prompt with something like the following: # # ./pstree -v uname=IRIX64 # function set_uname() { uname_bsd_match = "[Bb][Ss][Dd]"; uname_cygwin_match = "[Cc][Yy][Gg][Ww][Ii][Nn]"; uname_darwin_match = "[Dd][Aa][Rr][Ww][Ii][Nn]"; uname_linux_match = "[Ll][Ii][Nn][Uu][Xx]"; uname_irix_match = "[Ii][Rr][Ii][Xx]"; uname_hpux_match = "[Hh][Pp]-[Uu][Xx]"; uname_sunos_match="[Ss][Uu][Nn][Oo][Ss]"; if( uname == "" ) { uname_cmd="uname"; uname_cmd | getline uname; close(uname_cmd); } # Verify that you have a uname that you know how to handle. if( (uname !~ uname_linux_match) \ && (uname !~ uname_cygwin_match) \ && (uname !~ uname_irix_match) \ && (uname !~ uname_hpux_match) \ && (uname !~ uname_darwin_match) \ && (uname !~ uname_bsd_match) \ && (uname !~ uname_sunos_match) ) { msg = msg sprintf("\n"); msg = msg sprintf("%s: Warning: Script doesn't know how to handle " \ " a uname of \"%s\".\n", progname, uname); msg = msg sprintf("\n"); msg = msg sprintf("You can override the uname value at the " \ "command line using the\n"); msg = msg sprintf("following:\n"); msg = msg sprintf("\n"); msg = msg sprintf(" %s -v uname=\n", progname); msg = msg sprintf("\n"); msg = msg sprintf("Where is one of the following:\n"); msg = msg sprintf("\n"); msg = msg sprintf(" bsd\n"); msg = msg sprintf(" cygwin\n"); msg = msg sprintf(" darwin\n"); msg = msg sprintf(" hp-ux\n"); msg = msg sprintf(" linux\n"); msg = msg sprintf(" irix\n"); msg = msg sprintf(" sunos\n"); print msg | "cat 1>&2"; final_rv = 1; exit; } } function is_valid_sort_cmd(sort_cmd) { rv = 0; sort_cmd ="echo | " sort_cmd " 2>&1"; sort_cmd | getline tmp; close(sort_cmd); if( length(tmp) == 0 ) { rv = 1 } return rv; } # The sort command has evolved from "sort -n +1" to "sort -n -k 2". # Determine which, if any, is supported. function set_sort_cmd() { if( sort_cmd == "" ) { i = 0; possible_sort_cmds[i] = "sort -n " \ "-k " ppid_column " " \ "-k " pid_column; ++i; possible_sort_cmds[i] = "sort -n " \ "-k" ppid_column " " \ "-k" pid_column; ++i; possible_sort_cmds[i] = "sort -n " \ "+" (ppid_column - 1) " " \ "+" (pid_column - 1); ++i; for( i in possible_sort_cmds ) { if( is_valid_sort_cmd(possible_sort_cmds[i]) ) { sort_cmd = possible_sort_cmds[i]; break; } } } if( sort_cmd == "" ) { printf("%s: Warning: Unable to determine correct options to "\ "\"sort\".\n", progname); printf("Output will not be sorted by process id.\n"); } } function set_portability_variables() { if( (uname ~ uname_bsd_match) || (uname ~ uname_darwin_match) ) { ps_cmd = "ps -ax -ww -C -o user,pid,ppid,cpu,start,tty,time,command"; pid_column = 2; ppid_column = 3; stime_start_column = 5; cmd_column_when_stime_short = 8; cmd_column_when_stime_long = 8; } else if( uname ~ uname_cygwin_match ) { ps_cmd = "ps -afW"; pid_column = 2; ppid_column = 3; stime_start_column = 5; cmd_column_when_stime_short = 6; cmd_column_when_stime_long = 7; } else if( uname ~ uname_linux_match ) { ps_cmd = "ps -ef -www"; pid_column = 2; ppid_column = 3; stime_start_column = 5; cmd_column_when_stime_short = 8; cmd_column_when_stime_long = 8; } else if( (uname ~ uname_irix_match) || (uname ~ uname_hpux_match) ) { ps_cmd = "ps -ef"; pid_column = 2; ppid_column = 3; stime_start_column = 5; cmd_column_when_stime_short = 8; cmd_column_when_stime_long = 9; } else if( uname ~ uname_sunos_match ) { ps_cmd = "ps -ef"; pid_column = 2; ppid_column = 3; stime_start_column = 5; cmd_column_when_stime_short = 8; cmd_column_when_stime_long = 9; } else { printf("%s: Internal Error: Unexpected uname.\n", progname) \ > "/dev/stderr"; exit(1); } # Lastly, make sure all output is sorted by process id (column 2). # For Linux and BSD, the output of "ps" is already sorted, but # we'll go ahead and sort for all OSes just in case things change. set_sort_cmd(); if( sort_cmd != "" ) { ps_cmd = ps_cmd " | " sort_cmd; } } # You can override this from the command line to make the tree look # different. For example, # # ./pstree -v branch_prefix='\\_ ' -v branch_prefix_secondary='|' # # is a common look that many people like. Remember, '\' is an escape # character for both the shell and awk. Others that I think look # decent are as follows: # # ./pstree -v branch_prefix='+- ' -v branch_prefix_secondary='|' # function set_branch_prefix() { if( branch_prefix == "" ) { branch_prefix="|__ "; } if( branch_prefix_secondary == "" ) { branch_prefix_secondary = substr(branch_prefix, 1, 1); } } # Routine to copy a single line from ps_lines to ps_lines_sorted # adding the prefix to make it look like a tree. function copy_to_ps_lines_sorted(prefix, ps_lines_sorted, ps_lines_sorted_index, ps_lines, ps_lines_index, tmp, i, values, spaces, values_count, spaces_count, cmd_start_column) { # You could make this more efficient by using the previous call to # "split". values_count = split(ps_lines[ps_lines_index], values); spaces_count = split(ps_lines[ps_lines_index], spaces, "[^ ]+"); # All rows don't have the same number of columns. So, we have to # do a bit of detective work to determine what type of column we # have. The primary culprit is the "STIME" column (see comment at # the very top of the script). if( values[stime_start_column] ~ /^[a-zA-Z]/ ) { cmd_start_column = cmd_column_when_stime_long; } else { cmd_start_column = cmd_column_when_stime_short; } # Keep all the spaces and values correct, but insert the "prefix" # before the last value. tmp = "" for( i = 1 ; i < cmd_start_column ; ++i ) { tmp = tmp spaces[i] values[i]; } # Add the prefix to make it look like a tree. tmp = tmp spaces[cmd_start_column] prefix values[cmd_start_column]; # Add the command. for( i = cmd_start_column + 1 ; i <= values_count ; ++i ) { tmp = tmp spaces[i] values[i]; } # Copy to final destination. ps_lines_sorted[ps_lines_sorted_index[0]] = tmp; # Move index so that the next entry will be copied into the right # slot. ++ps_lines_sorted_index[0]; # Blank the original for the caller. ps_lines[ps_lines_index] = ""; } # # This functions does most of the work. It copies values out of # "ps_lines" and puts them (slightly modified) into "ps_lines_sorted". # # (input) recursion_level -- How deep is the function nested. # This is needed to create the prefix # so that the output looks like a # tree. # # (input) mostly_space_prefix -- This is the prefix that is added # just before the command name to # make it look like it is part of # a tree. # # (input/output) ps_lines_sorted_index[0] # -- Where to insert the next line in # "ps_lines_sorted". Arrays are # passed by reference. Because this # variable needs to be passed by # reference, I use the first element # of an array. (Reminds me of # JavaScript.) # # (input) ps_lines -- Same is in the main routine. # (input) pids -- Same is in the main routine. # (input) ppids -- Same is in the main routine. # (input) ps_line_count -- Same is in the main routine. # # (output) ps_lines_sorted -- This is the return data that # interests the caller. It has the # sorted output of "ps" such that it # looks like a tree. # # (local) i -- Loop index. # (local) j -- Loop index. # function group_process_with_its_children(parent_index, mostly_space_prefix, ps_lines_sorted_index, ps_lines, pids, ppids, ps_line_count, ps_lines_sorted, tmp, i, prefix, child_count) { # BSD has a bad way of injecting processes with a ppid == 0 that # _look_ like they are the children of the "(swapper)" process. # This means you encounter the "init" process one level of # recursion deep instead of having the "init" process start the # recursion. Compensate so that the forest lines up in the # correct spot. if( ppids[parent_index] == 0 ) { mostly_space_prefix = " "; } # Count the number of children this process has. This is required # to get the "|" portion of the tree correct. You want to draw # the "|" portion for every child except for the last one. So, # you have to know in advance how many children there are. for( i = 1 ; i < ps_line_count ; ++i ) { if( ps_lines[i] == "" ) { continue; } if( (ppids[i] == pids[parent_index]) && (ppids[i] != 0) ) { ++child_count; } } # Now, find all the children of the process from the FOR loop # above. for( i = 1 ; i < ps_line_count ; ++i ) { if( ps_lines[i] == "" ) { continue; } if( ppids[i] == pids[parent_index] ) { # BSD lists kernel threads with a ppid == 0 and it assigns # pid to the "(swapper)" process. So to be accurate, I # don't think anything with a ppid == 0 should be # indented. if( ppids[i] == 0 ) { tmp_branch_prefix = ""; } else { tmp_branch_prefix = mostly_space_prefix branch_prefix; } # Copy the child to the sorted list. copy_to_ps_lines_sorted(tmp_branch_prefix, ps_lines_sorted, ps_lines_sorted_index, ps_lines, i); if( child_count > 1 ) { deeper_mostly_space_prefix = mostly_space_prefix \ branch_prefix_secondary \ " "; } else { deeper_mostly_space_prefix = mostly_space_prefix " "; } # And find all of its children (recursively). group_process_with_its_children(i, deeper_mostly_space_prefix, ps_lines_sorted_index, ps_lines, pids, ppids, ps_line_count, ps_lines_sorted); # That's one less child. --child_count; } } } BEGIN { final_rv = 0; set_progname(); set_uname(); set_branch_prefix(); set_portability_variables(); exit(0); } END { if( final_rv != 0 ) { exit(final_rv); } # Take the output of the "ps" command and store each line in the # "ps_lines" array, and put the process id (pid) and the parent # process id (ppid) in the "pids" and "ppids" array respectively. # While you're at it, go ahead and sort the output first by the # parent's process id then by the process's id. The -n options to # sort guarantees that the header (which is numberless) will be # sorted before any of the other entries. ps_line_count = 0; while( (ps_rv = ps_cmd | getline ps_lines[ps_line_count]) > 0 ) { split(ps_lines[ps_line_count], s); pids[ps_line_count] = s[pid_column]; ppids[ps_line_count] = s[ppid_column]; ++ps_line_count; } close(ps_cmd); # Sort ps_lines. The header is at index 0. So, start with index # 1. Because this is recursive and because all processes other # than "init" have a parent, calling # "group_process_with_its_children()" on "init" will group all the # processes. # # Prime the pump. # # Use an array to pass by reference. ps_lines_sorted_index[0] = 0; # Copy the header to ps_lines_sorted. copy_to_ps_lines_sorted("", ps_lines_sorted, ps_lines_sorted_index, ps_lines, 0); # "init" is traditionally the only process with a parent # process id of 0; however, linux has several process that are, # apparently started directly by the kernel. for( i = 1 ; i < ps_line_count ; ++i ) { if( ps_lines[i] == "" ) { continue; } # Copy the root process, "init", to ps_lines_sorted. copy_to_ps_lines_sorted("", ps_lines_sorted, ps_lines_sorted_index, ps_lines, i); # # Pump. Recursively find all of this processes children. # group_process_with_its_children(i, "", ps_lines_sorted_index, ps_lines, pids, ppids, ps_line_count, ps_lines_sorted); } # Print. for( i = 0 ; i < ps_line_count ; ++i ) { printf("%s\n", ps_lines_sorted[i]); } }