git
1#include "git-compat-util.h"2
3#include "strbuf.h"4#include "strvec.h"5#include "trace2.h"6
7/*
8* We need more complex parsing in stat_parent_pid() and
9* parse_proc_stat() below than a dumb fscanf(). That's because while
10* the statcomm field is surrounded by parentheses, the process itself
11* is free to insert any arbitrary byte sequence its its name. That
12* can include newlines, spaces, closing parentheses etc.
13*
14* See do_task_stat() in fs/proc/array.c in linux.git, this is in
15* contrast with the escaped version of the name found in
16* /proc/%d/status.
17*
18* So instead of using fscanf() we'll read N bytes from it, look for
19* the first "(", and then the last ")", anything in-between is our
20* process name.
21*
22* How much N do we need? On Linux /proc/sys/kernel/pid_max is 2^15 by
23* default, but it can be raised set to values of up to 2^22. So
24* that's 7 digits for a PID. We have 2 PIDs in the first four fields
25* we're interested in, so 2 * 7 = 14.
26*
27* We then have 3 spaces between those four values, and we'd like to
28* get to the space between the 4th and the 5th (the "pgrp" field) to
29* make sure we read the entire "ppid" field. So that brings us up to
30* 14 + 3 + 1 = 18. Add the two parentheses around the "comm" value
31* and it's 20. The "state" value itself is then one character (now at
32* 21).
33*
34* Finally the maximum length of the "comm" name itself is 15
35* characters, e.g. a setting of "123456789abcdefg" will be truncated
36* to "123456789abcdef". See PR_SET_NAME in prctl(2). So all in all
37* we'd need to read 21 + 15 = 36 bytes.
38*
39* Let's just read 2^6 (64) instead for good measure. If PID_MAX ever
40* grows past 2^22 we'll be future-proof. We'll then anchor at the
41* last ")" we find to locate the parent PID.
42*/
43#define STAT_PARENT_PID_READ_N 6444
45static int parse_proc_stat(struct strbuf *sb, struct strbuf *name,46int *statppid)47{
48const char *comm_lhs = strchr(sb->buf, '(');49const char *comm_rhs = strrchr(sb->buf, ')');50const char *ppid_lhs, *ppid_rhs;51char *p;52pid_t ppid;53
54if (!comm_lhs || !comm_rhs)55goto bad_kernel;56
57/*58* We're at the ")", that's followed by " X ", where X is a
59* single "state" character. So advance by 4 bytes.
60*/
61ppid_lhs = comm_rhs + 4;62
63/*64* Read until the space between the "ppid" and "pgrp" fields
65* to make sure we're anchored after the untruncated "ppid"
66* field..
67*/
68ppid_rhs = strchr(ppid_lhs, ' ');69if (!ppid_rhs)70goto bad_kernel;71
72ppid = strtol(ppid_lhs, &p, 10);73if (ppid_rhs == p) {74const char *comm = comm_lhs + 1;75size_t commlen = comm_rhs - comm;76
77strbuf_add(name, comm, commlen);78*statppid = ppid;79
80return 0;81}82
83bad_kernel:84/*85* We were able to read our STAT_PARENT_PID_READ_N bytes from
86* /proc/%d/stat, but the content is bad. Broken kernel?
87* Should not happen, but handle it gracefully.
88*/
89return -1;90}
91
92static int stat_parent_pid(pid_t pid, struct strbuf *name, int *statppid)93{
94struct strbuf procfs_path = STRBUF_INIT;95struct strbuf sb = STRBUF_INIT;96FILE *fp;97int ret = -1;98
99/* try to use procfs if it's present. */100strbuf_addf(&procfs_path, "/proc/%d/stat", pid);101fp = fopen(procfs_path.buf, "r");102if (!fp)103goto cleanup;104
105/*106* We could be more strict here and assert that we read at
107* least STAT_PARENT_PID_READ_N. My reading of procfs(5) is
108* that on any modern kernel (at least since 2.6.0 released in
109* 2003) even if all the mandatory numeric fields were zero'd
110* out we'd get at least 100 bytes, but let's just check that
111* we got anything at all and trust the parse_proc_stat()
112* function to handle its "Bad Kernel?" error checking.
113*/
114if (!strbuf_fread(&sb, STAT_PARENT_PID_READ_N, fp))115goto cleanup;116if (parse_proc_stat(&sb, name, statppid) < 0)117goto cleanup;118
119ret = 0;120cleanup:121if (fp)122fclose(fp);123strbuf_release(&procfs_path);124strbuf_release(&sb);125
126return ret;127}
128
129static void push_ancestry_name(struct strvec *names, pid_t pid)130{
131struct strbuf name = STRBUF_INIT;132int ppid;133
134if (stat_parent_pid(pid, &name, &ppid) < 0)135goto cleanup;136
137strvec_push(names, name.buf);138
139/*140* Both errors and reaching the end of the process chain are
141* reported as fields of 0 by proc(5)
142*/
143if (ppid)144push_ancestry_name(names, ppid);145cleanup:146strbuf_release(&name);147
148return;149}
150
151void trace2_collect_process_info(enum trace2_process_info_reason reason)152{
153struct strvec names = STRVEC_INIT;154
155if (!trace2_is_enabled())156return;157
158switch (reason) {159case TRACE2_PROCESS_INFO_EXIT:160/*161* The Windows version of this calls its
162* get_peak_memory_info() here. We may want to insert
163* similar process-end statistics here in the future.
164*/
165break;166case TRACE2_PROCESS_INFO_STARTUP:167push_ancestry_name(&names, getppid());168
169if (names.nr)170trace2_cmd_ancestry(names.v);171strvec_clear(&names);172break;173}174
175return;176}
177