1package command
2
3import (
4	"context"
5	"errors"
6	"fmt"
7	"io"
8	"io/ioutil"
9	"os"
10	"os/exec"
11	"strings"
12	"sync"
13	"syscall"
14	"time"
15
16	"github.com/grpc-ecosystem/go-grpc-middleware/logging/logrus/ctxlogrus"
17	"github.com/opentracing/opentracing-go"
18	"github.com/sirupsen/logrus"
19	"gitlab.com/gitlab-org/labkit/tracing"
20)
21
22// GitEnv contains the ENV variables for git commands
23var GitEnv = []string{
24	// Force english locale for consistency on the output messages
25	"LANG=en_US.UTF-8",
26}
27
28// exportedEnvVars contains a list of environment variables
29// that are always exported to child processes on spawn
30var exportedEnvVars = []string{
31	"HOME",
32	"PATH",
33	"LD_LIBRARY_PATH",
34	"TZ",
35
36	// Export git tracing variables for easier debugging
37	"GIT_TRACE",
38	"GIT_TRACE_PACK_ACCESS",
39	"GIT_TRACE_PACKET",
40	"GIT_TRACE_PERFORMANCE",
41	"GIT_TRACE_SETUP",
42
43	// Git HTTP proxy settings: https://git-scm.com/docs/git-config#git-config-httpproxy
44	"all_proxy",
45	"http_proxy",
46	"HTTP_PROXY",
47	"https_proxy",
48	"HTTPS_PROXY",
49	// libcurl settings: https://curl.haxx.se/libcurl/c/CURLOPT_NOPROXY.html
50	"no_proxy",
51	"NO_PROXY",
52}
53
54var envInjector = tracing.NewEnvInjector()
55
56const (
57	// maxStderrBytes is at most how many bytes will be written to stderr
58	maxStderrBytes = 10000 // 10kb
59	// maxStderrLineLength is at most how many bytes a single line will be
60	// written to stderr. Lines exceeding this limit should be truncated
61	maxStderrLineLength = 4096
62)
63
64// Command encapsulates a running exec.Cmd. The embedded exec.Cmd is
65// terminated and reaped automatically when the context.Context that
66// created it is canceled.
67type Command struct {
68	reader       io.Reader
69	writer       io.WriteCloser
70	stderrBuffer *stderrBuffer
71	cmd          *exec.Cmd
72	context      context.Context
73	startTime    time.Time
74
75	waitError error
76	waitOnce  sync.Once
77
78	span opentracing.Span
79}
80
81type stdinSentinel struct{}
82
83func (stdinSentinel) Read([]byte) (int, error) {
84	return 0, errors.New("stdin sentinel should not be read from")
85}
86
87// SetupStdin instructs New() to configure the stdin pipe of the command it is
88// creating. This allows you call Write() on the command as if it is an ordinary
89// io.Writer, sending data directly to the stdin of the process.
90//
91// You should not call Read() on this value - it is strictly for configuration!
92var SetupStdin io.Reader = stdinSentinel{}
93
94// Read calls Read() on the stdout pipe of the command.
95func (c *Command) Read(p []byte) (int, error) {
96	if c.reader == nil {
97		panic("command has no reader")
98	}
99
100	return c.reader.Read(p)
101}
102
103// Write calls Write() on the stdin pipe of the command.
104func (c *Command) Write(p []byte) (int, error) {
105	if c.writer == nil {
106		panic("command has no writer")
107	}
108
109	return c.writer.Write(p)
110}
111
112// Wait calls Wait() on the exec.Cmd instance inside the command. This
113// blocks until the command has finished and reports the command exit
114// status via the error return value. Use ExitStatus to get the integer
115// exit status from the error returned by Wait().
116func (c *Command) Wait() error {
117	c.waitOnce.Do(c.wait)
118
119	return c.waitError
120}
121
122var wg = &sync.WaitGroup{}
123
124// WaitAllDone waits for all commands started by the command package to
125// finish. This can only be called once in the lifecycle of the current
126// Go process.
127func WaitAllDone() {
128	wg.Wait()
129}
130
131type contextWithoutDonePanic string
132
133// New creates a Command from an exec.Cmd. On success, the Command
134// contains a running subprocess. When ctx is canceled the embedded
135// process will be terminated and reaped automatically.
136//
137// If stdin is specified as SetupStdin, you will be able to write to the stdin
138// of the subprocess by calling Write() on the returned Command.
139func New(ctx context.Context, cmd *exec.Cmd, stdin io.Reader, stdout, stderr io.Writer, env ...string) (*Command, error) {
140	if ctx.Done() == nil {
141		panic(contextWithoutDonePanic("command spawned with context without Done() channel"))
142	}
143
144	if err := checkNullArgv(cmd); err != nil {
145		return nil, err
146	}
147
148	span, ctx := opentracing.StartSpanFromContext(
149		ctx,
150		cmd.Path,
151		opentracing.Tag{Key: "args", Value: strings.Join(cmd.Args, " ")},
152	)
153
154	putToken, err := getSpawnToken(ctx)
155	if err != nil {
156		return nil, err
157	}
158	defer putToken()
159
160	logPid := -1
161	defer func() {
162		ctxlogrus.Extract(ctx).WithFields(logrus.Fields{
163			"pid":  logPid,
164			"path": cmd.Path,
165			"args": cmd.Args,
166		}).Debug("spawn")
167	}()
168
169	command := &Command{
170		cmd:       cmd,
171		startTime: time.Now(),
172		context:   ctx,
173		span:      span,
174	}
175
176	// Explicitly set the environment for the command
177	env = append(env, "GIT_TERMINAL_PROMPT=0")
178
179	// Export env vars
180	cmd.Env = append(env, AllowedEnvironment(os.Environ())...)
181	cmd.Env = envInjector(ctx, cmd.Env)
182
183	// Start the command in its own process group (nice for signalling)
184	cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true}
185
186	// Three possible values for stdin:
187	//   * nil - Go implicitly uses /dev/null
188	//   * SetupStdin - configure with cmd.StdinPipe(), allowing Write() to work
189	//   * Another io.Reader - becomes cmd.Stdin. Write() will not work
190	if stdin == SetupStdin {
191		pipe, err := cmd.StdinPipe()
192		if err != nil {
193			return nil, fmt.Errorf("GitCommand: stdin: %v", err)
194		}
195		command.writer = pipe
196	} else if stdin != nil {
197		cmd.Stdin = stdin
198	}
199
200	if stdout != nil {
201		// We don't assign a reader if an stdout override was passed. We assume
202		// output is going to be directly handled by the caller.
203		cmd.Stdout = stdout
204	} else {
205		pipe, err := cmd.StdoutPipe()
206		if err != nil {
207			return nil, fmt.Errorf("GitCommand: stdout: %v", err)
208		}
209		command.reader = pipe
210	}
211
212	if stderr != nil {
213		cmd.Stderr = stderr
214	} else {
215		command.stderrBuffer, err = newStderrBuffer(maxStderrBytes, maxStderrLineLength, []byte("\n"))
216		if err != nil {
217			return nil, fmt.Errorf("GitCommand: failed to create stderr buffer: %v", err)
218		}
219		cmd.Stderr = command.stderrBuffer
220	}
221
222	if err := cmd.Start(); err != nil {
223		return nil, fmt.Errorf("GitCommand: start %v: %v", cmd.Args, err)
224	}
225	inFlightCommandGauge.Inc()
226
227	// The goroutine below is responsible for terminating and reaping the
228	// process when ctx is canceled.
229	wg.Add(1)
230	go func() {
231		<-ctx.Done()
232
233		if process := cmd.Process; process != nil && process.Pid > 0 {
234			// Send SIGTERM to the process group of cmd
235			syscall.Kill(-process.Pid, syscall.SIGTERM)
236		}
237		command.Wait()
238		wg.Done()
239	}()
240
241	logPid = cmd.Process.Pid
242
243	return command, nil
244}
245
246// AllowedEnvironment filters the given slice of environment variables and
247// returns all variables which are allowed per the variables defined above.
248// This is useful for constructing a base environment in which a command can be
249// run.
250func AllowedEnvironment(envs []string) []string {
251	var filtered []string
252
253	for _, env := range envs {
254		for _, exportedEnv := range exportedEnvVars {
255			if strings.HasPrefix(env, exportedEnv+"=") {
256				filtered = append(filtered, env)
257			}
258		}
259	}
260
261	return filtered
262}
263
264// This function should never be called directly, use Wait().
265func (c *Command) wait() {
266	if c.writer != nil {
267		// Prevent the command from blocking on waiting for stdin to be closed
268		c.writer.Close()
269	}
270
271	if c.reader != nil {
272		// Prevent the command from blocking on writing to its stdout.
273		io.Copy(ioutil.Discard, c.reader)
274	}
275
276	c.waitError = c.cmd.Wait()
277
278	inFlightCommandGauge.Dec()
279
280	c.logProcessComplete()
281}
282
283// ExitStatus will return the exit-code from an error returned by Wait().
284func ExitStatus(err error) (int, bool) {
285	exitError, ok := err.(*exec.ExitError)
286	if !ok {
287		return 0, false
288	}
289
290	waitStatus, ok := exitError.Sys().(syscall.WaitStatus)
291	if !ok {
292		return 0, false
293	}
294
295	return waitStatus.ExitStatus(), true
296}
297
298func (c *Command) logProcessComplete() {
299	exitCode := 0
300	if c.waitError != nil {
301		if exitStatus, ok := ExitStatus(c.waitError); ok {
302			exitCode = exitStatus
303		}
304	}
305
306	ctx := c.context
307	cmd := c.cmd
308
309	systemTime := cmd.ProcessState.SystemTime()
310	userTime := cmd.ProcessState.UserTime()
311	realTime := time.Since(c.startTime)
312
313	entry := ctxlogrus.Extract(ctx).WithFields(logrus.Fields{
314		"pid":                    cmd.ProcessState.Pid(),
315		"path":                   cmd.Path,
316		"args":                   cmd.Args,
317		"command.exitCode":       exitCode,
318		"command.system_time_ms": systemTime.Seconds() * 1000,
319		"command.user_time_ms":   userTime.Seconds() * 1000,
320		"command.real_time_ms":   realTime.Seconds() * 1000,
321	})
322
323	rusage, ok := cmd.ProcessState.SysUsage().(*syscall.Rusage)
324	if ok {
325		entry = entry.WithFields(logrus.Fields{
326			"command.maxrss":  rusage.Maxrss,
327			"command.inblock": rusage.Inblock,
328			"command.oublock": rusage.Oublock,
329		})
330	}
331
332	entry.Debug("spawn complete")
333	if c.stderrBuffer != nil && c.stderrBuffer.Len() > 0 {
334		entry.Error(c.stderrBuffer.String())
335	}
336
337	if stats := StatsFromContext(ctx); stats != nil {
338		stats.RecordSum("command.count", 1)
339		stats.RecordSum("command.system_time_ms", int(systemTime.Seconds()*1000))
340		stats.RecordSum("command.user_time_ms", int(userTime.Seconds()*1000))
341		stats.RecordSum("command.real_time_ms", int(realTime.Seconds()*1000))
342
343		if ok {
344			stats.RecordMax("command.maxrss", int(rusage.Maxrss))
345			stats.RecordSum("command.inblock", int(rusage.Inblock))
346			stats.RecordSum("command.oublock", int(rusage.Oublock))
347			stats.RecordSum("command.minflt", int(rusage.Minflt))
348			stats.RecordSum("command.majflt", int(rusage.Majflt))
349		}
350	}
351
352	c.span.LogKV(
353		"pid", cmd.ProcessState.Pid(),
354		"exit_code", exitCode,
355		"system_time_ms", int(systemTime.Seconds()*1000),
356		"user_time_ms", int(userTime.Seconds()*1000),
357		"real_time_ms", int(realTime.Seconds()*1000),
358	)
359	if ok {
360		c.span.LogKV(
361			"maxrss", rusage.Maxrss,
362			"inblock", rusage.Inblock,
363			"oublock", rusage.Oublock,
364			"minflt", rusage.Minflt,
365			"majflt", rusage.Majflt,
366		)
367	}
368	c.span.Finish()
369}
370
371// Command arguments will be passed to the exec syscall as
372// null-terminated C strings. That means the arguments themselves may not
373// contain a null byte. The go stdlib checks for null bytes but it
374// returns a cryptic error. This function returns a more explicit error.
375func checkNullArgv(cmd *exec.Cmd) error {
376	for _, arg := range cmd.Args {
377		if strings.IndexByte(arg, 0) > -1 {
378			// Use %q so that the null byte gets printed as \x00
379			return fmt.Errorf("detected null byte in command argument %q", arg)
380		}
381	}
382
383	return nil
384}
385
386// Args is an accessor for the command arguments
387func (c *Command) Args() []string {
388	return c.cmd.Args
389}
390
391// Env is an accessor for the environment variables
392func (c *Command) Env() []string {
393	return c.cmd.Env
394}
395
396// Pid is an accessor for the pid
397func (c *Command) Pid() int {
398	return c.cmd.Process.Pid
399}
400