1package command 2 3import ( 4 "context" 5 "errors" 6 "fmt" 7 "io" 8 "os" 9 "os/exec" 10 "strings" 11 "sync" 12 "syscall" 13 "time" 14 15 "github.com/grpc-ecosystem/go-grpc-middleware/logging/logrus/ctxlogrus" 16 "github.com/opentracing/opentracing-go" 17 "github.com/sirupsen/logrus" 18 "gitlab.com/gitlab-org/labkit/tracing" 19) 20 21// GitEnv contains the ENV variables for git commands 22var GitEnv = []string{ 23 // Force english locale for consistency on the output messages 24 "LANG=en_US.UTF-8", 25} 26 27// exportedEnvVars contains a list of environment variables 28// that are always exported to child processes on spawn 29var exportedEnvVars = []string{ 30 "HOME", 31 "PATH", 32 "LD_LIBRARY_PATH", 33 "TZ", 34 35 // Export git tracing variables for easier debugging 36 "GIT_TRACE", 37 "GIT_TRACE_PACK_ACCESS", 38 "GIT_TRACE_PACKET", 39 "GIT_TRACE_PERFORMANCE", 40 "GIT_TRACE_SETUP", 41 42 // Git HTTP proxy settings: https://git-scm.com/docs/git-config#git-config-httpproxy 43 "all_proxy", 44 "http_proxy", 45 "HTTP_PROXY", 46 "https_proxy", 47 "HTTPS_PROXY", 48 // libcurl settings: https://curl.haxx.se/libcurl/c/CURLOPT_NOPROXY.html 49 "no_proxy", 50 "NO_PROXY", 51} 52 53var envInjector = tracing.NewEnvInjector() 54 55const ( 56 // maxStderrBytes is at most how many bytes will be written to stderr 57 maxStderrBytes = 10000 // 10kb 58 // maxStderrLineLength is at most how many bytes a single line will be 59 // written to stderr. Lines exceeding this limit should be truncated 60 maxStderrLineLength = 4096 61) 62 63// Command encapsulates a running exec.Cmd. The embedded exec.Cmd is 64// terminated and reaped automatically when the context.Context that 65// created it is canceled. 66type Command struct { 67 reader io.Reader 68 writer io.WriteCloser 69 stderrBuffer *stderrBuffer 70 cmd *exec.Cmd 71 context context.Context 72 startTime time.Time 73 74 waitError error 75 waitOnce sync.Once 76 77 span opentracing.Span 78} 79 80type stdinSentinel struct{} 81 82func (stdinSentinel) Read([]byte) (int, error) { 83 return 0, errors.New("stdin sentinel should not be read from") 84} 85 86// SetupStdin instructs New() to configure the stdin pipe of the command it is 87// creating. This allows you call Write() on the command as if it is an ordinary 88// io.Writer, sending data directly to the stdin of the process. 89// 90// You should not call Read() on this value - it is strictly for configuration! 91var SetupStdin io.Reader = stdinSentinel{} 92 93// Read calls Read() on the stdout pipe of the command. 94func (c *Command) Read(p []byte) (int, error) { 95 if c.reader == nil { 96 panic("command has no reader") 97 } 98 99 return c.reader.Read(p) 100} 101 102// Write calls Write() on the stdin pipe of the command. 103func (c *Command) Write(p []byte) (int, error) { 104 if c.writer == nil { 105 panic("command has no writer") 106 } 107 108 return c.writer.Write(p) 109} 110 111// Wait calls Wait() on the exec.Cmd instance inside the command. This 112// blocks until the command has finished and reports the command exit 113// status via the error return value. Use ExitStatus to get the integer 114// exit status from the error returned by Wait(). 115func (c *Command) Wait() error { 116 c.waitOnce.Do(c.wait) 117 118 return c.waitError 119} 120 121var wg = &sync.WaitGroup{} 122 123// WaitAllDone waits for all commands started by the command package to 124// finish. This can only be called once in the lifecycle of the current 125// Go process. 126func WaitAllDone() { 127 wg.Wait() 128} 129 130type contextWithoutDonePanic string 131 132// New creates a Command from an exec.Cmd. On success, the Command 133// contains a running subprocess. When ctx is canceled the embedded 134// process will be terminated and reaped automatically. 135// 136// If stdin is specified as SetupStdin, you will be able to write to the stdin 137// of the subprocess by calling Write() on the returned Command. 138func New(ctx context.Context, cmd *exec.Cmd, stdin io.Reader, stdout, stderr io.Writer, env ...string) (*Command, error) { 139 if ctx.Done() == nil { 140 panic(contextWithoutDonePanic("command spawned with context without Done() channel")) 141 } 142 143 if err := checkNullArgv(cmd); err != nil { 144 return nil, err 145 } 146 147 span, ctx := opentracing.StartSpanFromContext( 148 ctx, 149 cmd.Path, 150 opentracing.Tag{Key: "args", Value: strings.Join(cmd.Args, " ")}, 151 ) 152 153 putToken, err := getSpawnToken(ctx) 154 if err != nil { 155 return nil, err 156 } 157 defer putToken() 158 159 logPid := -1 160 defer func() { 161 ctxlogrus.Extract(ctx).WithFields(logrus.Fields{ 162 "pid": logPid, 163 "path": cmd.Path, 164 "args": cmd.Args, 165 }).Debug("spawn") 166 }() 167 168 command := &Command{ 169 cmd: cmd, 170 startTime: time.Now(), 171 context: ctx, 172 span: span, 173 } 174 175 // Explicitly set the environment for the command 176 env = append(env, "GIT_TERMINAL_PROMPT=0") 177 178 // Export env vars 179 cmd.Env = append(env, AllowedEnvironment(os.Environ())...) 180 cmd.Env = envInjector(ctx, cmd.Env) 181 182 // Start the command in its own process group (nice for signalling) 183 cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true} 184 185 // Three possible values for stdin: 186 // * nil - Go implicitly uses /dev/null 187 // * SetupStdin - configure with cmd.StdinPipe(), allowing Write() to work 188 // * Another io.Reader - becomes cmd.Stdin. Write() will not work 189 if stdin == SetupStdin { 190 pipe, err := cmd.StdinPipe() 191 if err != nil { 192 return nil, fmt.Errorf("GitCommand: stdin: %v", err) 193 } 194 command.writer = pipe 195 } else if stdin != nil { 196 cmd.Stdin = stdin 197 } 198 199 if stdout != nil { 200 // We don't assign a reader if an stdout override was passed. We assume 201 // output is going to be directly handled by the caller. 202 cmd.Stdout = stdout 203 } else { 204 pipe, err := cmd.StdoutPipe() 205 if err != nil { 206 return nil, fmt.Errorf("GitCommand: stdout: %v", err) 207 } 208 command.reader = pipe 209 } 210 211 if stderr != nil { 212 cmd.Stderr = stderr 213 } else { 214 command.stderrBuffer, err = newStderrBuffer(maxStderrBytes, maxStderrLineLength, []byte("\n")) 215 if err != nil { 216 return nil, fmt.Errorf("GitCommand: failed to create stderr buffer: %v", err) 217 } 218 cmd.Stderr = command.stderrBuffer 219 } 220 221 if err := cmd.Start(); err != nil { 222 return nil, fmt.Errorf("GitCommand: start %v: %v", cmd.Args, err) 223 } 224 inFlightCommandGauge.Inc() 225 226 // The goroutine below is responsible for terminating and reaping the 227 // process when ctx is canceled. 228 wg.Add(1) 229 go func() { 230 <-ctx.Done() 231 232 if process := cmd.Process; process != nil && process.Pid > 0 { 233 // Send SIGTERM to the process group of cmd 234 syscall.Kill(-process.Pid, syscall.SIGTERM) 235 } 236 command.Wait() 237 }() 238 239 logPid = cmd.Process.Pid 240 241 return command, nil 242} 243 244// AllowedEnvironment filters the given slice of environment variables and 245// returns all variables which are allowed per the variables defined above. 246// This is useful for constructing a base environment in which a command can be 247// run. 248func AllowedEnvironment(envs []string) []string { 249 var filtered []string 250 251 for _, env := range envs { 252 for _, exportedEnv := range exportedEnvVars { 253 if strings.HasPrefix(env, exportedEnv+"=") { 254 filtered = append(filtered, env) 255 } 256 } 257 } 258 259 return filtered 260} 261 262// This function should never be called directly, use Wait(). 263func (c *Command) wait() { 264 if c.writer != nil { 265 // Prevent the command from blocking on waiting for stdin to be closed 266 c.writer.Close() 267 } 268 269 if c.reader != nil { 270 // Prevent the command from blocking on writing to its stdout. 271 _, _ = io.Copy(io.Discard, c.reader) 272 } 273 274 c.waitError = c.cmd.Wait() 275 276 inFlightCommandGauge.Dec() 277 278 c.logProcessComplete() 279 280 // This is a bit out-of-place here given that the `wg.Add()` call is in `New()`. 281 // But in `New()`, we have to resort waiting on the context being finished until we 282 // would be able to decrement the number of in-flight commands. Given that in some 283 // cases we detach processes from their initial context such that they run in the 284 // background, this would cause us to take longer than necessary to decrease the 285 // wait group counter again. So we instead do it here to accelerate the process, 286 // even though it's less idiomatic. 287 wg.Done() 288} 289 290// ExitStatus will return the exit-code from an error returned by Wait(). 291func ExitStatus(err error) (int, bool) { 292 exitError, ok := err.(*exec.ExitError) 293 if !ok { 294 return 0, false 295 } 296 297 waitStatus, ok := exitError.Sys().(syscall.WaitStatus) 298 if !ok { 299 return 0, false 300 } 301 302 return waitStatus.ExitStatus(), true 303} 304 305func (c *Command) logProcessComplete() { 306 exitCode := 0 307 if c.waitError != nil { 308 if exitStatus, ok := ExitStatus(c.waitError); ok { 309 exitCode = exitStatus 310 } 311 } 312 313 ctx := c.context 314 cmd := c.cmd 315 316 systemTime := cmd.ProcessState.SystemTime() 317 userTime := cmd.ProcessState.UserTime() 318 realTime := time.Since(c.startTime) 319 320 entry := ctxlogrus.Extract(ctx).WithFields(logrus.Fields{ 321 "pid": cmd.ProcessState.Pid(), 322 "path": cmd.Path, 323 "args": cmd.Args, 324 "command.exitCode": exitCode, 325 "command.system_time_ms": systemTime.Seconds() * 1000, 326 "command.user_time_ms": userTime.Seconds() * 1000, 327 "command.cpu_time_ms": (systemTime.Seconds() + userTime.Seconds()) * 1000, 328 "command.real_time_ms": realTime.Seconds() * 1000, 329 }) 330 331 rusage, ok := cmd.ProcessState.SysUsage().(*syscall.Rusage) 332 if ok { 333 entry = entry.WithFields(logrus.Fields{ 334 "command.maxrss": rusage.Maxrss, 335 "command.inblock": rusage.Inblock, 336 "command.oublock": rusage.Oublock, 337 }) 338 } 339 340 entry.Debug("spawn complete") 341 if c.stderrBuffer != nil && c.stderrBuffer.Len() > 0 { 342 entry.Error(c.stderrBuffer.String()) 343 } 344 345 if stats := StatsFromContext(ctx); stats != nil { 346 stats.RecordSum("command.count", 1) 347 stats.RecordSum("command.system_time_ms", int(systemTime.Seconds()*1000)) 348 stats.RecordSum("command.user_time_ms", int(userTime.Seconds()*1000)) 349 stats.RecordSum("command.cpu_time_ms", int((systemTime.Seconds()+userTime.Seconds())*1000)) 350 stats.RecordSum("command.real_time_ms", int(realTime.Seconds()*1000)) 351 352 if ok { 353 stats.RecordMax("command.maxrss", int(rusage.Maxrss)) 354 stats.RecordSum("command.inblock", int(rusage.Inblock)) 355 stats.RecordSum("command.oublock", int(rusage.Oublock)) 356 stats.RecordSum("command.minflt", int(rusage.Minflt)) 357 stats.RecordSum("command.majflt", int(rusage.Majflt)) 358 } 359 } 360 361 c.span.LogKV( 362 "pid", cmd.ProcessState.Pid(), 363 "exit_code", exitCode, 364 "system_time_ms", int(systemTime.Seconds()*1000), 365 "user_time_ms", int(userTime.Seconds()*1000), 366 "real_time_ms", int(realTime.Seconds()*1000), 367 ) 368 if ok { 369 c.span.LogKV( 370 "maxrss", rusage.Maxrss, 371 "inblock", rusage.Inblock, 372 "oublock", rusage.Oublock, 373 "minflt", rusage.Minflt, 374 "majflt", rusage.Majflt, 375 ) 376 } 377 c.span.Finish() 378} 379 380// Command arguments will be passed to the exec syscall as 381// null-terminated C strings. That means the arguments themselves may not 382// contain a null byte. The go stdlib checks for null bytes but it 383// returns a cryptic error. This function returns a more explicit error. 384func checkNullArgv(cmd *exec.Cmd) error { 385 for _, arg := range cmd.Args { 386 if strings.IndexByte(arg, 0) > -1 { 387 // Use %q so that the null byte gets printed as \x00 388 return fmt.Errorf("detected null byte in command argument %q", arg) 389 } 390 } 391 392 return nil 393} 394 395// Args is an accessor for the command arguments 396func (c *Command) Args() []string { 397 return c.cmd.Args 398} 399 400// Env is an accessor for the environment variables 401func (c *Command) Env() []string { 402 return c.cmd.Env 403} 404 405// Pid is an accessor for the pid 406func (c *Command) Pid() int { 407 return c.cmd.Process.Pid 408} 409