1// Copyright 2011 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// This file implements the host side of CGI (being the webserver
6// parent process).
7
8// Package cgi implements CGI (Common Gateway Interface) as specified
9// in RFC 3875.
10//
11// Note that using CGI means starting a new process to handle each
12// request, which is typically less efficient than using a
13// long-running server.  This package is intended primarily for
14// compatibility with existing systems.
15package cgi
16
17import (
18	"bufio"
19	"fmt"
20	"io"
21	"log"
22	"net/http"
23	"os"
24	"os/exec"
25	"path/filepath"
26	"regexp"
27	"runtime"
28	"strconv"
29	"strings"
30)
31
32var trailingPort = regexp.MustCompile(`:([0-9]+)$`)
33
34var osDefaultInheritEnv = map[string][]string{
35	"darwin":  {"DYLD_LIBRARY_PATH"},
36	"freebsd": {"LD_LIBRARY_PATH"},
37	"hpux":    {"LD_LIBRARY_PATH", "SHLIB_PATH"},
38	"irix":    {"LD_LIBRARY_PATH", "LD_LIBRARYN32_PATH", "LD_LIBRARY64_PATH"},
39	"linux":   {"LD_LIBRARY_PATH"},
40	"openbsd": {"LD_LIBRARY_PATH"},
41	"solaris": {"LD_LIBRARY_PATH", "LD_LIBRARY_PATH_32", "LD_LIBRARY_PATH_64"},
42	"windows": {"SystemRoot", "COMSPEC", "PATHEXT", "WINDIR"},
43}
44
45// Handler runs an executable in a subprocess with a CGI environment.
46type Handler struct {
47	Path string // path to the CGI executable
48	Root string // root URI prefix of handler or empty for "/"
49
50	// Dir specifies the CGI executable's working directory.
51	// If Dir is empty, the base directory of Path is used.
52	// If Path has no base directory, the current working
53	// directory is used.
54	Dir string
55
56	Env        []string    // extra environment variables to set, if any, as "key=value"
57	InheritEnv []string    // environment variables to inherit from host, as "key"
58	Logger     *log.Logger // optional log for errors or nil to use log.Print
59	Args       []string    // optional arguments to pass to child process
60
61	// PathLocationHandler specifies the root http Handler that
62	// should handle internal redirects when the CGI process
63	// returns a Location header value starting with a "/", as
64	// specified in RFC 3875 § 6.3.2. This will likely be
65	// http.DefaultServeMux.
66	//
67	// If nil, a CGI response with a local URI path is instead sent
68	// back to the client and not redirected internally.
69	PathLocationHandler http.Handler
70}
71
72// removeLeadingDuplicates remove leading duplicate in environments.
73// It's possible to override environment like following.
74//    cgi.Handler{
75//      ...
76//      Env: []string{"SCRIPT_FILENAME=foo.php"},
77//    }
78func removeLeadingDuplicates(env []string) (ret []string) {
79	n := len(env)
80	for i := 0; i < n; i++ {
81		e := env[i]
82		s := strings.SplitN(e, "=", 2)[0]
83		found := false
84		for j := i + 1; j < n; j++ {
85			if s == strings.SplitN(env[j], "=", 2)[0] {
86				found = true
87				break
88			}
89		}
90		if !found {
91			ret = append(ret, e)
92		}
93	}
94	return
95}
96
97func (h *Handler) ServeHTTP(rw http.ResponseWriter, req *http.Request) {
98	root := h.Root
99	if root == "" {
100		root = "/"
101	}
102
103	if len(req.TransferEncoding) > 0 && req.TransferEncoding[0] == "chunked" {
104		rw.WriteHeader(http.StatusBadRequest)
105		rw.Write([]byte("Chunked request bodies are not supported by CGI."))
106		return
107	}
108
109	pathInfo := req.URL.Path
110	if root != "/" && strings.HasPrefix(pathInfo, root) {
111		pathInfo = pathInfo[len(root):]
112	}
113
114	port := "80"
115	if matches := trailingPort.FindStringSubmatch(req.Host); len(matches) != 0 {
116		port = matches[1]
117	}
118
119	env := []string{
120		"SERVER_SOFTWARE=go",
121		"SERVER_NAME=" + req.Host,
122		"SERVER_PROTOCOL=HTTP/1.1",
123		"HTTP_HOST=" + req.Host,
124		"GATEWAY_INTERFACE=CGI/1.1",
125		"REQUEST_METHOD=" + req.Method,
126		"QUERY_STRING=" + req.URL.RawQuery,
127		"REQUEST_URI=" + req.URL.RequestURI(),
128		"PATH_INFO=" + pathInfo,
129		"SCRIPT_NAME=" + root,
130		"SCRIPT_FILENAME=" + h.Path,
131		"REMOTE_ADDR=" + req.RemoteAddr,
132		"REMOTE_HOST=" + req.RemoteAddr,
133		"SERVER_PORT=" + port,
134	}
135
136	if req.TLS != nil {
137		env = append(env, "HTTPS=on")
138	}
139
140	for k, v := range req.Header {
141		k = strings.Map(upperCaseAndUnderscore, k)
142		joinStr := ", "
143		if k == "COOKIE" {
144			joinStr = "; "
145		}
146		env = append(env, "HTTP_"+k+"="+strings.Join(v, joinStr))
147	}
148
149	if req.ContentLength > 0 {
150		env = append(env, fmt.Sprintf("CONTENT_LENGTH=%d", req.ContentLength))
151	}
152	if ctype := req.Header.Get("Content-Type"); ctype != "" {
153		env = append(env, "CONTENT_TYPE="+ctype)
154	}
155
156	if h.Env != nil {
157		env = append(env, h.Env...)
158	}
159
160	envPath := os.Getenv("PATH")
161	if envPath == "" {
162		envPath = "/bin:/usr/bin:/usr/ucb:/usr/bsd:/usr/local/bin"
163	}
164	env = append(env, "PATH="+envPath)
165
166	for _, e := range h.InheritEnv {
167		if v := os.Getenv(e); v != "" {
168			env = append(env, e+"="+v)
169		}
170	}
171
172	for _, e := range osDefaultInheritEnv[runtime.GOOS] {
173		if v := os.Getenv(e); v != "" {
174			env = append(env, e+"="+v)
175		}
176	}
177
178	env = removeLeadingDuplicates(env)
179
180	var cwd, path string
181	if h.Dir != "" {
182		path = h.Path
183		cwd = h.Dir
184	} else {
185		cwd, path = filepath.Split(h.Path)
186	}
187	if cwd == "" {
188		cwd = "."
189	}
190
191	internalError := func(err error) {
192		rw.WriteHeader(http.StatusInternalServerError)
193		h.printf("CGI error: %v", err)
194	}
195
196	cmd := &exec.Cmd{
197		Path:   path,
198		Args:   append([]string{h.Path}, h.Args...),
199		Dir:    cwd,
200		Env:    env,
201		Stderr: os.Stderr, // for now
202	}
203	if req.ContentLength != 0 {
204		cmd.Stdin = req.Body
205	}
206	stdoutRead, err := cmd.StdoutPipe()
207	if err != nil {
208		internalError(err)
209		return
210	}
211
212	err = cmd.Start()
213	if err != nil {
214		internalError(err)
215		return
216	}
217	defer cmd.Wait()
218	defer stdoutRead.Close()
219
220	linebody := bufio.NewReaderSize(stdoutRead, 1024)
221	headers := make(http.Header)
222	statusCode := 0
223	for {
224		line, isPrefix, err := linebody.ReadLine()
225		if isPrefix {
226			rw.WriteHeader(http.StatusInternalServerError)
227			h.printf("cgi: long header line from subprocess.")
228			return
229		}
230		if err == io.EOF {
231			break
232		}
233		if err != nil {
234			rw.WriteHeader(http.StatusInternalServerError)
235			h.printf("cgi: error reading headers: %v", err)
236			return
237		}
238		if len(line) == 0 {
239			break
240		}
241		parts := strings.SplitN(string(line), ":", 2)
242		if len(parts) < 2 {
243			h.printf("cgi: bogus header line: %s", string(line))
244			continue
245		}
246		header, val := parts[0], parts[1]
247		header = strings.TrimSpace(header)
248		val = strings.TrimSpace(val)
249		switch {
250		case header == "Status":
251			if len(val) < 3 {
252				h.printf("cgi: bogus status (short): %q", val)
253				return
254			}
255			code, err := strconv.Atoi(val[0:3])
256			if err != nil {
257				h.printf("cgi: bogus status: %q", val)
258				h.printf("cgi: line was %q", line)
259				return
260			}
261			statusCode = code
262		default:
263			headers.Add(header, val)
264		}
265	}
266
267	if loc := headers.Get("Location"); loc != "" {
268		if strings.HasPrefix(loc, "/") && h.PathLocationHandler != nil {
269			h.handleInternalRedirect(rw, req, loc)
270			return
271		}
272		if statusCode == 0 {
273			statusCode = http.StatusFound
274		}
275	}
276
277	if statusCode == 0 {
278		statusCode = http.StatusOK
279	}
280
281	// Copy headers to rw's headers, after we've decided not to
282	// go into handleInternalRedirect, which won't want its rw
283	// headers to have been touched.
284	for k, vv := range headers {
285		for _, v := range vv {
286			rw.Header().Add(k, v)
287		}
288	}
289
290	rw.WriteHeader(statusCode)
291
292	_, err = io.Copy(rw, linebody)
293	if err != nil {
294		h.printf("cgi: copy error: %v", err)
295	}
296}
297
298func (h *Handler) printf(format string, v ...interface{}) {
299	if h.Logger != nil {
300		h.Logger.Printf(format, v...)
301	} else {
302		log.Printf(format, v...)
303	}
304}
305
306func (h *Handler) handleInternalRedirect(rw http.ResponseWriter, req *http.Request, path string) {
307	url, err := req.URL.Parse(path)
308	if err != nil {
309		rw.WriteHeader(http.StatusInternalServerError)
310		h.printf("cgi: error resolving local URI path %q: %v", path, err)
311		return
312	}
313	// TODO: RFC 3875 isn't clear if only GET is supported, but it
314	// suggests so: "Note that any message-body attached to the
315	// request (such as for a POST request) may not be available
316	// to the resource that is the target of the redirect."  We
317	// should do some tests against Apache to see how it handles
318	// POST, HEAD, etc. Does the internal redirect get the same
319	// method or just GET? What about incoming headers?
320	// (e.g. Cookies) Which headers, if any, are copied into the
321	// second request?
322	newReq := &http.Request{
323		Method:     "GET",
324		URL:        url,
325		Proto:      "HTTP/1.1",
326		ProtoMajor: 1,
327		ProtoMinor: 1,
328		Header:     make(http.Header),
329		Host:       url.Host,
330		RemoteAddr: req.RemoteAddr,
331		TLS:        req.TLS,
332	}
333	h.PathLocationHandler.ServeHTTP(rw, newReq)
334}
335
336func upperCaseAndUnderscore(r rune) rune {
337	switch {
338	case r >= 'a' && r <= 'z':
339		return r - ('a' - 'A')
340	case r == '-':
341		return '_'
342	case r == '=':
343		// Maybe not part of the CGI 'spec' but would mess up
344		// the environment in any case, as Go represents the
345		// environment as a slice of "key=value" strings.
346		return '_'
347	}
348	// TODO: other transformations in spec or practice?
349	return r
350}
351