1package api
2
3import (
4	"bytes"
5	"encoding/json"
6	"fmt"
7	"io"
8	"net/http"
9	"net/textproto"
10	"net/url"
11	"strconv"
12	"strings"
13
14	"github.com/prometheus/client_golang/prometheus"
15	"github.com/prometheus/client_golang/prometheus/promauto"
16
17	"gitlab.com/gitlab-org/gitaly/v14/proto/go/gitalypb"
18
19	"gitlab.com/gitlab-org/gitlab/workhorse/internal/config"
20	"gitlab.com/gitlab-org/gitlab/workhorse/internal/gitaly"
21	"gitlab.com/gitlab-org/gitlab/workhorse/internal/helper"
22	"gitlab.com/gitlab-org/gitlab/workhorse/internal/log"
23
24	"gitlab.com/gitlab-org/gitlab/workhorse/internal/secret"
25)
26
27const (
28	// Custom content type for API responses, to catch routing / programming mistakes
29	ResponseContentType = "application/vnd.gitlab-workhorse+json"
30
31	failureResponseLimit = 32768
32
33	geoProxyEndpointPath = "/api/v4/geo/proxy"
34)
35
36type API struct {
37	Client  *http.Client
38	URL     *url.URL
39	Version string
40}
41
42var (
43	requestsCounter = promauto.NewCounterVec(
44		prometheus.CounterOpts{
45			Name: "gitlab_workhorse_internal_api_requests",
46			Help: "How many internal API requests have been completed by gitlab-workhorse, partitioned by status code and HTTP method.",
47		},
48		[]string{"code", "method"},
49	)
50	bytesTotal = promauto.NewCounter(
51		prometheus.CounterOpts{
52			Name: "gitlab_workhorse_internal_api_failure_response_bytes",
53			Help: "How many bytes have been returned by upstream GitLab in API failure/rejection response bodies.",
54		},
55	)
56)
57
58func NewAPI(myURL *url.URL, version string, roundTripper http.RoundTripper) *API {
59	return &API{
60		Client:  &http.Client{Transport: roundTripper},
61		URL:     myURL,
62		Version: version,
63	}
64}
65
66type GeoProxyEndpointResponse struct {
67	GeoProxyURL string `json:"geo_proxy_url"`
68}
69
70type HandleFunc func(http.ResponseWriter, *http.Request, *Response)
71
72type MultipartUploadParams struct {
73	// PartSize is the exact size of each uploaded part. Only the last one can be smaller
74	PartSize int64
75	// PartURLs contains the presigned URLs for each part
76	PartURLs []string
77	// CompleteURL is a presigned URL for CompleteMulipartUpload
78	CompleteURL string
79	// AbortURL is a presigned URL for AbortMultipartUpload
80	AbortURL string
81}
82
83type ObjectStorageParams struct {
84	Provider      string
85	S3Config      config.S3Config
86	GoCloudConfig config.GoCloudConfig
87}
88
89type RemoteObject struct {
90	// GetURL is an S3 GetObject URL
91	GetURL string
92	// DeleteURL is a presigned S3 RemoveObject URL
93	DeleteURL string
94	// StoreURL is the temporary presigned S3 PutObject URL to which upload the first found file
95	StoreURL string
96	// Boolean to indicate whether to use headers included in PutHeaders
97	CustomPutHeaders bool
98	// PutHeaders are HTTP headers (e.g. Content-Type) to be sent with StoreURL
99	PutHeaders map[string]string
100	// Whether to ignore Rails pre-signed URLs and have Workhorse directly access object storage provider
101	UseWorkhorseClient bool
102	// Remote, temporary object name where Rails will move to the final destination
103	RemoteTempObjectID string
104	// ID is a unique identifier of object storage upload
105	ID string
106	// Timeout is a number that represents timeout in seconds for sending data to StoreURL
107	Timeout int
108	// MultipartUpload contains presigned URLs for S3 MultipartUpload
109	MultipartUpload *MultipartUploadParams
110	// Object storage config for Workhorse client
111	ObjectStorage *ObjectStorageParams
112}
113
114type Response struct {
115	// GL_ID is an environment variable used by gitlab-shell hooks during 'git
116	// push' and 'git pull'
117	GL_ID string
118
119	// GL_USERNAME holds gitlab username of the user who is taking the action causing hooks to be invoked
120	GL_USERNAME string
121
122	// GL_REPOSITORY is an environment variable used by gitlab-shell hooks during
123	// 'git push' and 'git pull'
124	GL_REPOSITORY string
125	// GitConfigOptions holds the custom options that we want to pass to the git command
126	GitConfigOptions []string
127	// StoreLFSPath is provided by the GitLab Rails application to mark where the tmp file should be placed.
128	// This field is deprecated. GitLab will use TempPath instead
129	StoreLFSPath string
130	// LFS object id
131	LfsOid string
132	// LFS object size
133	LfsSize int64
134	// TmpPath is the path where we should store temporary files
135	// This is set by authorization middleware
136	TempPath string
137	// RemoteObject is provided by the GitLab Rails application
138	// and defines a way to store object on remote storage
139	RemoteObject RemoteObject
140	// Archive is the path where the artifacts archive is stored
141	Archive string `json:"archive"`
142	// Entry is a filename inside the archive point to file that needs to be extracted
143	Entry string `json:"entry"`
144	// Used to communicate channel session details
145	Channel *ChannelSettings
146	// GitalyServer specifies an address and authentication token for a gitaly server we should connect to.
147	GitalyServer gitaly.Server
148	// Repository object for making gRPC requests to Gitaly.
149	Repository gitalypb.Repository
150	// For git-http, does the requestor have the right to view all refs?
151	ShowAllRefs bool
152	// Detects whether an artifact is used for code intelligence
153	ProcessLsif bool
154	// Detects whether LSIF artifact will be parsed with references
155	ProcessLsifReferences bool
156	// The maximum accepted size in bytes of the upload
157	MaximumSize int64
158}
159
160// singleJoiningSlash is taken from reverseproxy.go:singleJoiningSlash
161func singleJoiningSlash(a, b string) string {
162	aslash := strings.HasSuffix(a, "/")
163	bslash := strings.HasPrefix(b, "/")
164	switch {
165	case aslash && bslash:
166		return a + b[1:]
167	case !aslash && !bslash:
168		return a + "/" + b
169	}
170	return a + b
171}
172
173// joinURLPath is taken from reverseproxy.go:joinURLPath
174func joinURLPath(a *url.URL, b string) (path string, rawpath string) {
175	// Avoid adding a trailing slash if the suffix is empty
176	if b == "" {
177		return a.Path, a.RawPath
178	} else if a.RawPath == "" {
179		return singleJoiningSlash(a.Path, b), ""
180	}
181
182	// Same as singleJoiningSlash, but uses EscapedPath to determine
183	// whether a slash should be added
184	apath := a.EscapedPath()
185	bpath := b
186
187	aslash := strings.HasSuffix(apath, "/")
188	bslash := strings.HasPrefix(bpath, "/")
189
190	switch {
191	case aslash && bslash:
192		return a.Path + bpath[1:], apath + bpath[1:]
193	case !aslash && !bslash:
194		return a.Path + "/" + bpath, apath + "/" + bpath
195	}
196	return a.Path + bpath, apath + bpath
197}
198
199// rebaseUrl is taken from reverseproxy.go:NewSingleHostReverseProxy
200func rebaseUrl(url *url.URL, onto *url.URL, suffix string) *url.URL {
201	newUrl := *url
202	newUrl.Scheme = onto.Scheme
203	newUrl.Host = onto.Host
204	newUrl.Path, newUrl.RawPath = joinURLPath(url, suffix)
205
206	if onto.RawQuery == "" || newUrl.RawQuery == "" {
207		newUrl.RawQuery = onto.RawQuery + newUrl.RawQuery
208	} else {
209		newUrl.RawQuery = onto.RawQuery + "&" + newUrl.RawQuery
210	}
211	return &newUrl
212}
213
214func (api *API) newRequest(r *http.Request, suffix string) (*http.Request, error) {
215	authReq := &http.Request{
216		Method: r.Method,
217		URL:    rebaseUrl(r.URL, api.URL, suffix),
218		Header: helper.HeaderClone(r.Header),
219	}
220
221	authReq = authReq.WithContext(r.Context())
222
223	removeConnectionHeaders(authReq.Header)
224
225	// Clean some headers when issuing a new request without body
226	authReq.Header.Del("Content-Type")
227	authReq.Header.Del("Content-Encoding")
228	authReq.Header.Del("Content-Length")
229	authReq.Header.Del("Content-Disposition")
230	authReq.Header.Del("Accept-Encoding")
231
232	// Hop-by-hop headers. These are removed when sent to the backend.
233	// http://www.w3.org/Protocols/rfc2616/rfc2616-sec13.html
234	authReq.Header.Del("Transfer-Encoding")
235	authReq.Header.Del("Connection")
236	authReq.Header.Del("Keep-Alive")
237	authReq.Header.Del("Proxy-Authenticate")
238	authReq.Header.Del("Proxy-Authorization")
239	authReq.Header.Del("Te")
240	// "Trailer", not "Trailers" as per rfc2616; See errata https://www.rfc-editor.org/errata_search.php?eid=4522
241	// See https://httpwg.org/http-core/draft-ietf-httpbis-semantics-latest.html#field.connection
242	authReq.Header.Del("Trailer")
243	authReq.Header.Del("Upgrade")
244
245	// Also forward the Host header, which is excluded from the Header map by the http library.
246	// This allows the Host header received by the backend to be consistent with other
247	// requests not going through gitlab-workhorse.
248	authReq.Host = r.Host
249
250	return authReq, nil
251}
252
253// PreAuthorize performs a pre-authorization check against the API for the given HTTP request
254//
255// If `outErr` is set, the other fields will be nil and it should be treated as
256// a 500 error.
257//
258// If httpResponse is present, the caller is responsible for closing its body
259//
260// authResponse will only be present if the authorization check was successful
261func (api *API) PreAuthorize(suffix string, r *http.Request) (httpResponse *http.Response, authResponse *Response, outErr error) {
262	authReq, err := api.newRequest(r, suffix)
263	if err != nil {
264		return nil, nil, fmt.Errorf("preAuthorizeHandler newUpstreamRequest: %v", err)
265	}
266
267	httpResponse, err = api.doRequestWithoutRedirects(authReq)
268	if err != nil {
269		return nil, nil, fmt.Errorf("preAuthorizeHandler: do request: %v", err)
270	}
271	defer func() {
272		if outErr != nil {
273			httpResponse.Body.Close()
274			httpResponse = nil
275		}
276	}()
277	requestsCounter.WithLabelValues(strconv.Itoa(httpResponse.StatusCode), authReq.Method).Inc()
278
279	// This may be a false positive, e.g. for .../info/refs, rather than a
280	// failure, so pass the response back
281	if httpResponse.StatusCode != http.StatusOK || !validResponseContentType(httpResponse) {
282		return httpResponse, nil, nil
283	}
284
285	authResponse = &Response{}
286	// The auth backend validated the client request and told us additional
287	// request metadata. We must extract this information from the auth
288	// response body.
289	if err := json.NewDecoder(httpResponse.Body).Decode(authResponse); err != nil {
290		return httpResponse, nil, fmt.Errorf("preAuthorizeHandler: decode authorization response: %v", err)
291	}
292
293	return httpResponse, authResponse, nil
294}
295
296func (api *API) PreAuthorizeHandler(next HandleFunc, suffix string) http.Handler {
297	return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
298		httpResponse, authResponse, err := api.PreAuthorize(suffix, r)
299		if httpResponse != nil {
300			defer httpResponse.Body.Close()
301		}
302
303		if err != nil {
304			helper.Fail500(w, r, err)
305			return
306		}
307
308		// The response couldn't be interpreted as a valid auth response, so
309		// pass it back (mostly) unmodified
310		if httpResponse != nil && authResponse == nil {
311			passResponseBack(httpResponse, w, r)
312			return
313		}
314
315		httpResponse.Body.Close() // Free up the Puma thread
316
317		copyAuthHeader(httpResponse, w)
318
319		next(w, r, authResponse)
320	})
321}
322
323func (api *API) doRequestWithoutRedirects(authReq *http.Request) (*http.Response, error) {
324	signingTripper := secret.NewRoundTripper(api.Client.Transport, api.Version)
325
326	return signingTripper.RoundTrip(authReq)
327}
328
329// removeConnectionHeaders removes hop-by-hop headers listed in the "Connection" header of h.
330// See https://tools.ietf.org/html/rfc7230#section-6.1
331func removeConnectionHeaders(h http.Header) {
332	for _, f := range h["Connection"] {
333		for _, sf := range strings.Split(f, ",") {
334			if sf = textproto.TrimString(sf); sf != "" {
335				h.Del(sf)
336			}
337		}
338	}
339}
340
341func copyAuthHeader(httpResponse *http.Response, w http.ResponseWriter) {
342	// Negotiate authentication (Kerberos) may need to return a WWW-Authenticate
343	// header to the client even in case of success as per RFC4559.
344	for k, v := range httpResponse.Header {
345		// Case-insensitive comparison as per RFC7230
346		if strings.EqualFold(k, "WWW-Authenticate") {
347			w.Header()[k] = v
348		}
349	}
350}
351
352func passResponseBack(httpResponse *http.Response, w http.ResponseWriter, r *http.Request) {
353	// NGINX response buffering is disabled on this path (with
354	// X-Accel-Buffering: no) but we still want to free up the Puma thread
355	// that generated httpResponse as fast as possible. To do this we buffer
356	// the entire response body in memory before sending it on.
357	responseBody, err := bufferResponse(httpResponse.Body)
358	if err != nil {
359		helper.Fail500(w, r, err)
360		return
361	}
362	httpResponse.Body.Close() // Free up the Puma thread
363	bytesTotal.Add(float64(responseBody.Len()))
364
365	for k, v := range httpResponse.Header {
366		// Accommodate broken clients that do case-sensitive header lookup
367		if k == "Www-Authenticate" {
368			w.Header()["WWW-Authenticate"] = v
369		} else {
370			w.Header()[k] = v
371		}
372	}
373	w.WriteHeader(httpResponse.StatusCode)
374	if _, err := io.Copy(w, responseBody); err != nil {
375		log.WithRequest(r).WithError(err).Error()
376	}
377}
378
379func bufferResponse(r io.Reader) (*bytes.Buffer, error) {
380	responseBody := &bytes.Buffer{}
381	n, err := io.Copy(responseBody, io.LimitReader(r, failureResponseLimit))
382	if err != nil {
383		return nil, err
384	}
385
386	if n == failureResponseLimit {
387		return nil, fmt.Errorf("response body exceeded maximum buffer size (%d bytes)", failureResponseLimit)
388	}
389
390	return responseBody, nil
391}
392
393func validResponseContentType(resp *http.Response) bool {
394	return helper.IsContentType(ResponseContentType, resp.Header.Get("Content-Type"))
395}
396
397func (api *API) GetGeoProxyURL() (*url.URL, error) {
398	geoProxyApiUrl := *api.URL
399	geoProxyApiUrl.Path, geoProxyApiUrl.RawPath = joinURLPath(api.URL, geoProxyEndpointPath)
400	geoProxyApiReq := &http.Request{
401		Method: "GET",
402		URL:    &geoProxyApiUrl,
403		Header: make(http.Header),
404	}
405
406	httpResponse, err := api.doRequestWithoutRedirects(geoProxyApiReq)
407	if err != nil {
408		return nil, fmt.Errorf("GetGeoProxyURL: do request: %v", err)
409	}
410	defer httpResponse.Body.Close()
411
412	if httpResponse.StatusCode != http.StatusOK {
413		return nil, fmt.Errorf("GetGeoProxyURL: Received HTTP status code: %v", httpResponse.StatusCode)
414	}
415
416	response := &GeoProxyEndpointResponse{}
417	if err := json.NewDecoder(httpResponse.Body).Decode(response); err != nil {
418		return nil, fmt.Errorf("GetGeoProxyURL: decode response: %v", err)
419	}
420
421	geoProxyURL, err := url.Parse(response.GeoProxyURL)
422	if err != nil {
423		return nil, fmt.Errorf("GetGeoProxyURL: Could not parse Geo proxy URL: %v, err: %v", response.GeoProxyURL, err)
424	}
425
426	return geoProxyURL, nil
427}
428