1package vcs
2
3import (
4	"encoding/json"
5	"encoding/xml"
6	"fmt"
7	"io"
8	"io/ioutil"
9	"net/http"
10	"net/url"
11	"regexp"
12	"strings"
13)
14
15type vcsInfo struct {
16	host     string
17	pattern  string
18	vcs      Type
19	addCheck func(m map[string]string, u *url.URL) (Type, error)
20	regex    *regexp.Regexp
21}
22
23// scpSyntaxRe matches the SCP-like addresses used by Git to access
24// repositories by SSH.
25var scpSyntaxRe = regexp.MustCompile(`^([a-zA-Z0-9_]+)@([a-zA-Z0-9._-]+):(.*)$`)
26
27var vcsList = []*vcsInfo{
28	{
29		host:    "github.com",
30		vcs:     Git,
31		pattern: `^(github\.com[/|:][A-Za-z0-9_.\-]+/[A-Za-z0-9_.\-]+)(/[A-Za-z0-9_.\-]+)*$`,
32	},
33	{
34		host:     "bitbucket.org",
35		pattern:  `^(bitbucket\.org/(?P<name>[A-Za-z0-9_.\-]+/[A-Za-z0-9_.\-]+))(/[A-Za-z0-9_.\-]+)*$`,
36		addCheck: checkBitbucket,
37	},
38	{
39		host:    "launchpad.net",
40		pattern: `^(launchpad\.net/(([A-Za-z0-9_.\-]+)(/[A-Za-z0-9_.\-]+)?|~[A-Za-z0-9_.\-]+/(\+junk|[A-Za-z0-9_.\-]+)/[A-Za-z0-9_.\-]+))(/[A-Za-z0-9_.\-]+)*$`,
41		vcs:     Bzr,
42	},
43	{
44		host:    "git.launchpad.net",
45		vcs:     Git,
46		pattern: `^(git\.launchpad\.net/(([A-Za-z0-9_.\-]+)|~[A-Za-z0-9_.\-]+/(\+git|[A-Za-z0-9_.\-]+)/[A-Za-z0-9_.\-]+))$`,
47	},
48	{
49		host:    "hub.jazz.net",
50		vcs:     Git,
51		pattern: `^(hub\.jazz\.net/git/[a-z0-9]+/[A-Za-z0-9_.\-]+)(/[A-Za-z0-9_.\-]+)*$`,
52	},
53	{
54		host:    "go.googlesource.com",
55		vcs:     Git,
56		pattern: `^(go\.googlesource\.com/[A-Za-z0-9_.\-]+/?)$`,
57	},
58	{
59		host:    "git.openstack.org",
60		vcs:     Git,
61		pattern: `^(git\.openstack\.org/[A-Za-z0-9_.\-]+/[A-Za-z0-9_.\-]+)$`,
62	},
63	// If none of the previous detect the type they will fall to this looking for the type in a generic sense
64	// by the extension to the path.
65	{
66		addCheck: checkURL,
67		pattern:  `\.(?P<type>git|hg|svn|bzr)$`,
68	},
69}
70
71func init() {
72	// Precompile the regular expressions used to check VCS locations.
73	for _, v := range vcsList {
74		v.regex = regexp.MustCompile(v.pattern)
75	}
76}
77
78// This function is really a hack around Go redirects rather than around
79// something VCS related. Should this be moved to the glide project or a
80// helper function?
81func detectVcsFromRemote(vcsURL string) (Type, string, error) {
82	t, e := detectVcsFromURL(vcsURL)
83	if e == nil {
84		return t, vcsURL, nil
85	} else if e != ErrCannotDetectVCS {
86		return NoVCS, "", e
87	}
88
89	// Pages like https://golang.org/x/net provide an html document with
90	// meta tags containing a location to work with. The go tool uses
91	// a meta tag with the name go-import which is what we use here.
92	// godoc.org also has one call go-source that we do not need to use.
93	// The value of go-import is in the form "prefix vcs repo". The prefix
94	// should match the vcsURL and the repo is a location that can be
95	// checked out. Note, to get the html document you you need to add
96	// ?go-get=1 to the url.
97	u, err := url.Parse(vcsURL)
98	if err != nil {
99		return NoVCS, "", err
100	}
101	if u.RawQuery == "" {
102		u.RawQuery = "go-get=1"
103	} else {
104		u.RawQuery = u.RawQuery + "+go-get=1"
105	}
106	checkURL := u.String()
107	resp, err := http.Get(checkURL)
108	if err != nil {
109		return NoVCS, "", ErrCannotDetectVCS
110	}
111	defer resp.Body.Close()
112	if resp.StatusCode < 200 || resp.StatusCode >= 300 {
113		if resp.StatusCode == 404 {
114			return NoVCS, "", NewRemoteError(fmt.Sprintf("%s Not Found", vcsURL), nil, "")
115		} else if resp.StatusCode == 401 || resp.StatusCode == 403 {
116			return NoVCS, "", NewRemoteError(fmt.Sprintf("%s Access Denied", vcsURL), nil, "")
117		}
118		return NoVCS, "", ErrCannotDetectVCS
119	}
120
121	t, nu, err := parseImportFromBody(u, resp.Body)
122	if err != nil {
123		// TODO(mattfarina): Log the parsing error
124		return NoVCS, "", ErrCannotDetectVCS
125	} else if t == "" || nu == "" {
126		return NoVCS, "", ErrCannotDetectVCS
127	}
128
129	return t, nu, nil
130}
131
132// From a remote vcs url attempt to detect the VCS.
133func detectVcsFromURL(vcsURL string) (Type, error) {
134
135	var u *url.URL
136	var err error
137
138	if m := scpSyntaxRe.FindStringSubmatch(vcsURL); m != nil {
139		// Match SCP-like syntax and convert it to a URL.
140		// Eg, "git@github.com:user/repo" becomes
141		// "ssh://git@github.com/user/repo".
142		u = &url.URL{
143			Scheme: "ssh",
144			User:   url.User(m[1]),
145			Host:   m[2],
146			Path:   "/" + m[3],
147		}
148	} else {
149		u, err = url.Parse(vcsURL)
150		if err != nil {
151			return "", err
152		}
153	}
154
155	// Detect file schemes
156	if u.Scheme == "file" {
157		return DetectVcsFromFS(u.Path)
158	}
159
160	if u.Host == "" {
161		return "", ErrCannotDetectVCS
162	}
163
164	// Try to detect from the scheme
165	switch u.Scheme {
166	case "git+ssh":
167		return Git, nil
168	case "git":
169		return Git, nil
170	case "bzr+ssh":
171		return Bzr, nil
172	case "svn+ssh":
173		return Svn, nil
174	}
175
176	// Try to detect from known hosts, such as Github
177	for _, v := range vcsList {
178		if v.host != "" && v.host != u.Host {
179			continue
180		}
181
182		// Make sure the pattern matches for an actual repo location. For example,
183		// we should fail if the VCS listed is github.com/masterminds as that's
184		// not actually a repo.
185		uCheck := u.Host + u.Path
186		m := v.regex.FindStringSubmatch(uCheck)
187		if m == nil {
188			if v.host != "" {
189				return "", ErrCannotDetectVCS
190			}
191
192			continue
193		}
194
195		// If we are here the host matches. If the host has a singular
196		// VCS type, such as Github, we can return the type right away.
197		if v.vcs != "" {
198			return v.vcs, nil
199		}
200
201		// Run additional checks to determine try and determine the repo
202		// for the matched service.
203		info := make(map[string]string)
204		for i, name := range v.regex.SubexpNames() {
205			if name != "" {
206				info[name] = m[i]
207			}
208		}
209		t, err := v.addCheck(info, u)
210		if err != nil {
211			switch err.(type) {
212			case *RemoteError:
213				return "", err
214			}
215			return "", ErrCannotDetectVCS
216		}
217
218		return t, nil
219	}
220
221	// Attempt to ascertain from the username passed in.
222	if u.User != nil {
223		un := u.User.Username()
224		if un == "git" {
225			return Git, nil
226		} else if un == "hg" {
227			return Hg, nil
228		}
229	}
230
231	// Unable to determine the vcs from the url.
232	return "", ErrCannotDetectVCS
233}
234
235// Figure out the type for Bitbucket by the passed in information
236// or via the public API.
237func checkBitbucket(i map[string]string, ul *url.URL) (Type, error) {
238
239	// Fast path for ssh urls where we may not even be able to
240	// anonymously get details from the API.
241	if ul.User != nil {
242		un := ul.User.Username()
243		if un == "git" {
244			return Git, nil
245		} else if un == "hg" {
246			return Hg, nil
247		}
248	}
249
250	// The part of the response we care about.
251	var response struct {
252		SCM Type `json:"scm"`
253	}
254
255	u := expand(i, "https://api.bitbucket.org/1.0/repositories/{name}")
256	data, err := get(u)
257	if err != nil {
258		return "", err
259	}
260
261	if err := json.Unmarshal(data, &response); err != nil {
262		return "", fmt.Errorf("Decoding error %s: %v", u, err)
263	}
264
265	return response.SCM, nil
266
267}
268
269// Expect a type key on i with the exact type detected from the regex.
270func checkURL(i map[string]string, u *url.URL) (Type, error) {
271	return Type(i["type"]), nil
272}
273
274func get(url string) ([]byte, error) {
275	resp, err := http.Get(url)
276	if err != nil {
277		return nil, err
278	}
279	defer resp.Body.Close()
280	if resp.StatusCode != 200 {
281		if resp.StatusCode == 404 {
282			return nil, NewRemoteError("Not Found", err, resp.Status)
283		} else if resp.StatusCode == 401 || resp.StatusCode == 403 {
284			return nil, NewRemoteError("Access Denied", err, resp.Status)
285		}
286		return nil, fmt.Errorf("%s: %s", url, resp.Status)
287	}
288	b, err := ioutil.ReadAll(resp.Body)
289	if err != nil {
290		return nil, fmt.Errorf("%s: %v", url, err)
291	}
292	return b, nil
293}
294
295func expand(match map[string]string, s string) string {
296	for k, v := range match {
297		s = strings.Replace(s, "{"+k+"}", v, -1)
298	}
299	return s
300}
301
302func parseImportFromBody(ur *url.URL, r io.ReadCloser) (tp Type, u string, err error) {
303	d := xml.NewDecoder(r)
304	d.CharsetReader = charsetReader
305	d.Strict = false
306	var t xml.Token
307	for {
308		t, err = d.Token()
309		if err != nil {
310			if err == io.EOF {
311				// When the end is reached it could not detect a VCS if it
312				// got here.
313				err = ErrCannotDetectVCS
314			}
315			return
316		}
317		if e, ok := t.(xml.StartElement); ok && strings.EqualFold(e.Name.Local, "body") {
318			return
319		}
320		if e, ok := t.(xml.EndElement); ok && strings.EqualFold(e.Name.Local, "head") {
321			return
322		}
323		e, ok := t.(xml.StartElement)
324		if !ok || !strings.EqualFold(e.Name.Local, "meta") {
325			continue
326		}
327		if attrValue(e.Attr, "name") != "go-import" {
328			continue
329		}
330		if f := strings.Fields(attrValue(e.Attr, "content")); len(f) == 3 {
331			// If the prefix supplied by the remote system isn't a prefix to the
332			// url we're fetching continue to look for other imports.
333			// This will work for exact matches and prefixes. For example,
334			// golang.org/x/net as a prefix will match for golang.org/x/net and
335			// golang.org/x/net/context.
336			vcsURL := ur.Host + ur.Path
337			if !strings.HasPrefix(vcsURL, f[0]) {
338				continue
339			} else {
340				switch Type(f[1]) {
341				case Git:
342					tp = Git
343				case Svn:
344					tp = Svn
345				case Bzr:
346					tp = Bzr
347				case Hg:
348					tp = Hg
349				}
350
351				u = f[2]
352				return
353			}
354		}
355	}
356}
357
358func charsetReader(charset string, input io.Reader) (io.Reader, error) {
359	switch strings.ToLower(charset) {
360	case "ascii":
361		return input, nil
362	default:
363		return nil, fmt.Errorf("can't decode XML document using charset %q", charset)
364	}
365}
366
367func attrValue(attrs []xml.Attr, name string) string {
368	for _, a := range attrs {
369		if strings.EqualFold(a.Name.Local, name) {
370			return a.Value
371		}
372	}
373	return ""
374}
375