1package vcs 2 3import ( 4 "encoding/json" 5 "encoding/xml" 6 "fmt" 7 "io" 8 "io/ioutil" 9 "net/http" 10 "net/url" 11 "regexp" 12 "strings" 13) 14 15type vcsInfo struct { 16 host string 17 pattern string 18 vcs Type 19 addCheck func(m map[string]string, u *url.URL) (Type, error) 20 regex *regexp.Regexp 21} 22 23// scpSyntaxRe matches the SCP-like addresses used by Git to access 24// repositories by SSH. 25var scpSyntaxRe = regexp.MustCompile(`^([a-zA-Z0-9_]+)@([a-zA-Z0-9._-]+):(.*)$`) 26 27var vcsList = []*vcsInfo{ 28 { 29 host: "github.com", 30 vcs: Git, 31 pattern: `^(github\.com[/|:][A-Za-z0-9_.\-]+/[A-Za-z0-9_.\-]+)(/[A-Za-z0-9_.\-]+)*$`, 32 }, 33 { 34 host: "bitbucket.org", 35 pattern: `^(bitbucket\.org/(?P<name>[A-Za-z0-9_.\-]+/[A-Za-z0-9_.\-]+))(/[A-Za-z0-9_.\-]+)*$`, 36 addCheck: checkBitbucket, 37 }, 38 { 39 host: "launchpad.net", 40 pattern: `^(launchpad\.net/(([A-Za-z0-9_.\-]+)(/[A-Za-z0-9_.\-]+)?|~[A-Za-z0-9_.\-]+/(\+junk|[A-Za-z0-9_.\-]+)/[A-Za-z0-9_.\-]+))(/[A-Za-z0-9_.\-]+)*$`, 41 vcs: Bzr, 42 }, 43 { 44 host: "git.launchpad.net", 45 vcs: Git, 46 pattern: `^(git\.launchpad\.net/(([A-Za-z0-9_.\-]+)|~[A-Za-z0-9_.\-]+/(\+git|[A-Za-z0-9_.\-]+)/[A-Za-z0-9_.\-]+))$`, 47 }, 48 { 49 host: "hub.jazz.net", 50 vcs: Git, 51 pattern: `^(hub\.jazz\.net/git/[a-z0-9]+/[A-Za-z0-9_.\-]+)(/[A-Za-z0-9_.\-]+)*$`, 52 }, 53 { 54 host: "go.googlesource.com", 55 vcs: Git, 56 pattern: `^(go\.googlesource\.com/[A-Za-z0-9_.\-]+/?)$`, 57 }, 58 { 59 host: "git.openstack.org", 60 vcs: Git, 61 pattern: `^(git\.openstack\.org/[A-Za-z0-9_.\-]+/[A-Za-z0-9_.\-]+)$`, 62 }, 63 // If none of the previous detect the type they will fall to this looking for the type in a generic sense 64 // by the extension to the path. 65 { 66 addCheck: checkURL, 67 pattern: `\.(?P<type>git|hg|svn|bzr)$`, 68 }, 69} 70 71func init() { 72 // Precompile the regular expressions used to check VCS locations. 73 for _, v := range vcsList { 74 v.regex = regexp.MustCompile(v.pattern) 75 } 76} 77 78// This function is really a hack around Go redirects rather than around 79// something VCS related. Should this be moved to the glide project or a 80// helper function? 81func detectVcsFromRemote(vcsURL string) (Type, string, error) { 82 t, e := detectVcsFromURL(vcsURL) 83 if e == nil { 84 return t, vcsURL, nil 85 } else if e != ErrCannotDetectVCS { 86 return NoVCS, "", e 87 } 88 89 // Pages like https://golang.org/x/net provide an html document with 90 // meta tags containing a location to work with. The go tool uses 91 // a meta tag with the name go-import which is what we use here. 92 // godoc.org also has one call go-source that we do not need to use. 93 // The value of go-import is in the form "prefix vcs repo". The prefix 94 // should match the vcsURL and the repo is a location that can be 95 // checked out. Note, to get the html document you you need to add 96 // ?go-get=1 to the url. 97 u, err := url.Parse(vcsURL) 98 if err != nil { 99 return NoVCS, "", err 100 } 101 if u.RawQuery == "" { 102 u.RawQuery = "go-get=1" 103 } else { 104 u.RawQuery = u.RawQuery + "+go-get=1" 105 } 106 checkURL := u.String() 107 resp, err := http.Get(checkURL) 108 if err != nil { 109 return NoVCS, "", ErrCannotDetectVCS 110 } 111 defer resp.Body.Close() 112 if resp.StatusCode < 200 || resp.StatusCode >= 300 { 113 if resp.StatusCode == 404 { 114 return NoVCS, "", NewRemoteError(fmt.Sprintf("%s Not Found", vcsURL), nil, "") 115 } else if resp.StatusCode == 401 || resp.StatusCode == 403 { 116 return NoVCS, "", NewRemoteError(fmt.Sprintf("%s Access Denied", vcsURL), nil, "") 117 } 118 return NoVCS, "", ErrCannotDetectVCS 119 } 120 121 t, nu, err := parseImportFromBody(u, resp.Body) 122 if err != nil { 123 // TODO(mattfarina): Log the parsing error 124 return NoVCS, "", ErrCannotDetectVCS 125 } else if t == "" || nu == "" { 126 return NoVCS, "", ErrCannotDetectVCS 127 } 128 129 return t, nu, nil 130} 131 132// From a remote vcs url attempt to detect the VCS. 133func detectVcsFromURL(vcsURL string) (Type, error) { 134 135 var u *url.URL 136 var err error 137 138 if m := scpSyntaxRe.FindStringSubmatch(vcsURL); m != nil { 139 // Match SCP-like syntax and convert it to a URL. 140 // Eg, "git@github.com:user/repo" becomes 141 // "ssh://git@github.com/user/repo". 142 u = &url.URL{ 143 Scheme: "ssh", 144 User: url.User(m[1]), 145 Host: m[2], 146 Path: "/" + m[3], 147 } 148 } else { 149 u, err = url.Parse(vcsURL) 150 if err != nil { 151 return "", err 152 } 153 } 154 155 // Detect file schemes 156 if u.Scheme == "file" { 157 return DetectVcsFromFS(u.Path) 158 } 159 160 if u.Host == "" { 161 return "", ErrCannotDetectVCS 162 } 163 164 // Try to detect from the scheme 165 switch u.Scheme { 166 case "git+ssh": 167 return Git, nil 168 case "git": 169 return Git, nil 170 case "bzr+ssh": 171 return Bzr, nil 172 case "svn+ssh": 173 return Svn, nil 174 } 175 176 // Try to detect from known hosts, such as Github 177 for _, v := range vcsList { 178 if v.host != "" && v.host != u.Host { 179 continue 180 } 181 182 // Make sure the pattern matches for an actual repo location. For example, 183 // we should fail if the VCS listed is github.com/masterminds as that's 184 // not actually a repo. 185 uCheck := u.Host + u.Path 186 m := v.regex.FindStringSubmatch(uCheck) 187 if m == nil { 188 if v.host != "" { 189 return "", ErrCannotDetectVCS 190 } 191 192 continue 193 } 194 195 // If we are here the host matches. If the host has a singular 196 // VCS type, such as Github, we can return the type right away. 197 if v.vcs != "" { 198 return v.vcs, nil 199 } 200 201 // Run additional checks to determine try and determine the repo 202 // for the matched service. 203 info := make(map[string]string) 204 for i, name := range v.regex.SubexpNames() { 205 if name != "" { 206 info[name] = m[i] 207 } 208 } 209 t, err := v.addCheck(info, u) 210 if err != nil { 211 switch err.(type) { 212 case *RemoteError: 213 return "", err 214 } 215 return "", ErrCannotDetectVCS 216 } 217 218 return t, nil 219 } 220 221 // Attempt to ascertain from the username passed in. 222 if u.User != nil { 223 un := u.User.Username() 224 if un == "git" { 225 return Git, nil 226 } else if un == "hg" { 227 return Hg, nil 228 } 229 } 230 231 // Unable to determine the vcs from the url. 232 return "", ErrCannotDetectVCS 233} 234 235// Figure out the type for Bitbucket by the passed in information 236// or via the public API. 237func checkBitbucket(i map[string]string, ul *url.URL) (Type, error) { 238 239 // Fast path for ssh urls where we may not even be able to 240 // anonymously get details from the API. 241 if ul.User != nil { 242 un := ul.User.Username() 243 if un == "git" { 244 return Git, nil 245 } else if un == "hg" { 246 return Hg, nil 247 } 248 } 249 250 // The part of the response we care about. 251 var response struct { 252 SCM Type `json:"scm"` 253 } 254 255 u := expand(i, "https://api.bitbucket.org/1.0/repositories/{name}") 256 data, err := get(u) 257 if err != nil { 258 return "", err 259 } 260 261 if err := json.Unmarshal(data, &response); err != nil { 262 return "", fmt.Errorf("Decoding error %s: %v", u, err) 263 } 264 265 return response.SCM, nil 266 267} 268 269// Expect a type key on i with the exact type detected from the regex. 270func checkURL(i map[string]string, u *url.URL) (Type, error) { 271 return Type(i["type"]), nil 272} 273 274func get(url string) ([]byte, error) { 275 resp, err := http.Get(url) 276 if err != nil { 277 return nil, err 278 } 279 defer resp.Body.Close() 280 if resp.StatusCode != 200 { 281 if resp.StatusCode == 404 { 282 return nil, NewRemoteError("Not Found", err, resp.Status) 283 } else if resp.StatusCode == 401 || resp.StatusCode == 403 { 284 return nil, NewRemoteError("Access Denied", err, resp.Status) 285 } 286 return nil, fmt.Errorf("%s: %s", url, resp.Status) 287 } 288 b, err := ioutil.ReadAll(resp.Body) 289 if err != nil { 290 return nil, fmt.Errorf("%s: %v", url, err) 291 } 292 return b, nil 293} 294 295func expand(match map[string]string, s string) string { 296 for k, v := range match { 297 s = strings.Replace(s, "{"+k+"}", v, -1) 298 } 299 return s 300} 301 302func parseImportFromBody(ur *url.URL, r io.ReadCloser) (tp Type, u string, err error) { 303 d := xml.NewDecoder(r) 304 d.CharsetReader = charsetReader 305 d.Strict = false 306 var t xml.Token 307 for { 308 t, err = d.Token() 309 if err != nil { 310 if err == io.EOF { 311 // When the end is reached it could not detect a VCS if it 312 // got here. 313 err = ErrCannotDetectVCS 314 } 315 return 316 } 317 if e, ok := t.(xml.StartElement); ok && strings.EqualFold(e.Name.Local, "body") { 318 return 319 } 320 if e, ok := t.(xml.EndElement); ok && strings.EqualFold(e.Name.Local, "head") { 321 return 322 } 323 e, ok := t.(xml.StartElement) 324 if !ok || !strings.EqualFold(e.Name.Local, "meta") { 325 continue 326 } 327 if attrValue(e.Attr, "name") != "go-import" { 328 continue 329 } 330 if f := strings.Fields(attrValue(e.Attr, "content")); len(f) == 3 { 331 // If the prefix supplied by the remote system isn't a prefix to the 332 // url we're fetching continue to look for other imports. 333 // This will work for exact matches and prefixes. For example, 334 // golang.org/x/net as a prefix will match for golang.org/x/net and 335 // golang.org/x/net/context. 336 vcsURL := ur.Host + ur.Path 337 if !strings.HasPrefix(vcsURL, f[0]) { 338 continue 339 } else { 340 switch Type(f[1]) { 341 case Git: 342 tp = Git 343 case Svn: 344 tp = Svn 345 case Bzr: 346 tp = Bzr 347 case Hg: 348 tp = Hg 349 } 350 351 u = f[2] 352 return 353 } 354 } 355 } 356} 357 358func charsetReader(charset string, input io.Reader) (io.Reader, error) { 359 switch strings.ToLower(charset) { 360 case "ascii": 361 return input, nil 362 default: 363 return nil, fmt.Errorf("can't decode XML document using charset %q", charset) 364 } 365} 366 367func attrValue(attrs []xml.Attr, name string) string { 368 for _, a := range attrs { 369 if strings.EqualFold(a.Name.Local, name) { 370 return a.Value 371 } 372 } 373 return "" 374} 375