1# Copyright 2017 Google Inc. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#    http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14"""This package facilitates HTTP/REST requests to the registry."""
15
16from __future__ import absolute_import
17from __future__ import division
18
19from __future__ import print_function
20
21import json
22import re
23import threading
24
25from containerregistry.client import docker_creds
26from containerregistry.client import docker_name
27from containerregistry.client.v2_2 import docker_creds as v2_2_creds
28import httplib2
29import six.moves.http_client
30import six.moves.urllib.parse
31
32# Options for docker_http.Transport actions
33PULL = 'pull'
34PUSH = 'push,pull'
35# For now DELETE is PUSH, which is the read/write ACL.
36DELETE = PUSH
37CATALOG = 'catalog'
38ACTIONS = [PULL, PUSH, DELETE, CATALOG]
39
40MANIFEST_SCHEMA1_MIME = 'application/vnd.docker.distribution.manifest.v1+json'
41MANIFEST_SCHEMA1_SIGNED_MIME = 'application/vnd.docker.distribution.manifest.v1+prettyjws'  # pylint disable=line-too-long
42MANIFEST_SCHEMA2_MIME = 'application/vnd.docker.distribution.manifest.v2+json'
43MANIFEST_LIST_MIME = 'application/vnd.docker.distribution.manifest.list.v2+json'
44LAYER_MIME = 'application/vnd.docker.image.rootfs.diff.tar.gzip'
45FOREIGN_LAYER_MIME = 'application/vnd.docker.image.rootfs.foreign.diff.tar.gzip'
46CONFIG_JSON_MIME = 'application/vnd.docker.container.image.v1+json'
47
48OCI_MANIFEST_MIME = 'application/vnd.oci.image.manifest.v1+json'
49OCI_IMAGE_INDEX_MIME = 'application/vnd.oci.image.index.v1+json'
50OCI_LAYER_MIME = 'application/vnd.oci.image.layer.v1.tar'
51OCI_GZIP_LAYER_MIME = 'application/vnd.oci.image.layer.v1.tar+gzip'
52OCI_NONDISTRIBUTABLE_LAYER_MIME = 'application/vnd.oci.image.layer.nondistributable.v1.tar'  # pylint disable=line-too-long
53OCI_NONDISTRIBUTABLE_GZIP_LAYER_MIME = 'application/vnd.oci.image.layer.nondistributable.v1.tar+gzip'  # pylint disable=line-too-long
54OCI_CONFIG_JSON_MIME = 'application/vnd.oci.image.config.v1+json'
55
56MANIFEST_SCHEMA1_MIMES = [MANIFEST_SCHEMA1_MIME, MANIFEST_SCHEMA1_SIGNED_MIME]
57MANIFEST_SCHEMA2_MIMES = [MANIFEST_SCHEMA2_MIME]
58OCI_MANIFEST_MIMES = [OCI_MANIFEST_MIME]
59
60# OCI and Schema2 are compatible formats.
61SUPPORTED_MANIFEST_MIMES = [OCI_MANIFEST_MIME, MANIFEST_SCHEMA2_MIME]
62
63# OCI Image Index and Manifest List are compatible formats.
64MANIFEST_LIST_MIMES = [OCI_IMAGE_INDEX_MIME, MANIFEST_LIST_MIME]
65
66# Docker & OCI layer mime types indicating foreign/non-distributable layers.
67NON_DISTRIBUTABLE_LAYER_MIMES = [
68    FOREIGN_LAYER_MIME, OCI_NONDISTRIBUTABLE_LAYER_MIME,
69    OCI_NONDISTRIBUTABLE_GZIP_LAYER_MIME
70]
71
72
73class Diagnostic(object):
74  """Diagnostic encapsulates a Registry v2 diagnostic message.
75
76  This captures one of the "errors" from a v2 Registry error response
77  message, as outlined here:
78    https://github.com/docker/distribution/blob/master/docs/spec/api.md#errors
79
80  Args:
81    error: the decoded JSON of the "errors" array element.
82  """
83
84  def __init__(self, error):
85    self._error = error
86
87  def __eq__(self, other):
88    return (self.code == other.code and
89            self.message == other.message and
90            self.detail == other.detail)
91
92  @property
93  def code(self):
94    return self._error.get('code')
95
96  @property
97  def message(self):
98    return self._error.get('message')
99
100  @property
101  def detail(self):
102    return self._error.get('detail')
103
104
105def _DiagnosticsFromContent(content):
106  """Extract and return the diagnostics from content."""
107  try:
108    content = content.decode('utf8')
109  except:  # pylint: disable=bare-except
110    # Assume it's already decoded. Defensive coding for old py2 habits that
111    # are hard to break. Passing does not make the problem worse.
112    pass
113  try:
114    o = json.loads(content)
115    return [Diagnostic(d) for d in o.get('errors', [])]
116  except:  # pylint: disable=bare-except
117    return [Diagnostic({
118        'code': 'UNKNOWN',
119        'message': content,
120    })]
121
122
123class V2DiagnosticException(Exception):
124  """Exceptions when an unexpected HTTP status is returned."""
125
126  def __init__(self, resp, content):
127    self._resp = resp
128    self._diagnostics = _DiagnosticsFromContent(content)
129    message = '\n'.join(
130        ['response: %s' % resp] +
131        ['%s: %s' % (d.message, d.detail) for d in self._diagnostics])
132    super(V2DiagnosticException, self).__init__(message)
133
134  @property
135  def diagnostics(self):
136    return self._diagnostics
137
138  @property
139  def response(self):
140    return self._resp
141
142  @property
143  def status(self):
144    return self._resp.status
145
146
147class BadStateException(Exception):
148  """Exceptions when we have entered an unexpected state."""
149
150
151class TokenRefreshException(BadStateException):
152  """Exception when token refresh fails."""
153
154
155def _CheckState(predicate, message = None):
156  if not predicate:
157    raise BadStateException(message if message else 'Unknown')
158
159
160_ANONYMOUS = ''
161_BASIC = 'Basic'
162_BEARER = 'Bearer'
163
164_REALM_PFX = 'realm='
165_SERVICE_PFX = 'service='
166
167
168class Transport(object):
169  """HTTP Transport abstraction to handle automatic v2 reauthentication.
170
171  In the v2 Registry protocol, all of the API endpoints expect to receive
172  'Bearer' authentication.  These Bearer tokens are generated by exchanging
173  'Basic' or 'Anonymous' authentication with an authentication endpoint
174  designated by the opening ping request.
175
176  The Bearer tokens are scoped to a resource (typically repository), and
177  are generated with a set of capabilities embedded (e.g. push, pull).
178
179  The Docker client has a baked in 60-second expiration for Bearer tokens,
180  and upon expiration, registries can reject any request with a 401.  The
181  transport should automatically refresh the Bearer token and reissue the
182  request.
183
184  Args:
185     name: the structured name of the docker resource being referenced.
186     creds: the basic authentication credentials to use for authentication
187            challenge exchanges.
188     transport: the HTTP transport to use under the hood.
189     action: One of docker_http.ACTIONS, for which we plan to use this transport
190  """
191
192  def __init__(self, name,
193               creds,
194               transport, action):
195    self._name = name
196    self._basic_creds = creds
197    self._transport = transport
198    self._action = action
199    self._lock = threading.Lock()
200
201    _CheckState(action in ACTIONS,
202                'Invalid action supplied to docker_http.Transport: %s' % action)
203
204    # Ping once to establish realm, and then get a good credential
205    # for use with this transport.
206    self._Ping()
207    if self._authentication == _BEARER:
208      self._Refresh()
209    elif self._authentication == _BASIC:
210      self._creds = self._basic_creds
211    else:
212      self._creds = docker_creds.Anonymous()
213
214  def _Ping(self):
215    """Ping the v2 Registry.
216
217    Only called during transport construction, this pings the listed
218    v2 registry.  The point of this ping is to establish the "realm"
219    and "service" to use for Basic for Bearer-Token exchanges.
220    """
221    # This initiates the pull by issuing a v2 ping:
222    #   GET H:P/v2/
223    headers = {
224        'content-type': 'application/json',
225        'user-agent': docker_name.USER_AGENT,
226    }
227    resp, content = self._transport.request(
228        '{scheme}://{registry}/v2/'.format(
229            scheme=Scheme(self._name.registry), registry=self._name.registry),
230        'GET',
231        body=None,
232        headers=headers)
233
234    # We expect a www-authenticate challenge.
235    _CheckState(
236        resp.status in [
237            six.moves.http_client.OK, six.moves.http_client.UNAUTHORIZED
238        ], 'Unexpected response pinging the registry: {}\nBody: {}'.format(
239            resp.status, content or '<empty>'))
240
241    # The registry is authenticated iff we have an authentication challenge.
242    if resp.status == six.moves.http_client.OK:
243      self._authentication = _ANONYMOUS
244      self._service = 'none'
245      self._realm = 'none'
246      return
247
248    challenge = resp['www-authenticate']
249    _CheckState(' ' in challenge,
250                'Unexpected "www-authenticate" header form: %s' % challenge)
251
252    (self._authentication, remainder) = challenge.split(' ', 1)
253
254    # Normalize the authentication scheme to have exactly the first letter
255    # capitalized. Scheme matching is required to be case insensitive:
256    # https://tools.ietf.org/html/rfc7235#section-2.1
257    self._authentication = self._authentication.capitalize()
258
259    _CheckState(self._authentication in [_BASIC, _BEARER],
260                'Unexpected "www-authenticate" challenge type: %s' %
261                self._authentication)
262
263    # Default "_service" to the registry
264    self._service = self._name.registry
265
266    tokens = remainder.split(',')
267    for t in tokens:
268      if t.startswith(_REALM_PFX):
269        self._realm = t[len(_REALM_PFX):].strip('"')
270      elif t.startswith(_SERVICE_PFX):
271        self._service = t[len(_SERVICE_PFX):].strip('"')
272
273    # Make sure these got set.
274    _CheckState(self._realm, 'Expected a "%s" in "www-authenticate" '
275                'header: %s' % (_REALM_PFX, challenge))
276
277  def _Scope(self):
278    """Construct the resource scope to pass to a v2 auth endpoint."""
279    return self._name.scope(self._action)
280
281  def _Refresh(self):
282    """Refreshes the Bearer token credentials underlying this transport.
283
284    This utilizes the "realm" and "service" established during _Ping to
285    set up _creds with up-to-date credentials, by passing the
286    client-provided _basic_creds to the authorization realm.
287
288    This is generally called under two circumstances:
289      1) When the transport is created (eagerly)
290      2) When a request fails on a 401 Unauthorized
291
292    Raises:
293      TokenRefreshException: Error during token exchange.
294    """
295    headers = {
296        'content-type': 'application/json',
297        'user-agent': docker_name.USER_AGENT,
298        'Authorization': self._basic_creds.Get()
299    }
300    parameters = {
301        'scope': self._Scope(),
302        'service': self._service,
303    }
304    resp, content = self._transport.request(
305        # 'realm' includes scheme and path
306        '{realm}?{query}'.format(
307            realm=self._realm,
308            query=six.moves.urllib.parse.urlencode(parameters)),
309        'GET',
310        body=None,
311        headers=headers)
312
313    if resp.status != six.moves.http_client.OK:
314      raise TokenRefreshException('Bad status during token exchange: %d\n%s' %
315                                  (resp.status, content))
316
317    try:
318      content = content.decode('utf8')
319    except:  # pylint: disable=bare-except
320      # Assume it's already decoded. Defensive coding for old py2 habits that
321      # are hard to break. Passing does not make the problem worse.
322      pass
323    wrapper_object = json.loads(content)
324    token = wrapper_object.get('token') or wrapper_object.get('access_token')
325    _CheckState(token is not None, 'Malformed JSON response: %s' % content)
326
327    with self._lock:
328      # We have successfully reauthenticated.
329      self._creds = v2_2_creds.Bearer(token)
330
331  # pylint: disable=invalid-name
332  def Request(self,
333              url,
334              accepted_codes = None,
335              method = None,
336              body = None,
337              content_type = None,
338              accepted_mimes = None
339             ):
340    """Wrapper containing much of the boilerplate REST logic for Registry calls.
341
342    Args:
343      url: the URL to which to talk
344      accepted_codes: the list of acceptable http status codes
345      method: the HTTP method to use (defaults to GET/PUT depending on
346              whether body is provided)
347      body: the body to pass into the PUT request (or None for GET)
348      content_type: the mime-type of the request (or None for JSON).
349              content_type is ignored when body is None.
350      accepted_mimes: the list of acceptable mime-types
351
352    Raises:
353      BadStateException: an unexpected internal state has been encountered.
354      V2DiagnosticException: an error has occurred interacting with v2.
355
356    Returns:
357      The response of the HTTP request, and its contents.
358    """
359    if not method:
360      method = 'GET' if not body else 'PUT'
361
362    # If the first request fails on a 401 Unauthorized, then refresh the
363    # Bearer token and retry, if the authentication mode is bearer.
364    for retry_unauthorized in [self._authentication == _BEARER, False]:
365      # self._creds may be changed by self._Refresh(), so do
366      # not hoist this.
367      headers = {
368          'user-agent': docker_name.USER_AGENT,
369      }
370      auth = self._creds.Get()
371      if auth:
372        headers['Authorization'] = auth
373
374      if body:  # Requests w/ bodies should have content-type.
375        headers['content-type'] = (
376            content_type if content_type else 'application/json')
377
378      if accepted_mimes is not None:
379        headers['Accept'] = ','.join(accepted_mimes)
380
381      # POST/PUT require a content-length, when no body is supplied.
382      if method in ('POST', 'PUT') and not body:
383        headers['content-length'] = '0'
384
385      resp, content = self._transport.request(
386          url, method, body=body, headers=headers)
387
388      if (retry_unauthorized and
389          resp.status == six.moves.http_client.UNAUTHORIZED):
390        # On Unauthorized, refresh the credential and retry.
391        self._Refresh()
392        continue
393      break
394
395    if resp.status not in accepted_codes:
396      # Use the content returned by GCR as the error message.
397      raise V2DiagnosticException(resp, content)
398
399    return resp, content
400
401  def PaginatedRequest(self,
402                       url,
403                       accepted_codes = None,
404                       method = None,
405                       body = None,
406                       content_type = None
407                      ):
408    """Wrapper around Request that follows Link headers if they exist.
409
410    Args:
411      url: the URL to which to talk
412      accepted_codes: the list of acceptable http status codes
413      method: the HTTP method to use (defaults to GET/PUT depending on
414              whether body is provided)
415      body: the body to pass into the PUT request (or None for GET)
416      content_type: the mime-type of the request (or None for JSON)
417
418    Yields:
419      The return value of calling Request for each page of results.
420    """
421    next_page = url
422
423    while next_page:
424      resp, content = self.Request(next_page, accepted_codes, method, body,
425                                   content_type)
426      yield resp, content
427
428      next_page = ParseNextLinkHeader(resp)
429
430
431def ParseNextLinkHeader(resp):
432  """Returns "next" link from RFC 5988 Link header or None if not present."""
433  link = resp.get('link')
434  if not link:
435    return None
436
437  m = re.match(r'.*<(.+)>;\s*rel="next".*', link)
438  if not m:
439    return None
440
441  return m.group(1)
442
443
444def Scheme(endpoint):
445  """Returns https scheme for all the endpoints except localhost."""
446  if endpoint.startswith('localhost:'):
447    return 'http'
448  elif re.match(r'.*\.local(?:host)?(?::\d{1,5})?$', endpoint):
449    return 'http'
450  else:
451    return 'https'
452