1""" 2RTFM: 3 4* http://docs.python.org/2/library/cookielib.html#cookie-objects 5 6Some code got from 7 https://github.com/kennethreitz/requests/blob/master/requests/cookies.py 8""" 9import json 10 11from six.moves.http_cookiejar import CookieJar, Cookie 12from six.moves.urllib.parse import urlparse, urlunparse 13 14from grab.error import GrabMisuseError 15 16COOKIE_ATTRS = ('name', 'value', 'version', 'port', 'domain', 17 'path', 'secure', 'expires', 'discard', 'comment', 18 'comment_url', 'rfc2109') 19 20 21# Source: 22# https://github.com/kennethreitz/requests/blob/master/requests/cookies.py 23class MockRequest(object): 24 """Wraps a `requests.Request` to mimic a `urllib2.Request`. 25 The code in `cookielib.CookieJar` expects this interface in order to 26 correctly manage cookie policies, i.e., determine whether a cookie can be 27 set, given the domains of the request and the cookie. 28 The original request object is read-only. The client is responsible for 29 collecting the new headers via `get_new_headers()` and interpreting them 30 appropriately. You probably want `get_cookie_header`, defined below. 31 """ 32 33 def __init__(self, request): 34 self._req = request 35 self._new_headers = {} 36 self.type = urlparse(self._req.url).scheme 37 38 def get_type(self): 39 return self.type 40 41 def get_host(self): 42 return urlparse(self._req.url).netloc 43 44 def get_origin_req_host(self): 45 return self.get_host() 46 47 def get_full_url(self): 48 # Only return the response's URL if the user hadn't set the Host 49 # header 50 if not self._req.headers.get('Host'): 51 return self._req.url 52 # If they did set it, retrieve it and reconstruct the expected domain 53 host = self._req.headers['Host'] 54 parsed = urlparse(self._req.url) 55 # Reconstruct the URL as we expect it 56 return urlunparse([ 57 parsed.scheme, host, parsed.path, parsed.params, parsed.query, 58 parsed.fragment 59 ]) 60 61 def is_unverifiable(self): 62 return True 63 64 def has_header(self, name): 65 return name in self._req.headers or name in self._new_headers 66 67 def get_header(self, name, default=None): 68 return self._req.headers.get(name, 69 self._new_headers.get(name, default)) 70 71 def add_header(self, key, val): 72 """ 73 cookielib has no legitimate use for this method; 74 add it back if you find one. 75 """ 76 raise NotImplementedError('Cookie headers should be added' 77 ' with add_unredirected_header()') 78 79 def add_unredirected_header(self, name, value): 80 self._new_headers[name] = value 81 82 def get_new_headers(self): 83 return self._new_headers 84 85 @property 86 def unverifiable(self): 87 return self.is_unverifiable() 88 89 @property 90 def origin_req_host(self): 91 return self.get_origin_req_host() 92 93 @property 94 def host(self): 95 return self.get_host() 96 97 98# https://github.com/kennethreitz/requests/blob/master/requests/cookies.py 99class MockResponse(object): 100 """Wraps a `httplib.HTTPMessage` to mimic a `urllib.addinfourl`. 101 ...what? Basically, expose the parsed HTTP headers from the server response 102 the way `cookielib` expects to see them. 103 """ 104 105 def __init__(self, headers): 106 """Make a MockResponse for `cookielib` to read. 107 :param headers: a httplib.HTTPMessage or analogous carrying the headers 108 """ 109 self._headers = headers 110 111 def info(self): 112 return self._headers 113 114 def getheaders(self, name): 115 self._headers.getheaders(name) 116 117 118def create_cookie(name, value, domain, httponly=None, **kwargs): 119 """Creates `cookielib.Cookie` instance""" 120 121 if domain == 'localhost': 122 domain = '' 123 config = dict( 124 name=name, 125 value=value, 126 version=0, 127 port=None, 128 domain=domain, 129 path='/', 130 secure=False, 131 expires=None, 132 discard=True, 133 comment=None, 134 comment_url=None, 135 rfc2109=False, 136 rest={'HttpOnly': httponly}, 137 ) 138 139 for key in kwargs: 140 if key not in config: 141 raise GrabMisuseError('Function `create_cookie` does not accept ' 142 '`%s` argument' % key) 143 144 config.update(**kwargs) 145 config['rest']['HttpOnly'] = httponly 146 147 config['port_specified'] = bool(config['port']) 148 config['domain_specified'] = bool(config['domain']) 149 config['domain_initial_dot'] = (config['domain'] or '').startswith('.') 150 config['path_specified'] = bool(config['path']) 151 152 return Cookie(**config) 153 154 155class CookieManager(object): 156 """ 157 Each Grab instance has `cookies` attribute that is instance of 158 `CookieManager` class. 159 160 That class contains helpful methods to create, load, save cookies from/to 161 different places. 162 """ 163 164 __slots__ = ('cookiejar',) 165 166 def __init__(self, cookiejar=None): 167 if cookiejar is not None: 168 self.cookiejar = cookiejar 169 else: 170 self.cookiejar = CookieJar() 171 # self.disable_cookiejar_lock(self.cookiejar) 172 173 # def disable_cookiejar_lock(self, cj): 174 # cj._cookies_lock = dummy_threading.RLock() 175 176 def set(self, name, value, domain, **kwargs): 177 """Add new cookie or replace existing cookie with same parameters. 178 179 :param name: name of cookie 180 :param value: value of cookie 181 :param kwargs: extra attributes of cookie 182 """ 183 184 if domain == 'localhost': 185 domain = '' 186 187 self.cookiejar.set_cookie(create_cookie(name, value, domain, **kwargs)) 188 189 def update(self, cookies): 190 if isinstance(cookies, CookieJar): 191 for cookie in cookies: 192 self.cookiejar.set_cookie(cookie) 193 elif isinstance(cookies, CookieManager): 194 for cookie in cookies.cookiejar: 195 self.cookiejar.set_cookie(cookie) 196 else: 197 raise GrabMisuseError('Unknown type of cookies argument: %s' 198 % type(cookies)) 199 200 @classmethod 201 def from_cookie_list(cls, clist): 202 jar = CookieJar() 203 for cookie in clist: 204 jar.set_cookie(cookie) 205 return cls(jar) 206 207 def clear(self): 208 self.cookiejar = CookieJar() 209 210 def __getstate__(self): 211 state = {} 212 for cls in type(self).mro(): 213 cls_slots = getattr(cls, '__slots__', ()) 214 for slot in cls_slots: 215 if slot != '__weakref__': 216 if hasattr(self, slot): 217 state[slot] = getattr(self, slot) 218 219 state['_cookiejar_cookies'] = list(self.cookiejar) 220 del state['cookiejar'] 221 222 return state 223 224 def __setstate__(self, state): 225 state['cookiejar'] = CookieJar() 226 for cookie in state['_cookiejar_cookies']: 227 state['cookiejar'].set_cookie(cookie) 228 del state['_cookiejar_cookies'] 229 230 for slot, value in state.items(): 231 setattr(self, slot, value) 232 233 def __getitem__(self, key): 234 for cookie in self.cookiejar: 235 if cookie.name == key: 236 return cookie.value 237 raise KeyError 238 239 def items(self): 240 res = [] 241 for cookie in self.cookiejar: 242 res.append((cookie.name, cookie.value)) 243 return res 244 245 def load_from_file(self, path): 246 """ 247 Load cookies from the file. 248 249 Content of file should be a JSON-serialized list of dicts. 250 """ 251 252 with open(path) as inf: 253 data = inf.read() 254 if data: 255 items = json.loads(data) 256 else: 257 items = {} 258 for item in items: 259 extra = dict((x, y) for x, y in item.items() 260 if x not in ['name', 'value', 'domain']) 261 self.set(item['name'], item['value'], item['domain'], **extra) 262 263 def get_dict(self): 264 res = [] 265 for cookie in self.cookiejar: 266 res.append(dict((x, getattr(cookie, x)) for x in COOKIE_ATTRS)) 267 return res 268 269 def save_to_file(self, path): 270 """ 271 Dump all cookies to file. 272 273 Cookies are dumped as JSON-serialized dict of keys and values. 274 """ 275 276 with open(path, 'w') as out: 277 out.write(json.dumps(self.get_dict())) 278 279 def get_cookie_header(self, req): 280 """ 281 :param req: object with httplib.Request interface 282 Actually, it have to have `url` and `headers` attributes 283 """ 284 mocked_req = MockRequest(req) 285 self.cookiejar.add_cookie_header(mocked_req) 286 return mocked_req.get_new_headers().get('Cookie') 287