adapters.py 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534
  1. # -*- coding: utf-8 -*-
  2. """
  3. requests.adapters
  4. ~~~~~~~~~~~~~~~~~
  5. This module contains the transport adapters that Requests uses to define
  6. and maintain connections.
  7. """
  8. import os.path
  9. import socket
  10. from urllib3.poolmanager import PoolManager, proxy_from_url
  11. from urllib3.response import HTTPResponse
  12. from urllib3.util import parse_url
  13. from urllib3.util import Timeout as TimeoutSauce
  14. from urllib3.util.retry import Retry
  15. from urllib3.exceptions import ClosedPoolError
  16. from urllib3.exceptions import ConnectTimeoutError
  17. from urllib3.exceptions import HTTPError as _HTTPError
  18. from urllib3.exceptions import MaxRetryError
  19. from urllib3.exceptions import NewConnectionError
  20. from urllib3.exceptions import ProxyError as _ProxyError
  21. from urllib3.exceptions import ProtocolError
  22. from urllib3.exceptions import ReadTimeoutError
  23. from urllib3.exceptions import SSLError as _SSLError
  24. from urllib3.exceptions import ResponseError
  25. from urllib3.exceptions import LocationValueError
  26. from .models import Response
  27. from .compat import urlparse, basestring
  28. from .utils import (DEFAULT_CA_BUNDLE_PATH, extract_zipped_paths,
  29. get_encoding_from_headers, prepend_scheme_if_needed,
  30. get_auth_from_url, urldefragauth, select_proxy)
  31. from .structures import CaseInsensitiveDict
  32. from .cookies import extract_cookies_to_jar
  33. from .exceptions import (ConnectionError, ConnectTimeout, ReadTimeout, SSLError,
  34. ProxyError, RetryError, InvalidSchema, InvalidProxyURL,
  35. InvalidURL)
  36. from .auth import _basic_auth_str
  37. try:
  38. from urllib3.contrib.socks import SOCKSProxyManager
  39. except ImportError:
  40. def SOCKSProxyManager(*args, **kwargs):
  41. raise InvalidSchema("Missing dependencies for SOCKS support.")
  42. DEFAULT_POOLBLOCK = False
  43. DEFAULT_POOLSIZE = 10
  44. DEFAULT_RETRIES = 0
  45. DEFAULT_POOL_TIMEOUT = None
  46. class BaseAdapter(object):
  47. """The Base Transport Adapter"""
  48. def __init__(self):
  49. super(BaseAdapter, self).__init__()
  50. def send(self, request, stream=False, timeout=None, verify=True,
  51. cert=None, proxies=None):
  52. """Sends PreparedRequest object. Returns Response object.
  53. :param request: The :class:`PreparedRequest <PreparedRequest>` being sent.
  54. :param stream: (optional) Whether to stream the request content.
  55. :param timeout: (optional) How long to wait for the server to send
  56. data before giving up, as a float, or a :ref:`(connect timeout,
  57. read timeout) <timeouts>` tuple.
  58. :type timeout: float or tuple
  59. :param verify: (optional) Either a boolean, in which case it controls whether we verify
  60. the server's TLS certificate, or a string, in which case it must be a path
  61. to a CA bundle to use
  62. :param cert: (optional) Any user-provided SSL certificate to be trusted.
  63. :param proxies: (optional) The proxies dictionary to apply to the request.
  64. """
  65. raise NotImplementedError
  66. def close(self):
  67. """Cleans up adapter specific items."""
  68. raise NotImplementedError
  69. class HTTPAdapter(BaseAdapter):
  70. """The built-in HTTP Adapter for urllib3.
  71. Provides a general-case interface for Requests sessions to contact HTTP and
  72. HTTPS urls by implementing the Transport Adapter interface. This class will
  73. usually be created by the :class:`Session <Session>` class under the
  74. covers.
  75. :param pool_connections: The number of urllib3 connection pools to cache.
  76. :param pool_maxsize: The maximum number of connections to save in the pool.
  77. :param max_retries: The maximum number of retries each connection
  78. should attempt. Note, this applies only to failed DNS lookups, socket
  79. connections and connection timeouts, never to requests where data has
  80. made it to the server. By default, Requests does not retry failed
  81. connections. If you need granular control over the conditions under
  82. which we retry a request, import urllib3's ``Retry`` class and pass
  83. that instead.
  84. :param pool_block: Whether the connection pool should block for connections.
  85. Usage::
  86. >>> import requests
  87. >>> s = requests.Session()
  88. >>> a = requests.adapters.HTTPAdapter(max_retries=3)
  89. >>> s.mount('http://', a)
  90. """
  91. __attrs__ = ['max_retries', 'config', '_pool_connections', '_pool_maxsize',
  92. '_pool_block']
  93. def __init__(self, pool_connections=DEFAULT_POOLSIZE,
  94. pool_maxsize=DEFAULT_POOLSIZE, max_retries=DEFAULT_RETRIES,
  95. pool_block=DEFAULT_POOLBLOCK):
  96. if max_retries == DEFAULT_RETRIES:
  97. self.max_retries = Retry(0, read=False)
  98. else:
  99. self.max_retries = Retry.from_int(max_retries)
  100. self.config = {}
  101. self.proxy_manager = {}
  102. super(HTTPAdapter, self).__init__()
  103. self._pool_connections = pool_connections
  104. self._pool_maxsize = pool_maxsize
  105. self._pool_block = pool_block
  106. self.init_poolmanager(pool_connections, pool_maxsize, block=pool_block)
  107. def __getstate__(self):
  108. return {attr: getattr(self, attr, None) for attr in self.__attrs__}
  109. def __setstate__(self, state):
  110. # Can't handle by adding 'proxy_manager' to self.__attrs__ because
  111. # self.poolmanager uses a lambda function, which isn't pickleable.
  112. self.proxy_manager = {}
  113. self.config = {}
  114. for attr, value in state.items():
  115. setattr(self, attr, value)
  116. self.init_poolmanager(self._pool_connections, self._pool_maxsize,
  117. block=self._pool_block)
  118. def init_poolmanager(self, connections, maxsize, block=DEFAULT_POOLBLOCK, **pool_kwargs):
  119. """Initializes a urllib3 PoolManager.
  120. This method should not be called from user code, and is only
  121. exposed for use when subclassing the
  122. :class:`HTTPAdapter <requests.adapters.HTTPAdapter>`.
  123. :param connections: The number of urllib3 connection pools to cache.
  124. :param maxsize: The maximum number of connections to save in the pool.
  125. :param block: Block when no free connections are available.
  126. :param pool_kwargs: Extra keyword arguments used to initialize the Pool Manager.
  127. """
  128. # save these values for pickling
  129. self._pool_connections = connections
  130. self._pool_maxsize = maxsize
  131. self._pool_block = block
  132. self.poolmanager = PoolManager(num_pools=connections, maxsize=maxsize,
  133. block=block, strict=True, **pool_kwargs)
  134. def proxy_manager_for(self, proxy, **proxy_kwargs):
  135. """Return urllib3 ProxyManager for the given proxy.
  136. This method should not be called from user code, and is only
  137. exposed for use when subclassing the
  138. :class:`HTTPAdapter <requests.adapters.HTTPAdapter>`.
  139. :param proxy: The proxy to return a urllib3 ProxyManager for.
  140. :param proxy_kwargs: Extra keyword arguments used to configure the Proxy Manager.
  141. :returns: ProxyManager
  142. :rtype: urllib3.ProxyManager
  143. """
  144. if proxy in self.proxy_manager:
  145. manager = self.proxy_manager[proxy]
  146. elif proxy.lower().startswith('socks'):
  147. username, password = get_auth_from_url(proxy)
  148. manager = self.proxy_manager[proxy] = SOCKSProxyManager(
  149. proxy,
  150. username=username,
  151. password=password,
  152. num_pools=self._pool_connections,
  153. maxsize=self._pool_maxsize,
  154. block=self._pool_block,
  155. **proxy_kwargs
  156. )
  157. else:
  158. proxy_headers = self.proxy_headers(proxy)
  159. manager = self.proxy_manager[proxy] = proxy_from_url(
  160. proxy,
  161. proxy_headers=proxy_headers,
  162. num_pools=self._pool_connections,
  163. maxsize=self._pool_maxsize,
  164. block=self._pool_block,
  165. **proxy_kwargs)
  166. return manager
  167. def cert_verify(self, conn, url, verify, cert):
  168. """Verify a SSL certificate. This method should not be called from user
  169. code, and is only exposed for use when subclassing the
  170. :class:`HTTPAdapter <requests.adapters.HTTPAdapter>`.
  171. :param conn: The urllib3 connection object associated with the cert.
  172. :param url: The requested URL.
  173. :param verify: Either a boolean, in which case it controls whether we verify
  174. the server's TLS certificate, or a string, in which case it must be a path
  175. to a CA bundle to use
  176. :param cert: The SSL certificate to verify.
  177. """
  178. if url.lower().startswith('https') and verify:
  179. cert_loc = None
  180. # Allow self-specified cert location.
  181. if verify is not True:
  182. cert_loc = verify
  183. if not cert_loc:
  184. cert_loc = extract_zipped_paths(DEFAULT_CA_BUNDLE_PATH)
  185. if not cert_loc or not os.path.exists(cert_loc):
  186. raise IOError("Could not find a suitable TLS CA certificate bundle, "
  187. "invalid path: {}".format(cert_loc))
  188. conn.cert_reqs = 'CERT_REQUIRED'
  189. if not os.path.isdir(cert_loc):
  190. conn.ca_certs = cert_loc
  191. else:
  192. conn.ca_cert_dir = cert_loc
  193. else:
  194. conn.cert_reqs = 'CERT_NONE'
  195. conn.ca_certs = None
  196. conn.ca_cert_dir = None
  197. if cert:
  198. if not isinstance(cert, basestring):
  199. conn.cert_file = cert[0]
  200. conn.key_file = cert[1]
  201. else:
  202. conn.cert_file = cert
  203. conn.key_file = None
  204. if conn.cert_file and not os.path.exists(conn.cert_file):
  205. raise IOError("Could not find the TLS certificate file, "
  206. "invalid path: {}".format(conn.cert_file))
  207. if conn.key_file and not os.path.exists(conn.key_file):
  208. raise IOError("Could not find the TLS key file, "
  209. "invalid path: {}".format(conn.key_file))
  210. def build_response(self, req, resp):
  211. """Builds a :class:`Response <requests.Response>` object from a urllib3
  212. response. This should not be called from user code, and is only exposed
  213. for use when subclassing the
  214. :class:`HTTPAdapter <requests.adapters.HTTPAdapter>`
  215. :param req: The :class:`PreparedRequest <PreparedRequest>` used to generate the response.
  216. :param resp: The urllib3 response object.
  217. :rtype: requests.Response
  218. """
  219. response = Response()
  220. # Fallback to None if there's no status_code, for whatever reason.
  221. response.status_code = getattr(resp, 'status', None)
  222. # Make headers case-insensitive.
  223. response.headers = CaseInsensitiveDict(getattr(resp, 'headers', {}))
  224. # Set encoding.
  225. response.encoding = get_encoding_from_headers(response.headers)
  226. response.raw = resp
  227. response.reason = response.raw.reason
  228. if isinstance(req.url, bytes):
  229. response.url = req.url.decode('utf-8')
  230. else:
  231. response.url = req.url
  232. # Add new cookies from the server.
  233. extract_cookies_to_jar(response.cookies, req, resp)
  234. # Give the Response some context.
  235. response.request = req
  236. response.connection = self
  237. return response
  238. def get_connection(self, url, proxies=None):
  239. """Returns a urllib3 connection for the given URL. This should not be
  240. called from user code, and is only exposed for use when subclassing the
  241. :class:`HTTPAdapter <requests.adapters.HTTPAdapter>`.
  242. :param url: The URL to connect to.
  243. :param proxies: (optional) A Requests-style dictionary of proxies used on this request.
  244. :rtype: urllib3.ConnectionPool
  245. """
  246. proxy = select_proxy(url, proxies)
  247. if proxy:
  248. proxy = prepend_scheme_if_needed(proxy, 'http')
  249. proxy_url = parse_url(proxy)
  250. if not proxy_url.host:
  251. raise InvalidProxyURL("Please check proxy URL. It is malformed"
  252. " and could be missing the host.")
  253. proxy_manager = self.proxy_manager_for(proxy)
  254. conn = proxy_manager.connection_from_url(url)
  255. else:
  256. # Only scheme should be lower case
  257. parsed = urlparse(url)
  258. url = parsed.geturl()
  259. conn = self.poolmanager.connection_from_url(url)
  260. return conn
  261. def close(self):
  262. """Disposes of any internal state.
  263. Currently, this closes the PoolManager and any active ProxyManager,
  264. which closes any pooled connections.
  265. """
  266. self.poolmanager.clear()
  267. for proxy in self.proxy_manager.values():
  268. proxy.clear()
  269. def request_url(self, request, proxies):
  270. """Obtain the url to use when making the final request.
  271. If the message is being sent through a HTTP proxy, the full URL has to
  272. be used. Otherwise, we should only use the path portion of the URL.
  273. This should not be called from user code, and is only exposed for use
  274. when subclassing the
  275. :class:`HTTPAdapter <requests.adapters.HTTPAdapter>`.
  276. :param request: The :class:`PreparedRequest <PreparedRequest>` being sent.
  277. :param proxies: A dictionary of schemes or schemes and hosts to proxy URLs.
  278. :rtype: str
  279. """
  280. proxy = select_proxy(request.url, proxies)
  281. scheme = urlparse(request.url).scheme
  282. is_proxied_http_request = (proxy and scheme != 'https')
  283. using_socks_proxy = False
  284. if proxy:
  285. proxy_scheme = urlparse(proxy).scheme.lower()
  286. using_socks_proxy = proxy_scheme.startswith('socks')
  287. url = request.path_url
  288. if is_proxied_http_request and not using_socks_proxy:
  289. url = urldefragauth(request.url)
  290. return url
  291. def add_headers(self, request, **kwargs):
  292. """Add any headers needed by the connection. As of v2.0 this does
  293. nothing by default, but is left for overriding by users that subclass
  294. the :class:`HTTPAdapter <requests.adapters.HTTPAdapter>`.
  295. This should not be called from user code, and is only exposed for use
  296. when subclassing the
  297. :class:`HTTPAdapter <requests.adapters.HTTPAdapter>`.
  298. :param request: The :class:`PreparedRequest <PreparedRequest>` to add headers to.
  299. :param kwargs: The keyword arguments from the call to send().
  300. """
  301. pass
  302. def proxy_headers(self, proxy):
  303. """Returns a dictionary of the headers to add to any request sent
  304. through a proxy. This works with urllib3 magic to ensure that they are
  305. correctly sent to the proxy, rather than in a tunnelled request if
  306. CONNECT is being used.
  307. This should not be called from user code, and is only exposed for use
  308. when subclassing the
  309. :class:`HTTPAdapter <requests.adapters.HTTPAdapter>`.
  310. :param proxy: The url of the proxy being used for this request.
  311. :rtype: dict
  312. """
  313. headers = {}
  314. username, password = get_auth_from_url(proxy)
  315. if username:
  316. headers['Proxy-Authorization'] = _basic_auth_str(username,
  317. password)
  318. return headers
  319. def send(self, request, stream=False, timeout=None, verify=True, cert=None, proxies=None):
  320. """Sends PreparedRequest object. Returns Response object.
  321. :param request: The :class:`PreparedRequest <PreparedRequest>` being sent.
  322. :param stream: (optional) Whether to stream the request content.
  323. :param timeout: (optional) How long to wait for the server to send
  324. data before giving up, as a float, or a :ref:`(connect timeout,
  325. read timeout) <timeouts>` tuple.
  326. :type timeout: float or tuple or urllib3 Timeout object
  327. :param verify: (optional) Either a boolean, in which case it controls whether
  328. we verify the server's TLS certificate, or a string, in which case it
  329. must be a path to a CA bundle to use
  330. :param cert: (optional) Any user-provided SSL certificate to be trusted.
  331. :param proxies: (optional) The proxies dictionary to apply to the request.
  332. :rtype: requests.Response
  333. """
  334. try:
  335. conn = self.get_connection(request.url, proxies)
  336. except LocationValueError as e:
  337. raise InvalidURL(e, request=request)
  338. self.cert_verify(conn, request.url, verify, cert)
  339. url = self.request_url(request, proxies)
  340. self.add_headers(request, stream=stream, timeout=timeout, verify=verify, cert=cert, proxies=proxies)
  341. chunked = not (request.body is None or 'Content-Length' in request.headers)
  342. if isinstance(timeout, tuple):
  343. try:
  344. connect, read = timeout
  345. timeout = TimeoutSauce(connect=connect, read=read)
  346. except ValueError as e:
  347. # this may raise a string formatting error.
  348. err = ("Invalid timeout {}. Pass a (connect, read) "
  349. "timeout tuple, or a single float to set "
  350. "both timeouts to the same value".format(timeout))
  351. raise ValueError(err)
  352. elif isinstance(timeout, TimeoutSauce):
  353. pass
  354. else:
  355. timeout = TimeoutSauce(connect=timeout, read=timeout)
  356. try:
  357. if not chunked:
  358. resp = conn.urlopen(
  359. method=request.method,
  360. url=url,
  361. body=request.body,
  362. headers=request.headers,
  363. redirect=False,
  364. assert_same_host=False,
  365. preload_content=False,
  366. decode_content=False,
  367. retries=self.max_retries,
  368. timeout=timeout
  369. )
  370. # Send the request.
  371. else:
  372. if hasattr(conn, 'proxy_pool'):
  373. conn = conn.proxy_pool
  374. low_conn = conn._get_conn(timeout=DEFAULT_POOL_TIMEOUT)
  375. try:
  376. low_conn.putrequest(request.method,
  377. url,
  378. skip_accept_encoding=True)
  379. for header, value in request.headers.items():
  380. low_conn.putheader(header, value)
  381. low_conn.endheaders()
  382. for i in request.body:
  383. low_conn.send(hex(len(i))[2:].encode('utf-8'))
  384. low_conn.send(b'\r\n')
  385. low_conn.send(i)
  386. low_conn.send(b'\r\n')
  387. low_conn.send(b'0\r\n\r\n')
  388. # Receive the response from the server
  389. try:
  390. # For Python 2.7, use buffering of HTTP responses
  391. r = low_conn.getresponse(buffering=True)
  392. except TypeError:
  393. # For compatibility with Python 3.3+
  394. r = low_conn.getresponse()
  395. resp = HTTPResponse.from_httplib(
  396. r,
  397. pool=conn,
  398. connection=low_conn,
  399. preload_content=False,
  400. decode_content=False
  401. )
  402. except:
  403. # If we hit any problems here, clean up the connection.
  404. # Then, reraise so that we can handle the actual exception.
  405. low_conn.close()
  406. raise
  407. except (ProtocolError, socket.error) as err:
  408. raise ConnectionError(err, request=request)
  409. except MaxRetryError as e:
  410. if isinstance(e.reason, ConnectTimeoutError):
  411. # TODO: Remove this in 3.0.0: see #2811
  412. if not isinstance(e.reason, NewConnectionError):
  413. raise ConnectTimeout(e, request=request)
  414. if isinstance(e.reason, ResponseError):
  415. raise RetryError(e, request=request)
  416. if isinstance(e.reason, _ProxyError):
  417. raise ProxyError(e, request=request)
  418. if isinstance(e.reason, _SSLError):
  419. # This branch is for urllib3 v1.22 and later.
  420. raise SSLError(e, request=request)
  421. raise ConnectionError(e, request=request)
  422. except ClosedPoolError as e:
  423. raise ConnectionError(e, request=request)
  424. except _ProxyError as e:
  425. raise ProxyError(e)
  426. except (_SSLError, _HTTPError) as e:
  427. if isinstance(e, _SSLError):
  428. # This branch is for urllib3 versions earlier than v1.22
  429. raise SSLError(e, request=request)
  430. elif isinstance(e, ReadTimeoutError):
  431. raise ReadTimeout(e, request=request)
  432. else:
  433. raise
  434. return self.build_response(request, resp)