s3.py 1.3 KB

1234567891011121314151617181920212223242526272829303132333435363738394041
  1. """ s3 support for remote file interactivity """
  2. from pandas import compat
  3. try:
  4. import s3fs
  5. from botocore.exceptions import NoCredentialsError
  6. except ImportError:
  7. raise ImportError("The s3fs library is required to handle s3 files")
  8. if compat.PY3:
  9. from urllib.parse import urlparse as parse_url
  10. else:
  11. from urlparse import urlparse as parse_url
  12. def _strip_schema(url):
  13. """Returns the url without the s3:// part"""
  14. result = parse_url(url)
  15. return result.netloc + result.path
  16. def get_filepath_or_buffer(filepath_or_buffer, encoding=None,
  17. compression=None, mode=None):
  18. if mode is None:
  19. mode = 'rb'
  20. fs = s3fs.S3FileSystem(anon=False)
  21. try:
  22. filepath_or_buffer = fs.open(_strip_schema(filepath_or_buffer), mode)
  23. except (compat.FileNotFoundError, NoCredentialsError):
  24. # boto3 has troubles when trying to access a public file
  25. # when credentialed...
  26. # An OSError is raised if you have credentials, but they
  27. # aren't valid for that bucket.
  28. # A NoCredentialsError is raised if you don't have creds
  29. # for that bucket.
  30. fs = s3fs.S3FileSystem(anon=True)
  31. filepath_or_buffer = fs.open(_strip_schema(filepath_or_buffer), mode)
  32. return filepath_or_buffer, None, compression, True