gbq.py 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163
  1. """ Google BigQuery support """
  2. import warnings
  3. def _try_import():
  4. # since pandas is a dependency of pandas-gbq
  5. # we need to import on first use
  6. try:
  7. import pandas_gbq
  8. except ImportError:
  9. # give a nice error message
  10. raise ImportError("Load data from Google BigQuery\n"
  11. "\n"
  12. "the pandas-gbq package is not installed\n"
  13. "see the docs: https://pandas-gbq.readthedocs.io\n"
  14. "\n"
  15. "you can install via pip or conda:\n"
  16. "pip install pandas-gbq\n"
  17. "conda install pandas-gbq -c conda-forge\n")
  18. return pandas_gbq
  19. def read_gbq(query, project_id=None, index_col=None, col_order=None,
  20. reauth=False, auth_local_webserver=False, dialect=None,
  21. location=None, configuration=None, credentials=None,
  22. private_key=None, verbose=None):
  23. """
  24. Load data from Google BigQuery.
  25. This function requires the `pandas-gbq package
  26. <https://pandas-gbq.readthedocs.io>`__.
  27. See the `How to authenticate with Google BigQuery
  28. <https://pandas-gbq.readthedocs.io/en/latest/howto/authentication.html>`__
  29. guide for authentication instructions.
  30. Parameters
  31. ----------
  32. query : str
  33. SQL-Like Query to return data values.
  34. project_id : str, optional
  35. Google BigQuery Account project ID. Optional when available from
  36. the environment.
  37. index_col : str, optional
  38. Name of result column to use for index in results DataFrame.
  39. col_order : list(str), optional
  40. List of BigQuery column names in the desired order for results
  41. DataFrame.
  42. reauth : boolean, default False
  43. Force Google BigQuery to re-authenticate the user. This is useful
  44. if multiple accounts are used.
  45. auth_local_webserver : boolean, default False
  46. Use the `local webserver flow`_ instead of the `console flow`_
  47. when getting user credentials.
  48. .. _local webserver flow:
  49. http://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_local_server
  50. .. _console flow:
  51. http://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_console
  52. *New in version 0.2.0 of pandas-gbq*.
  53. dialect : str, default 'legacy'
  54. Note: The default value is changing to 'standard' in a future verion.
  55. SQL syntax dialect to use. Value can be one of:
  56. ``'legacy'``
  57. Use BigQuery's legacy SQL dialect. For more information see
  58. `BigQuery Legacy SQL Reference
  59. <https://cloud.google.com/bigquery/docs/reference/legacy-sql>`__.
  60. ``'standard'``
  61. Use BigQuery's standard SQL, which is
  62. compliant with the SQL 2011 standard. For more information
  63. see `BigQuery Standard SQL Reference
  64. <https://cloud.google.com/bigquery/docs/reference/standard-sql/>`__.
  65. .. versionchanged:: 0.24.0
  66. location : str, optional
  67. Location where the query job should run. See the `BigQuery locations
  68. documentation
  69. <https://cloud.google.com/bigquery/docs/dataset-locations>`__ for a
  70. list of available locations. The location must match that of any
  71. datasets used in the query.
  72. *New in version 0.5.0 of pandas-gbq*.
  73. configuration : dict, optional
  74. Query config parameters for job processing.
  75. For example:
  76. configuration = {'query': {'useQueryCache': False}}
  77. For more information see `BigQuery REST API Reference
  78. <https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query>`__.
  79. credentials : google.auth.credentials.Credentials, optional
  80. Credentials for accessing Google APIs. Use this parameter to override
  81. default credentials, such as to use Compute Engine
  82. :class:`google.auth.compute_engine.Credentials` or Service Account
  83. :class:`google.oauth2.service_account.Credentials` directly.
  84. *New in version 0.8.0 of pandas-gbq*.
  85. .. versionadded:: 0.24.0
  86. private_key : str, deprecated
  87. Deprecated in pandas-gbq version 0.8.0. Use the ``credentials``
  88. parameter and
  89. :func:`google.oauth2.service_account.Credentials.from_service_account_info`
  90. or
  91. :func:`google.oauth2.service_account.Credentials.from_service_account_file`
  92. instead.
  93. Service account private key in JSON format. Can be file path
  94. or string contents. This is useful for remote server
  95. authentication (eg. Jupyter/IPython notebook on remote host).
  96. verbose : None, deprecated
  97. Deprecated in pandas-gbq version 0.4.0. Use the `logging module to
  98. adjust verbosity instead
  99. <https://pandas-gbq.readthedocs.io/en/latest/intro.html#logging>`__.
  100. Returns
  101. -------
  102. df: DataFrame
  103. DataFrame representing results of query.
  104. See Also
  105. --------
  106. pandas_gbq.read_gbq : This function in the pandas-gbq library.
  107. pandas.DataFrame.to_gbq : Write a DataFrame to Google BigQuery.
  108. """
  109. pandas_gbq = _try_import()
  110. if dialect is None:
  111. dialect = "legacy"
  112. warnings.warn(
  113. 'The default value for dialect is changing to "standard" in a '
  114. 'future version of pandas-gbq. Pass in dialect="legacy" to '
  115. "disable this warning.",
  116. FutureWarning,
  117. stacklevel=2,
  118. )
  119. return pandas_gbq.read_gbq(
  120. query, project_id=project_id, index_col=index_col,
  121. col_order=col_order, reauth=reauth,
  122. auth_local_webserver=auth_local_webserver, dialect=dialect,
  123. location=location, configuration=configuration,
  124. credentials=credentials, verbose=verbose, private_key=private_key)
  125. def to_gbq(dataframe, destination_table, project_id=None, chunksize=None,
  126. reauth=False, if_exists='fail', auth_local_webserver=False,
  127. table_schema=None, location=None, progress_bar=True,
  128. credentials=None, verbose=None, private_key=None):
  129. pandas_gbq = _try_import()
  130. return pandas_gbq.to_gbq(
  131. dataframe, destination_table, project_id=project_id,
  132. chunksize=chunksize, reauth=reauth, if_exists=if_exists,
  133. auth_local_webserver=auth_local_webserver, table_schema=table_schema,
  134. location=location, progress_bar=progress_bar,
  135. credentials=credentials, verbose=verbose, private_key=private_key)