conftest.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678
  1. from datetime import date, time, timedelta
  2. from decimal import Decimal
  3. import os
  4. from dateutil.tz import tzlocal, tzutc
  5. import hypothesis
  6. from hypothesis import strategies as st
  7. import numpy as np
  8. import pytest
  9. from pytz import FixedOffset, utc
  10. from pandas.compat import PY3, u
  11. import pandas.util._test_decorators as td
  12. import pandas as pd
  13. hypothesis.settings.register_profile(
  14. "ci",
  15. # Hypothesis timing checks are tuned for scalars by default, so we bump
  16. # them from 200ms to 500ms per test case as the global default. If this
  17. # is too short for a specific test, (a) try to make it faster, and (b)
  18. # if it really is slow add `@settings(deadline=...)` with a working value,
  19. # or `deadline=None` to entirely disable timeouts for that test.
  20. deadline=500,
  21. timeout=hypothesis.unlimited,
  22. suppress_health_check=(hypothesis.HealthCheck.too_slow,)
  23. )
  24. hypothesis.settings.load_profile("ci")
  25. def pytest_addoption(parser):
  26. parser.addoption("--skip-slow", action="store_true",
  27. help="skip slow tests")
  28. parser.addoption("--skip-network", action="store_true",
  29. help="skip network tests")
  30. parser.addoption("--skip-db", action="store_true",
  31. help="skip db tests")
  32. parser.addoption("--run-high-memory", action="store_true",
  33. help="run high memory tests")
  34. parser.addoption("--only-slow", action="store_true",
  35. help="run only slow tests")
  36. parser.addoption("--strict-data-files", action="store_true",
  37. help="Fail if a test is skipped for missing data file.")
  38. def pytest_runtest_setup(item):
  39. if 'slow' in item.keywords and item.config.getoption("--skip-slow"):
  40. pytest.skip("skipping due to --skip-slow")
  41. if 'slow' not in item.keywords and item.config.getoption("--only-slow"):
  42. pytest.skip("skipping due to --only-slow")
  43. if 'network' in item.keywords and item.config.getoption("--skip-network"):
  44. pytest.skip("skipping due to --skip-network")
  45. if 'db' in item.keywords and item.config.getoption("--skip-db"):
  46. pytest.skip("skipping due to --skip-db")
  47. if 'high_memory' in item.keywords and not item.config.getoption(
  48. "--run-high-memory"):
  49. pytest.skip(
  50. "skipping high memory test since --run-high-memory was not set")
  51. # Configurations for all tests and all test modules
  52. @pytest.fixture(autouse=True)
  53. def configure_tests():
  54. pd.set_option('chained_assignment', 'raise')
  55. # For running doctests: make np and pd names available
  56. @pytest.fixture(autouse=True)
  57. def add_imports(doctest_namespace):
  58. doctest_namespace['np'] = np
  59. doctest_namespace['pd'] = pd
  60. @pytest.fixture(params=['bsr', 'coo', 'csc', 'csr', 'dia', 'dok', 'lil'])
  61. def spmatrix(request):
  62. from scipy import sparse
  63. return getattr(sparse, request.param + '_matrix')
  64. @pytest.fixture(params=[0, 1, 'index', 'columns'],
  65. ids=lambda x: "axis {!r}".format(x))
  66. def axis(request):
  67. """
  68. Fixture for returning the axis numbers of a DataFrame.
  69. """
  70. return request.param
  71. axis_frame = axis
  72. @pytest.fixture(params=[0, 'index'], ids=lambda x: "axis {!r}".format(x))
  73. def axis_series(request):
  74. """
  75. Fixture for returning the axis numbers of a Series.
  76. """
  77. return request.param
  78. @pytest.fixture
  79. def ip():
  80. """
  81. Get an instance of IPython.InteractiveShell.
  82. Will raise a skip if IPython is not installed.
  83. """
  84. pytest.importorskip('IPython', minversion="6.0.0")
  85. from IPython.core.interactiveshell import InteractiveShell
  86. return InteractiveShell()
  87. @pytest.fixture(params=[True, False, None])
  88. def observed(request):
  89. """ pass in the observed keyword to groupby for [True, False]
  90. This indicates whether categoricals should return values for
  91. values which are not in the grouper [False / None], or only values which
  92. appear in the grouper [True]. [None] is supported for future compatiblity
  93. if we decide to change the default (and would need to warn if this
  94. parameter is not passed)"""
  95. return request.param
  96. _all_arithmetic_operators = ['__add__', '__radd__',
  97. '__sub__', '__rsub__',
  98. '__mul__', '__rmul__',
  99. '__floordiv__', '__rfloordiv__',
  100. '__truediv__', '__rtruediv__',
  101. '__pow__', '__rpow__',
  102. '__mod__', '__rmod__']
  103. if not PY3:
  104. _all_arithmetic_operators.extend(['__div__', '__rdiv__'])
  105. @pytest.fixture(params=_all_arithmetic_operators)
  106. def all_arithmetic_operators(request):
  107. """
  108. Fixture for dunder names for common arithmetic operations
  109. """
  110. return request.param
  111. _all_numeric_reductions = ['sum', 'max', 'min',
  112. 'mean', 'prod', 'std', 'var', 'median',
  113. 'kurt', 'skew']
  114. @pytest.fixture(params=_all_numeric_reductions)
  115. def all_numeric_reductions(request):
  116. """
  117. Fixture for numeric reduction names
  118. """
  119. return request.param
  120. _all_boolean_reductions = ['all', 'any']
  121. @pytest.fixture(params=_all_boolean_reductions)
  122. def all_boolean_reductions(request):
  123. """
  124. Fixture for boolean reduction names
  125. """
  126. return request.param
  127. _cython_table = pd.core.base.SelectionMixin._cython_table.items()
  128. @pytest.fixture(params=list(_cython_table))
  129. def cython_table_items(request):
  130. return request.param
  131. def _get_cython_table_params(ndframe, func_names_and_expected):
  132. """combine frame, functions from SelectionMixin._cython_table
  133. keys and expected result.
  134. Parameters
  135. ----------
  136. ndframe : DataFrame or Series
  137. func_names_and_expected : Sequence of two items
  138. The first item is a name of a NDFrame method ('sum', 'prod') etc.
  139. The second item is the expected return value
  140. Returns
  141. -------
  142. results : list
  143. List of three items (DataFrame, function, expected result)
  144. """
  145. results = []
  146. for func_name, expected in func_names_and_expected:
  147. results.append((ndframe, func_name, expected))
  148. results += [(ndframe, func, expected) for func, name in _cython_table
  149. if name == func_name]
  150. return results
  151. @pytest.fixture(params=['__eq__', '__ne__', '__le__',
  152. '__lt__', '__ge__', '__gt__'])
  153. def all_compare_operators(request):
  154. """
  155. Fixture for dunder names for common compare operations
  156. * >=
  157. * >
  158. * ==
  159. * !=
  160. * <
  161. * <=
  162. """
  163. return request.param
  164. @pytest.fixture(params=[None, 'gzip', 'bz2', 'zip',
  165. pytest.param('xz', marks=td.skip_if_no_lzma)])
  166. def compression(request):
  167. """
  168. Fixture for trying common compression types in compression tests
  169. """
  170. return request.param
  171. @pytest.fixture(params=['gzip', 'bz2', 'zip',
  172. pytest.param('xz', marks=td.skip_if_no_lzma)])
  173. def compression_only(request):
  174. """
  175. Fixture for trying common compression types in compression tests excluding
  176. uncompressed case
  177. """
  178. return request.param
  179. @pytest.fixture(params=[True, False])
  180. def writable(request):
  181. """
  182. Fixture that an array is writable
  183. """
  184. return request.param
  185. @pytest.fixture(scope='module')
  186. def datetime_tz_utc():
  187. from datetime import timezone
  188. return timezone.utc
  189. utc_objs = ['utc', 'dateutil/UTC', utc, tzutc()]
  190. if PY3:
  191. from datetime import timezone
  192. utc_objs.append(timezone.utc)
  193. @pytest.fixture(params=utc_objs)
  194. def utc_fixture(request):
  195. """
  196. Fixture to provide variants of UTC timezone strings and tzinfo objects
  197. """
  198. return request.param
  199. @pytest.fixture(params=['inner', 'outer', 'left', 'right'])
  200. def join_type(request):
  201. """
  202. Fixture for trying all types of join operations
  203. """
  204. return request.param
  205. @pytest.fixture
  206. def strict_data_files(pytestconfig):
  207. return pytestconfig.getoption("--strict-data-files")
  208. @pytest.fixture
  209. def datapath(strict_data_files):
  210. """Get the path to a data file.
  211. Parameters
  212. ----------
  213. path : str
  214. Path to the file, relative to ``pandas/tests/``
  215. Returns
  216. -------
  217. path : path including ``pandas/tests``.
  218. Raises
  219. ------
  220. ValueError
  221. If the path doesn't exist and the --strict-data-files option is set.
  222. """
  223. BASE_PATH = os.path.join(os.path.dirname(__file__), 'tests')
  224. def deco(*args):
  225. path = os.path.join(BASE_PATH, *args)
  226. if not os.path.exists(path):
  227. if strict_data_files:
  228. msg = "Could not find file {} and --strict-data-files is set."
  229. raise ValueError(msg.format(path))
  230. else:
  231. msg = "Could not find {}."
  232. pytest.skip(msg.format(path))
  233. return path
  234. return deco
  235. @pytest.fixture
  236. def iris(datapath):
  237. """The iris dataset as a DataFrame."""
  238. return pd.read_csv(datapath('data', 'iris.csv'))
  239. @pytest.fixture(params=['nlargest', 'nsmallest'])
  240. def nselect_method(request):
  241. """
  242. Fixture for trying all nselect methods
  243. """
  244. return request.param
  245. @pytest.fixture(params=['left', 'right', 'both', 'neither'])
  246. def closed(request):
  247. """
  248. Fixture for trying all interval closed parameters
  249. """
  250. return request.param
  251. @pytest.fixture(params=['left', 'right', 'both', 'neither'])
  252. def other_closed(request):
  253. """
  254. Secondary closed fixture to allow parametrizing over all pairs of closed
  255. """
  256. return request.param
  257. @pytest.fixture(params=[None, np.nan, pd.NaT, float('nan'), np.float('NaN')])
  258. def nulls_fixture(request):
  259. """
  260. Fixture for each null type in pandas
  261. """
  262. return request.param
  263. nulls_fixture2 = nulls_fixture # Generate cartesian product of nulls_fixture
  264. @pytest.fixture(params=[None, np.nan, pd.NaT])
  265. def unique_nulls_fixture(request):
  266. """
  267. Fixture for each null type in pandas, each null type exactly once
  268. """
  269. return request.param
  270. # Generate cartesian product of unique_nulls_fixture:
  271. unique_nulls_fixture2 = unique_nulls_fixture
  272. TIMEZONES = [None, 'UTC', 'US/Eastern', 'Asia/Tokyo', 'dateutil/US/Pacific',
  273. 'dateutil/Asia/Singapore', tzutc(), tzlocal(), FixedOffset(300),
  274. FixedOffset(0), FixedOffset(-300)]
  275. @td.parametrize_fixture_doc(str(TIMEZONES))
  276. @pytest.fixture(params=TIMEZONES)
  277. def tz_naive_fixture(request):
  278. """
  279. Fixture for trying timezones including default (None): {0}
  280. """
  281. return request.param
  282. @td.parametrize_fixture_doc(str(TIMEZONES[1:]))
  283. @pytest.fixture(params=TIMEZONES[1:])
  284. def tz_aware_fixture(request):
  285. """
  286. Fixture for trying explicit timezones: {0}
  287. """
  288. return request.param
  289. # ----------------------------------------------------------------
  290. # Dtypes
  291. UNSIGNED_INT_DTYPES = ["uint8", "uint16", "uint32", "uint64"]
  292. UNSIGNED_EA_INT_DTYPES = ["UInt8", "UInt16", "UInt32", "UInt64"]
  293. SIGNED_INT_DTYPES = [int, "int8", "int16", "int32", "int64"]
  294. SIGNED_EA_INT_DTYPES = ["Int8", "Int16", "Int32", "Int64"]
  295. ALL_INT_DTYPES = UNSIGNED_INT_DTYPES + SIGNED_INT_DTYPES
  296. ALL_EA_INT_DTYPES = UNSIGNED_EA_INT_DTYPES + SIGNED_EA_INT_DTYPES
  297. FLOAT_DTYPES = [float, "float32", "float64"]
  298. COMPLEX_DTYPES = [complex, "complex64", "complex128"]
  299. STRING_DTYPES = [str, 'str', 'U']
  300. DATETIME_DTYPES = ['datetime64[ns]', 'M8[ns]']
  301. TIMEDELTA_DTYPES = ['timedelta64[ns]', 'm8[ns]']
  302. BOOL_DTYPES = [bool, 'bool']
  303. BYTES_DTYPES = [bytes, 'bytes']
  304. OBJECT_DTYPES = [object, 'object']
  305. ALL_REAL_DTYPES = FLOAT_DTYPES + ALL_INT_DTYPES
  306. ALL_NUMPY_DTYPES = (ALL_REAL_DTYPES + COMPLEX_DTYPES + STRING_DTYPES
  307. + DATETIME_DTYPES + TIMEDELTA_DTYPES + BOOL_DTYPES
  308. + OBJECT_DTYPES + BYTES_DTYPES * PY3) # bytes only for PY3
  309. @pytest.fixture(params=STRING_DTYPES)
  310. def string_dtype(request):
  311. """Parametrized fixture for string dtypes.
  312. * str
  313. * 'str'
  314. * 'U'
  315. """
  316. return request.param
  317. @pytest.fixture(params=FLOAT_DTYPES)
  318. def float_dtype(request):
  319. """
  320. Parameterized fixture for float dtypes.
  321. * float
  322. * 'float32'
  323. * 'float64'
  324. """
  325. return request.param
  326. @pytest.fixture(params=COMPLEX_DTYPES)
  327. def complex_dtype(request):
  328. """
  329. Parameterized fixture for complex dtypes.
  330. * complex
  331. * 'complex64'
  332. * 'complex128'
  333. """
  334. return request.param
  335. @pytest.fixture(params=SIGNED_INT_DTYPES)
  336. def sint_dtype(request):
  337. """
  338. Parameterized fixture for signed integer dtypes.
  339. * int
  340. * 'int8'
  341. * 'int16'
  342. * 'int32'
  343. * 'int64'
  344. """
  345. return request.param
  346. @pytest.fixture(params=UNSIGNED_INT_DTYPES)
  347. def uint_dtype(request):
  348. """
  349. Parameterized fixture for unsigned integer dtypes.
  350. * 'uint8'
  351. * 'uint16'
  352. * 'uint32'
  353. * 'uint64'
  354. """
  355. return request.param
  356. @pytest.fixture(params=ALL_INT_DTYPES)
  357. def any_int_dtype(request):
  358. """
  359. Parameterized fixture for any integer dtype.
  360. * int
  361. * 'int8'
  362. * 'uint8'
  363. * 'int16'
  364. * 'uint16'
  365. * 'int32'
  366. * 'uint32'
  367. * 'int64'
  368. * 'uint64'
  369. """
  370. return request.param
  371. @pytest.fixture(params=ALL_REAL_DTYPES)
  372. def any_real_dtype(request):
  373. """
  374. Parameterized fixture for any (purely) real numeric dtype.
  375. * int
  376. * 'int8'
  377. * 'uint8'
  378. * 'int16'
  379. * 'uint16'
  380. * 'int32'
  381. * 'uint32'
  382. * 'int64'
  383. * 'uint64'
  384. * float
  385. * 'float32'
  386. * 'float64'
  387. """
  388. return request.param
  389. @pytest.fixture(params=ALL_NUMPY_DTYPES)
  390. def any_numpy_dtype(request):
  391. """
  392. Parameterized fixture for all numpy dtypes.
  393. * bool
  394. * 'bool'
  395. * int
  396. * 'int8'
  397. * 'uint8'
  398. * 'int16'
  399. * 'uint16'
  400. * 'int32'
  401. * 'uint32'
  402. * 'int64'
  403. * 'uint64'
  404. * float
  405. * 'float32'
  406. * 'float64'
  407. * complex
  408. * 'complex64'
  409. * 'complex128'
  410. * str
  411. * 'str'
  412. * 'U'
  413. * bytes
  414. * 'bytes'
  415. * 'datetime64[ns]'
  416. * 'M8[ns]'
  417. * 'timedelta64[ns]'
  418. * 'm8[ns]'
  419. * object
  420. * 'object'
  421. """
  422. return request.param
  423. # categoricals are handled separately
  424. _any_skipna_inferred_dtype = [
  425. ('string', ['a', np.nan, 'c']),
  426. ('unicode' if not PY3 else 'string', [u('a'), np.nan, u('c')]),
  427. ('bytes' if PY3 else 'string', [b'a', np.nan, b'c']),
  428. ('empty', [np.nan, np.nan, np.nan]),
  429. ('empty', []),
  430. ('mixed-integer', ['a', np.nan, 2]),
  431. ('mixed', ['a', np.nan, 2.0]),
  432. ('floating', [1.0, np.nan, 2.0]),
  433. ('integer', [1, np.nan, 2]),
  434. ('mixed-integer-float', [1, np.nan, 2.0]),
  435. ('decimal', [Decimal(1), np.nan, Decimal(2)]),
  436. ('boolean', [True, np.nan, False]),
  437. ('datetime64', [np.datetime64('2013-01-01'), np.nan,
  438. np.datetime64('2018-01-01')]),
  439. ('datetime', [pd.Timestamp('20130101'), np.nan, pd.Timestamp('20180101')]),
  440. ('date', [date(2013, 1, 1), np.nan, date(2018, 1, 1)]),
  441. # The following two dtypes are commented out due to GH 23554
  442. # ('complex', [1 + 1j, np.nan, 2 + 2j]),
  443. # ('timedelta64', [np.timedelta64(1, 'D'),
  444. # np.nan, np.timedelta64(2, 'D')]),
  445. ('timedelta', [timedelta(1), np.nan, timedelta(2)]),
  446. ('time', [time(1), np.nan, time(2)]),
  447. ('period', [pd.Period(2013), pd.NaT, pd.Period(2018)]),
  448. ('interval', [pd.Interval(0, 1), np.nan, pd.Interval(0, 2)])]
  449. ids, _ = zip(*_any_skipna_inferred_dtype) # use inferred type as fixture-id
  450. @pytest.fixture(params=_any_skipna_inferred_dtype, ids=ids)
  451. def any_skipna_inferred_dtype(request):
  452. """
  453. Fixture for all inferred dtypes from _libs.lib.infer_dtype
  454. The covered (inferred) types are:
  455. * 'string'
  456. * 'unicode' (if PY2)
  457. * 'empty'
  458. * 'bytes' (if PY3)
  459. * 'mixed'
  460. * 'mixed-integer'
  461. * 'mixed-integer-float'
  462. * 'floating'
  463. * 'integer'
  464. * 'decimal'
  465. * 'boolean'
  466. * 'datetime64'
  467. * 'datetime'
  468. * 'date'
  469. * 'timedelta'
  470. * 'time'
  471. * 'period'
  472. * 'interval'
  473. Returns
  474. -------
  475. inferred_dtype : str
  476. The string for the inferred dtype from _libs.lib.infer_dtype
  477. values : np.ndarray
  478. An array of object dtype that will be inferred to have
  479. `inferred_dtype`
  480. Examples
  481. --------
  482. >>> import pandas._libs.lib as lib
  483. >>>
  484. >>> def test_something(any_skipna_inferred_dtype):
  485. ... inferred_dtype, values = any_skipna_inferred_dtype
  486. ... # will pass
  487. ... assert lib.infer_dtype(values, skipna=True) == inferred_dtype
  488. """
  489. inferred_dtype, values = request.param
  490. values = np.array(values, dtype=object) # object dtype to avoid casting
  491. # correctness of inference tested in tests/dtypes/test_inference.py
  492. return inferred_dtype, values
  493. @pytest.fixture(params=[getattr(pd.offsets, o) for o in pd.offsets.__all__ if
  494. issubclass(getattr(pd.offsets, o), pd.offsets.Tick)])
  495. def tick_classes(request):
  496. """
  497. Fixture for Tick based datetime offsets available for a time series.
  498. """
  499. return request.param
  500. # ----------------------------------------------------------------
  501. # Global setup for tests using Hypothesis
  502. # Registering these strategies makes them globally available via st.from_type,
  503. # which is use for offsets in tests/tseries/offsets/test_offsets_properties.py
  504. for name in 'MonthBegin MonthEnd BMonthBegin BMonthEnd'.split():
  505. cls = getattr(pd.tseries.offsets, name)
  506. st.register_type_strategy(cls, st.builds(
  507. cls,
  508. n=st.integers(-99, 99),
  509. normalize=st.booleans(),
  510. ))
  511. for name in 'YearBegin YearEnd BYearBegin BYearEnd'.split():
  512. cls = getattr(pd.tseries.offsets, name)
  513. st.register_type_strategy(cls, st.builds(
  514. cls,
  515. n=st.integers(-5, 5),
  516. normalize=st.booleans(),
  517. month=st.integers(min_value=1, max_value=12),
  518. ))
  519. for name in 'QuarterBegin QuarterEnd BQuarterBegin BQuarterEnd'.split():
  520. cls = getattr(pd.tseries.offsets, name)
  521. st.register_type_strategy(cls, st.builds(
  522. cls,
  523. n=st.integers(-24, 24),
  524. normalize=st.booleans(),
  525. startingMonth=st.integers(min_value=1, max_value=12)
  526. ))