bscheibel
/
technical_drawings_extraction


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678
							from datetime import date, time, timedelta
from decimal import Decimal
import os

from dateutil.tz import tzlocal, tzutc
import hypothesis
from hypothesis import strategies as st
import numpy as np
import pytest
from pytz import FixedOffset, utc

from pandas.compat import PY3, u
import pandas.util._test_decorators as td

import pandas as pd

hypothesis.settings.register_profile(
    "ci",
    # Hypothesis timing checks are tuned for scalars by default, so we bump
    # them from 200ms to 500ms per test case as the global default.  If this
    # is too short for a specific test, (a) try to make it faster, and (b)
    # if it really is slow add `@settings(deadline=...)` with a working value,
    # or `deadline=None` to entirely disable timeouts for that test.
    deadline=500,
    timeout=hypothesis.unlimited,
    suppress_health_check=(hypothesis.HealthCheck.too_slow,)
)
hypothesis.settings.load_profile("ci")


def pytest_addoption(parser):
    parser.addoption("--skip-slow", action="store_true",
                     help="skip slow tests")
    parser.addoption("--skip-network", action="store_true",
                     help="skip network tests")
    parser.addoption("--skip-db", action="store_true",
                     help="skip db tests")
    parser.addoption("--run-high-memory", action="store_true",
                     help="run high memory tests")
    parser.addoption("--only-slow", action="store_true",
                     help="run only slow tests")
    parser.addoption("--strict-data-files", action="store_true",
                     help="Fail if a test is skipped for missing data file.")


def pytest_runtest_setup(item):
    if 'slow' in item.keywords and item.config.getoption("--skip-slow"):
        pytest.skip("skipping due to --skip-slow")

    if 'slow' not in item.keywords and item.config.getoption("--only-slow"):
        pytest.skip("skipping due to --only-slow")

    if 'network' in item.keywords and item.config.getoption("--skip-network"):
        pytest.skip("skipping due to --skip-network")

    if 'db' in item.keywords and item.config.getoption("--skip-db"):
        pytest.skip("skipping due to --skip-db")

    if 'high_memory' in item.keywords and not item.config.getoption(
            "--run-high-memory"):
        pytest.skip(
            "skipping high memory test since --run-high-memory was not set")


# Configurations for all tests and all test modules

@pytest.fixture(autouse=True)
def configure_tests():
    pd.set_option('chained_assignment', 'raise')


# For running doctests: make np and pd names available

@pytest.fixture(autouse=True)
def add_imports(doctest_namespace):
    doctest_namespace['np'] = np
    doctest_namespace['pd'] = pd


@pytest.fixture(params=['bsr', 'coo', 'csc', 'csr', 'dia', 'dok', 'lil'])
def spmatrix(request):
    from scipy import sparse
    return getattr(sparse, request.param + '_matrix')


@pytest.fixture(params=[0, 1, 'index', 'columns'],
                ids=lambda x: "axis {!r}".format(x))
def axis(request):
    """
     Fixture for returning the axis numbers of a DataFrame.
     """
    return request.param


axis_frame = axis


@pytest.fixture(params=[0, 'index'], ids=lambda x: "axis {!r}".format(x))
def axis_series(request):
    """
     Fixture for returning the axis numbers of a Series.
     """
    return request.param


@pytest.fixture
def ip():
    """
    Get an instance of IPython.InteractiveShell.

    Will raise a skip if IPython is not installed.
    """

    pytest.importorskip('IPython', minversion="6.0.0")
    from IPython.core.interactiveshell import InteractiveShell
    return InteractiveShell()


@pytest.fixture(params=[True, False, None])
def observed(request):
    """ pass in the observed keyword to groupby for [True, False]
    This indicates whether categoricals should return values for
    values which are not in the grouper [False / None], or only values which
    appear in the grouper [True]. [None] is supported for future compatiblity
    if we decide to change the default (and would need to warn if this
    parameter is not passed)"""
    return request.param


_all_arithmetic_operators = ['__add__', '__radd__',
                             '__sub__', '__rsub__',
                             '__mul__', '__rmul__',
                             '__floordiv__', '__rfloordiv__',
                             '__truediv__', '__rtruediv__',
                             '__pow__', '__rpow__',
                             '__mod__', '__rmod__']
if not PY3:
    _all_arithmetic_operators.extend(['__div__', '__rdiv__'])


@pytest.fixture(params=_all_arithmetic_operators)
def all_arithmetic_operators(request):
    """
    Fixture for dunder names for common arithmetic operations
    """
    return request.param


_all_numeric_reductions = ['sum', 'max', 'min',
                           'mean', 'prod', 'std', 'var', 'median',
                           'kurt', 'skew']


@pytest.fixture(params=_all_numeric_reductions)
def all_numeric_reductions(request):
    """
    Fixture for numeric reduction names
    """
    return request.param


_all_boolean_reductions = ['all', 'any']


@pytest.fixture(params=_all_boolean_reductions)
def all_boolean_reductions(request):
    """
    Fixture for boolean reduction names
    """
    return request.param


_cython_table = pd.core.base.SelectionMixin._cython_table.items()


@pytest.fixture(params=list(_cython_table))
def cython_table_items(request):
    return request.param


def _get_cython_table_params(ndframe, func_names_and_expected):
    """combine frame, functions from SelectionMixin._cython_table
    keys and expected result.

    Parameters
    ----------
    ndframe : DataFrame or Series
    func_names_and_expected : Sequence of two items
        The first item is a name of a NDFrame method ('sum', 'prod') etc.
        The second item is the expected return value

    Returns
    -------
    results : list
        List of three items (DataFrame, function, expected result)
    """
    results = []
    for func_name, expected in func_names_and_expected:
        results.append((ndframe, func_name, expected))
        results += [(ndframe, func, expected) for func, name in _cython_table
                    if name == func_name]
    return results


@pytest.fixture(params=['__eq__', '__ne__', '__le__',
                        '__lt__', '__ge__', '__gt__'])
def all_compare_operators(request):
    """
    Fixture for dunder names for common compare operations

    * >=
    * >
    * ==
    * !=
    * <
    * <=
    """
    return request.param


@pytest.fixture(params=[None, 'gzip', 'bz2', 'zip',
                        pytest.param('xz', marks=td.skip_if_no_lzma)])
def compression(request):
    """
    Fixture for trying common compression types in compression tests
    """
    return request.param


@pytest.fixture(params=['gzip', 'bz2', 'zip',
                        pytest.param('xz', marks=td.skip_if_no_lzma)])
def compression_only(request):
    """
    Fixture for trying common compression types in compression tests excluding
    uncompressed case
    """
    return request.param


@pytest.fixture(params=[True, False])
def writable(request):
    """
    Fixture that an array is writable
    """
    return request.param


@pytest.fixture(scope='module')
def datetime_tz_utc():
    from datetime import timezone
    return timezone.utc


utc_objs = ['utc', 'dateutil/UTC', utc, tzutc()]
if PY3:
    from datetime import timezone
    utc_objs.append(timezone.utc)


@pytest.fixture(params=utc_objs)
def utc_fixture(request):
    """
    Fixture to provide variants of UTC timezone strings and tzinfo objects
    """
    return request.param


@pytest.fixture(params=['inner', 'outer', 'left', 'right'])
def join_type(request):
    """
    Fixture for trying all types of join operations
    """
    return request.param


@pytest.fixture
def strict_data_files(pytestconfig):
    return pytestconfig.getoption("--strict-data-files")


@pytest.fixture
def datapath(strict_data_files):
    """Get the path to a data file.

    Parameters
    ----------
    path : str
        Path to the file, relative to ``pandas/tests/``

    Returns
    -------
    path : path including ``pandas/tests``.

    Raises
    ------
    ValueError
        If the path doesn't exist and the --strict-data-files option is set.
    """
    BASE_PATH = os.path.join(os.path.dirname(__file__), 'tests')

    def deco(*args):
        path = os.path.join(BASE_PATH, *args)
        if not os.path.exists(path):
            if strict_data_files:
                msg = "Could not find file {} and --strict-data-files is set."
                raise ValueError(msg.format(path))
            else:
                msg = "Could not find {}."
                pytest.skip(msg.format(path))
        return path
    return deco


@pytest.fixture
def iris(datapath):
    """The iris dataset as a DataFrame."""
    return pd.read_csv(datapath('data', 'iris.csv'))


@pytest.fixture(params=['nlargest', 'nsmallest'])
def nselect_method(request):
    """
    Fixture for trying all nselect methods
    """
    return request.param


@pytest.fixture(params=['left', 'right', 'both', 'neither'])
def closed(request):
    """
    Fixture for trying all interval closed parameters
    """
    return request.param


@pytest.fixture(params=['left', 'right', 'both', 'neither'])
def other_closed(request):
    """
    Secondary closed fixture to allow parametrizing over all pairs of closed
    """
    return request.param


@pytest.fixture(params=[None, np.nan, pd.NaT, float('nan'), np.float('NaN')])
def nulls_fixture(request):
    """
    Fixture for each null type in pandas
    """
    return request.param


nulls_fixture2 = nulls_fixture  # Generate cartesian product of nulls_fixture


@pytest.fixture(params=[None, np.nan, pd.NaT])
def unique_nulls_fixture(request):
    """
    Fixture for each null type in pandas, each null type exactly once
    """
    return request.param


# Generate cartesian product of unique_nulls_fixture:
unique_nulls_fixture2 = unique_nulls_fixture


TIMEZONES = [None, 'UTC', 'US/Eastern', 'Asia/Tokyo', 'dateutil/US/Pacific',
             'dateutil/Asia/Singapore', tzutc(), tzlocal(), FixedOffset(300),
             FixedOffset(0), FixedOffset(-300)]


@td.parametrize_fixture_doc(str(TIMEZONES))
@pytest.fixture(params=TIMEZONES)
def tz_naive_fixture(request):
    """
    Fixture for trying timezones including default (None): {0}
    """
    return request.param


@td.parametrize_fixture_doc(str(TIMEZONES[1:]))
@pytest.fixture(params=TIMEZONES[1:])
def tz_aware_fixture(request):
    """
    Fixture for trying explicit timezones: {0}
    """
    return request.param


# ----------------------------------------------------------------
# Dtypes
UNSIGNED_INT_DTYPES = ["uint8", "uint16", "uint32", "uint64"]
UNSIGNED_EA_INT_DTYPES = ["UInt8", "UInt16", "UInt32", "UInt64"]
SIGNED_INT_DTYPES = [int, "int8", "int16", "int32", "int64"]
SIGNED_EA_INT_DTYPES = ["Int8", "Int16", "Int32", "Int64"]
ALL_INT_DTYPES = UNSIGNED_INT_DTYPES + SIGNED_INT_DTYPES
ALL_EA_INT_DTYPES = UNSIGNED_EA_INT_DTYPES + SIGNED_EA_INT_DTYPES

FLOAT_DTYPES = [float, "float32", "float64"]
COMPLEX_DTYPES = [complex, "complex64", "complex128"]
STRING_DTYPES = [str, 'str', 'U']

DATETIME_DTYPES = ['datetime64[ns]', 'M8[ns]']
TIMEDELTA_DTYPES = ['timedelta64[ns]', 'm8[ns]']

BOOL_DTYPES = [bool, 'bool']
BYTES_DTYPES = [bytes, 'bytes']
OBJECT_DTYPES = [object, 'object']

ALL_REAL_DTYPES = FLOAT_DTYPES + ALL_INT_DTYPES
ALL_NUMPY_DTYPES = (ALL_REAL_DTYPES + COMPLEX_DTYPES + STRING_DTYPES
                    + DATETIME_DTYPES + TIMEDELTA_DTYPES + BOOL_DTYPES
                    + OBJECT_DTYPES + BYTES_DTYPES * PY3)  # bytes only for PY3


@pytest.fixture(params=STRING_DTYPES)
def string_dtype(request):
    """Parametrized fixture for string dtypes.

    * str
    * 'str'
    * 'U'
    """
    return request.param


@pytest.fixture(params=FLOAT_DTYPES)
def float_dtype(request):
    """
    Parameterized fixture for float dtypes.

    * float
    * 'float32'
    * 'float64'
    """

    return request.param


@pytest.fixture(params=COMPLEX_DTYPES)
def complex_dtype(request):
    """
    Parameterized fixture for complex dtypes.

    * complex
    * 'complex64'
    * 'complex128'
    """

    return request.param


@pytest.fixture(params=SIGNED_INT_DTYPES)
def sint_dtype(request):
    """
    Parameterized fixture for signed integer dtypes.

    * int
    * 'int8'
    * 'int16'
    * 'int32'
    * 'int64'
    """

    return request.param


@pytest.fixture(params=UNSIGNED_INT_DTYPES)
def uint_dtype(request):
    """
    Parameterized fixture for unsigned integer dtypes.

    * 'uint8'
    * 'uint16'
    * 'uint32'
    * 'uint64'
    """

    return request.param


@pytest.fixture(params=ALL_INT_DTYPES)
def any_int_dtype(request):
    """
    Parameterized fixture for any integer dtype.

    * int
    * 'int8'
    * 'uint8'
    * 'int16'
    * 'uint16'
    * 'int32'
    * 'uint32'
    * 'int64'
    * 'uint64'
    """

    return request.param


@pytest.fixture(params=ALL_REAL_DTYPES)
def any_real_dtype(request):
    """
    Parameterized fixture for any (purely) real numeric dtype.

    * int
    * 'int8'
    * 'uint8'
    * 'int16'
    * 'uint16'
    * 'int32'
    * 'uint32'
    * 'int64'
    * 'uint64'
    * float
    * 'float32'
    * 'float64'
    """

    return request.param


@pytest.fixture(params=ALL_NUMPY_DTYPES)
def any_numpy_dtype(request):
    """
    Parameterized fixture for all numpy dtypes.

    * bool
    * 'bool'
    * int
    * 'int8'
    * 'uint8'
    * 'int16'
    * 'uint16'
    * 'int32'
    * 'uint32'
    * 'int64'
    * 'uint64'
    * float
    * 'float32'
    * 'float64'
    * complex
    * 'complex64'
    * 'complex128'
    * str
    * 'str'
    * 'U'
    * bytes
    * 'bytes'
    * 'datetime64[ns]'
    * 'M8[ns]'
    * 'timedelta64[ns]'
    * 'm8[ns]'
    * object
    * 'object'
    """

    return request.param


# categoricals are handled separately
_any_skipna_inferred_dtype = [
    ('string', ['a', np.nan, 'c']),
    ('unicode' if not PY3 else 'string', [u('a'), np.nan, u('c')]),
    ('bytes' if PY3 else 'string', [b'a', np.nan, b'c']),
    ('empty', [np.nan, np.nan, np.nan]),
    ('empty', []),
    ('mixed-integer', ['a', np.nan, 2]),
    ('mixed', ['a', np.nan, 2.0]),
    ('floating', [1.0, np.nan, 2.0]),
    ('integer', [1, np.nan, 2]),
    ('mixed-integer-float', [1, np.nan, 2.0]),
    ('decimal', [Decimal(1), np.nan, Decimal(2)]),
    ('boolean', [True, np.nan, False]),
    ('datetime64', [np.datetime64('2013-01-01'), np.nan,
                    np.datetime64('2018-01-01')]),
    ('datetime', [pd.Timestamp('20130101'), np.nan, pd.Timestamp('20180101')]),
    ('date', [date(2013, 1, 1), np.nan, date(2018, 1, 1)]),
    # The following two dtypes are commented out due to GH 23554
    # ('complex', [1 + 1j, np.nan, 2 + 2j]),
    # ('timedelta64', [np.timedelta64(1, 'D'),
    #                  np.nan, np.timedelta64(2, 'D')]),
    ('timedelta', [timedelta(1), np.nan, timedelta(2)]),
    ('time', [time(1), np.nan, time(2)]),
    ('period', [pd.Period(2013), pd.NaT, pd.Period(2018)]),
    ('interval', [pd.Interval(0, 1), np.nan, pd.Interval(0, 2)])]
ids, _ = zip(*_any_skipna_inferred_dtype)  # use inferred type as fixture-id


@pytest.fixture(params=_any_skipna_inferred_dtype, ids=ids)
def any_skipna_inferred_dtype(request):
    """
    Fixture for all inferred dtypes from _libs.lib.infer_dtype

    The covered (inferred) types are:
    * 'string'
    * 'unicode' (if PY2)
    * 'empty'
    * 'bytes' (if PY3)
    * 'mixed'
    * 'mixed-integer'
    * 'mixed-integer-float'
    * 'floating'
    * 'integer'
    * 'decimal'
    * 'boolean'
    * 'datetime64'
    * 'datetime'
    * 'date'
    * 'timedelta'
    * 'time'
    * 'period'
    * 'interval'

    Returns
    -------
    inferred_dtype : str
        The string for the inferred dtype from _libs.lib.infer_dtype
    values : np.ndarray
        An array of object dtype that will be inferred to have
        `inferred_dtype`

    Examples
    --------
    >>> import pandas._libs.lib as lib
    >>>
    >>> def test_something(any_skipna_inferred_dtype):
    ...     inferred_dtype, values = any_skipna_inferred_dtype
    ...     # will pass
    ...     assert lib.infer_dtype(values, skipna=True) == inferred_dtype
    """
    inferred_dtype, values = request.param
    values = np.array(values, dtype=object)  # object dtype to avoid casting

    # correctness of inference tested in tests/dtypes/test_inference.py
    return inferred_dtype, values


@pytest.fixture(params=[getattr(pd.offsets, o) for o in pd.offsets.__all__ if
                        issubclass(getattr(pd.offsets, o), pd.offsets.Tick)])
def tick_classes(request):
    """
    Fixture for Tick based datetime offsets available for a time series.
    """
    return request.param

# ----------------------------------------------------------------
# Global setup for tests using Hypothesis


# Registering these strategies makes them globally available via st.from_type,
# which is use for offsets in tests/tseries/offsets/test_offsets_properties.py
for name in 'MonthBegin MonthEnd BMonthBegin BMonthEnd'.split():
    cls = getattr(pd.tseries.offsets, name)
    st.register_type_strategy(cls, st.builds(
        cls,
        n=st.integers(-99, 99),
        normalize=st.booleans(),
    ))

for name in 'YearBegin YearEnd BYearBegin BYearEnd'.split():
    cls = getattr(pd.tseries.offsets, name)
    st.register_type_strategy(cls, st.builds(
        cls,
        n=st.integers(-5, 5),
        normalize=st.booleans(),
        month=st.integers(min_value=1, max_value=12),
    ))

for name in 'QuarterBegin QuarterEnd BQuarterBegin BQuarterEnd'.split():
    cls = getattr(pd.tseries.offsets, name)
    st.register_type_strategy(cls, st.builds(
        cls,
        n=st.integers(-24, 24),
        normalize=st.booleans(),
        startingMonth=st.integers(min_value=1, max_value=12)
    ))