1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480448144824483448444854486448744884489449044914492449344944495449644974498449945004501450245034504450545064507450845094510451145124513451445154516451745184519452045214522452345244525452645274528452945304531453245334534453545364537453845394540454145424543454445454546454745484549455045514552455345544555455645574558455945604561456245634564456545664567456845694570457145724573457445754576457745784579458045814582458345844585458645874588458945904591459245934594459545964597459845994600460146024603460446054606460746084609461046114612461346144615461646174618461946204621462246234624462546264627462846294630463146324633463446354636463746384639464046414642464346444645464646474648464946504651465246534654465546564657465846594660466146624663466446654666466746684669467046714672467346744675467646774678467946804681468246834684468546864687468846894690469146924693469446954696469746984699470047014702470347044705470647074708470947104711471247134714471547164717471847194720472147224723472447254726472747284729473047314732473347344735473647374738473947404741474247434744474547464747474847494750475147524753475447554756475747584759476047614762476347644765476647674768476947704771477247734774477547764777477847794780478147824783478447854786478747884789479047914792479347944795479647974798479948004801480248034804480548064807480848094810481148124813481448154816481748184819482048214822482348244825482648274828482948304831483248334834483548364837483848394840484148424843484448454846484748484849485048514852485348544855485648574858485948604861486248634864486548664867486848694870487148724873487448754876487748784879488048814882488348844885488648874888488948904891 |
- # pylint: disable-msg=E1101,W0613,W0603
- """
- High level interface to PyTables for reading and writing pandas data structures
- to disk
- """
- import copy
- from datetime import date, datetime
- from distutils.version import LooseVersion
- import itertools
- import os
- import re
- import time
- import warnings
- import numpy as np
- from pandas._libs import algos, lib, writers as libwriters
- from pandas._libs.tslibs import timezones
- from pandas.compat import PY3, filter, lrange, range, string_types
- from pandas.errors import PerformanceWarning
- from pandas.core.dtypes.common import (
- ensure_int64, ensure_object, ensure_platform_int, is_categorical_dtype,
- is_datetime64_dtype, is_datetime64tz_dtype, is_list_like,
- is_timedelta64_dtype)
- from pandas.core.dtypes.missing import array_equivalent
- from pandas import (
- DataFrame, DatetimeIndex, Index, Int64Index, MultiIndex, Panel,
- PeriodIndex, Series, SparseDataFrame, SparseSeries, TimedeltaIndex, compat,
- concat, isna, to_datetime)
- from pandas.core import config
- from pandas.core.algorithms import match, unique
- from pandas.core.arrays.categorical import (
- Categorical, _factorize_from_iterables)
- from pandas.core.arrays.sparse import BlockIndex, IntIndex
- from pandas.core.base import StringMixin
- import pandas.core.common as com
- from pandas.core.computation.pytables import Expr, maybe_expression
- from pandas.core.config import get_option
- from pandas.core.index import ensure_index
- from pandas.core.internals import (
- BlockManager, _block2d_to_blocknd, _block_shape, _factor_indexer,
- make_block)
- from pandas.io.common import _stringify_path
- from pandas.io.formats.printing import adjoin, pprint_thing
- # versioning attribute
- _version = '0.15.2'
- # encoding
- # PY3 encoding if we don't specify
- _default_encoding = 'UTF-8'
- def _ensure_decoded(s):
- """ if we have bytes, decode them to unicode """
- if isinstance(s, np.bytes_):
- s = s.decode('UTF-8')
- return s
- def _ensure_encoding(encoding):
- # set the encoding if we need
- if encoding is None:
- if PY3:
- encoding = _default_encoding
- return encoding
- def _ensure_str(name):
- """Ensure that an index / column name is a str (python 3) or
- unicode (python 2); otherwise they may be np.string dtype.
- Non-string dtypes are passed through unchanged.
- https://github.com/pandas-dev/pandas/issues/13492
- """
- if isinstance(name, compat.string_types):
- name = compat.text_type(name)
- return name
- Term = Expr
- def _ensure_term(where, scope_level):
- """
- ensure that the where is a Term or a list of Term
- this makes sure that we are capturing the scope of variables
- that are passed
- create the terms here with a frame_level=2 (we are 2 levels down)
- """
- # only consider list/tuple here as an ndarray is automatically a coordinate
- # list
- level = scope_level + 1
- if isinstance(where, (list, tuple)):
- wlist = []
- for w in filter(lambda x: x is not None, where):
- if not maybe_expression(w):
- wlist.append(w)
- else:
- wlist.append(Term(w, scope_level=level))
- where = wlist
- elif maybe_expression(where):
- where = Term(where, scope_level=level)
- return where
- class PossibleDataLossError(Exception):
- pass
- class ClosedFileError(Exception):
- pass
- class IncompatibilityWarning(Warning):
- pass
- incompatibility_doc = """
- where criteria is being ignored as this version [%s] is too old (or
- not-defined), read the file in and write it out to a new file to upgrade (with
- the copy_to method)
- """
- class AttributeConflictWarning(Warning):
- pass
- attribute_conflict_doc = """
- the [%s] attribute of the existing index is [%s] which conflicts with the new
- [%s], resetting the attribute to None
- """
- class DuplicateWarning(Warning):
- pass
- duplicate_doc = """
- duplicate entries in table, taking most recently appended
- """
- performance_doc = """
- your performance may suffer as PyTables will pickle object types that it cannot
- map directly to c-types [inferred_type->%s,key->%s] [items->%s]
- """
- # formats
- _FORMAT_MAP = {
- u'f': 'fixed',
- u'fixed': 'fixed',
- u't': 'table',
- u'table': 'table',
- }
- format_deprecate_doc = """
- the table keyword has been deprecated
- use the format='fixed(f)|table(t)' keyword instead
- fixed(f) : specifies the Fixed format
- and is the default for put operations
- table(t) : specifies the Table format
- and is the default for append operations
- """
- # map object types
- _TYPE_MAP = {
- Series: u'series',
- SparseSeries: u'sparse_series',
- DataFrame: u'frame',
- SparseDataFrame: u'sparse_frame',
- Panel: u'wide',
- }
- # storer class map
- _STORER_MAP = {
- u'Series': 'LegacySeriesFixed',
- u'DataFrame': 'LegacyFrameFixed',
- u'DataMatrix': 'LegacyFrameFixed',
- u'series': 'SeriesFixed',
- u'sparse_series': 'SparseSeriesFixed',
- u'frame': 'FrameFixed',
- u'sparse_frame': 'SparseFrameFixed',
- u'wide': 'PanelFixed',
- }
- # table class map
- _TABLE_MAP = {
- u'generic_table': 'GenericTable',
- u'appendable_series': 'AppendableSeriesTable',
- u'appendable_multiseries': 'AppendableMultiSeriesTable',
- u'appendable_frame': 'AppendableFrameTable',
- u'appendable_multiframe': 'AppendableMultiFrameTable',
- u'appendable_panel': 'AppendablePanelTable',
- u'worm': 'WORMTable',
- u'legacy_frame': 'LegacyFrameTable',
- u'legacy_panel': 'LegacyPanelTable',
- }
- # axes map
- _AXES_MAP = {
- DataFrame: [0],
- Panel: [1, 2]
- }
- # register our configuration options
- dropna_doc = """
- : boolean
- drop ALL nan rows when appending to a table
- """
- format_doc = """
- : format
- default format writing format, if None, then
- put will default to 'fixed' and append will default to 'table'
- """
- with config.config_prefix('io.hdf'):
- config.register_option('dropna_table', False, dropna_doc,
- validator=config.is_bool)
- config.register_option(
- 'default_format', None, format_doc,
- validator=config.is_one_of_factory(['fixed', 'table', None])
- )
- # oh the troubles to reduce import time
- _table_mod = None
- _table_file_open_policy_is_strict = False
- def _tables():
- global _table_mod
- global _table_file_open_policy_is_strict
- if _table_mod is None:
- import tables
- _table_mod = tables
- # version requirements
- if LooseVersion(tables.__version__) < LooseVersion('3.0.0'):
- raise ImportError("PyTables version >= 3.0.0 is required")
- # set the file open policy
- # return the file open policy; this changes as of pytables 3.1
- # depending on the HDF5 version
- try:
- _table_file_open_policy_is_strict = (
- tables.file._FILE_OPEN_POLICY == 'strict')
- except AttributeError:
- pass
- return _table_mod
- # interface to/from ###
- def to_hdf(path_or_buf, key, value, mode=None, complevel=None, complib=None,
- append=None, **kwargs):
- """ store this object, close it if we opened it """
- if append:
- f = lambda store: store.append(key, value, **kwargs)
- else:
- f = lambda store: store.put(key, value, **kwargs)
- path_or_buf = _stringify_path(path_or_buf)
- if isinstance(path_or_buf, string_types):
- with HDFStore(path_or_buf, mode=mode, complevel=complevel,
- complib=complib) as store:
- f(store)
- else:
- f(path_or_buf)
- def read_hdf(path_or_buf, key=None, mode='r', **kwargs):
- """
- Read from the store, close it if we opened it.
- Retrieve pandas object stored in file, optionally based on where
- criteria
- Parameters
- ----------
- path_or_buf : string, buffer or path object
- Path to the file to open, or an open :class:`pandas.HDFStore` object.
- Supports any object implementing the ``__fspath__`` protocol.
- This includes :class:`pathlib.Path` and py._path.local.LocalPath
- objects.
- .. versionadded:: 0.19.0 support for pathlib, py.path.
- .. versionadded:: 0.21.0 support for __fspath__ protocol.
- key : object, optional
- The group identifier in the store. Can be omitted if the HDF file
- contains a single pandas object.
- mode : {'r', 'r+', 'a'}, optional
- Mode to use when opening the file. Ignored if path_or_buf is a
- :class:`pandas.HDFStore`. Default is 'r'.
- where : list, optional
- A list of Term (or convertible) objects.
- start : int, optional
- Row number to start selection.
- stop : int, optional
- Row number to stop selection.
- columns : list, optional
- A list of columns names to return.
- iterator : bool, optional
- Return an iterator object.
- chunksize : int, optional
- Number of rows to include in an iteration when using an iterator.
- errors : str, default 'strict'
- Specifies how encoding and decoding errors are to be handled.
- See the errors argument for :func:`open` for a full list
- of options.
- **kwargs
- Additional keyword arguments passed to HDFStore.
- Returns
- -------
- item : object
- The selected object. Return type depends on the object stored.
- See Also
- --------
- pandas.DataFrame.to_hdf : Write a HDF file from a DataFrame.
- pandas.HDFStore : Low-level access to HDF files.
- Examples
- --------
- >>> df = pd.DataFrame([[1, 1.0, 'a']], columns=['x', 'y', 'z'])
- >>> df.to_hdf('./store.h5', 'data')
- >>> reread = pd.read_hdf('./store.h5')
- """
- if mode not in ['r', 'r+', 'a']:
- raise ValueError('mode {0} is not allowed while performing a read. '
- 'Allowed modes are r, r+ and a.'.format(mode))
- # grab the scope
- if 'where' in kwargs:
- kwargs['where'] = _ensure_term(kwargs['where'], scope_level=1)
- if isinstance(path_or_buf, HDFStore):
- if not path_or_buf.is_open:
- raise IOError('The HDFStore must be open for reading.')
- store = path_or_buf
- auto_close = False
- else:
- path_or_buf = _stringify_path(path_or_buf)
- if not isinstance(path_or_buf, string_types):
- raise NotImplementedError('Support for generic buffers has not '
- 'been implemented.')
- try:
- exists = os.path.exists(path_or_buf)
- # if filepath is too long
- except (TypeError, ValueError):
- exists = False
- if not exists:
- raise compat.FileNotFoundError(
- 'File {path} does not exist'.format(path=path_or_buf))
- store = HDFStore(path_or_buf, mode=mode, **kwargs)
- # can't auto open/close if we are using an iterator
- # so delegate to the iterator
- auto_close = True
- try:
- if key is None:
- groups = store.groups()
- if len(groups) == 0:
- raise ValueError('No dataset in HDF5 file.')
- candidate_only_group = groups[0]
- # For the HDF file to have only one dataset, all other groups
- # should then be metadata groups for that candidate group. (This
- # assumes that the groups() method enumerates parent groups
- # before their children.)
- for group_to_check in groups[1:]:
- if not _is_metadata_of(group_to_check, candidate_only_group):
- raise ValueError('key must be provided when HDF5 file '
- 'contains multiple datasets.')
- key = candidate_only_group._v_pathname
- return store.select(key, auto_close=auto_close, **kwargs)
- except (ValueError, TypeError):
- # if there is an error, close the store
- try:
- store.close()
- except AttributeError:
- pass
- raise
- def _is_metadata_of(group, parent_group):
- """Check if a given group is a metadata group for a given parent_group."""
- if group._v_depth <= parent_group._v_depth:
- return False
- current = group
- while current._v_depth > 1:
- parent = current._v_parent
- if parent == parent_group and current._v_name == 'meta':
- return True
- current = current._v_parent
- return False
- class HDFStore(StringMixin):
- """
- Dict-like IO interface for storing pandas objects in PyTables
- either Fixed or Table format.
- Parameters
- ----------
- path : string
- File path to HDF5 file
- mode : {'a', 'w', 'r', 'r+'}, default 'a'
- ``'r'``
- Read-only; no data can be modified.
- ``'w'``
- Write; a new file is created (an existing file with the same
- name would be deleted).
- ``'a'``
- Append; an existing file is opened for reading and writing,
- and if the file does not exist it is created.
- ``'r+'``
- It is similar to ``'a'``, but the file must already exist.
- complevel : int, 0-9, default None
- Specifies a compression level for data.
- A value of 0 disables compression.
- complib : {'zlib', 'lzo', 'bzip2', 'blosc'}, default 'zlib'
- Specifies the compression library to be used.
- As of v0.20.2 these additional compressors for Blosc are supported
- (default if no compressor specified: 'blosc:blosclz'):
- {'blosc:blosclz', 'blosc:lz4', 'blosc:lz4hc', 'blosc:snappy',
- 'blosc:zlib', 'blosc:zstd'}.
- Specifying a compression library which is not available issues
- a ValueError.
- fletcher32 : bool, default False
- If applying compression use the fletcher32 checksum
- Examples
- --------
- >>> bar = pd.DataFrame(np.random.randn(10, 4))
- >>> store = pd.HDFStore('test.h5')
- >>> store['foo'] = bar # write to HDF5
- >>> bar = store['foo'] # retrieve
- >>> store.close()
- """
- def __init__(self, path, mode=None, complevel=None, complib=None,
- fletcher32=False, **kwargs):
- if 'format' in kwargs:
- raise ValueError('format is not a defined argument for HDFStore')
- try:
- import tables # noqa
- except ImportError as ex: # pragma: no cover
- raise ImportError('HDFStore requires PyTables, "{ex!s}" problem '
- 'importing'.format(ex=ex))
- if complib is not None and complib not in tables.filters.all_complibs:
- raise ValueError(
- "complib only supports {libs} compression.".format(
- libs=tables.filters.all_complibs))
- if complib is None and complevel is not None:
- complib = tables.filters.default_complib
- self._path = _stringify_path(path)
- if mode is None:
- mode = 'a'
- self._mode = mode
- self._handle = None
- self._complevel = complevel if complevel else 0
- self._complib = complib
- self._fletcher32 = fletcher32
- self._filters = None
- self.open(mode=mode, **kwargs)
- def __fspath__(self):
- return self._path
- @property
- def root(self):
- """ return the root node """
- self._check_if_open()
- return self._handle.root
- @property
- def filename(self):
- return self._path
- def __getitem__(self, key):
- return self.get(key)
- def __setitem__(self, key, value):
- self.put(key, value)
- def __delitem__(self, key):
- return self.remove(key)
- def __getattr__(self, name):
- """ allow attribute access to get stores """
- try:
- return self.get(name)
- except (KeyError, ClosedFileError):
- pass
- raise AttributeError(
- "'{object}' object has no attribute '{name}'".format(
- object=type(self).__name__, name=name))
- def __contains__(self, key):
- """ check for existence of this key
- can match the exact pathname or the pathnm w/o the leading '/'
- """
- node = self.get_node(key)
- if node is not None:
- name = node._v_pathname
- if name == key or name[1:] == key:
- return True
- return False
- def __len__(self):
- return len(self.groups())
- def __unicode__(self):
- return '{type}\nFile path: {path}\n'.format(
- type=type(self), path=pprint_thing(self._path))
- def __enter__(self):
- return self
- def __exit__(self, exc_type, exc_value, traceback):
- self.close()
- def keys(self):
- """
- Return a (potentially unordered) list of the keys corresponding to the
- objects stored in the HDFStore. These are ABSOLUTE path-names (e.g.
- have the leading '/'
- """
- return [n._v_pathname for n in self.groups()]
- def __iter__(self):
- return iter(self.keys())
- def items(self):
- """
- iterate on key->group
- """
- for g in self.groups():
- yield g._v_pathname, g
- iteritems = items
- def open(self, mode='a', **kwargs):
- """
- Open the file in the specified mode
- Parameters
- ----------
- mode : {'a', 'w', 'r', 'r+'}, default 'a'
- See HDFStore docstring or tables.open_file for info about modes
- """
- tables = _tables()
- if self._mode != mode:
- # if we are changing a write mode to read, ok
- if self._mode in ['a', 'w'] and mode in ['r', 'r+']:
- pass
- elif mode in ['w']:
- # this would truncate, raise here
- if self.is_open:
- raise PossibleDataLossError(
- "Re-opening the file [{0}] with mode [{1}] "
- "will delete the current file!"
- .format(self._path, self._mode)
- )
- self._mode = mode
- # close and reopen the handle
- if self.is_open:
- self.close()
- if self._complevel and self._complevel > 0:
- self._filters = _tables().Filters(self._complevel, self._complib,
- fletcher32=self._fletcher32)
- try:
- self._handle = tables.open_file(self._path, self._mode, **kwargs)
- except (IOError) as e: # pragma: no cover
- if 'can not be written' in str(e):
- print(
- 'Opening {path} in read-only mode'.format(path=self._path))
- self._handle = tables.open_file(self._path, 'r', **kwargs)
- else:
- raise
- except (ValueError) as e:
- # trap PyTables >= 3.1 FILE_OPEN_POLICY exception
- # to provide an updated message
- if 'FILE_OPEN_POLICY' in str(e):
- e = ValueError(
- "PyTables [{version}] no longer supports opening multiple "
- "files\n"
- "even in read-only mode on this HDF5 version "
- "[{hdf_version}]. You can accept this\n"
- "and not open the same file multiple times at once,\n"
- "upgrade the HDF5 version, or downgrade to PyTables 3.0.0 "
- "which allows\n"
- "files to be opened multiple times at once\n"
- .format(version=tables.__version__,
- hdf_version=tables.get_hdf5_version()))
- raise e
- except (Exception) as e:
- # trying to read from a non-existent file causes an error which
- # is not part of IOError, make it one
- if self._mode == 'r' and 'Unable to open/create file' in str(e):
- raise IOError(str(e))
- raise
- def close(self):
- """
- Close the PyTables file handle
- """
- if self._handle is not None:
- self._handle.close()
- self._handle = None
- @property
- def is_open(self):
- """
- return a boolean indicating whether the file is open
- """
- if self._handle is None:
- return False
- return bool(self._handle.isopen)
- def flush(self, fsync=False):
- """
- Force all buffered modifications to be written to disk.
- Parameters
- ----------
- fsync : bool (default False)
- call ``os.fsync()`` on the file handle to force writing to disk.
- Notes
- -----
- Without ``fsync=True``, flushing may not guarantee that the OS writes
- to disk. With fsync, the operation will block until the OS claims the
- file has been written; however, other caching layers may still
- interfere.
- """
- if self._handle is not None:
- self._handle.flush()
- if fsync:
- try:
- os.fsync(self._handle.fileno())
- except OSError:
- pass
- def get(self, key):
- """
- Retrieve pandas object stored in file
- Parameters
- ----------
- key : object
- Returns
- -------
- obj : same type as object stored in file
- """
- group = self.get_node(key)
- if group is None:
- raise KeyError('No object named {key} in the file'.format(key=key))
- return self._read_group(group)
- def select(self, key, where=None, start=None, stop=None, columns=None,
- iterator=False, chunksize=None, auto_close=False, **kwargs):
- """
- Retrieve pandas object stored in file, optionally based on where
- criteria
- Parameters
- ----------
- key : object
- where : list of Term (or convertible) objects, optional
- start : integer (defaults to None), row number to start selection
- stop : integer (defaults to None), row number to stop selection
- columns : a list of columns that if not None, will limit the return
- columns
- iterator : boolean, return an iterator, default False
- chunksize : nrows to include in iteration, return an iterator
- auto_close : boolean, should automatically close the store when
- finished, default is False
- Returns
- -------
- The selected object
- """
- group = self.get_node(key)
- if group is None:
- raise KeyError('No object named {key} in the file'.format(key=key))
- # create the storer and axes
- where = _ensure_term(where, scope_level=1)
- s = self._create_storer(group)
- s.infer_axes()
- # function to call on iteration
- def func(_start, _stop, _where):
- return s.read(start=_start, stop=_stop,
- where=_where,
- columns=columns)
- # create the iterator
- it = TableIterator(self, s, func, where=where, nrows=s.nrows,
- start=start, stop=stop, iterator=iterator,
- chunksize=chunksize, auto_close=auto_close)
- return it.get_result()
- def select_as_coordinates(
- self, key, where=None, start=None, stop=None, **kwargs):
- """
- return the selection as an Index
- Parameters
- ----------
- key : object
- where : list of Term (or convertible) objects, optional
- start : integer (defaults to None), row number to start selection
- stop : integer (defaults to None), row number to stop selection
- """
- where = _ensure_term(where, scope_level=1)
- return self.get_storer(key).read_coordinates(where=where, start=start,
- stop=stop, **kwargs)
- def select_column(self, key, column, **kwargs):
- """
- return a single column from the table. This is generally only useful to
- select an indexable
- Parameters
- ----------
- key : object
- column: the column of interest
- Exceptions
- ----------
- raises KeyError if the column is not found (or key is not a valid
- store)
- raises ValueError if the column can not be extracted individually (it
- is part of a data block)
- """
- return self.get_storer(key).read_column(column=column, **kwargs)
- def select_as_multiple(self, keys, where=None, selector=None, columns=None,
- start=None, stop=None, iterator=False,
- chunksize=None, auto_close=False, **kwargs):
- """ Retrieve pandas objects from multiple tables
- Parameters
- ----------
- keys : a list of the tables
- selector : the table to apply the where criteria (defaults to keys[0]
- if not supplied)
- columns : the columns I want back
- start : integer (defaults to None), row number to start selection
- stop : integer (defaults to None), row number to stop selection
- iterator : boolean, return an iterator, default False
- chunksize : nrows to include in iteration, return an iterator
- Exceptions
- ----------
- raises KeyError if keys or selector is not found or keys is empty
- raises TypeError if keys is not a list or tuple
- raises ValueError if the tables are not ALL THE SAME DIMENSIONS
- """
- # default to single select
- where = _ensure_term(where, scope_level=1)
- if isinstance(keys, (list, tuple)) and len(keys) == 1:
- keys = keys[0]
- if isinstance(keys, string_types):
- return self.select(key=keys, where=where, columns=columns,
- start=start, stop=stop, iterator=iterator,
- chunksize=chunksize, **kwargs)
- if not isinstance(keys, (list, tuple)):
- raise TypeError("keys must be a list/tuple")
- if not len(keys):
- raise ValueError("keys must have a non-zero length")
- if selector is None:
- selector = keys[0]
- # collect the tables
- tbls = [self.get_storer(k) for k in keys]
- s = self.get_storer(selector)
- # validate rows
- nrows = None
- for t, k in itertools.chain([(s, selector)], zip(tbls, keys)):
- if t is None:
- raise KeyError("Invalid table [{key}]".format(key=k))
- if not t.is_table:
- raise TypeError(
- "object [{obj}] is not a table, and cannot be used in all "
- "select as multiple".format(obj=t.pathname)
- )
- if nrows is None:
- nrows = t.nrows
- elif t.nrows != nrows:
- raise ValueError(
- "all tables must have exactly the same nrows!")
- # axis is the concentation axes
- axis = list({t.non_index_axes[0][0] for t in tbls})[0]
- def func(_start, _stop, _where):
- # retrieve the objs, _where is always passed as a set of
- # coordinates here
- objs = [t.read(where=_where, columns=columns, start=_start,
- stop=_stop, **kwargs) for t in tbls]
- # concat and return
- return concat(objs, axis=axis,
- verify_integrity=False)._consolidate()
- # create the iterator
- it = TableIterator(self, s, func, where=where, nrows=nrows,
- start=start, stop=stop, iterator=iterator,
- chunksize=chunksize, auto_close=auto_close)
- return it.get_result(coordinates=True)
- def put(self, key, value, format=None, append=False, **kwargs):
- """
- Store object in HDFStore
- Parameters
- ----------
- key : object
- value : {Series, DataFrame, Panel}
- format : 'fixed(f)|table(t)', default is 'fixed'
- fixed(f) : Fixed format
- Fast writing/reading. Not-appendable, nor searchable
- table(t) : Table format
- Write as a PyTables Table structure which may perform
- worse but allow more flexible operations like searching
- / selecting subsets of the data
- append : boolean, default False
- This will force Table format, append the input data to the
- existing.
- data_columns : list of columns to create as data columns, or True to
- use all columns. See
- `here <http://pandas.pydata.org/pandas-docs/stable/io.html#query-via-data-columns>`__ # noqa
- encoding : default None, provide an encoding for strings
- dropna : boolean, default False, do not write an ALL nan row to
- the store settable by the option 'io.hdf.dropna_table'
- """
- if format is None:
- format = get_option("io.hdf.default_format") or 'fixed'
- kwargs = self._validate_format(format, kwargs)
- self._write_to_group(key, value, append=append, **kwargs)
- def remove(self, key, where=None, start=None, stop=None):
- """
- Remove pandas object partially by specifying the where condition
- Parameters
- ----------
- key : string
- Node to remove or delete rows from
- where : list of Term (or convertible) objects, optional
- start : integer (defaults to None), row number to start selection
- stop : integer (defaults to None), row number to stop selection
- Returns
- -------
- number of rows removed (or None if not a Table)
- Exceptions
- ----------
- raises KeyError if key is not a valid store
- """
- where = _ensure_term(where, scope_level=1)
- try:
- s = self.get_storer(key)
- except KeyError:
- # the key is not a valid store, re-raising KeyError
- raise
- except Exception:
- if where is not None:
- raise ValueError(
- "trying to remove a node with a non-None where clause!")
- # we are actually trying to remove a node (with children)
- s = self.get_node(key)
- if s is not None:
- s._f_remove(recursive=True)
- return None
- # remove the node
- if com._all_none(where, start, stop):
- s.group._f_remove(recursive=True)
- # delete from the table
- else:
- if not s.is_table:
- raise ValueError(
- 'can only remove with where on objects written as tables')
- return s.delete(where=where, start=start, stop=stop)
- def append(self, key, value, format=None, append=True, columns=None,
- dropna=None, **kwargs):
- """
- Append to Table in file. Node must already exist and be Table
- format.
- Parameters
- ----------
- key : object
- value : {Series, DataFrame, Panel}
- format : 'table' is the default
- table(t) : table format
- Write as a PyTables Table structure which may perform
- worse but allow more flexible operations like searching
- / selecting subsets of the data
- append : boolean, default True, append the input data to the
- existing
- data_columns : list of columns, or True, default None
- List of columns to create as indexed data columns for on-disk
- queries, or True to use all columns. By default only the axes
- of the object are indexed. See `here
- <http://pandas.pydata.org/pandas-docs/stable/io.html#query-via-data-columns>`__.
- min_itemsize : dict of columns that specify minimum string sizes
- nan_rep : string to use as string nan represenation
- chunksize : size to chunk the writing
- expectedrows : expected TOTAL row size of this table
- encoding : default None, provide an encoding for strings
- dropna : boolean, default False, do not write an ALL nan row to
- the store settable by the option 'io.hdf.dropna_table'
- Notes
- -----
- Does *not* check if data being appended overlaps with existing
- data in the table, so be careful
- """
- if columns is not None:
- raise TypeError("columns is not a supported keyword in append, "
- "try data_columns")
- if dropna is None:
- dropna = get_option("io.hdf.dropna_table")
- if format is None:
- format = get_option("io.hdf.default_format") or 'table'
- kwargs = self._validate_format(format, kwargs)
- self._write_to_group(key, value, append=append, dropna=dropna,
- **kwargs)
- def append_to_multiple(self, d, value, selector, data_columns=None,
- axes=None, dropna=False, **kwargs):
- """
- Append to multiple tables
- Parameters
- ----------
- d : a dict of table_name to table_columns, None is acceptable as the
- values of one node (this will get all the remaining columns)
- value : a pandas object
- selector : a string that designates the indexable table; all of its
- columns will be designed as data_columns, unless data_columns is
- passed, in which case these are used
- data_columns : list of columns to create as data columns, or True to
- use all columns
- dropna : if evaluates to True, drop rows from all tables if any single
- row in each table has all NaN. Default False.
- Notes
- -----
- axes parameter is currently not accepted
- """
- if axes is not None:
- raise TypeError("axes is currently not accepted as a parameter to"
- " append_to_multiple; you can create the "
- "tables independently instead")
- if not isinstance(d, dict):
- raise ValueError(
- "append_to_multiple must have a dictionary specified as the "
- "way to split the value"
- )
- if selector not in d:
- raise ValueError(
- "append_to_multiple requires a selector that is in passed dict"
- )
- # figure out the splitting axis (the non_index_axis)
- axis = list(set(range(value.ndim)) - set(_AXES_MAP[type(value)]))[0]
- # figure out how to split the value
- remain_key = None
- remain_values = []
- for k, v in d.items():
- if v is None:
- if remain_key is not None:
- raise ValueError(
- "append_to_multiple can only have one value in d that "
- "is None"
- )
- remain_key = k
- else:
- remain_values.extend(v)
- if remain_key is not None:
- ordered = value.axes[axis]
- ordd = ordered.difference(Index(remain_values))
- ordd = sorted(ordered.get_indexer(ordd))
- d[remain_key] = ordered.take(ordd)
- # data_columns
- if data_columns is None:
- data_columns = d[selector]
- # ensure rows are synchronized across the tables
- if dropna:
- idxs = (value[cols].dropna(how='all').index for cols in d.values())
- valid_index = next(idxs)
- for index in idxs:
- valid_index = valid_index.intersection(index)
- value = value.loc[valid_index]
- # append
- for k, v in d.items():
- dc = data_columns if k == selector else None
- # compute the val
- val = value.reindex(v, axis=axis)
- self.append(k, val, data_columns=dc, **kwargs)
- def create_table_index(self, key, **kwargs):
- """ Create a pytables index on the table
- Parameters
- ----------
- key : object (the node to index)
- Exceptions
- ----------
- raises if the node is not a table
- """
- # version requirements
- _tables()
- s = self.get_storer(key)
- if s is None:
- return
- if not s.is_table:
- raise TypeError(
- "cannot create table index on a Fixed format store")
- s.create_index(**kwargs)
- def groups(self):
- """return a list of all the top-level nodes (that are not themselves a
- pandas storage object)
- """
- _tables()
- self._check_if_open()
- return [
- g for g in self._handle.walk_groups()
- if (not isinstance(g, _table_mod.link.Link) and
- (getattr(g._v_attrs, 'pandas_type', None) or
- getattr(g, 'table', None) or
- (isinstance(g, _table_mod.table.Table) and
- g._v_name != u'table')))
- ]
- def walk(self, where="/"):
- """ Walk the pytables group hierarchy for pandas objects
- This generator will yield the group path, subgroups and pandas object
- names for each group.
- Any non-pandas PyTables objects that are not a group will be ignored.
- The `where` group itself is listed first (preorder), then each of its
- child groups (following an alphanumerical order) is also traversed,
- following the same procedure.
- .. versionadded:: 0.24.0
- Parameters
- ----------
- where : str, optional
- Group where to start walking.
- If not supplied, the root group is used.
- Yields
- ------
- path : str
- Full path to a group (without trailing '/')
- groups : list of str
- names of the groups contained in `path`
- leaves : list of str
- names of the pandas objects contained in `path`
- """
- _tables()
- self._check_if_open()
- for g in self._handle.walk_groups(where):
- if getattr(g._v_attrs, 'pandas_type', None) is not None:
- continue
- groups = []
- leaves = []
- for child in g._v_children.values():
- pandas_type = getattr(child._v_attrs, 'pandas_type', None)
- if pandas_type is None:
- if isinstance(child, _table_mod.group.Group):
- groups.append(child._v_name)
- else:
- leaves.append(child._v_name)
- yield (g._v_pathname.rstrip('/'), groups, leaves)
- def get_node(self, key):
- """ return the node with the key or None if it does not exist """
- self._check_if_open()
- try:
- if not key.startswith('/'):
- key = '/' + key
- return self._handle.get_node(self.root, key)
- except _table_mod.exceptions.NoSuchNodeError:
- return None
- def get_storer(self, key):
- """ return the storer object for a key, raise if not in the file """
- group = self.get_node(key)
- if group is None:
- raise KeyError('No object named {key} in the file'.format(key=key))
- s = self._create_storer(group)
- s.infer_axes()
- return s
- def copy(self, file, mode='w', propindexes=True, keys=None, complib=None,
- complevel=None, fletcher32=False, overwrite=True):
- """ copy the existing store to a new file, upgrading in place
- Parameters
- ----------
- propindexes: restore indexes in copied file (defaults to True)
- keys : list of keys to include in the copy (defaults to all)
- overwrite : overwrite (remove and replace) existing nodes in the
- new store (default is True)
- mode, complib, complevel, fletcher32 same as in HDFStore.__init__
- Returns
- -------
- open file handle of the new store
- """
- new_store = HDFStore(
- file,
- mode=mode,
- complib=complib,
- complevel=complevel,
- fletcher32=fletcher32)
- if keys is None:
- keys = list(self.keys())
- if not isinstance(keys, (tuple, list)):
- keys = [keys]
- for k in keys:
- s = self.get_storer(k)
- if s is not None:
- if k in new_store:
- if overwrite:
- new_store.remove(k)
- data = self.select(k)
- if s.is_table:
- index = False
- if propindexes:
- index = [a.name for a in s.axes if a.is_indexed]
- new_store.append(
- k, data, index=index,
- data_columns=getattr(s, 'data_columns', None),
- encoding=s.encoding
- )
- else:
- new_store.put(k, data, encoding=s.encoding)
- return new_store
- def info(self):
- """
- Print detailed information on the store.
- .. versionadded:: 0.21.0
- """
- output = '{type}\nFile path: {path}\n'.format(
- type=type(self), path=pprint_thing(self._path))
- if self.is_open:
- lkeys = sorted(list(self.keys()))
- if len(lkeys):
- keys = []
- values = []
- for k in lkeys:
- try:
- s = self.get_storer(k)
- if s is not None:
- keys.append(pprint_thing(s.pathname or k))
- values.append(
- pprint_thing(s or 'invalid_HDFStore node'))
- except Exception as detail:
- keys.append(k)
- values.append(
- "[invalid_HDFStore node: {detail}]".format(
- detail=pprint_thing(detail)))
- output += adjoin(12, keys, values)
- else:
- output += 'Empty'
- else:
- output += "File is CLOSED"
- return output
- # private methods ######
- def _check_if_open(self):
- if not self.is_open:
- raise ClosedFileError("{0} file is not open!".format(self._path))
- def _validate_format(self, format, kwargs):
- """ validate / deprecate formats; return the new kwargs """
- kwargs = kwargs.copy()
- # validate
- try:
- kwargs['format'] = _FORMAT_MAP[format.lower()]
- except KeyError:
- raise TypeError("invalid HDFStore format specified [{0}]"
- .format(format))
- return kwargs
- def _create_storer(self, group, format=None, value=None, append=False,
- **kwargs):
- """ return a suitable class to operate """
- def error(t):
- raise TypeError(
- "cannot properly create the storer for: [{t}] [group->"
- "{group},value->{value},format->{format},append->{append},"
- "kwargs->{kwargs}]".format(t=t, group=group,
- value=type(value), format=format,
- append=append, kwargs=kwargs))
- pt = _ensure_decoded(getattr(group._v_attrs, 'pandas_type', None))
- tt = _ensure_decoded(getattr(group._v_attrs, 'table_type', None))
- # infer the pt from the passed value
- if pt is None:
- if value is None:
- _tables()
- if (getattr(group, 'table', None) or
- isinstance(group, _table_mod.table.Table)):
- pt = u'frame_table'
- tt = u'generic_table'
- else:
- raise TypeError(
- "cannot create a storer if the object is not existing "
- "nor a value are passed")
- else:
- try:
- pt = _TYPE_MAP[type(value)]
- except KeyError:
- error('_TYPE_MAP')
- # we are actually a table
- if format == 'table':
- pt += u'_table'
- # a storer node
- if u'table' not in pt:
- try:
- return globals()[_STORER_MAP[pt]](self, group, **kwargs)
- except KeyError:
- error('_STORER_MAP')
- # existing node (and must be a table)
- if tt is None:
- # if we are a writer, determine the tt
- if value is not None:
- if pt == u'series_table':
- index = getattr(value, 'index', None)
- if index is not None:
- if index.nlevels == 1:
- tt = u'appendable_series'
- elif index.nlevels > 1:
- tt = u'appendable_multiseries'
- elif pt == u'frame_table':
- index = getattr(value, 'index', None)
- if index is not None:
- if index.nlevels == 1:
- tt = u'appendable_frame'
- elif index.nlevels > 1:
- tt = u'appendable_multiframe'
- elif pt == u'wide_table':
- tt = u'appendable_panel'
- elif pt == u'ndim_table':
- tt = u'appendable_ndim'
- else:
- # distiguish between a frame/table
- tt = u'legacy_panel'
- try:
- fields = group.table._v_attrs.fields
- if len(fields) == 1 and fields[0] == u'value':
- tt = u'legacy_frame'
- except IndexError:
- pass
- try:
- return globals()[_TABLE_MAP[tt]](self, group, **kwargs)
- except KeyError:
- error('_TABLE_MAP')
- def _write_to_group(self, key, value, format, index=True, append=False,
- complib=None, encoding=None, **kwargs):
- group = self.get_node(key)
- # remove the node if we are not appending
- if group is not None and not append:
- self._handle.remove_node(group, recursive=True)
- group = None
- # we don't want to store a table node at all if are object is 0-len
- # as there are not dtypes
- if getattr(value, 'empty', None) and (format == 'table' or append):
- return
- if group is None:
- paths = key.split('/')
- # recursively create the groups
- path = '/'
- for p in paths:
- if not len(p):
- continue
- new_path = path
- if not path.endswith('/'):
- new_path += '/'
- new_path += p
- group = self.get_node(new_path)
- if group is None:
- group = self._handle.create_group(path, p)
- path = new_path
- s = self._create_storer(group, format, value, append=append,
- encoding=encoding, **kwargs)
- if append:
- # raise if we are trying to append to a Fixed format,
- # or a table that exists (and we are putting)
- if (not s.is_table or
- (s.is_table and format == 'fixed' and s.is_exists)):
- raise ValueError('Can only append to Tables')
- if not s.is_exists:
- s.set_object_info()
- else:
- s.set_object_info()
- if not s.is_table and complib:
- raise ValueError(
- 'Compression not supported on Fixed format stores'
- )
- # write the object
- s.write(obj=value, append=append, complib=complib, **kwargs)
- if s.is_table and index:
- s.create_index(columns=index)
- def _read_group(self, group, **kwargs):
- s = self._create_storer(group)
- s.infer_axes()
- return s.read(**kwargs)
- class TableIterator(object):
- """ define the iteration interface on a table
- Parameters
- ----------
- store : the reference store
- s : the referred storer
- func : the function to execute the query
- where : the where of the query
- nrows : the rows to iterate on
- start : the passed start value (default is None)
- stop : the passed stop value (default is None)
- iterator : boolean, whether to use the default iterator
- chunksize : the passed chunking value (default is 50000)
- auto_close : boolean, automatically close the store at the end of
- iteration, default is False
- kwargs : the passed kwargs
- """
- def __init__(self, store, s, func, where, nrows, start=None, stop=None,
- iterator=False, chunksize=None, auto_close=False):
- self.store = store
- self.s = s
- self.func = func
- self.where = where
- # set start/stop if they are not set if we are a table
- if self.s.is_table:
- if nrows is None:
- nrows = 0
- if start is None:
- start = 0
- if stop is None:
- stop = nrows
- stop = min(nrows, stop)
- self.nrows = nrows
- self.start = start
- self.stop = stop
- self.coordinates = None
- if iterator or chunksize is not None:
- if chunksize is None:
- chunksize = 100000
- self.chunksize = int(chunksize)
- else:
- self.chunksize = None
- self.auto_close = auto_close
- def __iter__(self):
- # iterate
- current = self.start
- while current < self.stop:
- stop = min(current + self.chunksize, self.stop)
- value = self.func(None, None, self.coordinates[current:stop])
- current = stop
- if value is None or not len(value):
- continue
- yield value
- self.close()
- def close(self):
- if self.auto_close:
- self.store.close()
- def get_result(self, coordinates=False):
- # return the actual iterator
- if self.chunksize is not None:
- if not self.s.is_table:
- raise TypeError(
- "can only use an iterator or chunksize on a table")
- self.coordinates = self.s.read_coordinates(where=self.where)
- return self
- # if specified read via coordinates (necessary for multiple selections
- if coordinates:
- where = self.s.read_coordinates(where=self.where, start=self.start,
- stop=self.stop)
- else:
- where = self.where
- # directly return the result
- results = self.func(self.start, self.stop, where)
- self.close()
- return results
- class IndexCol(StringMixin):
- """ an index column description class
- Parameters
- ----------
- axis : axis which I reference
- values : the ndarray like converted values
- kind : a string description of this type
- typ : the pytables type
- pos : the position in the pytables
- """
- is_an_indexable = True
- is_data_indexable = True
- _info_fields = ['freq', 'tz', 'index_name']
- def __init__(self, values=None, kind=None, typ=None, cname=None,
- itemsize=None, name=None, axis=None, kind_attr=None,
- pos=None, freq=None, tz=None, index_name=None, **kwargs):
- self.values = values
- self.kind = kind
- self.typ = typ
- self.itemsize = itemsize
- self.name = name
- self.cname = cname
- self.kind_attr = kind_attr
- self.axis = axis
- self.pos = pos
- self.freq = freq
- self.tz = tz
- self.index_name = index_name
- self.table = None
- self.meta = None
- self.metadata = None
- if name is not None:
- self.set_name(name, kind_attr)
- if pos is not None:
- self.set_pos(pos)
- def set_name(self, name, kind_attr=None):
- """ set the name of this indexer """
- self.name = name
- self.kind_attr = kind_attr or "{name}_kind".format(name=name)
- if self.cname is None:
- self.cname = name
- return self
- def set_axis(self, axis):
- """ set the axis over which I index """
- self.axis = axis
- return self
- def set_pos(self, pos):
- """ set the position of this column in the Table """
- self.pos = pos
- if pos is not None and self.typ is not None:
- self.typ._v_pos = pos
- return self
- def set_table(self, table):
- self.table = table
- return self
- def __unicode__(self):
- temp = tuple(
- map(pprint_thing,
- (self.name,
- self.cname,
- self.axis,
- self.pos,
- self.kind)))
- return ','.join(("{key}->{value}".format(key=key, value=value)
- for key, value in zip(
- ['name', 'cname', 'axis', 'pos', 'kind'], temp)))
- def __eq__(self, other):
- """ compare 2 col items """
- return all(getattr(self, a, None) == getattr(other, a, None)
- for a in ['name', 'cname', 'axis', 'pos'])
- def __ne__(self, other):
- return not self.__eq__(other)
- @property
- def is_indexed(self):
- """ return whether I am an indexed column """
- try:
- return getattr(self.table.cols, self.cname).is_indexed
- except AttributeError:
- False
- def copy(self):
- new_self = copy.copy(self)
- return new_self
- def infer(self, handler):
- """infer this column from the table: create and return a new object"""
- table = handler.table
- new_self = self.copy()
- new_self.set_table(table)
- new_self.get_attr()
- new_self.read_metadata(handler)
- return new_self
- def convert(self, values, nan_rep, encoding, errors):
- """ set the values from this selection: take = take ownership """
- # values is a recarray
- if values.dtype.fields is not None:
- values = values[self.cname]
- values = _maybe_convert(values, self.kind, encoding, errors)
- kwargs = dict()
- if self.freq is not None:
- kwargs['freq'] = _ensure_decoded(self.freq)
- if self.index_name is not None:
- kwargs['name'] = _ensure_decoded(self.index_name)
- # making an Index instance could throw a number of different errors
- try:
- self.values = Index(values, **kwargs)
- except Exception: # noqa: E722
- # if the output freq is different that what we recorded,
- # it should be None (see also 'doc example part 2')
- if 'freq' in kwargs:
- kwargs['freq'] = None
- self.values = Index(values, **kwargs)
- self.values = _set_tz(self.values, self.tz)
- return self
- def take_data(self):
- """ return the values & release the memory """
- self.values, values = None, self.values
- return values
- @property
- def attrs(self):
- return self.table._v_attrs
- @property
- def description(self):
- return self.table.description
- @property
- def col(self):
- """ return my current col description """
- return getattr(self.description, self.cname, None)
- @property
- def cvalues(self):
- """ return my cython values """
- return self.values
- def __iter__(self):
- return iter(self.values)
- def maybe_set_size(self, min_itemsize=None):
- """ maybe set a string col itemsize:
- min_itemsize can be an integer or a dict with this columns name
- with an integer size """
- if _ensure_decoded(self.kind) == u'string':
- if isinstance(min_itemsize, dict):
- min_itemsize = min_itemsize.get(self.name)
- if min_itemsize is not None and self.typ.itemsize < min_itemsize:
- self.typ = _tables(
- ).StringCol(itemsize=min_itemsize, pos=self.pos)
- def validate(self, handler, append):
- self.validate_names()
- def validate_names(self):
- pass
- def validate_and_set(self, handler, append):
- self.set_table(handler.table)
- self.validate_col()
- self.validate_attr(append)
- self.validate_metadata(handler)
- self.write_metadata(handler)
- self.set_attr()
- def validate_col(self, itemsize=None):
- """ validate this column: return the compared against itemsize """
- # validate this column for string truncation (or reset to the max size)
- if _ensure_decoded(self.kind) == u'string':
- c = self.col
- if c is not None:
- if itemsize is None:
- itemsize = self.itemsize
- if c.itemsize < itemsize:
- raise ValueError(
- "Trying to store a string with len [{itemsize}] in "
- "[{cname}] column but\nthis column has a limit of "
- "[{c_itemsize}]!\nConsider using min_itemsize to "
- "preset the sizes on these columns".format(
- itemsize=itemsize, cname=self.cname,
- c_itemsize=c.itemsize))
- return c.itemsize
- return None
- def validate_attr(self, append):
- # check for backwards incompatibility
- if append:
- existing_kind = getattr(self.attrs, self.kind_attr, None)
- if existing_kind is not None and existing_kind != self.kind:
- raise TypeError(
- "incompatible kind in col [{existing} - "
- "{self_kind}]".format(
- existing=existing_kind, self_kind=self.kind))
- def update_info(self, info):
- """ set/update the info for this indexable with the key/value
- if there is a conflict raise/warn as needed """
- for key in self._info_fields:
- value = getattr(self, key, None)
- idx = _get_info(info, self.name)
- existing_value = idx.get(key)
- if key in idx and value is not None and existing_value != value:
- # frequency/name just warn
- if key in ['freq', 'index_name']:
- ws = attribute_conflict_doc % (key, existing_value, value)
- warnings.warn(ws, AttributeConflictWarning, stacklevel=6)
- # reset
- idx[key] = None
- setattr(self, key, None)
- else:
- raise ValueError(
- "invalid info for [{name}] for [{key}], "
- "existing_value [{existing_value}] conflicts with "
- "new value [{value}]".format(
- name=self.name, key=key,
- existing_value=existing_value, value=value))
- else:
- if value is not None or existing_value is not None:
- idx[key] = value
- return self
- def set_info(self, info):
- """ set my state from the passed info """
- idx = info.get(self.name)
- if idx is not None:
- self.__dict__.update(idx)
- def get_attr(self):
- """ set the kind for this column """
- self.kind = getattr(self.attrs, self.kind_attr, None)
- def set_attr(self):
- """ set the kind for this column """
- setattr(self.attrs, self.kind_attr, self.kind)
- def read_metadata(self, handler):
- """ retrieve the metadata for this columns """
- self.metadata = handler.read_metadata(self.cname)
- def validate_metadata(self, handler):
- """ validate that kind=category does not change the categories """
- if self.meta == 'category':
- new_metadata = self.metadata
- cur_metadata = handler.read_metadata(self.cname)
- if (new_metadata is not None and cur_metadata is not None and
- not array_equivalent(new_metadata, cur_metadata)):
- raise ValueError("cannot append a categorical with "
- "different categories to the existing")
- def write_metadata(self, handler):
- """ set the meta data """
- if self.metadata is not None:
- handler.write_metadata(self.cname, self.metadata)
- class GenericIndexCol(IndexCol):
- """ an index which is not represented in the data of the table """
- @property
- def is_indexed(self):
- return False
- def convert(self, values, nan_rep, encoding, errors):
- """ set the values from this selection: take = take ownership """
- self.values = Int64Index(np.arange(self.table.nrows))
- return self
- def get_attr(self):
- pass
- def set_attr(self):
- pass
- class DataCol(IndexCol):
- """ a data holding column, by definition this is not indexable
- Parameters
- ----------
- data : the actual data
- cname : the column name in the table to hold the data (typically
- values)
- meta : a string description of the metadata
- metadata : the actual metadata
- """
- is_an_indexable = False
- is_data_indexable = False
- _info_fields = ['tz', 'ordered']
- @classmethod
- def create_for_block(
- cls, i=None, name=None, cname=None, version=None, **kwargs):
- """ return a new datacol with the block i """
- if cname is None:
- cname = name or 'values_block_{idx}'.format(idx=i)
- if name is None:
- name = cname
- # prior to 0.10.1, we named values blocks like: values_block_0 an the
- # name values_0
- try:
- if version[0] == 0 and version[1] <= 10 and version[2] == 0:
- m = re.search(r"values_block_(\d+)", name)
- if m:
- name = "values_{group}".format(group=m.groups()[0])
- except IndexError:
- pass
- return cls(name=name, cname=cname, **kwargs)
- def __init__(self, values=None, kind=None, typ=None,
- cname=None, data=None, meta=None, metadata=None,
- block=None, **kwargs):
- super(DataCol, self).__init__(values=values, kind=kind, typ=typ,
- cname=cname, **kwargs)
- self.dtype = None
- self.dtype_attr = u'{name}_dtype'.format(name=self.name)
- self.meta = meta
- self.meta_attr = u'{name}_meta'.format(name=self.name)
- self.set_data(data)
- self.set_metadata(metadata)
- def __unicode__(self):
- temp = tuple(
- map(pprint_thing,
- (self.name,
- self.cname,
- self.dtype,
- self.kind,
- self.shape)))
- return ','.join(("{key}->{value}".format(key=key, value=value)
- for key, value in zip(
- ['name', 'cname', 'dtype', 'kind', 'shape'], temp)))
- def __eq__(self, other):
- """ compare 2 col items """
- return all(getattr(self, a, None) == getattr(other, a, None)
- for a in ['name', 'cname', 'dtype', 'pos'])
- def set_data(self, data, dtype=None):
- self.data = data
- if data is not None:
- if dtype is not None:
- self.dtype = dtype
- self.set_kind()
- elif self.dtype is None:
- self.dtype = data.dtype.name
- self.set_kind()
- def take_data(self):
- """ return the data & release the memory """
- self.data, data = None, self.data
- return data
- def set_metadata(self, metadata):
- """ record the metadata """
- if metadata is not None:
- metadata = np.array(metadata, copy=False).ravel()
- self.metadata = metadata
- def set_kind(self):
- # set my kind if we can
- if self.dtype is not None:
- dtype = _ensure_decoded(self.dtype)
- if dtype.startswith(u'string') or dtype.startswith(u'bytes'):
- self.kind = 'string'
- elif dtype.startswith(u'float'):
- self.kind = 'float'
- elif dtype.startswith(u'complex'):
- self.kind = 'complex'
- elif dtype.startswith(u'int') or dtype.startswith(u'uint'):
- self.kind = 'integer'
- elif dtype.startswith(u'date'):
- self.kind = 'datetime'
- elif dtype.startswith(u'timedelta'):
- self.kind = 'timedelta'
- elif dtype.startswith(u'bool'):
- self.kind = 'bool'
- else:
- raise AssertionError(
- "cannot interpret dtype of [{dtype}] in [{obj}]".format(
- dtype=dtype, obj=self))
- # set my typ if we need
- if self.typ is None:
- self.typ = getattr(self.description, self.cname, None)
- def set_atom(self, block, block_items, existing_col, min_itemsize,
- nan_rep, info, encoding=None, errors='strict'):
- """ create and setup my atom from the block b """
- self.values = list(block_items)
- # short-cut certain block types
- if block.is_categorical:
- return self.set_atom_categorical(block, items=block_items,
- info=info)
- elif block.is_datetimetz:
- return self.set_atom_datetime64tz(block, info=info)
- elif block.is_datetime:
- return self.set_atom_datetime64(block)
- elif block.is_timedelta:
- return self.set_atom_timedelta64(block)
- elif block.is_complex:
- return self.set_atom_complex(block)
- dtype = block.dtype.name
- inferred_type = lib.infer_dtype(block.values, skipna=False)
- if inferred_type == 'date':
- raise TypeError(
- "[date] is not implemented as a table column")
- elif inferred_type == 'datetime':
- # after 8260
- # this only would be hit for a mutli-timezone dtype
- # which is an error
- raise TypeError(
- "too many timezones in this block, create separate "
- "data columns"
- )
- elif inferred_type == 'unicode':
- raise TypeError(
- "[unicode] is not implemented as a table column")
- # this is basically a catchall; if say a datetime64 has nans then will
- # end up here ###
- elif inferred_type == 'string' or dtype == 'object':
- self.set_atom_string(
- block, block_items,
- existing_col,
- min_itemsize,
- nan_rep,
- encoding,
- errors)
- # set as a data block
- else:
- self.set_atom_data(block)
- def get_atom_string(self, block, itemsize):
- return _tables().StringCol(itemsize=itemsize, shape=block.shape[0])
- def set_atom_string(self, block, block_items, existing_col, min_itemsize,
- nan_rep, encoding, errors):
- # fill nan items with myself, don't disturb the blocks by
- # trying to downcast
- block = block.fillna(nan_rep, downcast=False)
- if isinstance(block, list):
- block = block[0]
- data = block.values
- # see if we have a valid string type
- inferred_type = lib.infer_dtype(data.ravel(), skipna=False)
- if inferred_type != 'string':
- # we cannot serialize this data, so report an exception on a column
- # by column basis
- for i, item in enumerate(block_items):
- col = block.iget(i)
- inferred_type = lib.infer_dtype(col.ravel(), skipna=False)
- if inferred_type != 'string':
- raise TypeError(
- "Cannot serialize the column [{item}] because\n"
- "its data contents are [{type}] object dtype".format(
- item=item, type=inferred_type)
- )
- # itemsize is the maximum length of a string (along any dimension)
- data_converted = _convert_string_array(data, encoding, errors)
- itemsize = data_converted.itemsize
- # specified min_itemsize?
- if isinstance(min_itemsize, dict):
- min_itemsize = int(min_itemsize.get(
- self.name) or min_itemsize.get('values') or 0)
- itemsize = max(min_itemsize or 0, itemsize)
- # check for column in the values conflicts
- if existing_col is not None:
- eci = existing_col.validate_col(itemsize)
- if eci > itemsize:
- itemsize = eci
- self.itemsize = itemsize
- self.kind = 'string'
- self.typ = self.get_atom_string(block, itemsize)
- self.set_data(data_converted.astype(
- '|S{size}'.format(size=itemsize), copy=False))
- def get_atom_coltype(self, kind=None):
- """ return the PyTables column class for this column """
- if kind is None:
- kind = self.kind
- if self.kind.startswith('uint'):
- col_name = "UInt{name}Col".format(name=kind[4:])
- else:
- col_name = "{name}Col".format(name=kind.capitalize())
- return getattr(_tables(), col_name)
- def get_atom_data(self, block, kind=None):
- return self.get_atom_coltype(kind=kind)(shape=block.shape[0])
- def set_atom_complex(self, block):
- self.kind = block.dtype.name
- itemsize = int(self.kind.split('complex')[-1]) // 8
- self.typ = _tables().ComplexCol(
- itemsize=itemsize, shape=block.shape[0])
- self.set_data(block.values.astype(self.typ.type, copy=False))
- def set_atom_data(self, block):
- self.kind = block.dtype.name
- self.typ = self.get_atom_data(block)
- self.set_data(block.values.astype(self.typ.type, copy=False))
- def set_atom_categorical(self, block, items, info=None, values=None):
- # currently only supports a 1-D categorical
- # in a 1-D block
- values = block.values
- codes = values.codes
- self.kind = 'integer'
- self.dtype = codes.dtype.name
- if values.ndim > 1:
- raise NotImplementedError("only support 1-d categoricals")
- if len(items) > 1:
- raise NotImplementedError("only support single block categoricals")
- # write the codes; must be in a block shape
- self.ordered = values.ordered
- self.typ = self.get_atom_data(block, kind=codes.dtype.name)
- self.set_data(_block_shape(codes))
- # write the categories
- self.meta = 'category'
- self.set_metadata(block.values.categories)
- # update the info
- self.update_info(info)
- def get_atom_datetime64(self, block):
- return _tables().Int64Col(shape=block.shape[0])
- def set_atom_datetime64(self, block, values=None):
- self.kind = 'datetime64'
- self.typ = self.get_atom_datetime64(block)
- if values is None:
- values = block.values.view('i8')
- self.set_data(values, 'datetime64')
- def set_atom_datetime64tz(self, block, info, values=None):
- if values is None:
- values = block.values
- # convert this column to i8 in UTC, and save the tz
- values = values.asi8.reshape(block.shape)
- # store a converted timezone
- self.tz = _get_tz(block.values.tz)
- self.update_info(info)
- self.kind = 'datetime64'
- self.typ = self.get_atom_datetime64(block)
- self.set_data(values, 'datetime64')
- def get_atom_timedelta64(self, block):
- return _tables().Int64Col(shape=block.shape[0])
- def set_atom_timedelta64(self, block, values=None):
- self.kind = 'timedelta64'
- self.typ = self.get_atom_timedelta64(block)
- if values is None:
- values = block.values.view('i8')
- self.set_data(values, 'timedelta64')
- @property
- def shape(self):
- return getattr(self.data, 'shape', None)
- @property
- def cvalues(self):
- """ return my cython values """
- return self.data
- def validate_attr(self, append):
- """validate that we have the same order as the existing & same dtype"""
- if append:
- existing_fields = getattr(self.attrs, self.kind_attr, None)
- if (existing_fields is not None and
- existing_fields != list(self.values)):
- raise ValueError("appended items do not match existing items"
- " in table!")
- existing_dtype = getattr(self.attrs, self.dtype_attr, None)
- if (existing_dtype is not None and
- existing_dtype != self.dtype):
- raise ValueError("appended items dtype do not match existing "
- "items dtype in table!")
- def convert(self, values, nan_rep, encoding, errors):
- """set the data from this selection (and convert to the correct dtype
- if we can)
- """
- # values is a recarray
- if values.dtype.fields is not None:
- values = values[self.cname]
- self.set_data(values)
- # use the meta if needed
- meta = _ensure_decoded(self.meta)
- # convert to the correct dtype
- if self.dtype is not None:
- dtype = _ensure_decoded(self.dtype)
- # reverse converts
- if dtype == u'datetime64':
- # recreate with tz if indicated
- self.data = _set_tz(self.data, self.tz, coerce=True)
- elif dtype == u'timedelta64':
- self.data = np.asarray(self.data, dtype='m8[ns]')
- elif dtype == u'date':
- try:
- self.data = np.asarray(
- [date.fromordinal(v) for v in self.data], dtype=object)
- except ValueError:
- self.data = np.asarray(
- [date.fromtimestamp(v) for v in self.data],
- dtype=object)
- elif dtype == u'datetime':
- self.data = np.asarray(
- [datetime.fromtimestamp(v) for v in self.data],
- dtype=object)
- elif meta == u'category':
- # we have a categorical
- categories = self.metadata
- codes = self.data.ravel()
- # if we have stored a NaN in the categories
- # then strip it; in theory we could have BOTH
- # -1s in the codes and nulls :<
- if categories is None:
- # Handle case of NaN-only categorical columns in which case
- # the categories are an empty array; when this is stored,
- # pytables cannot write a zero-len array, so on readback
- # the categories would be None and `read_hdf()` would fail.
- categories = Index([], dtype=np.float64)
- else:
- mask = isna(categories)
- if mask.any():
- categories = categories[~mask]
- codes[codes != -1] -= mask.astype(int).cumsum().values
- self.data = Categorical.from_codes(codes,
- categories=categories,
- ordered=self.ordered)
- else:
- try:
- self.data = self.data.astype(dtype, copy=False)
- except TypeError:
- self.data = self.data.astype('O', copy=False)
- # convert nans / decode
- if _ensure_decoded(self.kind) == u'string':
- self.data = _unconvert_string_array(
- self.data, nan_rep=nan_rep, encoding=encoding, errors=errors)
- return self
- def get_attr(self):
- """ get the data for this column """
- self.values = getattr(self.attrs, self.kind_attr, None)
- self.dtype = getattr(self.attrs, self.dtype_attr, None)
- self.meta = getattr(self.attrs, self.meta_attr, None)
- self.set_kind()
- def set_attr(self):
- """ set the data for this column """
- setattr(self.attrs, self.kind_attr, self.values)
- setattr(self.attrs, self.meta_attr, self.meta)
- if self.dtype is not None:
- setattr(self.attrs, self.dtype_attr, self.dtype)
- class DataIndexableCol(DataCol):
- """ represent a data column that can be indexed """
- is_data_indexable = True
- def validate_names(self):
- if not Index(self.values).is_object():
- raise ValueError("cannot have non-object label DataIndexableCol")
- def get_atom_string(self, block, itemsize):
- return _tables().StringCol(itemsize=itemsize)
- def get_atom_data(self, block, kind=None):
- return self.get_atom_coltype(kind=kind)()
- def get_atom_datetime64(self, block):
- return _tables().Int64Col()
- def get_atom_timedelta64(self, block):
- return _tables().Int64Col()
- class GenericDataIndexableCol(DataIndexableCol):
- """ represent a generic pytables data column """
- def get_attr(self):
- pass
- class Fixed(StringMixin):
- """ represent an object in my store
- facilitate read/write of various types of objects
- this is an abstract base class
- Parameters
- ----------
- parent : my parent HDFStore
- group : the group node where the table resides
- """
- pandas_kind = None
- obj_type = None
- ndim = None
- is_table = False
- def __init__(self, parent, group, encoding=None, errors='strict',
- **kwargs):
- self.parent = parent
- self.group = group
- self.encoding = _ensure_encoding(encoding)
- self.errors = errors
- self.set_version()
- @property
- def is_old_version(self):
- return (self.version[0] <= 0 and self.version[1] <= 10 and
- self.version[2] < 1)
- def set_version(self):
- """ compute and set our version """
- version = _ensure_decoded(
- getattr(self.group._v_attrs, 'pandas_version', None))
- try:
- self.version = tuple(int(x) for x in version.split('.'))
- if len(self.version) == 2:
- self.version = self.version + (0,)
- except AttributeError:
- self.version = (0, 0, 0)
- @property
- def pandas_type(self):
- return _ensure_decoded(getattr(self.group._v_attrs,
- 'pandas_type', None))
- @property
- def format_type(self):
- return 'fixed'
- def __unicode__(self):
- """ return a pretty representation of myself """
- self.infer_axes()
- s = self.shape
- if s is not None:
- if isinstance(s, (list, tuple)):
- s = "[{shape}]".format(
- shape=','.join(pprint_thing(x) for x in s))
- return "{type:12.12} (shape->{shape})".format(
- type=self.pandas_type, shape=s)
- return self.pandas_type
- def set_object_info(self):
- """ set my pandas type & version """
- self.attrs.pandas_type = str(self.pandas_kind)
- self.attrs.pandas_version = str(_version)
- self.set_version()
- def copy(self):
- new_self = copy.copy(self)
- return new_self
- @property
- def storage_obj_type(self):
- return self.obj_type
- @property
- def shape(self):
- return self.nrows
- @property
- def pathname(self):
- return self.group._v_pathname
- @property
- def _handle(self):
- return self.parent._handle
- @property
- def _filters(self):
- return self.parent._filters
- @property
- def _complevel(self):
- return self.parent._complevel
- @property
- def _fletcher32(self):
- return self.parent._fletcher32
- @property
- def _complib(self):
- return self.parent._complib
- @property
- def attrs(self):
- return self.group._v_attrs
- def set_attrs(self):
- """ set our object attributes """
- pass
- def get_attrs(self):
- """ get our object attributes """
- pass
- @property
- def storable(self):
- """ return my storable """
- return self.group
- @property
- def is_exists(self):
- return False
- @property
- def nrows(self):
- return getattr(self.storable, 'nrows', None)
- def validate(self, other):
- """ validate against an existing storable """
- if other is None:
- return
- return True
- def validate_version(self, where=None):
- """ are we trying to operate on an old version? """
- return True
- def infer_axes(self):
- """ infer the axes of my storer
- return a boolean indicating if we have a valid storer or not """
- s = self.storable
- if s is None:
- return False
- self.get_attrs()
- return True
- def read(self, **kwargs):
- raise NotImplementedError(
- "cannot read on an abstract storer: subclasses should implement")
- def write(self, **kwargs):
- raise NotImplementedError(
- "cannot write on an abstract storer: sublcasses should implement")
- def delete(self, where=None, start=None, stop=None, **kwargs):
- """
- support fully deleting the node in its entirety (only) - where
- specification must be None
- """
- if com._all_none(where, start, stop):
- self._handle.remove_node(self.group, recursive=True)
- return None
- raise TypeError("cannot delete on an abstract storer")
- class GenericFixed(Fixed):
- """ a generified fixed version """
- _index_type_map = {DatetimeIndex: 'datetime', PeriodIndex: 'period'}
- _reverse_index_map = {v: k for k, v in compat.iteritems(_index_type_map)}
- attributes = []
- # indexer helpders
- def _class_to_alias(self, cls):
- return self._index_type_map.get(cls, '')
- def _alias_to_class(self, alias):
- if isinstance(alias, type): # pragma: no cover
- # compat: for a short period of time master stored types
- return alias
- return self._reverse_index_map.get(alias, Index)
- def _get_index_factory(self, klass):
- if klass == DatetimeIndex:
- def f(values, freq=None, tz=None):
- # data are already in UTC, localize and convert if tz present
- result = DatetimeIndex._simple_new(values.values, name=None,
- freq=freq)
- if tz is not None:
- result = result.tz_localize('UTC').tz_convert(tz)
- return result
- return f
- elif klass == PeriodIndex:
- def f(values, freq=None, tz=None):
- return PeriodIndex._simple_new(values, name=None, freq=freq)
- return f
- return klass
- def validate_read(self, kwargs):
- """
- remove table keywords from kwargs and return
- raise if any keywords are passed which are not-None
- """
- kwargs = copy.copy(kwargs)
- columns = kwargs.pop('columns', None)
- if columns is not None:
- raise TypeError("cannot pass a column specification when reading "
- "a Fixed format store. this store must be "
- "selected in its entirety")
- where = kwargs.pop('where', None)
- if where is not None:
- raise TypeError("cannot pass a where specification when reading "
- "from a Fixed format store. this store must be "
- "selected in its entirety")
- return kwargs
- @property
- def is_exists(self):
- return True
- def set_attrs(self):
- """ set our object attributes """
- self.attrs.encoding = self.encoding
- self.attrs.errors = self.errors
- def get_attrs(self):
- """ retrieve our attributes """
- self.encoding = _ensure_encoding(getattr(self.attrs, 'encoding', None))
- self.errors = _ensure_decoded(getattr(self.attrs, 'errors', 'strict'))
- for n in self.attributes:
- setattr(self, n, _ensure_decoded(getattr(self.attrs, n, None)))
- def write(self, obj, **kwargs):
- self.set_attrs()
- def read_array(self, key, start=None, stop=None):
- """ read an array for the specified node (off of group """
- import tables
- node = getattr(self.group, key)
- attrs = node._v_attrs
- transposed = getattr(attrs, 'transposed', False)
- if isinstance(node, tables.VLArray):
- ret = node[0][start:stop]
- else:
- dtype = getattr(attrs, 'value_type', None)
- shape = getattr(attrs, 'shape', None)
- if shape is not None:
- # length 0 axis
- ret = np.empty(shape, dtype=dtype)
- else:
- ret = node[start:stop]
- if dtype == u'datetime64':
- # reconstruct a timezone if indicated
- ret = _set_tz(ret, getattr(attrs, 'tz', None), coerce=True)
- elif dtype == u'timedelta64':
- ret = np.asarray(ret, dtype='m8[ns]')
- if transposed:
- return ret.T
- else:
- return ret
- def read_index(self, key, **kwargs):
- variety = _ensure_decoded(
- getattr(self.attrs, '{key}_variety'.format(key=key)))
- if variety == u'multi':
- return self.read_multi_index(key, **kwargs)
- elif variety == u'block':
- return self.read_block_index(key, **kwargs)
- elif variety == u'sparseint':
- return self.read_sparse_intindex(key, **kwargs)
- elif variety == u'regular':
- _, index = self.read_index_node(getattr(self.group, key), **kwargs)
- return index
- else: # pragma: no cover
- raise TypeError(
- 'unrecognized index variety: {variety}'.format(
- variety=variety))
- def write_index(self, key, index):
- if isinstance(index, MultiIndex):
- setattr(self.attrs, '{key}_variety'.format(key=key), 'multi')
- self.write_multi_index(key, index)
- elif isinstance(index, BlockIndex):
- setattr(self.attrs, '{key}_variety'.format(key=key), 'block')
- self.write_block_index(key, index)
- elif isinstance(index, IntIndex):
- setattr(self.attrs, '{key}_variety'.format(key=key), 'sparseint')
- self.write_sparse_intindex(key, index)
- else:
- setattr(self.attrs, '{key}_variety'.format(key=key), 'regular')
- converted = _convert_index(index, self.encoding, self.errors,
- self.format_type).set_name('index')
- self.write_array(key, converted.values)
- node = getattr(self.group, key)
- node._v_attrs.kind = converted.kind
- node._v_attrs.name = index.name
- if isinstance(index, (DatetimeIndex, PeriodIndex)):
- node._v_attrs.index_class = self._class_to_alias(type(index))
- if hasattr(index, 'freq'):
- node._v_attrs.freq = index.freq
- if hasattr(index, 'tz') and index.tz is not None:
- node._v_attrs.tz = _get_tz(index.tz)
- def write_block_index(self, key, index):
- self.write_array('{key}_blocs'.format(key=key), index.blocs)
- self.write_array('{key}_blengths'.format(key=key), index.blengths)
- setattr(self.attrs, '{key}_length'.format(key=key), index.length)
- def read_block_index(self, key, **kwargs):
- length = getattr(self.attrs, '{key}_length'.format(key=key))
- blocs = self.read_array('{key}_blocs'.format(key=key), **kwargs)
- blengths = self.read_array('{key}_blengths'.format(key=key), **kwargs)
- return BlockIndex(length, blocs, blengths)
- def write_sparse_intindex(self, key, index):
- self.write_array('{key}_indices'.format(key=key), index.indices)
- setattr(self.attrs, '{key}_length'.format(key=key), index.length)
- def read_sparse_intindex(self, key, **kwargs):
- length = getattr(self.attrs, '{key}_length'.format(key=key))
- indices = self.read_array('{key}_indices'.format(key=key), **kwargs)
- return IntIndex(length, indices)
- def write_multi_index(self, key, index):
- setattr(self.attrs, '{key}_nlevels'.format(key=key), index.nlevels)
- for i, (lev, level_codes, name) in enumerate(zip(index.levels,
- index.codes,
- index.names)):
- # write the level
- level_key = '{key}_level{idx}'.format(key=key, idx=i)
- conv_level = _convert_index(lev, self.encoding, self.errors,
- self.format_type).set_name(level_key)
- self.write_array(level_key, conv_level.values)
- node = getattr(self.group, level_key)
- node._v_attrs.kind = conv_level.kind
- node._v_attrs.name = name
- # write the name
- setattr(node._v_attrs, '{key}_name{name}'.format(
- key=key, name=name), name)
- # write the labels
- label_key = '{key}_label{idx}'.format(key=key, idx=i)
- self.write_array(label_key, level_codes)
- def read_multi_index(self, key, **kwargs):
- nlevels = getattr(self.attrs, '{key}_nlevels'.format(key=key))
- levels = []
- codes = []
- names = []
- for i in range(nlevels):
- level_key = '{key}_level{idx}'.format(key=key, idx=i)
- name, lev = self.read_index_node(getattr(self.group, level_key),
- **kwargs)
- levels.append(lev)
- names.append(name)
- label_key = '{key}_label{idx}'.format(key=key, idx=i)
- level_codes = self.read_array(label_key, **kwargs)
- codes.append(level_codes)
- return MultiIndex(levels=levels, codes=codes, names=names,
- verify_integrity=True)
- def read_index_node(self, node, start=None, stop=None):
- data = node[start:stop]
- # If the index was an empty array write_array_empty() will
- # have written a sentinel. Here we relace it with the original.
- if ('shape' in node._v_attrs and
- self._is_empty_array(getattr(node._v_attrs, 'shape'))):
- data = np.empty(getattr(node._v_attrs, 'shape'),
- dtype=getattr(node._v_attrs, 'value_type'))
- kind = _ensure_decoded(node._v_attrs.kind)
- name = None
- if 'name' in node._v_attrs:
- name = _ensure_str(node._v_attrs.name)
- name = _ensure_decoded(name)
- index_class = self._alias_to_class(_ensure_decoded(
- getattr(node._v_attrs, 'index_class', '')))
- factory = self._get_index_factory(index_class)
- kwargs = {}
- if u'freq' in node._v_attrs:
- kwargs['freq'] = node._v_attrs['freq']
- if u'tz' in node._v_attrs:
- kwargs['tz'] = node._v_attrs['tz']
- if kind in (u'date', u'datetime'):
- index = factory(_unconvert_index(data, kind,
- encoding=self.encoding,
- errors=self.errors),
- dtype=object, **kwargs)
- else:
- index = factory(_unconvert_index(data, kind,
- encoding=self.encoding,
- errors=self.errors), **kwargs)
- index.name = name
- return name, index
- def write_array_empty(self, key, value):
- """ write a 0-len array """
- # ugly hack for length 0 axes
- arr = np.empty((1,) * value.ndim)
- self._handle.create_array(self.group, key, arr)
- getattr(self.group, key)._v_attrs.value_type = str(value.dtype)
- getattr(self.group, key)._v_attrs.shape = value.shape
- def _is_empty_array(self, shape):
- """Returns true if any axis is zero length."""
- return any(x == 0 for x in shape)
- def write_array(self, key, value, items=None):
- if key in self.group:
- self._handle.remove_node(self.group, key)
- # Transform needed to interface with pytables row/col notation
- empty_array = self._is_empty_array(value.shape)
- transposed = False
- if is_categorical_dtype(value):
- raise NotImplementedError('Cannot store a category dtype in '
- 'a HDF5 dataset that uses format='
- '"fixed". Use format="table".')
- if not empty_array:
- if hasattr(value, 'T'):
- # ExtensionArrays (1d) may not have transpose.
- value = value.T
- transposed = True
- if self._filters is not None:
- atom = None
- try:
- # get the atom for this datatype
- atom = _tables().Atom.from_dtype(value.dtype)
- except ValueError:
- pass
- if atom is not None:
- # create an empty chunked array and fill it from value
- if not empty_array:
- ca = self._handle.create_carray(self.group, key, atom,
- value.shape,
- filters=self._filters)
- ca[:] = value
- getattr(self.group, key)._v_attrs.transposed = transposed
- else:
- self.write_array_empty(key, value)
- return
- if value.dtype.type == np.object_:
- # infer the type, warn if we have a non-string type here (for
- # performance)
- inferred_type = lib.infer_dtype(value.ravel(), skipna=False)
- if empty_array:
- pass
- elif inferred_type == 'string':
- pass
- else:
- try:
- items = list(items)
- except TypeError:
- pass
- ws = performance_doc % (inferred_type, key, items)
- warnings.warn(ws, PerformanceWarning, stacklevel=7)
- vlarr = self._handle.create_vlarray(self.group, key,
- _tables().ObjectAtom())
- vlarr.append(value)
- else:
- if empty_array:
- self.write_array_empty(key, value)
- else:
- if is_datetime64_dtype(value.dtype):
- self._handle.create_array(
- self.group, key, value.view('i8'))
- getattr(
- self.group, key)._v_attrs.value_type = 'datetime64'
- elif is_datetime64tz_dtype(value.dtype):
- # store as UTC
- # with a zone
- self._handle.create_array(self.group, key,
- value.asi8)
- node = getattr(self.group, key)
- node._v_attrs.tz = _get_tz(value.tz)
- node._v_attrs.value_type = 'datetime64'
- elif is_timedelta64_dtype(value.dtype):
- self._handle.create_array(
- self.group, key, value.view('i8'))
- getattr(
- self.group, key)._v_attrs.value_type = 'timedelta64'
- else:
- self._handle.create_array(self.group, key, value)
- getattr(self.group, key)._v_attrs.transposed = transposed
- class LegacyFixed(GenericFixed):
- def read_index_legacy(self, key, start=None, stop=None):
- node = getattr(self.group, key)
- data = node[start:stop]
- kind = node._v_attrs.kind
- return _unconvert_index_legacy(data, kind, encoding=self.encoding,
- errors=self.errors)
- class LegacySeriesFixed(LegacyFixed):
- def read(self, **kwargs):
- kwargs = self.validate_read(kwargs)
- index = self.read_index_legacy('index')
- values = self.read_array('values')
- return Series(values, index=index)
- class LegacyFrameFixed(LegacyFixed):
- def read(self, **kwargs):
- kwargs = self.validate_read(kwargs)
- index = self.read_index_legacy('index')
- columns = self.read_index_legacy('columns')
- values = self.read_array('values')
- return DataFrame(values, index=index, columns=columns)
- class SeriesFixed(GenericFixed):
- pandas_kind = u'series'
- attributes = ['name']
- @property
- def shape(self):
- try:
- return len(getattr(self.group, 'values')),
- except (TypeError, AttributeError):
- return None
- def read(self, **kwargs):
- kwargs = self.validate_read(kwargs)
- index = self.read_index('index', **kwargs)
- values = self.read_array('values', **kwargs)
- return Series(values, index=index, name=self.name)
- def write(self, obj, **kwargs):
- super(SeriesFixed, self).write(obj, **kwargs)
- self.write_index('index', obj.index)
- self.write_array('values', obj.values)
- self.attrs.name = obj.name
- class SparseFixed(GenericFixed):
- def validate_read(self, kwargs):
- """
- we don't support start, stop kwds in Sparse
- """
- kwargs = super(SparseFixed, self).validate_read(kwargs)
- if 'start' in kwargs or 'stop' in kwargs:
- raise NotImplementedError("start and/or stop are not supported "
- "in fixed Sparse reading")
- return kwargs
- class SparseSeriesFixed(SparseFixed):
- pandas_kind = u'sparse_series'
- attributes = ['name', 'fill_value', 'kind']
- def read(self, **kwargs):
- kwargs = self.validate_read(kwargs)
- index = self.read_index('index')
- sp_values = self.read_array('sp_values')
- sp_index = self.read_index('sp_index')
- return SparseSeries(sp_values, index=index, sparse_index=sp_index,
- kind=self.kind or u'block',
- fill_value=self.fill_value,
- name=self.name)
- def write(self, obj, **kwargs):
- super(SparseSeriesFixed, self).write(obj, **kwargs)
- self.write_index('index', obj.index)
- self.write_index('sp_index', obj.sp_index)
- self.write_array('sp_values', obj.sp_values)
- self.attrs.name = obj.name
- self.attrs.fill_value = obj.fill_value
- self.attrs.kind = obj.kind
- class SparseFrameFixed(SparseFixed):
- pandas_kind = u'sparse_frame'
- attributes = ['default_kind', 'default_fill_value']
- def read(self, **kwargs):
- kwargs = self.validate_read(kwargs)
- columns = self.read_index('columns')
- sdict = {}
- for c in columns:
- key = 'sparse_series_{columns}'.format(columns=c)
- s = SparseSeriesFixed(self.parent, getattr(self.group, key))
- s.infer_axes()
- sdict[c] = s.read()
- return SparseDataFrame(sdict, columns=columns,
- default_kind=self.default_kind,
- default_fill_value=self.default_fill_value)
- def write(self, obj, **kwargs):
- """ write it as a collection of individual sparse series """
- super(SparseFrameFixed, self).write(obj, **kwargs)
- for name, ss in compat.iteritems(obj):
- key = 'sparse_series_{name}'.format(name=name)
- if key not in self.group._v_children:
- node = self._handle.create_group(self.group, key)
- else:
- node = getattr(self.group, key)
- s = SparseSeriesFixed(self.parent, node)
- s.write(ss)
- self.attrs.default_fill_value = obj.default_fill_value
- self.attrs.default_kind = obj.default_kind
- self.write_index('columns', obj.columns)
- class BlockManagerFixed(GenericFixed):
- attributes = ['ndim', 'nblocks']
- is_shape_reversed = False
- @property
- def shape(self):
- try:
- ndim = self.ndim
- # items
- items = 0
- for i in range(self.nblocks):
- node = getattr(self.group, 'block{idx}_items'.format(idx=i))
- shape = getattr(node, 'shape', None)
- if shape is not None:
- items += shape[0]
- # data shape
- node = getattr(self.group, 'block0_values')
- shape = getattr(node, 'shape', None)
- if shape is not None:
- shape = list(shape[0:(ndim - 1)])
- else:
- shape = []
- shape.append(items)
- # hacky - this works for frames, but is reversed for panels
- if self.is_shape_reversed:
- shape = shape[::-1]
- return shape
- except AttributeError:
- return None
- def read(self, start=None, stop=None, **kwargs):
- # start, stop applied to rows, so 0th axis only
- kwargs = self.validate_read(kwargs)
- select_axis = self.obj_type()._get_block_manager_axis(0)
- axes = []
- for i in range(self.ndim):
- _start, _stop = (start, stop) if i == select_axis else (None, None)
- ax = self.read_index('axis{idx}'.format(
- idx=i), start=_start, stop=_stop)
- axes.append(ax)
- items = axes[0]
- blocks = []
- for i in range(self.nblocks):
- blk_items = self.read_index('block{idx}_items'.format(idx=i))
- values = self.read_array('block{idx}_values'.format(idx=i),
- start=_start, stop=_stop)
- blk = make_block(values,
- placement=items.get_indexer(blk_items))
- blocks.append(blk)
- return self.obj_type(BlockManager(blocks, axes))
- def write(self, obj, **kwargs):
- super(BlockManagerFixed, self).write(obj, **kwargs)
- data = obj._data
- if not data.is_consolidated():
- data = data.consolidate()
- self.attrs.ndim = data.ndim
- for i, ax in enumerate(data.axes):
- if i == 0:
- if not ax.is_unique:
- raise ValueError(
- "Columns index has to be unique for fixed format")
- self.write_index('axis{idx}'.format(idx=i), ax)
- # Supporting mixed-type DataFrame objects...nontrivial
- self.attrs.nblocks = len(data.blocks)
- for i, blk in enumerate(data.blocks):
- # I have no idea why, but writing values before items fixed #2299
- blk_items = data.items.take(blk.mgr_locs)
- self.write_array('block{idx}_values'.format(idx=i),
- blk.values, items=blk_items)
- self.write_index('block{idx}_items'.format(idx=i), blk_items)
- class FrameFixed(BlockManagerFixed):
- pandas_kind = u'frame'
- obj_type = DataFrame
- class PanelFixed(BlockManagerFixed):
- pandas_kind = u'wide'
- obj_type = Panel
- is_shape_reversed = True
- def write(self, obj, **kwargs):
- obj._consolidate_inplace()
- return super(PanelFixed, self).write(obj, **kwargs)
- class Table(Fixed):
- """ represent a table:
- facilitate read/write of various types of tables
- Attrs in Table Node
- -------------------
- These are attributes that are store in the main table node, they are
- necessary to recreate these tables when read back in.
- index_axes : a list of tuples of the (original indexing axis and
- index column)
- non_index_axes: a list of tuples of the (original index axis and
- columns on a non-indexing axis)
- values_axes : a list of the columns which comprise the data of this
- table
- data_columns : a list of the columns that we are allowing indexing
- (these become single columns in values_axes), or True to force all
- columns
- nan_rep : the string to use for nan representations for string
- objects
- levels : the names of levels
- metadata : the names of the metadata columns
- """
- pandas_kind = u'wide_table'
- table_type = None
- levels = 1
- is_table = True
- is_shape_reversed = False
- def __init__(self, *args, **kwargs):
- super(Table, self).__init__(*args, **kwargs)
- self.index_axes = []
- self.non_index_axes = []
- self.values_axes = []
- self.data_columns = []
- self.metadata = []
- self.info = dict()
- self.nan_rep = None
- self.selection = None
- @property
- def table_type_short(self):
- return self.table_type.split('_')[0]
- @property
- def format_type(self):
- return 'table'
- def __unicode__(self):
- """ return a pretty representatgion of myself """
- self.infer_axes()
- dc = ",dc->[{columns}]".format(columns=(','.join(
- self.data_columns) if len(self.data_columns) else ''))
- ver = ''
- if self.is_old_version:
- ver = "[{version}]".format(
- version='.'.join(str(x) for x in self.version))
- return (
- "{pandas_type:12.12}{ver} (typ->{table_type},nrows->{nrows},"
- "ncols->{ncols},indexers->[{index_axes}]{dc})".format(
- pandas_type=self.pandas_type, ver=ver,
- table_type=self.table_type_short, nrows=self.nrows,
- ncols=self.ncols,
- index_axes=(','.join(a.name for a in self.index_axes)), dc=dc
- ))
- def __getitem__(self, c):
- """ return the axis for c """
- for a in self.axes:
- if c == a.name:
- return a
- return None
- def validate(self, other):
- """ validate against an existing table """
- if other is None:
- return
- if other.table_type != self.table_type:
- raise TypeError(
- "incompatible table_type with existing "
- "[{other} - {self}]".format(
- other=other.table_type, self=self.table_type))
- for c in ['index_axes', 'non_index_axes', 'values_axes']:
- sv = getattr(self, c, None)
- ov = getattr(other, c, None)
- if sv != ov:
- # show the error for the specific axes
- for i, sax in enumerate(sv):
- oax = ov[i]
- if sax != oax:
- raise ValueError(
- "invalid combinate of [{c}] on appending data "
- "[{sax}] vs current table [{oax}]".format(
- c=c, sax=sax, oax=oax))
- # should never get here
- raise Exception(
- "invalid combinate of [{c}] on appending data [{sv}] vs "
- "current table [{ov}]".format(c=c, sv=sv, ov=ov))
- @property
- def is_multi_index(self):
- """the levels attribute is 1 or a list in the case of a multi-index"""
- return isinstance(self.levels, list)
- def validate_metadata(self, existing):
- """ create / validate metadata """
- self.metadata = [
- c.name for c in self.values_axes if c.metadata is not None]
- def validate_multiindex(self, obj):
- """validate that we can store the multi-index; reset and return the
- new object
- """
- levels = [l if l is not None else "level_{0}".format(i)
- for i, l in enumerate(obj.index.names)]
- try:
- return obj.reset_index(), levels
- except ValueError:
- raise ValueError("duplicate names/columns in the multi-index when "
- "storing as a table")
- @property
- def nrows_expected(self):
- """ based on our axes, compute the expected nrows """
- return np.prod([i.cvalues.shape[0] for i in self.index_axes])
- @property
- def is_exists(self):
- """ has this table been created """
- return u'table' in self.group
- @property
- def storable(self):
- return getattr(self.group, 'table', None)
- @property
- def table(self):
- """ return the table group (this is my storable) """
- return self.storable
- @property
- def dtype(self):
- return self.table.dtype
- @property
- def description(self):
- return self.table.description
- @property
- def axes(self):
- return itertools.chain(self.index_axes, self.values_axes)
- @property
- def ncols(self):
- """ the number of total columns in the values axes """
- return sum(len(a.values) for a in self.values_axes)
- @property
- def is_transposed(self):
- return False
- @property
- def data_orientation(self):
- """return a tuple of my permutated axes, non_indexable at the front"""
- return tuple(itertools.chain([int(a[0]) for a in self.non_index_axes],
- [int(a.axis) for a in self.index_axes]))
- def queryables(self):
- """ return a dict of the kinds allowable columns for this object """
- # compute the values_axes queryables
- return dict(
- [(a.cname, a) for a in self.index_axes] +
- [(self.storage_obj_type._AXIS_NAMES[axis], None)
- for axis, values in self.non_index_axes] +
- [(v.cname, v) for v in self.values_axes
- if v.name in set(self.data_columns)]
- )
- def index_cols(self):
- """ return a list of my index cols """
- return [(i.axis, i.cname) for i in self.index_axes]
- def values_cols(self):
- """ return a list of my values cols """
- return [i.cname for i in self.values_axes]
- def _get_metadata_path(self, key):
- """ return the metadata pathname for this key """
- return "{group}/meta/{key}/meta".format(group=self.group._v_pathname,
- key=key)
- def write_metadata(self, key, values):
- """
- write out a meta data array to the key as a fixed-format Series
- Parameters
- ----------
- key : string
- values : ndarray
- """
- values = Series(values)
- self.parent.put(self._get_metadata_path(key), values, format='table',
- encoding=self.encoding, errors=self.errors,
- nan_rep=self.nan_rep)
- def read_metadata(self, key):
- """ return the meta data array for this key """
- if getattr(getattr(self.group, 'meta', None), key, None) is not None:
- return self.parent.select(self._get_metadata_path(key))
- return None
- def set_info(self):
- """ update our table index info """
- self.attrs.info = self.info
- def set_attrs(self):
- """ set our table type & indexables """
- self.attrs.table_type = str(self.table_type)
- self.attrs.index_cols = self.index_cols()
- self.attrs.values_cols = self.values_cols()
- self.attrs.non_index_axes = self.non_index_axes
- self.attrs.data_columns = self.data_columns
- self.attrs.nan_rep = self.nan_rep
- self.attrs.encoding = self.encoding
- self.attrs.errors = self.errors
- self.attrs.levels = self.levels
- self.attrs.metadata = self.metadata
- self.set_info()
- def get_attrs(self):
- """ retrieve our attributes """
- self.non_index_axes = getattr(
- self.attrs, 'non_index_axes', None) or []
- self.data_columns = getattr(
- self.attrs, 'data_columns', None) or []
- self.info = getattr(
- self.attrs, 'info', None) or dict()
- self.nan_rep = getattr(self.attrs, 'nan_rep', None)
- self.encoding = _ensure_encoding(
- getattr(self.attrs, 'encoding', None))
- self.errors = _ensure_decoded(getattr(self.attrs, 'errors', 'strict'))
- self.levels = getattr(
- self.attrs, 'levels', None) or []
- self.index_axes = [
- a.infer(self) for a in self.indexables if a.is_an_indexable
- ]
- self.values_axes = [
- a.infer(self) for a in self.indexables if not a.is_an_indexable
- ]
- self.metadata = getattr(
- self.attrs, 'metadata', None) or []
- def validate_version(self, where=None):
- """ are we trying to operate on an old version? """
- if where is not None:
- if (self.version[0] <= 0 and self.version[1] <= 10 and
- self.version[2] < 1):
- ws = incompatibility_doc % '.'.join(
- [str(x) for x in self.version])
- warnings.warn(ws, IncompatibilityWarning)
- def validate_min_itemsize(self, min_itemsize):
- """validate the min_itemisze doesn't contain items that are not in the
- axes this needs data_columns to be defined
- """
- if min_itemsize is None:
- return
- if not isinstance(min_itemsize, dict):
- return
- q = self.queryables()
- for k, v in min_itemsize.items():
- # ok, apply generally
- if k == 'values':
- continue
- if k not in q:
- raise ValueError(
- "min_itemsize has the key [{key}] which is not an axis or "
- "data_column".format(key=k))
- @property
- def indexables(self):
- """ create/cache the indexables if they don't exist """
- if self._indexables is None:
- self._indexables = []
- # index columns
- self._indexables.extend([
- IndexCol(name=name, axis=axis, pos=i)
- for i, (axis, name) in enumerate(self.attrs.index_cols)
- ])
- # values columns
- dc = set(self.data_columns)
- base_pos = len(self._indexables)
- def f(i, c):
- klass = DataCol
- if c in dc:
- klass = DataIndexableCol
- return klass.create_for_block(i=i, name=c, pos=base_pos + i,
- version=self.version)
- self._indexables.extend(
- [f(i, c) for i, c in enumerate(self.attrs.values_cols)])
- return self._indexables
- def create_index(self, columns=None, optlevel=None, kind=None):
- """
- Create a pytables index on the specified columns
- note: cannot index Time64Col() or ComplexCol currently;
- PyTables must be >= 3.0
- Parameters
- ----------
- columns : False (don't create an index), True (create all columns
- index), None or list_like (the indexers to index)
- optlevel: optimization level (defaults to 6)
- kind : kind of index (defaults to 'medium')
- Exceptions
- ----------
- raises if the node is not a table
- """
- if not self.infer_axes():
- return
- if columns is False:
- return
- # index all indexables and data_columns
- if columns is None or columns is True:
- columns = [a.cname for a in self.axes if a.is_data_indexable]
- if not isinstance(columns, (tuple, list)):
- columns = [columns]
- kw = dict()
- if optlevel is not None:
- kw['optlevel'] = optlevel
- if kind is not None:
- kw['kind'] = kind
- table = self.table
- for c in columns:
- v = getattr(table.cols, c, None)
- if v is not None:
- # remove the index if the kind/optlevel have changed
- if v.is_indexed:
- index = v.index
- cur_optlevel = index.optlevel
- cur_kind = index.kind
- if kind is not None and cur_kind != kind:
- v.remove_index()
- else:
- kw['kind'] = cur_kind
- if optlevel is not None and cur_optlevel != optlevel:
- v.remove_index()
- else:
- kw['optlevel'] = cur_optlevel
- # create the index
- if not v.is_indexed:
- if v.type.startswith('complex'):
- raise TypeError(
- 'Columns containing complex values can be stored '
- 'but cannot'
- ' be indexed when using table format. Either use '
- 'fixed format, set index=False, or do not include '
- 'the columns containing complex values to '
- 'data_columns when initializing the table.')
- v.create_index(**kw)
- def read_axes(self, where, **kwargs):
- """create and return the axes sniffed from the table: return boolean
- for success
- """
- # validate the version
- self.validate_version(where)
- # infer the data kind
- if not self.infer_axes():
- return False
- # create the selection
- self.selection = Selection(self, where=where, **kwargs)
- values = self.selection.select()
- # convert the data
- for a in self.axes:
- a.set_info(self.info)
- a.convert(values, nan_rep=self.nan_rep, encoding=self.encoding,
- errors=self.errors)
- return True
- def get_object(self, obj):
- """ return the data for this obj """
- return obj
- def validate_data_columns(self, data_columns, min_itemsize):
- """take the input data_columns and min_itemize and create a data
- columns spec
- """
- if not len(self.non_index_axes):
- return []
- axis, axis_labels = self.non_index_axes[0]
- info = self.info.get(axis, dict())
- if info.get('type') == 'MultiIndex' and data_columns:
- raise ValueError("cannot use a multi-index on axis [{0}] with "
- "data_columns {1}".format(axis, data_columns))
- # evaluate the passed data_columns, True == use all columns
- # take only valide axis labels
- if data_columns is True:
- data_columns = list(axis_labels)
- elif data_columns is None:
- data_columns = []
- # if min_itemsize is a dict, add the keys (exclude 'values')
- if isinstance(min_itemsize, dict):
- existing_data_columns = set(data_columns)
- data_columns.extend([
- k for k in min_itemsize.keys()
- if k != 'values' and k not in existing_data_columns
- ])
- # return valid columns in the order of our axis
- return [c for c in data_columns if c in axis_labels]
- def create_axes(self, axes, obj, validate=True, nan_rep=None,
- data_columns=None, min_itemsize=None, **kwargs):
- """ create and return the axes
- leagcy tables create an indexable column, indexable index,
- non-indexable fields
- Parameters:
- -----------
- axes: a list of the axes in order to create (names or numbers of
- the axes)
- obj : the object to create axes on
- validate: validate the obj against an existing object already
- written
- min_itemsize: a dict of the min size for a column in bytes
- nan_rep : a values to use for string column nan_rep
- encoding : the encoding for string values
- data_columns : a list of columns that we want to create separate to
- allow indexing (or True will force all columns)
- """
- # set the default axes if needed
- if axes is None:
- try:
- axes = _AXES_MAP[type(obj)]
- except KeyError:
- raise TypeError(
- "cannot properly create the storer for: [group->{group},"
- "value->{value}]".format(
- group=self.group._v_name, value=type(obj)))
- # map axes to numbers
- axes = [obj._get_axis_number(a) for a in axes]
- # do we have an existing table (if so, use its axes & data_columns)
- if self.infer_axes():
- existing_table = self.copy()
- existing_table.infer_axes()
- axes = [a.axis for a in existing_table.index_axes]
- data_columns = existing_table.data_columns
- nan_rep = existing_table.nan_rep
- self.encoding = existing_table.encoding
- self.errors = existing_table.errors
- self.info = copy.copy(existing_table.info)
- else:
- existing_table = None
- # currently support on ndim-1 axes
- if len(axes) != self.ndim - 1:
- raise ValueError(
- "currently only support ndim-1 indexers in an AppendableTable")
- # create according to the new data
- self.non_index_axes = []
- self.data_columns = []
- # nan_representation
- if nan_rep is None:
- nan_rep = 'nan'
- self.nan_rep = nan_rep
- # create axes to index and non_index
- index_axes_map = dict()
- for i, a in enumerate(obj.axes):
- if i in axes:
- name = obj._AXIS_NAMES[i]
- index_axes_map[i] = _convert_index(
- a, self.encoding, self.errors, self.format_type
- ).set_name(name).set_axis(i)
- else:
- # we might be able to change the axes on the appending data if
- # necessary
- append_axis = list(a)
- if existing_table is not None:
- indexer = len(self.non_index_axes)
- exist_axis = existing_table.non_index_axes[indexer][1]
- if not array_equivalent(np.array(append_axis),
- np.array(exist_axis)):
- # ahah! -> reindex
- if array_equivalent(np.array(sorted(append_axis)),
- np.array(sorted(exist_axis))):
- append_axis = exist_axis
- # the non_index_axes info
- info = _get_info(self.info, i)
- info['names'] = list(a.names)
- info['type'] = a.__class__.__name__
- self.non_index_axes.append((i, append_axis))
- # set axis positions (based on the axes)
- self.index_axes = [
- index_axes_map[a].set_pos(j).update_info(self.info)
- for j, a in enumerate(axes)
- ]
- j = len(self.index_axes)
- # check for column conflicts
- for a in self.axes:
- a.maybe_set_size(min_itemsize=min_itemsize)
- # reindex by our non_index_axes & compute data_columns
- for a in self.non_index_axes:
- obj = _reindex_axis(obj, a[0], a[1])
- def get_blk_items(mgr, blocks):
- return [mgr.items.take(blk.mgr_locs) for blk in blocks]
- # figure out data_columns and get out blocks
- block_obj = self.get_object(obj)._consolidate()
- blocks = block_obj._data.blocks
- blk_items = get_blk_items(block_obj._data, blocks)
- if len(self.non_index_axes):
- axis, axis_labels = self.non_index_axes[0]
- data_columns = self.validate_data_columns(
- data_columns, min_itemsize)
- if len(data_columns):
- mgr = block_obj.reindex(
- Index(axis_labels).difference(Index(data_columns)),
- axis=axis
- )._data
- blocks = list(mgr.blocks)
- blk_items = get_blk_items(mgr, blocks)
- for c in data_columns:
- mgr = block_obj.reindex([c], axis=axis)._data
- blocks.extend(mgr.blocks)
- blk_items.extend(get_blk_items(mgr, mgr.blocks))
- # reorder the blocks in the same order as the existing_table if we can
- if existing_table is not None:
- by_items = {tuple(b_items.tolist()): (b, b_items)
- for b, b_items in zip(blocks, blk_items)}
- new_blocks = []
- new_blk_items = []
- for ea in existing_table.values_axes:
- items = tuple(ea.values)
- try:
- b, b_items = by_items.pop(items)
- new_blocks.append(b)
- new_blk_items.append(b_items)
- except (IndexError, KeyError):
- raise ValueError(
- "cannot match existing table structure for [{items}] "
- "on appending data".format(
- items=(','.join(pprint_thing(item) for
- item in items))))
- blocks = new_blocks
- blk_items = new_blk_items
- # add my values
- self.values_axes = []
- for i, (b, b_items) in enumerate(zip(blocks, blk_items)):
- # shape of the data column are the indexable axes
- klass = DataCol
- name = None
- # we have a data_column
- if (data_columns and len(b_items) == 1 and
- b_items[0] in data_columns):
- klass = DataIndexableCol
- name = b_items[0]
- self.data_columns.append(name)
- # make sure that we match up the existing columns
- # if we have an existing table
- if existing_table is not None and validate:
- try:
- existing_col = existing_table.values_axes[i]
- except (IndexError, KeyError):
- raise ValueError(
- "Incompatible appended table [{blocks}]"
- "with existing table [{table}]".format(
- blocks=blocks,
- table=existing_table.values_axes))
- else:
- existing_col = None
- try:
- col = klass.create_for_block(
- i=i, name=name, version=self.version)
- col.set_atom(block=b, block_items=b_items,
- existing_col=existing_col,
- min_itemsize=min_itemsize,
- nan_rep=nan_rep,
- encoding=self.encoding,
- errors=self.errors,
- info=self.info)
- col.set_pos(j)
- self.values_axes.append(col)
- except (NotImplementedError, ValueError, TypeError) as e:
- raise e
- except Exception as detail:
- raise Exception(
- "cannot find the correct atom type -> "
- "[dtype->{name},items->{items}] {detail!s}".format(
- name=b.dtype.name, items=b_items, detail=detail))
- j += 1
- # validate our min_itemsize
- self.validate_min_itemsize(min_itemsize)
- # validate our metadata
- self.validate_metadata(existing_table)
- # validate the axes if we have an existing table
- if validate:
- self.validate(existing_table)
- def process_axes(self, obj, columns=None):
- """ process axes filters """
- # make a copy to avoid side effects
- if columns is not None:
- columns = list(columns)
- # make sure to include levels if we have them
- if columns is not None and self.is_multi_index:
- for n in self.levels:
- if n not in columns:
- columns.insert(0, n)
- # reorder by any non_index_axes & limit to the select columns
- for axis, labels in self.non_index_axes:
- obj = _reindex_axis(obj, axis, labels, columns)
- # apply the selection filters (but keep in the same order)
- if self.selection.filter is not None:
- for field, op, filt in self.selection.filter.format():
- def process_filter(field, filt):
- for axis_name in obj._AXIS_NAMES.values():
- axis_number = obj._get_axis_number(axis_name)
- axis_values = obj._get_axis(axis_name)
- # see if the field is the name of an axis
- if field == axis_name:
- # if we have a multi-index, then need to include
- # the levels
- if self.is_multi_index:
- filt = filt.union(Index(self.levels))
- takers = op(axis_values, filt)
- return obj.loc._getitem_axis(takers,
- axis=axis_number)
- # this might be the name of a file IN an axis
- elif field in axis_values:
- # we need to filter on this dimension
- values = ensure_index(getattr(obj, field).values)
- filt = ensure_index(filt)
- # hack until we support reversed dim flags
- if isinstance(obj, DataFrame):
- axis_number = 1 - axis_number
- takers = op(values, filt)
- return obj.loc._getitem_axis(takers,
- axis=axis_number)
- raise ValueError("cannot find the field [{field}] for "
- "filtering!".format(field=field))
- obj = process_filter(field, filt)
- return obj
- def create_description(self, complib=None, complevel=None,
- fletcher32=False, expectedrows=None):
- """ create the description of the table from the axes & values """
- # provided expected rows if its passed
- if expectedrows is None:
- expectedrows = max(self.nrows_expected, 10000)
- d = dict(name='table', expectedrows=expectedrows)
- # description from the axes & values
- d['description'] = {a.cname: a.typ for a in self.axes}
- if complib:
- if complevel is None:
- complevel = self._complevel or 9
- filters = _tables().Filters(
- complevel=complevel, complib=complib,
- fletcher32=fletcher32 or self._fletcher32)
- d['filters'] = filters
- elif self._filters is not None:
- d['filters'] = self._filters
- return d
- def read_coordinates(self, where=None, start=None, stop=None, **kwargs):
- """select coordinates (row numbers) from a table; return the
- coordinates object
- """
- # validate the version
- self.validate_version(where)
- # infer the data kind
- if not self.infer_axes():
- return False
- # create the selection
- self.selection = Selection(
- self, where=where, start=start, stop=stop, **kwargs)
- coords = self.selection.select_coords()
- if self.selection.filter is not None:
- for field, op, filt in self.selection.filter.format():
- data = self.read_column(
- field, start=coords.min(), stop=coords.max() + 1)
- coords = coords[
- op(data.iloc[coords - coords.min()], filt).values]
- return Index(coords)
- def read_column(self, column, where=None, start=None, stop=None):
- """return a single column from the table, generally only indexables
- are interesting
- """
- # validate the version
- self.validate_version()
- # infer the data kind
- if not self.infer_axes():
- return False
- if where is not None:
- raise TypeError("read_column does not currently accept a where "
- "clause")
- # find the axes
- for a in self.axes:
- if column == a.name:
- if not a.is_data_indexable:
- raise ValueError(
- "column [{column}] can not be extracted individually; "
- "it is not data indexable".format(column=column))
- # column must be an indexable or a data column
- c = getattr(self.table.cols, column)
- a.set_info(self.info)
- return Series(_set_tz(a.convert(c[start:stop],
- nan_rep=self.nan_rep,
- encoding=self.encoding,
- errors=self.errors
- ).take_data(),
- a.tz, True), name=column)
- raise KeyError(
- "column [{column}] not found in the table".format(column=column))
- class WORMTable(Table):
- """ a write-once read-many table: this format DOES NOT ALLOW appending to a
- table. writing is a one-time operation the data are stored in a format
- that allows for searching the data on disk
- """
- table_type = u'worm'
- def read(self, **kwargs):
- """ read the indices and the indexing array, calculate offset rows and
- return """
- raise NotImplementedError("WORMTable needs to implement read")
- def write(self, **kwargs):
- """ write in a format that we can search later on (but cannot append
- to): write out the indices and the values using _write_array
- (e.g. a CArray) create an indexing table so that we can search
- """
- raise NotImplementedError("WORKTable needs to implement write")
- class LegacyTable(Table):
- """ an appendable table: allow append/query/delete operations to a
- (possibly) already existing appendable table this table ALLOWS
- append (but doesn't require them), and stores the data in a format
- that can be easily searched
- """
- _indexables = [
- IndexCol(name='index', axis=1, pos=0),
- IndexCol(name='column', axis=2, pos=1, index_kind='columns_kind'),
- DataCol(name='fields', cname='values', kind_attr='fields', pos=2)
- ]
- table_type = u'legacy'
- ndim = 3
- def write(self, **kwargs):
- raise TypeError("write operations are not allowed on legacy tables!")
- def read(self, where=None, columns=None, **kwargs):
- """we have n indexable columns, with an arbitrary number of data
- axes
- """
- if not self.read_axes(where=where, **kwargs):
- return None
- lst_vals = [a.values for a in self.index_axes]
- labels, levels = _factorize_from_iterables(lst_vals)
- # labels and levels are tuples but lists are expected
- labels = list(labels)
- levels = list(levels)
- N = [len(lvl) for lvl in levels]
- # compute the key
- key = _factor_indexer(N[1:], labels)
- objs = []
- if len(unique(key)) == len(key):
- sorter, _ = algos.groupsort_indexer(
- ensure_int64(key), np.prod(N))
- sorter = ensure_platform_int(sorter)
- # create the objs
- for c in self.values_axes:
- # the data need to be sorted
- sorted_values = c.take_data().take(sorter, axis=0)
- if sorted_values.ndim == 1:
- sorted_values = sorted_values.reshape(
- (sorted_values.shape[0], 1))
- take_labels = [l.take(sorter) for l in labels]
- items = Index(c.values)
- block = _block2d_to_blocknd(
- values=sorted_values, placement=np.arange(len(items)),
- shape=tuple(N), labels=take_labels, ref_items=items)
- # create the object
- mgr = BlockManager([block], [items] + levels)
- obj = self.obj_type(mgr)
- # permute if needed
- if self.is_transposed:
- obj = obj.transpose(
- *tuple(Series(self.data_orientation).argsort()))
- objs.append(obj)
- else:
- warnings.warn(duplicate_doc, DuplicateWarning, stacklevel=5)
- # reconstruct
- long_index = MultiIndex.from_arrays(
- [i.values for i in self.index_axes])
- for c in self.values_axes:
- lp = DataFrame(c.data, index=long_index, columns=c.values)
- # need a better algorithm
- tuple_index = long_index.values
- unique_tuples = unique(tuple_index)
- unique_tuples = com.asarray_tuplesafe(unique_tuples)
- indexer = match(unique_tuples, tuple_index)
- indexer = ensure_platform_int(indexer)
- new_index = long_index.take(indexer)
- new_values = lp.values.take(indexer, axis=0)
- lp = DataFrame(new_values, index=new_index, columns=lp.columns)
- objs.append(lp.to_panel())
- # create the composite object
- if len(objs) == 1:
- wp = objs[0]
- else:
- wp = concat(objs, axis=0, verify_integrity=False)._consolidate()
- # apply the selection filters & axis orderings
- wp = self.process_axes(wp, columns=columns)
- return wp
- class LegacyFrameTable(LegacyTable):
- """ support the legacy frame table """
- pandas_kind = u'frame_table'
- table_type = u'legacy_frame'
- obj_type = Panel
- def read(self, *args, **kwargs):
- return super(LegacyFrameTable, self).read(*args, **kwargs)['value']
- class LegacyPanelTable(LegacyTable):
- """ support the legacy panel table """
- table_type = u'legacy_panel'
- obj_type = Panel
- class AppendableTable(LegacyTable):
- """ suppor the new appendable table formats """
- _indexables = None
- table_type = u'appendable'
- def write(self, obj, axes=None, append=False, complib=None,
- complevel=None, fletcher32=None, min_itemsize=None,
- chunksize=None, expectedrows=None, dropna=False, **kwargs):
- if not append and self.is_exists:
- self._handle.remove_node(self.group, 'table')
- # create the axes
- self.create_axes(axes=axes, obj=obj, validate=append,
- min_itemsize=min_itemsize,
- **kwargs)
- for a in self.axes:
- a.validate(self, append)
- if not self.is_exists:
- # create the table
- options = self.create_description(complib=complib,
- complevel=complevel,
- fletcher32=fletcher32,
- expectedrows=expectedrows)
- # set the table attributes
- self.set_attrs()
- # create the table
- self._handle.create_table(self.group, **options)
- else:
- pass
- # table = self.table
- # update my info
- self.set_info()
- # validate the axes and set the kinds
- for a in self.axes:
- a.validate_and_set(self, append)
- # add the rows
- self.write_data(chunksize, dropna=dropna)
- def write_data(self, chunksize, dropna=False):
- """ we form the data into a 2-d including indexes,values,mask
- write chunk-by-chunk """
- names = self.dtype.names
- nrows = self.nrows_expected
- # if dropna==True, then drop ALL nan rows
- masks = []
- if dropna:
- for a in self.values_axes:
- # figure the mask: only do if we can successfully process this
- # column, otherwise ignore the mask
- mask = isna(a.data).all(axis=0)
- if isinstance(mask, np.ndarray):
- masks.append(mask.astype('u1', copy=False))
- # consolidate masks
- if len(masks):
- mask = masks[0]
- for m in masks[1:]:
- mask = mask & m
- mask = mask.ravel()
- else:
- mask = None
- # broadcast the indexes if needed
- indexes = [a.cvalues for a in self.index_axes]
- nindexes = len(indexes)
- bindexes = []
- for i, idx in enumerate(indexes):
- # broadcast to all other indexes except myself
- if i > 0 and i < nindexes:
- repeater = np.prod(
- [indexes[bi].shape[0] for bi in range(0, i)])
- idx = np.tile(idx, repeater)
- if i < nindexes - 1:
- repeater = np.prod([indexes[bi].shape[0]
- for bi in range(i + 1, nindexes)])
- idx = np.repeat(idx, repeater)
- bindexes.append(idx)
- # transpose the values so first dimension is last
- # reshape the values if needed
- values = [a.take_data() for a in self.values_axes]
- values = [v.transpose(np.roll(np.arange(v.ndim), v.ndim - 1))
- for v in values]
- bvalues = []
- for i, v in enumerate(values):
- new_shape = (nrows,) + self.dtype[names[nindexes + i]].shape
- bvalues.append(values[i].reshape(new_shape))
- # write the chunks
- if chunksize is None:
- chunksize = 100000
- rows = np.empty(min(chunksize, nrows), dtype=self.dtype)
- chunks = int(nrows / chunksize) + 1
- for i in range(chunks):
- start_i = i * chunksize
- end_i = min((i + 1) * chunksize, nrows)
- if start_i >= end_i:
- break
- self.write_data_chunk(
- rows,
- indexes=[a[start_i:end_i] for a in bindexes],
- mask=mask[start_i:end_i] if mask is not None else None,
- values=[v[start_i:end_i] for v in bvalues])
- def write_data_chunk(self, rows, indexes, mask, values):
- """
- Parameters
- ----------
- rows : an empty memory space where we are putting the chunk
- indexes : an array of the indexes
- mask : an array of the masks
- values : an array of the values
- """
- # 0 len
- for v in values:
- if not np.prod(v.shape):
- return
- try:
- nrows = indexes[0].shape[0]
- if nrows != len(rows):
- rows = np.empty(nrows, dtype=self.dtype)
- names = self.dtype.names
- nindexes = len(indexes)
- # indexes
- for i, idx in enumerate(indexes):
- rows[names[i]] = idx
- # values
- for i, v in enumerate(values):
- rows[names[i + nindexes]] = v
- # mask
- if mask is not None:
- m = ~mask.ravel().astype(bool, copy=False)
- if not m.all():
- rows = rows[m]
- except Exception as detail:
- raise Exception(
- "cannot create row-data -> {detail}".format(detail=detail))
- try:
- if len(rows):
- self.table.append(rows)
- self.table.flush()
- except Exception as detail:
- raise TypeError(
- "tables cannot write this data -> {detail}".format(
- detail=detail))
- def delete(self, where=None, start=None, stop=None, **kwargs):
- # delete all rows (and return the nrows)
- if where is None or not len(where):
- if start is None and stop is None:
- nrows = self.nrows
- self._handle.remove_node(self.group, recursive=True)
- else:
- # pytables<3.0 would remove a single row with stop=None
- if stop is None:
- stop = self.nrows
- nrows = self.table.remove_rows(start=start, stop=stop)
- self.table.flush()
- return nrows
- # infer the data kind
- if not self.infer_axes():
- return None
- # create the selection
- table = self.table
- self.selection = Selection(
- self, where, start=start, stop=stop, **kwargs)
- values = self.selection.select_coords()
- # delete the rows in reverse order
- sorted_series = Series(values).sort_values()
- ln = len(sorted_series)
- if ln:
- # construct groups of consecutive rows
- diff = sorted_series.diff()
- groups = list(diff[diff > 1].index)
- # 1 group
- if not len(groups):
- groups = [0]
- # final element
- if groups[-1] != ln:
- groups.append(ln)
- # initial element
- if groups[0] != 0:
- groups.insert(0, 0)
- # we must remove in reverse order!
- pg = groups.pop()
- for g in reversed(groups):
- rows = sorted_series.take(lrange(g, pg))
- table.remove_rows(start=rows[rows.index[0]
- ], stop=rows[rows.index[-1]] + 1)
- pg = g
- self.table.flush()
- # return the number of rows removed
- return ln
- class AppendableFrameTable(AppendableTable):
- """ suppor the new appendable table formats """
- pandas_kind = u'frame_table'
- table_type = u'appendable_frame'
- ndim = 2
- obj_type = DataFrame
- @property
- def is_transposed(self):
- return self.index_axes[0].axis == 1
- def get_object(self, obj):
- """ these are written transposed """
- if self.is_transposed:
- obj = obj.T
- return obj
- def read(self, where=None, columns=None, **kwargs):
- if not self.read_axes(where=where, **kwargs):
- return None
- info = (self.info.get(self.non_index_axes[0][0], dict())
- if len(self.non_index_axes) else dict())
- index = self.index_axes[0].values
- frames = []
- for a in self.values_axes:
- # we could have a multi-index constructor here
- # ensure_index doesn't recognized our list-of-tuples here
- if info.get('type') == 'MultiIndex':
- cols = MultiIndex.from_tuples(a.values)
- else:
- cols = Index(a.values)
- names = info.get('names')
- if names is not None:
- cols.set_names(names, inplace=True)
- if self.is_transposed:
- values = a.cvalues
- index_ = cols
- cols_ = Index(index, name=getattr(index, 'name', None))
- else:
- values = a.cvalues.T
- index_ = Index(index, name=getattr(index, 'name', None))
- cols_ = cols
- # if we have a DataIndexableCol, its shape will only be 1 dim
- if values.ndim == 1 and isinstance(values, np.ndarray):
- values = values.reshape((1, values.shape[0]))
- block = make_block(values, placement=np.arange(len(cols_)))
- mgr = BlockManager([block], [cols_, index_])
- frames.append(DataFrame(mgr))
- if len(frames) == 1:
- df = frames[0]
- else:
- df = concat(frames, axis=1)
- # apply the selection filters & axis orderings
- df = self.process_axes(df, columns=columns)
- return df
- class AppendableSeriesTable(AppendableFrameTable):
- """ support the new appendable table formats """
- pandas_kind = u'series_table'
- table_type = u'appendable_series'
- ndim = 2
- obj_type = Series
- storage_obj_type = DataFrame
- @property
- def is_transposed(self):
- return False
- def get_object(self, obj):
- return obj
- def write(self, obj, data_columns=None, **kwargs):
- """ we are going to write this as a frame table """
- if not isinstance(obj, DataFrame):
- name = obj.name or 'values'
- obj = DataFrame({name: obj}, index=obj.index)
- obj.columns = [name]
- return super(AppendableSeriesTable, self).write(
- obj=obj, data_columns=obj.columns.tolist(), **kwargs)
- def read(self, columns=None, **kwargs):
- is_multi_index = self.is_multi_index
- if columns is not None and is_multi_index:
- for n in self.levels:
- if n not in columns:
- columns.insert(0, n)
- s = super(AppendableSeriesTable, self).read(columns=columns, **kwargs)
- if is_multi_index:
- s.set_index(self.levels, inplace=True)
- s = s.iloc[:, 0]
- # remove the default name
- if s.name == 'values':
- s.name = None
- return s
- class AppendableMultiSeriesTable(AppendableSeriesTable):
- """ support the new appendable table formats """
- pandas_kind = u'series_table'
- table_type = u'appendable_multiseries'
- def write(self, obj, **kwargs):
- """ we are going to write this as a frame table """
- name = obj.name or 'values'
- obj, self.levels = self.validate_multiindex(obj)
- cols = list(self.levels)
- cols.append(name)
- obj.columns = cols
- return super(AppendableMultiSeriesTable, self).write(obj=obj, **kwargs)
- class GenericTable(AppendableFrameTable):
- """ a table that read/writes the generic pytables table format """
- pandas_kind = u'frame_table'
- table_type = u'generic_table'
- ndim = 2
- obj_type = DataFrame
- @property
- def pandas_type(self):
- return self.pandas_kind
- @property
- def storable(self):
- return getattr(self.group, 'table', None) or self.group
- def get_attrs(self):
- """ retrieve our attributes """
- self.non_index_axes = []
- self.nan_rep = None
- self.levels = []
- self.index_axes = [a.infer(self)
- for a in self.indexables if a.is_an_indexable]
- self.values_axes = [a.infer(self)
- for a in self.indexables if not a.is_an_indexable]
- self.data_columns = [a.name for a in self.values_axes]
- @property
- def indexables(self):
- """ create the indexables from the table description """
- if self._indexables is None:
- d = self.description
- # the index columns is just a simple index
- self._indexables = [GenericIndexCol(name='index', axis=0)]
- for i, n in enumerate(d._v_names):
- dc = GenericDataIndexableCol(
- name=n, pos=i, values=[n], version=self.version)
- self._indexables.append(dc)
- return self._indexables
- def write(self, **kwargs):
- raise NotImplementedError("cannot write on an generic table")
- class AppendableMultiFrameTable(AppendableFrameTable):
- """ a frame with a multi-index """
- table_type = u'appendable_multiframe'
- obj_type = DataFrame
- ndim = 2
- _re_levels = re.compile(r"^level_\d+$")
- @property
- def table_type_short(self):
- return u'appendable_multi'
- def write(self, obj, data_columns=None, **kwargs):
- if data_columns is None:
- data_columns = []
- elif data_columns is True:
- data_columns = obj.columns.tolist()
- obj, self.levels = self.validate_multiindex(obj)
- for n in self.levels:
- if n not in data_columns:
- data_columns.insert(0, n)
- return super(AppendableMultiFrameTable, self).write(
- obj=obj, data_columns=data_columns, **kwargs)
- def read(self, **kwargs):
- df = super(AppendableMultiFrameTable, self).read(**kwargs)
- df = df.set_index(self.levels)
- # remove names for 'level_%d'
- df.index = df.index.set_names([
- None if self._re_levels.search(l) else l for l in df.index.names
- ])
- return df
- class AppendablePanelTable(AppendableTable):
- """ suppor the new appendable table formats """
- table_type = u'appendable_panel'
- ndim = 3
- obj_type = Panel
- def get_object(self, obj):
- """ these are written transposed """
- if self.is_transposed:
- obj = obj.transpose(*self.data_orientation)
- return obj
- @property
- def is_transposed(self):
- return self.data_orientation != tuple(range(self.ndim))
- def _reindex_axis(obj, axis, labels, other=None):
- ax = obj._get_axis(axis)
- labels = ensure_index(labels)
- # try not to reindex even if other is provided
- # if it equals our current index
- if other is not None:
- other = ensure_index(other)
- if (other is None or labels.equals(other)) and labels.equals(ax):
- return obj
- labels = ensure_index(labels.unique())
- if other is not None:
- labels = ensure_index(other.unique()).intersection(labels, sort=False)
- if not labels.equals(ax):
- slicer = [slice(None, None)] * obj.ndim
- slicer[axis] = labels
- obj = obj.loc[tuple(slicer)]
- return obj
- def _get_info(info, name):
- """ get/create the info for this name """
- try:
- idx = info[name]
- except KeyError:
- idx = info[name] = dict()
- return idx
- # tz to/from coercion
- def _get_tz(tz):
- """ for a tz-aware type, return an encoded zone """
- zone = timezones.get_timezone(tz)
- if zone is None:
- zone = tz.utcoffset().total_seconds()
- return zone
- def _set_tz(values, tz, preserve_UTC=False, coerce=False):
- """
- coerce the values to a DatetimeIndex if tz is set
- preserve the input shape if possible
- Parameters
- ----------
- values : ndarray
- tz : string/pickled tz object
- preserve_UTC : boolean,
- preserve the UTC of the result
- coerce : if we do not have a passed timezone, coerce to M8[ns] ndarray
- """
- if tz is not None:
- name = getattr(values, 'name', None)
- values = values.ravel()
- tz = timezones.get_timezone(_ensure_decoded(tz))
- values = DatetimeIndex(values, name=name)
- if values.tz is None:
- values = values.tz_localize('UTC').tz_convert(tz)
- if preserve_UTC:
- if tz == 'UTC':
- values = list(values)
- elif coerce:
- values = np.asarray(values, dtype='M8[ns]')
- return values
- def _convert_index(index, encoding=None, errors='strict', format_type=None):
- index_name = getattr(index, 'name', None)
- if isinstance(index, DatetimeIndex):
- converted = index.asi8
- return IndexCol(converted, 'datetime64', _tables().Int64Col(),
- freq=getattr(index, 'freq', None),
- tz=getattr(index, 'tz', None),
- index_name=index_name)
- elif isinstance(index, TimedeltaIndex):
- converted = index.asi8
- return IndexCol(converted, 'timedelta64', _tables().Int64Col(),
- freq=getattr(index, 'freq', None),
- index_name=index_name)
- elif isinstance(index, (Int64Index, PeriodIndex)):
- atom = _tables().Int64Col()
- # avoid to store ndarray of Period objects
- return IndexCol(index._ndarray_values, 'integer', atom,
- freq=getattr(index, 'freq', None),
- index_name=index_name)
- if isinstance(index, MultiIndex):
- raise TypeError('MultiIndex not supported here!')
- inferred_type = lib.infer_dtype(index, skipna=False)
- values = np.asarray(index)
- if inferred_type == 'datetime64':
- converted = values.view('i8')
- return IndexCol(converted, 'datetime64', _tables().Int64Col(),
- freq=getattr(index, 'freq', None),
- tz=getattr(index, 'tz', None),
- index_name=index_name)
- elif inferred_type == 'timedelta64':
- converted = values.view('i8')
- return IndexCol(converted, 'timedelta64', _tables().Int64Col(),
- freq=getattr(index, 'freq', None),
- index_name=index_name)
- elif inferred_type == 'datetime':
- converted = np.asarray([(time.mktime(v.timetuple()) +
- v.microsecond / 1E6) for v in values],
- dtype=np.float64)
- return IndexCol(converted, 'datetime', _tables().Time64Col(),
- index_name=index_name)
- elif inferred_type == 'date':
- converted = np.asarray([v.toordinal() for v in values],
- dtype=np.int32)
- return IndexCol(converted, 'date', _tables().Time32Col(),
- index_name=index_name)
- elif inferred_type == 'string':
- # atom = _tables().ObjectAtom()
- # return np.asarray(values, dtype='O'), 'object', atom
- converted = _convert_string_array(values, encoding, errors)
- itemsize = converted.dtype.itemsize
- return IndexCol(
- converted, 'string', _tables().StringCol(itemsize),
- itemsize=itemsize, index_name=index_name
- )
- elif inferred_type == 'unicode':
- if format_type == 'fixed':
- atom = _tables().ObjectAtom()
- return IndexCol(np.asarray(values, dtype='O'), 'object', atom,
- index_name=index_name)
- raise TypeError(
- "[unicode] is not supported as a in index type for [{0}] formats"
- .format(format_type)
- )
- elif inferred_type == 'integer':
- # take a guess for now, hope the values fit
- atom = _tables().Int64Col()
- return IndexCol(np.asarray(values, dtype=np.int64), 'integer', atom,
- index_name=index_name)
- elif inferred_type == 'floating':
- atom = _tables().Float64Col()
- return IndexCol(np.asarray(values, dtype=np.float64), 'float', atom,
- index_name=index_name)
- else: # pragma: no cover
- atom = _tables().ObjectAtom()
- return IndexCol(np.asarray(values, dtype='O'), 'object', atom,
- index_name=index_name)
- def _unconvert_index(data, kind, encoding=None, errors='strict'):
- kind = _ensure_decoded(kind)
- if kind == u'datetime64':
- index = DatetimeIndex(data)
- elif kind == u'timedelta64':
- index = TimedeltaIndex(data)
- elif kind == u'datetime':
- index = np.asarray([datetime.fromtimestamp(v) for v in data],
- dtype=object)
- elif kind == u'date':
- try:
- index = np.asarray(
- [date.fromordinal(v) for v in data], dtype=object)
- except (ValueError):
- index = np.asarray(
- [date.fromtimestamp(v) for v in data], dtype=object)
- elif kind in (u'integer', u'float'):
- index = np.asarray(data)
- elif kind in (u'string'):
- index = _unconvert_string_array(data, nan_rep=None, encoding=encoding,
- errors=errors)
- elif kind == u'object':
- index = np.asarray(data[0])
- else: # pragma: no cover
- raise ValueError('unrecognized index type {kind}'.format(kind=kind))
- return index
- def _unconvert_index_legacy(data, kind, legacy=False, encoding=None,
- errors='strict'):
- kind = _ensure_decoded(kind)
- if kind == u'datetime':
- index = to_datetime(data)
- elif kind in (u'integer'):
- index = np.asarray(data, dtype=object)
- elif kind in (u'string'):
- index = _unconvert_string_array(data, nan_rep=None, encoding=encoding,
- errors=errors)
- else: # pragma: no cover
- raise ValueError('unrecognized index type {kind}'.format(kind=kind))
- return index
- def _convert_string_array(data, encoding, errors, itemsize=None):
- """
- we take a string-like that is object dtype and coerce to a fixed size
- string type
- Parameters
- ----------
- data : a numpy array of object dtype
- encoding : None or string-encoding
- errors : handler for encoding errors
- itemsize : integer, optional, defaults to the max length of the strings
- Returns
- -------
- data in a fixed-length string dtype, encoded to bytes if needed
- """
- # encode if needed
- if encoding is not None and len(data):
- data = Series(data.ravel()).str.encode(
- encoding, errors).values.reshape(data.shape)
- # create the sized dtype
- if itemsize is None:
- ensured = ensure_object(data.ravel())
- itemsize = max(1, libwriters.max_len_string_array(ensured))
- data = np.asarray(data, dtype="S{size}".format(size=itemsize))
- return data
- def _unconvert_string_array(data, nan_rep=None, encoding=None,
- errors='strict'):
- """
- inverse of _convert_string_array
- Parameters
- ----------
- data : fixed length string dtyped array
- nan_rep : the storage repr of NaN, optional
- encoding : the encoding of the data, optional
- errors : handler for encoding errors, default 'strict'
- Returns
- -------
- an object array of the decoded data
- """
- shape = data.shape
- data = np.asarray(data.ravel(), dtype=object)
- # guard against a None encoding in PY3 (because of a legacy
- # where the passed encoding is actually None)
- encoding = _ensure_encoding(encoding)
- if encoding is not None and len(data):
- itemsize = libwriters.max_len_string_array(ensure_object(data))
- if compat.PY3:
- dtype = "U{0}".format(itemsize)
- else:
- dtype = "S{0}".format(itemsize)
- if isinstance(data[0], compat.binary_type):
- data = Series(data).str.decode(encoding, errors=errors).values
- else:
- data = data.astype(dtype, copy=False).astype(object, copy=False)
- if nan_rep is None:
- nan_rep = 'nan'
- data = libwriters.string_array_replace_from_nan_rep(data, nan_rep)
- return data.reshape(shape)
- def _maybe_convert(values, val_kind, encoding, errors):
- if _need_convert(val_kind):
- conv = _get_converter(val_kind, encoding, errors)
- # conv = np.frompyfunc(conv, 1, 1)
- values = conv(values)
- return values
- def _get_converter(kind, encoding, errors):
- kind = _ensure_decoded(kind)
- if kind == 'datetime64':
- return lambda x: np.asarray(x, dtype='M8[ns]')
- elif kind == 'datetime':
- return lambda x: to_datetime(x, cache=True).to_pydatetime()
- elif kind == 'string':
- return lambda x: _unconvert_string_array(x, encoding=encoding,
- errors=errors)
- else: # pragma: no cover
- raise ValueError('invalid kind {kind}'.format(kind=kind))
- def _need_convert(kind):
- kind = _ensure_decoded(kind)
- if kind in (u'datetime', u'datetime64', u'string'):
- return True
- return False
- class Selection(object):
- """
- Carries out a selection operation on a tables.Table object.
- Parameters
- ----------
- table : a Table object
- where : list of Terms (or convertible to)
- start, stop: indices to start and/or stop selection
- """
- def __init__(self, table, where=None, start=None, stop=None):
- self.table = table
- self.where = where
- self.start = start
- self.stop = stop
- self.condition = None
- self.filter = None
- self.terms = None
- self.coordinates = None
- if is_list_like(where):
- # see if we have a passed coordinate like
- try:
- inferred = lib.infer_dtype(where, skipna=False)
- if inferred == 'integer' or inferred == 'boolean':
- where = np.asarray(where)
- if where.dtype == np.bool_:
- start, stop = self.start, self.stop
- if start is None:
- start = 0
- if stop is None:
- stop = self.table.nrows
- self.coordinates = np.arange(start, stop)[where]
- elif issubclass(where.dtype.type, np.integer):
- if ((self.start is not None and
- (where < self.start).any()) or
- (self.stop is not None and
- (where >= self.stop).any())):
- raise ValueError(
- "where must have index locations >= start and "
- "< stop"
- )
- self.coordinates = where
- except ValueError:
- pass
- if self.coordinates is None:
- self.terms = self.generate(where)
- # create the numexpr & the filter
- if self.terms is not None:
- self.condition, self.filter = self.terms.evaluate()
- def generate(self, where):
- """ where can be a : dict,list,tuple,string """
- if where is None:
- return None
- q = self.table.queryables()
- try:
- return Expr(where, queryables=q, encoding=self.table.encoding)
- except NameError:
- # raise a nice message, suggesting that the user should use
- # data_columns
- raise ValueError(
- "The passed where expression: {0}\n"
- " contains an invalid variable reference\n"
- " all of the variable references must be a "
- "reference to\n"
- " an axis (e.g. 'index' or 'columns'), or a "
- "data_column\n"
- " The currently defined references are: {1}\n"
- .format(where, ','.join(q.keys()))
- )
- def select(self):
- """
- generate the selection
- """
- if self.condition is not None:
- return self.table.table.read_where(self.condition.format(),
- start=self.start,
- stop=self.stop)
- elif self.coordinates is not None:
- return self.table.table.read_coordinates(self.coordinates)
- return self.table.table.read(start=self.start, stop=self.stop)
- def select_coords(self):
- """
- generate the selection
- """
- start, stop = self.start, self.stop
- nrows = self.table.nrows
- if start is None:
- start = 0
- elif start < 0:
- start += nrows
- if self.stop is None:
- stop = nrows
- elif stop < 0:
- stop += nrows
- if self.condition is not None:
- return self.table.table.get_where_list(self.condition.format(),
- start=start, stop=stop,
- sort=True)
- elif self.coordinates is not None:
- return self.coordinates
- return np.arange(start, stop)
- # utilities ###
- def timeit(key, df, fn=None, remove=True, **kwargs):
- if fn is None:
- fn = 'timeit.h5'
- store = HDFStore(fn, mode='w')
- store.append(key, df, **kwargs)
- store.close()
- if remove:
- os.remove(fn)
|