DataFrameToCollection.py 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. Created on Tue Apr 28 12:01:07 2020
  5. @author: tanya
  6. @description: a class to reshape a pandas dataframe to a dataframe
  7. where rows are (possibly nested) documents
  8. according to a mongodb JsonSchema
  9. """
  10. import pandas as pd
  11. import numpy as np
  12. import os
  13. import sys
  14. from copy import deepcopy
  15. from cdplib.log import Log
  16. sys.path.append(os.getcwd())
  17. class DataFrameToCollection():
  18. def __init__(self,
  19. grp_fields: list,
  20. schema_path: str = None,
  21. schema: dict = None):
  22. '''
  23. Method to_documents transforms a dataframe
  24. to a dataframe where each row is a (possibly nested) document
  25. columns of the dataframe represent nested fields of the schema
  26. separated by "."
  27. Example: customer.name.first, customer.name.last
  28. :param grp_fields: the unique identifiers of the documents
  29. :param schema: JsonSchema for the mongodb collection the data should
  30. be conform with
  31. '''
  32. import json
  33. self._logger = Log("DataFrameToCollection")
  34. self._grp_fields = grp_fields
  35. if (schema_path is None) and (schema is None):
  36. err = "Specify either schema or schema_path"
  37. self._logger.log_and_raise_error(err)
  38. elif schema is not None:
  39. self.schema = schema
  40. else:
  41. if not os.path.isfile(schema_path):
  42. err = "JsonSchema not found"
  43. self._logger.log_and_raise_error(err)
  44. # load schema to dictionary if it is a valid json file
  45. try:
  46. with open(schema_path, "r") as f:
  47. self.schema = json.load(f)
  48. except Exception as e:
  49. err = ("Failed to load the schema,"
  50. "exit with error: {}".format(e))
  51. self._logger.log_and_raise_error(err)
  52. def to_documents(self,
  53. data: pd.DataFrame,
  54. grp_fields: list = None,
  55. schema: dict = None):
  56. '''
  57. Transforms a dataframe to a dataframe where each
  58. row is a (possibly nested) document.
  59. columns of the dataframe represent nested fields of the schema
  60. separated by "."
  61. Example: customer.name.first, customer.name.last
  62. '''
  63. data = self._concatenate_duplicated_columns(data)
  64. if grp_fields is None:
  65. grp_fields = self._grp_fields
  66. grp_fields = [c for c in grp_fields if c in data.columns]
  67. if schema is None:
  68. schema = self.schema
  69. reshaped_df = data[grp_fields].drop_duplicates().set_index(grp_fields)
  70. for field in schema["properties"]:
  71. field_type = self._field_type(field=field, schema=schema)
  72. if not self._field_is_a_column:
  73. continue
  74. args = {"field": field, "schema": schema,
  75. "data": data, "grp_fields": grp_fields}
  76. if field_type == "object":
  77. reshaped_column = self._reshape_object_column(**args)
  78. elif field_type == "array":
  79. reshaped_column = self._reshape_array_column(**args)
  80. else:
  81. # has simple type
  82. reshaped_column = self._reshape_simple_column(**args)
  83. reshaped_df = self._bring_to_same_index(
  84. df1=reshaped_df,
  85. df2=reshaped_column)
  86. reshaped_df = pd.concat([reshaped_df, reshaped_column],
  87. axis=1, ignore_index=False)
  88. # reset the index
  89. reshaped_df = reshaped_df\
  90. .drop(list(reshaped_df.index.names), axis=1, errors="ignore")\
  91. .reset_index(drop=False)
  92. return reshaped_df
  93. def _reshape_simple_column(self, field: str,
  94. data: pd.DataFrame,
  95. grp_fields: list,
  96. schema: dict) -> pd.DataFrame:
  97. '''
  98. Example: grp_fields = [product_id, store_id],
  99. field = price,
  100. some_other_field = customer_id
  101. product_id | store_id | price | customer_id
  102. p1 s1 5.99 c1
  103. p1 s1 5.99 c2
  104. p2 s1 7.99 c3
  105. result:
  106. index price
  107. product_id store_id
  108. p1 s1 5.99
  109. p2 s1 7.99
  110. '''
  111. self._assert_is_one_to_one(field=field,
  112. data=data,
  113. grp_fields=grp_fields)
  114. if field not in grp_fields:
  115. fields = grp_fields + [field]
  116. else:
  117. fields = grp_fields
  118. return data[fields].drop_duplicates(subset=grp_fields)\
  119. .set_index(grp_fields, drop=False)[field]
  120. def _reshape_object_column(self, field: str,
  121. data: pd.DataFrame,
  122. grp_fields: list,
  123. schema: dict) -> pd.Series:
  124. '''
  125. Reshapes a DataFrame to a Series with one column of dictionary dtype
  126. Example 1 (simple):
  127. grp_fields: ["transaction_id"]
  128. schema = {"bsonType": object,
  129. "properties": {
  130. "ransaction_id": str,
  131. "commodity": {
  132. "bsonType": object,
  133. "properties": {
  134. "id": String,
  135. "name": "fruits",
  136. }
  137. },
  138. "customer": {
  139. "bsonType": "object",
  140. "properties": {
  141. "name": {
  142. "bsonType": "object",
  143. "properties": {
  144. "first": str,
  145. "last": str
  146. },
  147. "bank": str
  148. }
  149. }
  150. }
  151. }
  152. }
  153. data:
  154. =====================================================
  155. transaction_id | commodity.id | commodity.name | customer.name.first | customer.name.last | customer.bank
  156. t1 com1 fruits Jude Law sberbank
  157. t2 com2 meat Meryl Streep raiffeisen
  158. ===================================================
  159. field 1: commodity (level of nestedness 1)
  160. result 1:
  161. Index commodity
  162. transaction_id
  163. p1 {id: com1, name: fruits}
  164. p2 {id: com2, name: meat}
  165. ====================================================
  166. field 2: customer (level of nestedness 2)
  167. result 2:
  168. Index customer
  169. transaction_id
  170. t1 {name: {first: Jude, last: Law}, bank: sberbank}
  171. t2 {name: {first: Meryl, last: Streep}, bank: raiffeisen}
  172. =====================================================
  173. '''
  174. data = data.copy(deep=True)
  175. field_subschema = self._object_subschema(
  176. field=field,
  177. schema=schema)
  178. # if sub_schema is nested then recursively apply the function
  179. # to reshape its sub-fields first
  180. reshaped_df = self.to_documents(
  181. data=data,
  182. grp_fields=grp_fields,
  183. schema=field_subschema)
  184. reshaped_df = reshaped_df\
  185. .set_index(grp_fields, drop=False)\
  186. .drop(self._grp_fields, axis=1, errors="ignore")
  187. if reshaped_df is not None:
  188. reshaped_column = reshaped_df.apply(self._row_to_dict, axis=1)
  189. reshaped_column.name = field
  190. else:
  191. reshaped_column = None
  192. return reshaped_column
  193. def _reshape_array_column(self, field: str,
  194. data: pd.DataFrame,
  195. grp_fields: list,
  196. schema: dict) -> pd.DataFrame:
  197. '''
  198. Example
  199. grp_fields = [customer]
  200. schema = {"bsonType": object,
  201. "properties": {
  202. "customer": str,
  203. "product": {
  204. "bsonType": array,
  205. "items": {
  206. "bsonType": "str"
  207. }
  208. },
  209. "commodity": {
  210. "bsonType": array,
  211. "items": {
  212. "bsonType": "object",
  213. "properties": {
  214. "id": String,
  215. "name": "fruits",
  216. }
  217. }
  218. }
  219. }
  220. }
  221. data:
  222. =====================================================
  223. customer | product | commodity.id | commodity.name
  224. c1 p1 com1 fruits
  225. c1 p2 com1 fruits
  226. c1 p3 com2 meat
  227. c1 p4 com1 fruits
  228. c2 p1 com2 meat
  229. ===================================================
  230. field 1: product
  231. result 1:
  232. Index product
  233. customer
  234. c1 [p1, p2, p3, p4]
  235. c2 [p1]
  236. ====================================================
  237. field 2: commodity
  238. result 2:
  239. Index commodity
  240. customer
  241. c1 [{id: com1, name: fruits}, {id: com2, name: meat}]
  242. c2 [{id: com2, name: meant}]
  243. '''
  244. data = data.copy(deep=True)
  245. items_type = self._items_type(field=field, schema=schema)
  246. if items_type == "array":
  247. reshaped_column = data.groupby(grp_fields, sort=False)[field]\
  248. .apply(self._column_to_uniques, axis=0)
  249. elif items_type == "object":
  250. items_subschema = self._items_subschema(field=field, schema=schema)
  251. simple_subfields = self._simple_subfields(field=field,
  252. schema=items_subschema)
  253. reshaped_column = self._reshape_object_column(
  254. field=field,
  255. data=data,
  256. grp_fields=grp_fields + simple_subfields,
  257. schema=items_subschema,)
  258. reshaped_column = reshaped_column\
  259. .reset_index(grp_fields)\
  260. .groupby(grp_fields, sort=False)[field]\
  261. .apply(self._column_to_uniques)
  262. else:
  263. # items type is simple type
  264. reshaped_column = data\
  265. .groupby(grp_fields, sort=False)[field]\
  266. .apply(self._column_to_uniques_flattened)
  267. return reshaped_column
  268. def _concatenate_duplicated_columns(self,
  269. data: pd.DataFrame) -> pd.DataFrame:
  270. '''
  271. Example:
  272. data:
  273. =====================================================
  274. customer | product | product
  275. c1 p1 p2
  276. c2 p1 p3
  277. result
  278. customer | product
  279. c1 [p1, p2]
  280. c2 [p1, p3]
  281. '''
  282. data = data.copy(deep=True)
  283. def row_to_list(row: pd.Series) -> list:
  284. return list(row)
  285. for c in set(data.columns):
  286. if isinstance(data[c], pd.DataFrame):
  287. data["temp"] = data[c].apply(row_to_list, axis=1)
  288. data.drop(c, axis=1, inplace=True)
  289. data = data.rename(columns={"temp": c})
  290. return data
  291. def _field_is_a_column(self, field: str,
  292. data: pd.DataFrame,
  293. schema: dict) -> bool:
  294. '''
  295. Example:
  296. schema = {"bsonType": object,
  297. "properties": {
  298. "customer": str,
  299. "commodity": {
  300. "bsonType": array,
  301. "items": {
  302. "bsonType": "object",
  303. "properties": {
  304. "id": String,
  305. "name": "fruits",
  306. }
  307. }
  308. }
  309. }
  310. }
  311. data:
  312. =====================================================
  313. customer | commodity.id | commodity.name
  314. c1 com1 fruits
  315. c1 com1 fruits
  316. c1 com2 meat
  317. c1 com1 fruits
  318. c2 com2 meat
  319. ===================================================
  320. Returns True for customer, commodity, commodity.id, commodity.name
  321. '''
  322. unrolled_names = []
  323. for c in data.columns:
  324. splitted = c.split(".")
  325. for i in range(len(splitted)):
  326. unrolled_names.append(".".join(splitted[:i+1]))
  327. return (field in unrolled_names)
  328. def _field_type(self, field: str, schema: dict) -> str:
  329. '''
  330. Can be simple type, object, or array
  331. '''
  332. return schema["properties"][field]["bsonType"]
  333. def _items_type(self, field: str, schema: dict) -> str:
  334. '''
  335. type of items of an array field (can be simple, object, or array)
  336. '''
  337. return schema["properties"][field]["items"]["bsonType"]
  338. def _has_simple_type(self, field: str, schema: dict) -> bool:
  339. '''
  340. True if is not array and is not object
  341. '''
  342. field_type = self._field_type(field=field, schema=schema)
  343. return field_type not in ["array", "object"]
  344. def _simple_subfields(self, field: str, schema: dict) -> list:
  345. '''
  346. Returns list of fields that have a simple type
  347. Example:
  348. "customer": {
  349. "bsonType": "object",
  350. "properties": {
  351. "name": {
  352. "bsonType": "object",
  353. "properties": {
  354. "first": str,
  355. "last": str
  356. },
  357. }
  358. "bank": str
  359. }
  360. }
  361. returns [customer.bank]
  362. '''
  363. subschema = schema["properties"][field]
  364. return [field + "." + f for f in subschema["properties"]
  365. if self._has_simple_type(field=f, schema=subschema)]
  366. def _assert_is_one_to_one(self, field: str,
  367. data: pd.DataFrame,
  368. grp_fields: list):
  369. '''
  370. A column with simple or object type should not have duplicated entries
  371. per combination of grp columns.
  372. Example: grp_fields = [product_id, store_id],
  373. field = price,
  374. some_other_field = customer_id
  375. Good example:
  376. product_id | store_id | price | customer_id
  377. p1 s1 5.99 c1
  378. p1 s1 5.99 c2
  379. p2 s1 7.99 c3
  380. Bad example:
  381. product_id | store_id | price | customer_id
  382. p1 s1 5.99 c1
  383. p1 s1 6.99 c2
  384. p2 s1 7.99 c3
  385. '''
  386. is_one_to_one = \
  387. (len(data[grp_fields + [field]].drop_duplicates()) ==
  388. len(data[grp_fields].drop_duplicates()))
  389. if not is_one_to_one:
  390. print(data[grp_fields + [field]])
  391. err = ("Column {0} should be one to one "
  392. "with {1}").format(field, grp_fields)
  393. self._logger.log_and_raise_error(err)
  394. def _bring_to_same_index(self,
  395. df1: pd.DataFrame,
  396. df2: pd.DataFrame):
  397. '''
  398. if index names of df2 are in index or column names of df1,
  399. sets the index of df1 to the index of df2,
  400. else raises an error
  401. '''
  402. def has_same_index(df1, df2):
  403. expr = df1.index.names != df2.index.names
  404. if hasattr(expr, "any"):
  405. return expr.any()
  406. else:
  407. return expr
  408. try:
  409. if has_same_index(df1, df2):
  410. df2_index_names = list(df2.index.names)
  411. df1 = df2\
  412. .reset_index(drop=False)\
  413. .drop("index", axis=1, errors="ignore")\
  414. .set_index(df2_index_names)
  415. return df1
  416. except Exception as e:
  417. err = ("Error in the index of the reshaped dataframe."
  418. " Exit with error: {}".format(e))
  419. self._logger.log_and_raise_error(err)
  420. def _object_subschema(self, field: str, schema: dict) -> dict:
  421. '''
  422. If a field is an object, its value is a sub-schema.
  423. This method completes the sub-schema key names
  424. to the key names of the initial schema.
  425. Example: field: commodity
  426. schema = {"bsonType": object,
  427. "properties": {
  428. "commodity": {
  429. "bsonType": object,
  430. "properties": {
  431. "id": "com1",
  432. "name": "fruits",
  433. }
  434. },
  435. "product_id": p1,
  436. "store_id": s1
  437. }
  438. }
  439. result
  440. {commodity.id : com1, commodity.name : fruits}
  441. '''
  442. subschema = deepcopy(schema["properties"][field])
  443. subschema["properties"] = {".".join([field, k]): v
  444. for k, v
  445. in subschema["properties"].items()}
  446. return subschema
  447. def _items_subschema(self, field: str, schema: dict) -> dict:
  448. '''
  449. schema = {"bsonType": object,
  450. "properties": {
  451. "commodity": {
  452. "bsonType": array,
  453. "items": {
  454. "bsonType": "object",
  455. "properties": {
  456. "id": String,
  457. "name": "fruits",
  458. }
  459. }
  460. }
  461. }
  462. }
  463. field = commodity
  464. returns {"bsonType": "object",
  465. "properties": {
  466. "id": String,
  467. "name": "fruits",
  468. }
  469. }
  470. '''
  471. subschema = deepcopy(schema["properties"][field]["items"])
  472. return {"bsonType": "object",
  473. "properties": {field: subschema}}
  474. def _row_to_dict(self, row: pd.Series) -> dict:
  475. '''
  476. Transforms pandas series to a dictionary
  477. is meant to be applied to a dataframe in axis = 1
  478. Example:
  479. row:
  480. Index commodity.id | commodity.name
  481. transaction_id
  482. t1 com1 fruits
  483. result:
  484. Index commodity
  485. transaction_id
  486. t1 {id: com1, name: fruits}
  487. '''
  488. def drop_prefix(field: str) -> str:
  489. return field.split(".")[-1]
  490. def row_not_null(entry) -> str:
  491. '''
  492. entry can be dict, list (or array), scalar
  493. check these three cases
  494. '''
  495. if isinstance(entry, dict):
  496. # not null, if one of the keys is not null
  497. return pd.notnull(list(entry.values())).any()
  498. # list
  499. elif hasattr(pd.notnull(entry), "any"):
  500. return pd.notnull(entry).any()
  501. # scalar
  502. else:
  503. return pd.notnull(entry)
  504. return {drop_prefix(field): row[field] for field in row.index
  505. if row_not_null(row[field])}
  506. def _column_to_uniques(self, column: pd.Series) -> pd.DataFrame:
  507. '''
  508. return: list of unique values from a Series where
  509. entries are arbitrary objects
  510. (pandas unique() method does not work if entries are of complex types)
  511. Example:
  512. column:
  513. 1
  514. 1
  515. 2
  516. [1,2]
  517. [1,2],
  518. [1,2,3]
  519. [[1,2], [1,2]]
  520. {"a": 1, "b": 2}
  521. Result:
  522. 1
  523. 2
  524. [1,2]
  525. [1,2,3],
  526. [[1,2], [1,2]]
  527. {"a": 1, "b": 2}
  528. '''
  529. is_simple_dtype = (np.issubdtype(pd.Series(column), np.number)) or\
  530. (pd.Series(column).dtype == str)
  531. if is_simple_dtype:
  532. uniques = list(pd.Series(column).unique())
  533. else:
  534. uniques = pd.DataFrame({"temp": column.tolist()})\
  535. .assign(temp_str=lambda y: y["temp"].astype(str))\
  536. .drop_duplicates(subset=["temp_str"])\
  537. .drop("temp_str", axis=1).iloc[:, 0].tolist()
  538. def is_empty(y):
  539. is_empty_dict = (isinstance(y, dict) and (len(y) == 0))
  540. is_empty_list = (isinstance(y, list) and (len(y) == 0))
  541. return is_empty_dict or is_empty_list
  542. uniques = [el for el in uniques if not is_empty(el)]
  543. return uniques
  544. def _column_to_uniques_flattened(self, column: pd.Series) -> pd.Series:
  545. '''
  546. Does the same as colum_to_uniques, but flattens lists of lists
  547. Example:
  548. column:
  549. 1
  550. 1
  551. 2
  552. [1,2]
  553. [1,2],
  554. [1,2,3]
  555. [[1,2], [1,2]]
  556. {"a": 1, "b": 2}
  557. Result:
  558. 1
  559. 2
  560. [1,2]
  561. [1,2,3],
  562. {"a": 1, "b": 2}
  563. '''
  564. uniques = self._column_to_uniques(column)
  565. def flatten_list(l):
  566. from itertools import chain
  567. return list(chain.from_iterable(l))
  568. is_list_of_lists = (isinstance(uniques, list)) \
  569. and (isinstance(uniques[0], list))
  570. if is_list_of_lists:
  571. uniques = flatten_list(uniques)
  572. return uniques
  573. def test():
  574. '''
  575. '''
  576. df = pd.DataFrame({
  577. "a": [5]*8 + [6]*8,
  578. "b": [10]*8 + [20]*8,
  579. "c1": [100, 200]*8,
  580. "c": [100, 200]*8,
  581. "d.da": [11]*8 + [22]*8,
  582. "d.db": [33]*8 + [34]*8,
  583. "e.ea.eaa": [5]*8 + [55]*8,
  584. "e.ea.eab": [6]*8 + [66]*8,
  585. "e.eb": [2, 2, 3, 3]*4,
  586. "e.ec.eca": [1, 2, 3, 4]*4,
  587. "e.ec.ecb": [5, 6, 7, 8]*4,
  588. "f.fa": [1]*4 + [3]*4 + [11]*4 + [33]*4,
  589. "f.fb": [2]*4 + [3]*2 + [4]*2 + [22]*4 + [44]*4,
  590. "g.ga": [1, 2, 3, 4]*4,
  591. })
  592. print("<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<")
  593. print("DataFrame looks like")
  594. print(df)
  595. print("<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<")
  596. duplicate = pd.DataFrame({"c": [300, 400]*8})
  597. df = pd.concat([df, duplicate], axis=1)
  598. schm = {"bsonType": "object",
  599. "required": ["a"],
  600. "properties": {
  601. "a": {
  602. "bsonType": "integer"
  603. },
  604. "b": {
  605. "bsonType": "integer"
  606. },
  607. "c": {
  608. "bsonType": "array",
  609. "items": {
  610. "bsonType": "integer"
  611. }
  612. },
  613. "c1": {
  614. "bsonType": "array",
  615. "items": {
  616. "bsonType": "integer"
  617. }
  618. },
  619. "d": {
  620. "bsonType": "object",
  621. "properties": {
  622. "da": {"bsonType": "integer"},
  623. "db": {"bsonType": "integer"}
  624. }
  625. },
  626. "e": {
  627. "bsonType": "object",
  628. "properties": {
  629. "ea": {
  630. "bsonType": "object",
  631. "properties": {
  632. "eaa": {"bsonType": "integer"},
  633. "eab": {"bsonType": "integer"}
  634. }
  635. },
  636. "eb": {
  637. "bsonType": "array",
  638. "items": {
  639. "bsonType": "integer"
  640. }
  641. },
  642. "ec": {
  643. "bsonType": "array",
  644. "items": {
  645. "bsonType": "object",
  646. "properties": {
  647. "eca": {"bsonType": "integer"},
  648. "ecb": {"bsonType": "integer"}
  649. }
  650. }
  651. }
  652. }
  653. },
  654. "f": {
  655. "bsonType": "array",
  656. "items": {
  657. "bsonType": "object",
  658. "properties": {
  659. "fa": {"bsonType": "integer"},
  660. "fb": {
  661. "bsonType": "array",
  662. "items": {"bsonType": "integer"}
  663. }
  664. }
  665. }
  666. },
  667. "g": {
  668. "bsonType": "array",
  669. "items": {
  670. "bsonType": "object",
  671. "properties": {
  672. "ga": {"bsonType": "integer"}
  673. }
  674. }
  675. }
  676. }
  677. }
  678. grp_fields = ["a"]
  679. reshaper = DataFrameToCollection(grp_fields=grp_fields, schema=schm)
  680. reshaped_df = reshaper.to_documents(df)
  681. print("<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<")
  682. print("DataFrame looks like")
  683. print(reshaped_df)
  684. print("<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<")
  685. if __name__ == "__main__":
  686. test()