12345678910111213141516 |
- # from tika import parser
- #
- # raw = parser.from_file("GV_12.pdf")
- # raw = str(raw)
- #
- # safe_text = raw.encode('utf-8', errors='ignore')
- #
- # #safe_text = str(safe_text).replace("\n", "").replace("\\", "")
- # print('--- safe text ---' )
- # print(safe_text
- #
- # )
- import textract
- text = textract.process("GV_12.pdf")
|