read_pdf.py 336 B

1234567891011121314151617
  1. # from tika import parser
  2. #
  3. # raw = parser.from_file("GV_12.pdf")
  4. # raw = str(raw)
  5. #
  6. # safe_text = raw.encode('utf-8', errors='ignore')
  7. #
  8. # #safe_text = str(safe_text).replace("\n", "").replace("\\", "")
  9. # print('--- safe text ---' )
  10. # print(safe_text
  11. #
  12. # )
  13. import textract
  14. text = textract.process("../drawings/GV_12.PDF")
  15. print(text)