regex_test.py 853 B

123456789101112131415161718192021222324252627
  1. import re
  2. reg_all = re.compile(r"(^[A-Z]{1}-?[A-Z]?\s*$)|([A-Z]\W?[A-Z]?\s?\W\s?\d\d?\s?\s?:\s?\d\d?\s?\W)|((?!\d)(?!Rpk)[a-zA-Z]{3,}?\W)|(?!0)(^\d{1}\s*$|A\d{1}|\d/\d)")
  3. regex = r"(\S+\s{1,3}?\S*\s?\S*\S*\s?\S*\S*\s?\S*\S*\s?\S*\S*\s+)"
  4. file = open('/home/bscheibel/PycharmProjects/dxf_reader/drawings/5129275_Rev01-GV12.txt', 'r')
  5. text = file.read()
  6. file.close()
  7. extraction = []
  8. matches = re.findall(regex, text, re.MULTILINE)
  9. for match in matches:
  10. #print(match)
  11. extraction.append(match.strip())
  12. i = 0
  13. new_matches = []
  14. for match in extraction:
  15. #print(match)
  16. #print("blub")
  17. match = match.split('\n')[0]
  18. #if len(match)>1:
  19. # extraction.append(match[1])
  20. #print(match[1])
  21. #print([match])
  22. if not re.search(reg_all, match):
  23. #print("blub")
  24. new_matches.append(match)
  25. i += 1
  26. print(new_matches)