regex_extraction.py 522 B

123456789101112131415161718
  1. # coding=utf8
  2. import re
  3. regex = r"(\S+\s{1,3}?\S*\s?\S*\S*\s?\S*\S*\s?\S*\S*\s?\S*\S*\s+)"
  4. regex1 = r"([A-Z]\s?\W\s?\d\d?\s?\s?\W\s?\d\d?\s?\W)" #ti get the bezeichnungen raus
  5. regex2= r"([a-zA-Z]{3,})" #alle wörter raus???
  6. extracted_dimensions = []
  7. file=open('/home/bscheibel/PycharmProjects/dxf_reader/drawings/5152166_Rev04.txt', 'r')
  8. text= file.read()
  9. file.close()
  10. matches = re.findall(regex, text, re.MULTILINE)
  11. for match in matches:
  12. extracted_dimensions.append(match.strip())
  13. print(extracted_dimensions)