regex_extraction.py 2.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172
  1. # coding=utf8
  2. import re
  3. regex = r"(\S+\s{1,3}?\S*\s?\S*\S*\s?\S*\S*\s?\S*\S*\s?\S*\S*\s+)" #alle gruppen von zahlen raus
  4. regex1 = r"([A-Z]\s?\W\s?\d\d?\s?\s?:\s?\d\d?\s?\W)" #ti get the bezeichnungen raus
  5. regex2= r"([a-zA-Z]{3,})" #alle wörter raus???
  6. regex_isos = r"(ISO\s\d\d\d\d?\W?\d?\W?\d?\W?\d?)" #get iso standards
  7. reg = r"(^\d{1}$)" #einzelne Zahlen raus
  8. reg1 = r"(^[A-Z]-?[A-Z]?$)" #einzelne Buchstaben raus
  9. extracted_dimensions = []
  10. file = open('/home/bscheibel/PycharmProjects/dxf_reader/drawings/5129275_Rev01-GV12.txt', 'r')
  11. text = file.read()
  12. file.close()
  13. matches = re.findall(regex, text, re.MULTILINE)
  14. for match in matches:
  15. extracted_dimensions.append(match.strip())
  16. isos = []
  17. new_dims = []
  18. #next part replaces everything you do not need with whitespace
  19. for dim in extracted_dimensions:
  20. if re.search(regex_isos, dim):
  21. match = re.findall(regex_isos,dim)
  22. isos.append(match[0])
  23. dim = re.sub(regex_isos,'' ,dim)
  24. if re.search(regex1, dim):
  25. dim = re.sub(regex1, '', dim)
  26. if re.search(regex2, dim):
  27. dim = re.sub(regex2,'' ,dim)
  28. if re.search(reg, dim):
  29. dim = re.sub(reg,'' ,dim)
  30. if re.search(reg1, dim):
  31. dim = re.sub(reg1,'' ,dim)
  32. if dim != '':
  33. new_dims.append(dim)
  34. print(isos)
  35. for dim in new_dims:
  36. if b:
  37. print("Rechtwinkligkeit")
  38. print(dim)
  39. if g:
  40. print("Zylinderform")
  41. print(dim)
  42. if f:
  43. print("Parallelität")
  44. print(dim)
  45. if c:
  46. print("Zylinderform")
  47. print(dim)
  48. if r:
  49. print("Konzentrizität oder Durchmesser?")
  50. print(dim)
  51. if i:
  52. print("Symmetrie")
  53. print(dim)
  54. if j:
  55. print("Ortstoleranz/Mittelpunkt")
  56. print(dim)
  57. if n:
  58. print("Durchmesser")
  59. print(dim)
  60. if É:
  61. print("Modifikator")
  62. print(dim)
  63. ####nicht dabei: neigungswinkel und lauftoleranzen
  64. if R:
  65. print("Radius")
  66. if °:
  67. print("Grad")