123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687 |
- # coding=utf8
- import re
- def clean(extracted_dimensions):
- #next part extracts the isos and removes everything we dont need like just text or the X:X stuff, einzelne buchstaben und zahlen
- for dim in extracted_dimensions:
- if re.match(regex_isos, dim): #isos
- match = re.findall(regex_isos,dim)
- isos.append(match[0])
- extracted_dimensions.remove(dim)
- for dim in extracted_dimensions:
- match =re.match(reg_all, dim)
- if match:
- #print(re.findall(reg_all,dim))
- #print(match[0])
- try:
- extracted_dimensions.remove(dim)
- except:
- print("error")
- #print(isos)
- #print(extracted_dimensions)
- return isos, extracted_dimensions
- def print_clean(extracted_dimensions):
- for dim in extracted_dimensions:
- if "b" in dim:
- print("Rechtwinkligkeit")
- print(dim)
- if "g" in dim:
- print("Zylinderform")
- print(dim)
- if "f" in dim:
- print("Parallelität")
- print(dim)
- if "c" in dim:
- print("Zylinderform")
- print(dim)
- if "r" in dim:
- print("Konzentrizität?")
- print(dim)
- if "i" in dim:
- print("Symmetrie")
- print(dim)
- if "j" in dim:
- print("Ortstoleranz/Mittelpunkt")
- print(dim)
- if "n" in dim:
- print("Durchmesser")
- print(dim)
- if "É" in dim:
- print("Modifikator")
- print(dim)
- ####nicht dabei: neigungswinkel und lauftoleranzen
- if "R" in dim:
- print("Radius")
- print(dim)
- if "°" in dim:
- print("Grad")
- if "Ø" in dim:
- print("Durchmesser")
- regex = r"(\S+\s{1,3}?\S*\s?\S*\S*\s?\S*\S*\s?\S*\S*\s?\S*\S*\s+)" #alle gruppen von zahlen raus
- regex1 = r"([A-Z]\W?[A-Z]?\s?\W\s?\d\d?\s?\s?:\s?\d\d?\s?\W)" #ti get the bezeichnungen raus
- regex2 = r"((?!\d)(?!Rpk)[a-zA-Z]{3,})" #alle wörter raus??? außer Rpk
- regex_isos = r"(ISO\s\d\d\d\d?\W?\d?\W?\d?\W?\d?)" #get iso standards
- reg = r"(^\d{1}$)" #einzelne Zahlen raus #checked
- reg1 = r"(^[A-Z]{1}-?[A-Z]?$)" #einzelne Buchstaben raus #checked
- reg_all = r"(^(?!0)\d{1}$)|(^[A-Z]{1}-?[A-Z]?$)|(^[A-Z]\W?[A-Z]?\s?\W\s?\d\d?\s?\s?:\s?\d\d?\s?\W)|((?!\d)(?!Rpk)[a-zA-Z]{3,})"
- extracted_dimensions = []
- file = open('/home/bscheibel/PycharmProjects/dxf_reader/drawings/5129275_Rev01-GV12.txt', 'r')
- text = file.read()
- file.close()
- matches = re.findall(regex, text, re.MULTILINE)
- for match in matches:
- extracted_dimensions.append(match.strip())
- #print(extracted_dimensions)
- isos = []
- isos, dims = clean(extracted_dimensions)
- #print(isos)
- #dims = clean(dims)
- for dim in dims:
- print(dim)
- print_clean(dims)
|