|
@@ -1,67 +1,101 @@
|
|
|
# coding=utf8
|
|
|
import re
|
|
|
+import csv_to_pandadf
|
|
|
|
|
|
def clean(extracted_dimensions):
|
|
|
- #next part extracts the isos and removes everything we dont need like just text or the X:X stuff, einzelne buchstaben und zahlen
|
|
|
+ #next part extracts the isos and removes everything we dont need like just text or detail/maßstab, einzelne buchstaben und zahlen
|
|
|
for dim in extracted_dimensions:
|
|
|
if re.match(regex_isos, dim): #isos
|
|
|
match = re.findall(regex_isos,dim)
|
|
|
isos.append(match[0])
|
|
|
extracted_dimensions.remove(dim)
|
|
|
|
|
|
-
|
|
|
- for dim in extracted_dimensions:
|
|
|
- match =re.match(reg_all, dim)
|
|
|
- if match:
|
|
|
- #print(re.findall(reg_all,dim))
|
|
|
- #print(match[0])
|
|
|
- try:
|
|
|
- extracted_dimensions.remove(dim)
|
|
|
- except:
|
|
|
- print("error")
|
|
|
+ i = 0
|
|
|
+ new_matches = []
|
|
|
+ for match in extracted_dimensions:
|
|
|
+ # print(match)
|
|
|
+ match = match.split('\n')[0]
|
|
|
+ # if len(match)>1:
|
|
|
+ # extraction.append(match[1])
|
|
|
+ # print(match[1])
|
|
|
+ if not re.search(reg_all, match):
|
|
|
+ new_matches.append(match)
|
|
|
+ i += 1
|
|
|
|
|
|
#print(isos)
|
|
|
#print(extracted_dimensions)
|
|
|
- return isos, extracted_dimensions
|
|
|
+ return isos, new_matches
|
|
|
|
|
|
|
|
|
-def print_clean(extracted_dimensions):
|
|
|
- for dim in extracted_dimensions:
|
|
|
- if "b" in dim:
|
|
|
+def print_clean(dims):
|
|
|
+ mal = "no"
|
|
|
+ vorzeichen = "no"
|
|
|
+ for dim in dims:
|
|
|
+ if re.match(r"b\s\d*\W?\d*\s.",dim):
|
|
|
print("Rechtwinkligkeit")
|
|
|
print(dim)
|
|
|
- if "g" in dim:
|
|
|
+ continue
|
|
|
+ if re.match(r"g\s\d*\W?\d*", dim):
|
|
|
print("Zylinderform")
|
|
|
print(dim)
|
|
|
- if "f" in dim:
|
|
|
+ continue
|
|
|
+ if re.match(r"g\s\d*\W?\d*", dim):
|
|
|
print("Parallelität")
|
|
|
print(dim)
|
|
|
- if "c" in dim:
|
|
|
+ continue
|
|
|
+ if re.match(r"g\s\d*\W?\d*", dim):
|
|
|
print("Zylinderform")
|
|
|
print(dim)
|
|
|
- if "r" in dim:
|
|
|
- print("Konzentrizität?")
|
|
|
+ continue
|
|
|
+ if re.match(r"g\s\d*\W?\d*", dim):
|
|
|
+ print("Konzentrizität")
|
|
|
print(dim)
|
|
|
- if "i" in dim:
|
|
|
+ continue
|
|
|
+ if re.match(r"i\s\d*\W?\d*", dim):
|
|
|
print("Symmetrie")
|
|
|
print(dim)
|
|
|
- if "j" in dim:
|
|
|
+ continue
|
|
|
+ if re.match(r"j\s\d*\W?\d*", dim):
|
|
|
print("Ortstoleranz/Mittelpunkt")
|
|
|
print(dim)
|
|
|
- if "n" in dim:
|
|
|
+ if re.match(r"n\d*", dim):
|
|
|
print("Durchmesser")
|
|
|
print(dim)
|
|
|
if "É" in dim:
|
|
|
print("Modifikator")
|
|
|
print(dim)
|
|
|
+ continue
|
|
|
####nicht dabei: neigungswinkel und lauftoleranzen
|
|
|
- if "R" in dim:
|
|
|
+ if re.match(r"R\d*$",dim):
|
|
|
print("Radius")
|
|
|
print(dim)
|
|
|
+ continue
|
|
|
if "°" in dim:
|
|
|
print("Grad")
|
|
|
- if "Ø" in dim:
|
|
|
+ print(dim)
|
|
|
+ continue
|
|
|
+ if re.match(r"Ø\s*\d*\W?\d*", dim):
|
|
|
print("Durchmesser")
|
|
|
+ print(dim)
|
|
|
+ continue
|
|
|
+
|
|
|
+def merge(dims):
|
|
|
+ last_item = ""
|
|
|
+ i = 0
|
|
|
+ new_dims = []
|
|
|
+ for dim in dims:
|
|
|
+ dims[i] = dim.replace('È','GG')
|
|
|
+ if re.match(r"\d?x$", last_item):
|
|
|
+ last_item = last_item + " " + dims[i]
|
|
|
+ if re.match(r"R0", dim):
|
|
|
+ last_item = dim + last_item
|
|
|
+ if re.match(r"^°$", last_item):
|
|
|
+ last_item = dim + last_item
|
|
|
+ new_dims.append(last_item)
|
|
|
+ i += 1
|
|
|
+ last_item = dim
|
|
|
+ return dims
|
|
|
+
|
|
|
|
|
|
regex = r"(\S+\s{1,3}?\S*\s?\S*\S*\s?\S*\S*\s?\S*\S*\s?\S*\S*\s+)" #alle gruppen von zahlen raus
|
|
|
regex1 = r"([A-Z]\W?[A-Z]?\s?\W\s?\d\d?\s?\s?:\s?\d\d?\s?\W)" #ti get the bezeichnungen raus
|
|
@@ -69,19 +103,28 @@ regex2 = r"((?!\d)(?!Rpk)[a-zA-Z]{3,})" #alle wörter raus??? außer Rpk
|
|
|
regex_isos = r"(ISO\s\d\d\d\d?\W?\d?\W?\d?\W?\d?)" #get iso standards
|
|
|
reg = r"(^\d{1}$)" #einzelne Zahlen raus #checked
|
|
|
reg1 = r"(^[A-Z]{1}-?[A-Z]?$)" #einzelne Buchstaben raus #checked
|
|
|
-reg_all = r"(^(?!0)\d{1}$)|(^[A-Z]{1}-?[A-Z]?$)|(^[A-Z]\W?[A-Z]?\s?\W\s?\d\d?\s?\s?:\s?\d\d?\s?\W)|((?!\d)(?!Rpk)[a-zA-Z]{3,})"
|
|
|
+reg_all = re.compile(r"(^[A-Z]{1}-?[A-Z]?\s*$)|([A-Z]\W?[A-Z]?\s?\W\s?\d\d?\s?\s?:\s?\d\d?\s?\W)|((?!\d)(?!Rpk)[a-zA-Z]{3,}?\W)|(?!0)(^\d{1}\s*$|A\d{1}|\d/\d)")
|
|
|
extracted_dimensions = []
|
|
|
-file = open('/home/bscheibel/PycharmProjects/dxf_reader/drawings/5129275_Rev01-GV12.txt', 'r')
|
|
|
+
|
|
|
+
|
|
|
+text = csv_to_pandadf.read_csv('/home/bscheibel/PycharmProjects/dxf_reader/temporary/text_merged_GV12.csv')
|
|
|
+
|
|
|
+"""file = open('/home/bscheibel/PycharmProjects/dxf_reader/temporary/text_merged.csv', 'r')
|
|
|
text = file.read()
|
|
|
file.close()
|
|
|
-matches = re.findall(regex, text, re.MULTILINE)
|
|
|
-for match in matches:
|
|
|
+matches = re.findall(regex, text, re.MULTILINE) """
|
|
|
+for match in text:
|
|
|
extracted_dimensions.append(match.strip())
|
|
|
#print(extracted_dimensions)
|
|
|
+"""for dim in extracted_dimensions:
|
|
|
+ print( [dim] )"""
|
|
|
isos = []
|
|
|
isos, dims = clean(extracted_dimensions)
|
|
|
-#print(isos)
|
|
|
-#dims = clean(dims)
|
|
|
for dim in dims:
|
|
|
- print(dim)
|
|
|
-print_clean(dims)
|
|
|
+ print(dim)
|
|
|
+#print(isos)
|
|
|
+new_dims = []
|
|
|
+new_dims = merge(dims)
|
|
|
+print(new_dims)
|
|
|
+
|
|
|
+#print_clean(dims)
|