bscheibel 5 éve
szülő
commit
75aceddfe8
2 módosított fájl, 191 hozzáadás és 59 törlés
  1. 75 59
      regex_extraction.py
  2. 116 0
      regex_online_tester.py

+ 75 - 59
regex_extraction.py

@@ -1,71 +1,87 @@
 # coding=utf8
 import re
 
+def clean(extracted_dimensions):
+    #next part extracts the isos and removes everything we dont need like just text or the X:X stuff, einzelne buchstaben und zahlen
+    for dim in extracted_dimensions:
+        if re.match(regex_isos, dim): #isos
+            match = re.findall(regex_isos,dim)
+            isos.append(match[0])
+            extracted_dimensions.remove(dim)
+
+
+    for dim in extracted_dimensions:
+        match =re.match(reg_all, dim)
+        if match:
+            #print(re.findall(reg_all,dim))
+            #print(match[0])
+            try:
+                extracted_dimensions.remove(dim)
+            except:
+                print("error")
+
+    #print(isos)
+    #print(extracted_dimensions)
+    return isos, extracted_dimensions
+
+
+def print_clean(extracted_dimensions):
+    for dim in extracted_dimensions:
+        if "b" in dim:
+            print("Rechtwinkligkeit")
+            print(dim)
+        if "g" in dim:
+            print("Zylinderform")
+            print(dim)
+        if "f" in dim:
+            print("Parallelität")
+            print(dim)
+        if "c" in dim:
+            print("Zylinderform")
+            print(dim)
+        if "r" in dim:
+            print("Konzentrizität?")
+            print(dim)
+        if "i" in dim:
+            print("Symmetrie")
+            print(dim)
+        if "j" in dim:
+            print("Ortstoleranz/Mittelpunkt")
+            print(dim)
+        if "n" in dim:
+            print("Durchmesser")
+            print(dim)
+        if "É" in dim:
+            print("Modifikator")
+            print(dim)
+        ####nicht dabei: neigungswinkel und lauftoleranzen
+        if "R" in dim:
+            print("Radius")
+            print(dim)
+        if "°" in dim:
+            print("Grad")
+        if "Ø" in dim:
+            print("Durchmesser")
+
 regex = r"(\S+\s{1,3}?\S*\s?\S*\S*\s?\S*\S*\s?\S*\S*\s?\S*\S*\s+)" #alle gruppen von zahlen raus
-regex1 = r"([A-Z]\s?\W\s?\d\d?\s?\s?:\s?\d\d?\s?\W)" #ti get the bezeichnungen raus
-regex2= r"([a-zA-Z]{3,})" #alle wörter raus???
+regex1 = r"([A-Z]\W?[A-Z]?\s?\W\s?\d\d?\s?\s?:\s?\d\d?\s?\W)" #ti get the bezeichnungen raus
+regex2 = r"((?!\d)(?!Rpk)[a-zA-Z]{3,})" #alle wörter raus??? außer Rpk
 regex_isos = r"(ISO\s\d\d\d\d?\W?\d?\W?\d?\W?\d?)" #get iso standards
-reg = r"(^\d{1}$)" #einzelne Zahlen raus
-reg1 = r"(^[A-Z]-?[A-Z]?$)" #einzelne Buchstaben raus
+reg = r"(^\d{1}$)" #einzelne Zahlen raus #checked
+reg1 = r"(^[A-Z]{1}-?[A-Z]?$)" #einzelne Buchstaben raus #checked
+reg_all = r"(^(?!0)\d{1}$)|(^[A-Z]{1}-?[A-Z]?$)|(^[A-Z]\W?[A-Z]?\s?\W\s?\d\d?\s?\s?:\s?\d\d?\s?\W)|((?!\d)(?!Rpk)[a-zA-Z]{3,})"
 extracted_dimensions = []
 file = open('/home/bscheibel/PycharmProjects/dxf_reader/drawings/5129275_Rev01-GV12.txt', 'r')
 text = file.read()
 file.close()
 matches = re.findall(regex, text, re.MULTILINE)
 for match in matches:
-        extracted_dimensions.append(match.strip())
+    extracted_dimensions.append(match.strip())
+#print(extracted_dimensions)
 isos = []
-new_dims = []
-#next part replaces everything you do not need with whitespace
-for dim in extracted_dimensions:
-        if re.search(regex_isos, dim):
-                match = re.findall(regex_isos,dim)
-                isos.append(match[0])
-                dim = re.sub(regex_isos,'' ,dim)
-        if re.search(regex1, dim):
-                dim = re.sub(regex1, '', dim)
-        if re.search(regex2, dim):
-                dim = re.sub(regex2,'' ,dim)
-        if re.search(reg, dim):
-                dim = re.sub(reg,'' ,dim)
-        if re.search(reg1, dim):
-                dim = re.sub(reg1,'' ,dim)
-        if dim != '':
-                new_dims.append(dim)
-
-print(isos)
-for dim in new_dims:
-        if b:
-                print("Rechtwinkligkeit")
-                print(dim)
-        if g:
-                print("Zylinderform")
-                print(dim)
-        if f:
-                print("Parallelität")
-                print(dim)
-        if c:
-                print("Zylinderform")
-                print(dim)
-        if r:
-                print("Konzentrizität oder Durchmesser?")
-                print(dim)
-        if i:
-                print("Symmetrie")
-                print(dim)
-        if j:
-                print("Ortstoleranz/Mittelpunkt")
-                print(dim)
-        if n:
-                print("Durchmesser")
-                print(dim)
-        if É:
-                print("Modifikator")
-                print(dim)
-        ####nicht dabei: neigungswinkel und lauftoleranzen
-        if R:
-                print("Radius")
-
-        if °:
-                print("Grad")
-
+isos, dims = clean(extracted_dimensions)
+#print(isos)
+#dims = clean(dims)
+for dim in dims:
+    print(dim)
+print_clean(dims)

+ 116 - 0
regex_online_tester.py

@@ -0,0 +1,116 @@
+# coding=utf8
+# the above tag defines encoding for this document and is for Python 2.x compatibility
+
+import re
+
+regex = r"(^(?!0)\d{1}$)|(^[A-Z]{1}-?[A-Z]?$)|(^[A-Z]\W?[A-Z]?\s?\W\s?\d\d?\s?\s?:\s?\d\d?\s?\W)|((?!\d)(?!Rpk)[a-zA-Z]{3,})"
+
+test_str = ("2"
+            "4"
+            "6"
+            "8"
+            "Z ( 20 : 1 )"
+            "b 0,01 A\n"
+            "2x\n"
+            "r Ø0,05 CZ B\n"
+            "g 0,01\n"
+            "R2\n"
+            "5°\n"
+            ",8\n"
+            "R0\n"
+            "Ø 4,1\n"
+            "B\n"
+            "-0,1\n"
+            "2x Ø19,2 +- 00,05 CT\n"
+            "-0,05\n"
+            "Ø15,15 +- 0,05\n"
+            "1,65\n"
+            "-0,1\n"
+            "0\n"
+            "n11\n"
+            ",5\n"
+            "R0,\n"
+            "R0\n"
+            "C\n"
+            "C\n"
+            "Rz 1,6\n"
+            "Rpk 0,2\n"
+            "°\n"
+            "45\n"
+            "E #F\n"
+            "Ø4,5 x7 (++ 0,028) È\n"
+            "f 0,01 A\n"
+            "0,7\n"
+            "b 0,01 A\n"
+            "c 0,01\n"
+            "r Ø0,05 B\n"
+            "0,04\n"
+            "F\n"
+            "1,5 +- 0,1\n"
+            "D\n"
+            "0\n"
+            "dimensions apply to the finished part\n"
+            "2 +- 0,1\n"
+            "Material: 1.4021\n"
+            "c 0,005\n"
+            "9,3 +- 0,1\n"
+            "Ra 0,8\n"
+            "4,8 `0,05\n"
+            "0\n"
+            "Raw part No:\n"
+            "-\n"
+            "Rz 0,25\n"
+            "Edge finish:\n"
+            "-0,2\n"
+            "Rz 16\n"
+            "-0,05\n"
+            "for machined surface\n"
+            "B\n"
+            "principales and rules according to:\n"
+            "1   Start drawing\n"
+            "-\n"
+            "of form, orientation, location and run-out:\n"
+            "DESCRIPTION\n"
+            "DRAWN BY INSP. DATE\n"
+            "Edge finish according to:\n"
+            "REVISION HISTORY\n"
+            "Copyright\n"
+            "    16%\n"
+            "not applicable\n"
+            "this document as well as the\n"
+            "without\n"
+            "    Nominal\n"
+            "file:\n"
+            "be held liable for the payment\n"
+            "the event\n"
+            "A\n"
+            "A\n"
+            "of the grant of a patent,\n"
+            "Scale:\n"
+            "Projection:\n"
+            "Valid for all untoleranced dimensions:\n"
+            "d 0,2 A B\n"
+            "-\n"
+            "2:1\n"
+            "mm\n"
+            "Description:\n"
+            "F   Cleanliness:\n"
+            "11.07.2018\n"
+            "F\n"
+            "    Part\n"
+            "of deposits, detachable\n"
+            "Design Freeze Release B1 -Sample"
+            "Page:\n"
+            "    Delivery\n"
+            "tbd\n"
+            "ENGINE Division"
+            "5129275\n"
+            "1/1\n"
+            "2\n"
+            "4\n"
+            "6\n"
+            "A3\n")
+
+matches = re.sub(regex,"" ,test_str)
+
+print(test_str)