Browse Source

fixed reading

bscheibel 4 years ago
parent
commit
65b0255771

+ 1 - 1
clustering_precomputed_dbscan.py

@@ -137,6 +137,6 @@ def cluster_and_preprocess(result,eps):
         result = list(readCSV)
 
     dm = np.asarray([[dist(p1, p2) for p2 in result] for p1 in result])
-    clustering_result = clustering(dm,eps)
+    clustering_result = clustering(dm,float(eps))
     return clustering_result
 

+ 234 - 72
drawings/5129275_Rev01-GV12.txt

@@ -1,117 +1,279 @@
-                 1                                        2                               3                                 4                                                5                                                                   6                                     7                                 8
+1
 
-                                                                                                                    A-A ( 5 : 1 )
-A                                                                                                                                                                                                                                                                                                                                               A
-                                         Z ( 20 : 1 )                                                                                        b 0,01 A
-                                                                                                                                                                                                                                        2x
-                                                                                                                    X                                                                                                                   r Ø0,05 CZ B
-                                                                                                                                           g 0,01
+3
 
+2
 
+4
 
-                                         R2                                                                                                                                                                                                                                  X ( 10 : 1 )
+5
 
+6
 
+Z ( 20 : 1 )
+R2
 
+X ( 10 : 1 )
 
-
+n11
 
+1,65
 
+Y ( 10 : 1 )
 
+F
 
-                                                                                                                       ,8
-                                                                                                                     R0
+2 +- 0,1
+0
 
+Edge finish:
 
-                                                                                                                                   Ø 4,1
-B                                                                                                                                            Z                                                                                                                                         -0,1                                                     B
+-0,2
+-0,05
 
+Rz 16
+valid for machined surface
+according to DIN ISO 1302
 
++0,2
 
+9,3 +- 0,1
+0
 
-                                                                                                                                                                                                             2x Ø19,2 +- 00,05 CT
-                                                                                                                                                                                                                                                                                                          -0,05
+4,8 `0,05
 
+D
 
+B
 
+according to DIN ISO 13715
 
-                                                                                                                                                                                            Ø15,15 +- 0,05
-                                                   1,65                                                                                                                                                                                                                                -0,1
+Concepts, principales and rules according to:
+ISO 8015
+Dimensions according to:
+ISO 14405 1-3
+Tolerances of form, orientation, location and run-out: ISO 1101
+Edge finish according to:
+ISO 13715
+Surface texture according to:
+ISO 1302
+Limits according to:
+ISO 286-2
+not applicable
+16% - rule
+Nominal geometry (theoretically exact) defined by CAD file:
 
+1
 
+04.04.2018
+DESCRIPTION
 
+SECTION
 
-                                                                                                                                                                                                      0
-                                                                                                                                                                                 n11
-                                   Y ( 10 : 1 )                                                                                                                                                                                                                      ,5
-                                                                                                                                                                                                                                                                                                              R0,
-                                                                                                                                                                                                                                                                                                                  5
-                                                                                                                                                                                                                                                                   R0
-C                                              E                                                                                                                                                                                                                                                                                                C
-                                                               Rz 1,6
-                                                               Rpk 0,2
-                                                                                            °
-                                                                                          45
+A
 
-                                                    E #F
+A
 
+Weight:
 
+d 0,2 A B
 
+Scale:
 
-                                                                                                                                                      Ø4,5 x7 (++ 0,028) È
-                                                    f 0,01 A                              0,7                             Y                                                                                  b 0,01 A
-                                                    c 0,01                                                                                                                                                    r Ø0,05 B
+-
 
+2:1
 
+Unit:
 
+Projection:
+ISO 128
+Method 1
 
-                                                                                                                                                      0,04
-                                                                                      A
-                                               F                                       1,5 +- 0,1
-D                                                                                             0                                                                                                                                                                                                                                                 D
-    All dimensions apply to the finished part including surface/material                                                2 +- 0,1
-                                                                                                                             0
-    treatment.
-    Material: 1.4021                  Quality standard: EN 10095                   c 0,005                                      9,3 +- 0,1
+mm
 
+Document type:
+Description:
 
+HUM
 
+11.07.2018
 
-                                                                                                                                                    Ra 0,8
-    Surface/material treatment:
-                                                                                                        4,8 `0,05                      0
-                                       Hardened to 48 HRC +2/-0
-    Raw part No:                       -
-                                                                                              Rz 0,25
+-
 
+LKA
 
+-
 
+DRAWN BY INSP. DATE
 
-    Surface texture:                            Edge finish:
-                                                        -0,2
-              Rz 16                                     -0,05         +0,2
+INSP. BY
+
+4
+
+5
+
+Valve Lifter GV12.2
+
+6
+
+7
+
+F
+Page:
+
+5129275
+
+ENGINE Division
+
+Drawing Approval Stamp
+
+3
+
+Component Drawing
+
+Part Number:
+
+Design Freeze Release B1 -Sample
+
+tbd
+
+2
+
+DATE
+
+Copyright
+The reproduction, distribution and utilization of this document as well as the communication of its contents to others without
+express authorization is prohibited. Offenders will be held liable for the payment of damages. All rights reserved in the event
+of the grant of a patent, utility model or design. (ISO 16016)
+
+Cleanliness:
+
+1
+
+Start drawing
+
+REVISION HISTORY
+
+Part has to be degreased and free of deposits, detachable
+burrs and visual detectable particles.
+Delivery conditions:
+
+E
+
+REV.
+
+5129275_Rev01.stp
+Valid for all untoleranced dimensions:
+
+r Ø0,05 B
+
+Ra 0,8
+
+c 0,005
+
+Hardened to 48 HRC +2/-0
+-
+
+Rz 0,25
+
+Raw part No:
+
+b 0,01 A
+
+0,04
+
+1,5 +- 0,1
+0
+
+All dimensions apply to the finished part including surface/material
+treatment.
+Material: 1.4021
+Quality standard: EN 10095
+
+Surface texture:
+
+R0,
+5
+
+45
+
+Y
+
+0,7
+A
+
+Surface/material treatment:
+
+-0,05
+
+-0,1
+
+C
+
+Rz 1,6
+Rpk 0,2
+E #F
+f 0,01 A
+c 0,01
+
+D
+
+B
+
+,5
+R0
+
+Ø4,5 x7 (++ 0,028) È
+
+E
+
+-0,1
+
+2x Ø19,2 +- 00,05 CT
+
+Z
+Ø15,15 +- 0,05
+0
+
+Ø 4,1
+
+R0
+,8
+
+5°
+
+r Ø0,05 CZ B
+
+g 0,01
+
+B
+
+C
+
+A
+
+2x
+
+b 0,01 A
+
+X
+
+F
+
+8
+
+A-A ( 5 : 1 )
+
+A
+
+E
+
+7
+
+1/1
+
+A3
 
-E   valid for machined surface
-    according to DIN ISO 1302                         according to DIN ISO 13715
-                                                                                                                                                                         B                                                                                                                                                                      E
-    Concepts, principales and rules according to:          ISO 8015
-    Dimensions according to:                               ISO 14405 1-3                                                                                                     1   Start drawing                                                                                                   04.04.2018     LKA          -          -
-    Tolerances of form, orientation, location and run-out: ISO 1101                                                                                                    REV.                                                           DESCRIPTION                                      SECTION     DATE       DRAWN BY INSP. DATE    INSP. BY
-    Edge finish according to:                              ISO 13715
-    Surface texture according to:                          ISO 1302                                                                                                                                                                                                 REVISION HISTORY
-    Limits according to:                                   ISO 286-2                                                                                                    Copyright
-    16% - rule                                             not applicable                                                                                               The reproduction, distribution and utilization of this document as well as the communication of its contents to others without
-    Nominal geometry (theoretically exact) defined by CAD file:                                                                                                         express authorization is prohibited. Offenders will be held liable for the payment of damages. All rights reserved in the event
-                                 5129275_Rev01.stp                                                         A                                 A                          of the grant of a patent, utility model or design. (ISO 16016)
-                                                                                                                                                                        Weight:             Scale:                                  Unit:            Projection:                Document type:
-    Valid for all untoleranced dimensions:                                                                                                                                                                                                                                                       Component Drawing
-                                                d 0,2 A B                                                                                                                         -                      2:1                                mm       ISO 128
-                                                                                                                                                                                                                                                     Method 1
-                                                                                                                                                                                                                                                                                Description:
-F   Cleanliness:
-                                                                                                                                                                                   HUM                  11.07.2018                                                                               Valve Lifter GV12.2                            F
-    Part has to be degreased and free of deposits, detachable
-    burrs and visual detectable particles.                                                                                                                                   Design Freeze Release B1 -Sample                                                                   Part Number:                                        Page:
-    Delivery conditions:         tbd                                                                                                                                                   Drawing Approval Stamp                                         ENGINE Division                             5129275                            1/1
-                 1                                        2                               3                                 4                                                5                                                                   6                                     7                                A3
 

+ 12 - 2
main.py

@@ -16,21 +16,31 @@ def write_redis(uuid, result, db_params):
 def main(uuid, filepath, db, eps):
     filename = order_bounding_boxes_in_each_block.pdf_to_html(uuid, filepath)
     print(filename)
-    result = order_bounding_boxes_in_each_block.get_bound_box(filename)  ##get coordinates+text out of html file into array of arrays
+    result, number_blocks, number_words= order_bounding_boxes_in_each_block.get_bound_box(filename)  ##get coordinates+text out of html file into array of arrays
+    if eps == '0':
+        if number_words > 500:
+            eps = 7
+        else:
+            eps = 0.001
+    print(eps)
     isos = order_bounding_boxes_in_each_block.extract_isos(result)
     res = clustering_precomputed_dbscan.cluster_and_preprocess(result,eps)
     clean_arrays = read_from_clustered_merged.read("/home/bscheibel/PycharmProjects/dxf_reader/temporary/values_clusteredfrom_precomputed_dbscan.csv")
     pretty = regex_clean_new.print_clean(clean_arrays)
-    res = organize_drawing_according_to_details_new.main_function(pretty)
+    res, details_dict = organize_drawing_according_to_details_new.main_function(pretty)
     #print(res)
 
     json_isos = json.dumps(isos)
     json_result = json.dumps(res)
+    json_details =json.dumps(details_dict)
     write_redis(uuid+"dims", json_result, db)
     write_redis(uuid+"isos",json_isos, db)
+    write_redis(uuid+"eps", str(number_blocks)+","+str(number_words), db)
+    write_redis(uuid+"details",json_details ,db)
     #print(redis.Redis('localhost').get(uuid+"dims"))
     #print(result)
 
+
 if __name__ == "__main__":
     uuid = sys.argv[1]
     filename = sys.argv[2]

+ 7 - 5
regex_extraction.py

@@ -27,9 +27,11 @@ def clean(extracted_dimensions):
 def print_clean(dims):
     dims_new = []
     dimss = []
-    for dim in dims:
-        dim = re.split("CT",dim)
-        dimss.extend(dim)
+    #or dim in dims:
+    #    if "CT" in dim:
+    #        dim = re.split("CT",dim)
+    #        for di in dim:
+    #            dimss.extend(di)
     #print(dimss)
     for dim in dimss:
         if re.search(r"b\s\d*\W?\d*\s.",dim):
@@ -73,7 +75,7 @@ def print_clean(dims):
 
 
         ####nicht dabei: neigungswinkel und lauftoleranzen
-
+    print(dimms)
     return dimms
 
 
@@ -100,7 +102,7 @@ def extract_pretty(input):
         text_combined = ""
         #new_arr = ""
        # print(element)
-        element=eval(element)
+        element = eval(element)
         for x in element:
             text_combined += x[4] + " "
             #print(x[4])

+ 4 - 1
order_bounding_boxes_in_each_block.py

@@ -12,9 +12,12 @@ def get_bound_box(file):
 
     all_elements = []
     blocks = html_file.findAll('block')
+    number_blocks = len(blocks)
+    number_words = 0
     for block in blocks:
         list_elements = []
         words = block.findAll('word')
+        number_words += len(words)
         for word in words:
             word_list = []
             word_list.append(word["xmin"])
@@ -49,7 +52,7 @@ def get_bound_box(file):
 
         #print("\n")"""
 
-    return new_all_elements
+    return new_all_elements, number_blocks, number_words
 
 def pdf_to_html(uuid,filepath):
     filename = str(uuid)+"out.html"

+ 12 - 9
organize_drawing_according_to_details_new.py

@@ -4,11 +4,11 @@ import csv
 import clustering_precomputed_dbscan
 
 def get_details(result): #search for all details in drawing and store it in list details, first need to append all text elements of one line and then check if regular expression is found in this text element
-    reg = r"([A-Z])-\1|([A-Z]\W?[A-Z]?\s?\W\s?\d\d?\s?\s?:\s?\d\d?\s?\W)"
+    reg = r"([A-Z])-\1|([A-Z]\W?[A-Z]?\s?\W\s?\d\d?\s?\s?:\s?\d\d?\s?\W)|(Start drawing)|(All dimensions apply to the finished part including surface\/material treatment)"
     details = []
     for element in result:
         new = []
-        if re.match(reg,element):
+        if re.search(reg,element):
             new.extend(result[element])
             new.append(element)
             details.append(new)
@@ -95,19 +95,22 @@ def intersects(detail, rectangle): #using the separating axis theorem
 def main_function(result):
     reg = r"([A-Z])-\1|([A-Z]\W?[A-Z]?\s?\W\s?\d\d?\s?\s?:\s?\d\d?\s?\W)"
     details, number= get_details(result)
+    print(details)
     details = sorted(details, key=lambda x: x[0]) #sort by distance from 0,0
     sections = get_borders(details)
     section = []
+    details_dict = {}
 
     for sect in sections:
         coord = []
         coord_name = sect[0][4]
         for sec in sect[1:]:
             coord.append(sec)
+        details_dict[coord_name] = coord
         section.append(list((coord_name,coord)))
     #print(section)
-    if number == 0 | len(section)==0:
-        section.append(list(("No details",list((000.000,000.000,100000000.000,10000000.000)))))
+    if number == 0 | len(section) == 0:
+            section.append(list(("No details",list((000.000,000.000,100000000.000,10000000.000)))))
      #   print(section)
 
 
@@ -119,12 +122,12 @@ def main_function(result):
         for det in section:
             help_array = []
             help_dict = {}
-            if re.match(reg, res):
-                continue
+            if re.match(reg, res): ###damit nicht details zu details zugeordnet werden!!!
+                break
             if intersects(det,result[res]):
                 name = det[0]
-                help_array.append(res)
-                help_array.extend(result[res])
+                #help_array.append(res)
+                #help_array.extend(result[res])
                 help_dict[res] = result[res]
                 #print(name)
                 if name in dict:
@@ -138,5 +141,5 @@ def main_function(result):
     #    for d in dict[dic]:
     #        print(d)
 
-    return dict
+    return dict, details_dict
 

+ 8 - 6
regex_clean_new.py

@@ -37,20 +37,22 @@ def print_clean(dims): ##alles raus was nicht relevant ist! und zeichen ersetzen
                 dim = dim.replace('⌀', "Ø")
             reg12 = re.compile(r"(.*\d{1,4}\W?\d{0,4})\s?\+\s-\s?(\d{1,4}\W?\d{0,4})\s?(\d{1,4}\W?\d{0,3})") ##???? was machst du?? nach toleranzen suchen, mit +/- blabla
             reg13 = re.compile(r"(.*)\+\s\+\s(\d*\W\d*)\s(\d*\W\d*)(.*)")
-            reg14 = re.compile(r"(\+\s\d*\.?\d*)\s(\d*\.?\d*)\s(\+?\s?\-?\d*\.*\d*)")
+            reg14 = re.compile(r"(\+\s\d*,?\d*)\s(\d*,?\d*)\s(\+?\s?\-?\d*,?\d*)")
             g = re.search(reg12, dim)
-            f = re.search(reg13,dim)
-            e = re.search(reg14,dim)
+            f = re.search(reg13, dim)
+            e = re.search(reg14, dim)
             if g:
                 dim = re.sub(reg12, g.group(1) + " +" + g.group(2) + " -" + g.group(3), dim) # +/- toleranzen schön darstellen
                 #print(dim)
-            if f:
+            elif f:
                 dim = f.group(1) + "+" + f.group(2) + " +" + f.group(3) + f.group(4)
-            if e:
-                dim= e.group(2) +  + e.group(1) +  + e.group(3)
+            elif e:
+                dim = e.group(2) + " " + e.group(1) + " " + e.group(3)
+
             dim = dim.replace(" ,",".").replace(", ",".").replace(",",".")
             dims_new[dim] = coords
 
     #for dim in dims_new:
     #    print(dim)
+    print(dims_new)
     return dims_new

File diff suppressed because it is too large
+ 126 - 44
temporary/list_to_csv_with_corner_points.csv


File diff suppressed because it is too large
+ 125 - 44
temporary/values_clusteredfrom_precomputed_dbscan.csv


+ 2 - 0
test_extract_pdf_dims.py

@@ -0,0 +1,2 @@
+
+