Browse Source

fixed reading

bscheibel 4 years ago
parent
commit
428424bea0

+ 6 - 5
main.py

@@ -15,19 +15,20 @@ def write_redis(uuid, result, db_params):
 
 def main(uuid, filepath, db, eps):
     filename = order_bounding_boxes_in_each_block.pdf_to_html(uuid, filepath)
-    print(filename)
+    #print(filename)
     result, number_blocks, number_words= order_bounding_boxes_in_each_block.get_bound_box(filename)  ##get coordinates+text out of html file into array of arrays
     if eps == '0':
         if number_words > 500:
             eps = 7
         else:
             eps = 0.001
-    print(eps)
+    #print(eps)
     isos = order_bounding_boxes_in_each_block.extract_isos(result)
     res = clustering_precomputed_dbscan.cluster_and_preprocess(result,eps)
     clean_arrays = read_from_clustered_merged.read("/home/bscheibel/PycharmProjects/dxf_reader/temporary/values_clusteredfrom_precomputed_dbscan.csv")
+    tables = order_bounding_boxes_in_each_block.get_tables(clean_arrays)
     pretty = regex_clean_new.print_clean(clean_arrays)
-    res, details_dict = organize_drawing_according_to_details_new.main_function(pretty)
+    res, details_dict = organize_drawing_according_to_details_new.main_function(pretty, tables)
     #print(res)
 
     json_isos = json.dumps(isos)
@@ -37,10 +38,10 @@ def main(uuid, filepath, db, eps):
     write_redis(uuid+"isos",json_isos, db)
     write_redis(uuid+"eps", str(number_blocks)+","+str(number_words), db)
     write_redis(uuid+"details",json_details ,db)
+    print(json_details)
     #print(redis.Redis('localhost').get(uuid+"dims"))
     #print(result)
 
-
 if __name__ == "__main__":
     uuid = sys.argv[1]
     filename = sys.argv[2]
@@ -48,4 +49,4 @@ if __name__ == "__main__":
     eps = sys.argv[4]
     main(uuid,filename, db, eps)
 
-#main("33333", "/home/bscheibel/PycharmProjects/dxf_reader/drawings/GV_12.PDF", "localhost")
+#main("33333", "/home/bscheibel/PycharmProjects/dxf_reader/drawings/5129275_Rev01-GV12.pdf", "localhost",3)

+ 16 - 0
order_bounding_boxes_in_each_block.py

@@ -62,6 +62,8 @@ def pdf_to_html(uuid,filepath):
 
 def extract_isos(result):
     reg = r"(ISO\s\d\d\d\d*\W?\d?\W?\d?)|(EN\s\d*)"
+    #reg1 = r""
+    #reg2 = r""
     details_ = []
     for element in result:
         new_arr = ""
@@ -80,6 +82,20 @@ def extract_isos(result):
     return details_
 
 
+def get_tables(result):
+    reg = r"(Start drawing)|(All dimensions)"
+    tables = []
+    for element in result:
+        new = []
+        #print(element)
+        if re.search(reg, element):
+            new.extend(result[element])
+            new.append(element)
+            tables.append(new)
+    number = len(tables)
+    #print(tables)
+    return tables
+
 
 #file="/home/bscheibel/PycharmProjects/dxf_reader/drawings/5152166_Rev04.html"
 #get_bound_box(file)

+ 58 - 21
organize_drawing_according_to_details_new.py

@@ -4,7 +4,7 @@ import csv
 import clustering_precomputed_dbscan
 
 def get_details(result): #search for all details in drawing and store it in list details, first need to append all text elements of one line and then check if regular expression is found in this text element
-    reg = r"([A-Z])-\1|([A-Z]\W?[A-Z]?\s?\W\s?\d\d?\s?\s?:\s?\d\d?\s?\W)|(Start drawing)|(All dimensions apply to the finished part including surface\/material treatment)"
+    reg = r"([A-Z])-\1|([A-Z]\W?[A-Z]?\s?\W\s?\d\d?\s?\s?:\s?\d\d?\s?\W)"
     details = []
     for element in result:
         new = []
@@ -15,7 +15,10 @@ def get_details(result): #search for all details in drawing and store it in list
     number = len(details)
     return details, number
 
-def get_borders(details):
+
+
+
+def get_borders(details, tables):
     sections = []
     #print(coords)
     for first in details:
@@ -49,17 +52,19 @@ def get_borders(details):
                     #print(first, second)
                     distance_xmin = abs(secondx_min - firstx_max)/2
                     x_max = firstx_max + distance_xmin
-            if firsty_min > secondy_max and abs(firstx_min-secondx_min) < 40 and first != second: ####check above
+            if firsty_min > secondy_max and abs(firstx_min-secondx_min) < 80 and first != second: ####check above
                 if abs(firsty_min - secondy_max)/2 < distance_ymin:
                     #print(first, second)
                     distance_ymin = abs(firsty_min - secondy_max)/2
                     y_min = firsty_min
-            if firsty_max < secondy_min and abs(firstx_min-secondx_min) < 40 and first != second: ####check below
+            if firsty_max < secondy_min and abs(firstx_min-secondx_min) < 80 and first != second: ####check below
                 if abs(firsty_max - secondy_min)/2 < distance_ymax:
                     #print(first, second)
                     distance_ymax = abs(firsty_max - secondy_min)/2
                     y_max = secondy_min
 
+
+
         if y_min == -1:
             y_min = firsty_min
         if x_min == -1:
@@ -68,20 +73,39 @@ def get_borders(details):
             x_max = firstx_max + distance_xmax
         if y_max == -1:
             y_max = 1000000000
+
+        ##check if it intersects with tables
+        for table in tables:
+            #print(table)
+            table_xmin = table[0]
+            if "Start drawing" in table[4]:
+                table_xmax = 100000000
+            else:
+                table_xmax = table[2]
+            table_ymin = table[1]
+            #table_ymax = table[3]
+            if y_max > table_ymin:
+                if firstx_min > table_xmin and firstx_min < table_xmax:
+                    #print("blub",first,table, table_xmax)
+                    y_max = table_ymin
+                elif x_max > table_xmin and x_max < table_xmax:
+                    #print(first,table)
+                    y_max = table_ymin
+
         sections.append((first,x_min, y_min,x_max,y_max))
 
-    #for section in sections:
-    #    print(section)
+    """for section in sections:
+        print(section)"""
     return sections
 
 def intersects(detail, rectangle): #using the separating axis theorem
     #print(detail)
 
 
-    rect1_bottom_left_x = detail[1][0]
-    rect1_top_right_x = detail[1][2]
-    rect1_bottom_left_y = detail[1][3]
-    rect1_top_right_y = detail[1][1]
+    rect1_bottom_left_x = float(detail[1][0])
+    rect1_top_right_x = float(detail[1][2])
+    rect1_bottom_left_y = float(detail[1][3])
+    rect1_top_right_y = float(detail[1][1])
 
     rect2_bottom_left_x = float(rectangle[0])
     rect2_top_right_x = float(rectangle[2])
@@ -92,12 +116,16 @@ def intersects(detail, rectangle): #using the separating axis theorem
     return not (rect1_top_right_x < rect2_bottom_left_x or rect1_bottom_left_x > rect2_top_right_x or rect1_top_right_y > rect2_bottom_left_y or rect1_bottom_left_y < rect2_top_right_y)
 
 
-def main_function(result):
+def main_function(result, tables):
     reg = r"([A-Z])-\1|([A-Z]\W?[A-Z]?\s?\W\s?\d\d?\s?\s?:\s?\d\d?\s?\W)"
-    details, number= get_details(result)
-    print(details)
+    details, number = get_details(result)
+    #details.extend(tables)
+    #print(tables)
+    #print(details)
     details = sorted(details, key=lambda x: x[0]) #sort by distance from 0,0
-    sections = get_borders(details)
+    sections = get_borders(details, tables)
+
+    #sections.append(tables)
     section = []
     details_dict = {}
 
@@ -109,19 +137,28 @@ def main_function(result):
         details_dict[coord_name] = coord
         section.append(list((coord_name,coord)))
     #print(section)
+    for table in tables:
+        table[3] = 10000000
+        coord = []
+        name = "Table"
+        for tab in table[:4]:
+            coord.append(tab)
+        details_dict[name] = coord
+        section.append(list((name,coord)))
+    #print(section)
+
     if number == 0 | len(section) == 0:
             section.append(list(("No details",list((000.000,000.000,100000000.000,10000000.000)))))
-     #   print(section)
-
 
 
     dict = {}
 
-
     for res in result:
+        #print(res)
         for det in section:
             help_array = []
             help_dict = {}
+            #print(det)
             if re.match(reg, res): ###damit nicht details zu details zugeordnet werden!!!
                 break
             if intersects(det,result[res]):
@@ -136,10 +173,10 @@ def main_function(result):
                     dict[name] = help_dict
                 break
 
-    #for dic in dict:
-    #    print(dic)
-    #    for d in dict[dic]:
-    #        print(d)
+    """for dic in dict:
+        print(dic)
+        for d in dict[dic]:
+            print(d)"""
 
     return dict, details_dict
 

+ 2 - 1
regex_clean_new.py

@@ -1,6 +1,7 @@
 # coding=utf8
 import re
 
+
 def print_clean(dims): ##alles raus was nicht relevant ist! und zeichen ersetzen!
     dims_new = {}
     reg_clean = r"[a-zA-Z]{4,}|^\d\s\d$|^[a-zA-Z]{2,}\d.*$|^[A-Z]{1}$|^mm$|^\d{2}\.\d{2}\.\d{4}|^-$|A\d|^\d{1}$|^[A-Za-z]{3,}\.?$|^\d{5}|^\d{1}\s\W\s\d"
@@ -54,5 +55,5 @@ def print_clean(dims): ##alles raus was nicht relevant ist! und zeichen ersetzen
 
     #for dim in dims_new:
     #    print(dim)
-    print(dims_new)
+    #print(dims_new)
     return dims_new

+ 0 - 2
test_extract_pdf_dims.py

@@ -1,2 +0,0 @@
-
-