пре 5 година · 428424bea0
--- a/main.py
+++ b/main.py
@@ -15,19 +15,20 @@ def write_redis(uuid, result, db_params):
 
				 
			
 
				 def main(uuid, filepath, db, eps):
			
 
				     filename = order_bounding_boxes_in_each_block.pdf_to_html(uuid, filepath)
			
 
				-    print(filename)
			
 
				+    #print(filename)
			
 
				     result, number_blocks, number_words= order_bounding_boxes_in_each_block.get_bound_box(filename)  ##get coordinates+text out of html file into array of arrays
			
 
				     if eps == '0':
			
 
				         if number_words > 500:
			
 
				             eps = 7
			
 
				         else:
			
 
				             eps = 0.001
			
 
				-    print(eps)
			
 
				+    #print(eps)
			
 
				     isos = order_bounding_boxes_in_each_block.extract_isos(result)
			
 
				     res = clustering_precomputed_dbscan.cluster_and_preprocess(result,eps)
			
 
				     clean_arrays = read_from_clustered_merged.read("/home/bscheibel/PycharmProjects/dxf_reader/temporary/values_clusteredfrom_precomputed_dbscan.csv")
			
 
				+    tables = order_bounding_boxes_in_each_block.get_tables(clean_arrays)
			
 
				     pretty = regex_clean_new.print_clean(clean_arrays)
			
 
				-    res, details_dict = organize_drawing_according_to_details_new.main_function(pretty)
			
 
				+    res, details_dict = organize_drawing_according_to_details_new.main_function(pretty, tables)
			
 
				     #print(res)
			
 
				 
			
 
				     json_isos = json.dumps(isos)
			
@@ -37,10 +38,10 @@ def main(uuid, filepath, db, eps):
 
				     write_redis(uuid+"isos",json_isos, db)
			
 
				     write_redis(uuid+"eps", str(number_blocks)+","+str(number_words), db)
			
 
				     write_redis(uuid+"details",json_details ,db)
			
 
				+    print(json_details)
			
 
				     #print(redis.Redis('localhost').get(uuid+"dims"))
			
 
				     #print(result)
			
 
				 
			
 
				-
			
 
				 if __name__ == "__main__":
			
 
				     uuid = sys.argv[1]
			
 
				     filename = sys.argv[2]
			
@@ -48,4 +49,4 @@ if __name__ == "__main__":
 
				     eps = sys.argv[4]
			
 
				     main(uuid,filename, db, eps)
			
 
				 
			
 
				-#main("33333", "/home/bscheibel/PycharmProjects/dxf_reader/drawings/GV_12.PDF", "localhost")
			
 
				+#main("33333", "/home/bscheibel/PycharmProjects/dxf_reader/drawings/5129275_Rev01-GV12.pdf", "localhost",3)
			
--- a/order_bounding_boxes_in_each_block.py
+++ b/order_bounding_boxes_in_each_block.py
@@ -62,6 +62,8 @@ def pdf_to_html(uuid,filepath):
 
				 
			
 
				 def extract_isos(result):
			
 
				     reg = r"(ISO\s\d\d\d\d*\W?\d?\W?\d?)|(EN\s\d*)"
			
 
				+    #reg1 = r""
			
 
				+    #reg2 = r""
			
 
				     details_ = []
			
 
				     for element in result:
			
 
				         new_arr = ""
			
@@ -80,6 +82,20 @@ def extract_isos(result):
 
				     return details_
			
 
				 
			
 
				 
			
 
				+def get_tables(result):
			
 
				+    reg = r"(Start drawing)|(All dimensions)"
			
 
				+    tables = []
			
 
				+    for element in result:
			
 
				+        new = []
			
 
				+        #print(element)
			
 
				+        if re.search(reg, element):
			
 
				+            new.extend(result[element])
			
 
				+            new.append(element)
			
 
				+            tables.append(new)
			
 
				+    number = len(tables)
			
 
				+    #print(tables)
			
 
				+    return tables
			
 
				+
			
 
				 
			
 
				 #file="/home/bscheibel/PycharmProjects/dxf_reader/drawings/5152166_Rev04.html"
			
 
				 #get_bound_box(file)
			
--- a/organize_drawing_according_to_details_new.py
+++ b/organize_drawing_according_to_details_new.py
@@ -4,7 +4,7 @@ import csv
 
				 import clustering_precomputed_dbscan
			
 
				 
			
 
				 def get_details(result): #search for all details in drawing and store it in list details, first need to append all text elements of one line and then check if regular expression is found in this text element
			
 
				-    reg = r"([A-Z])-\1|([A-Z]\W?[A-Z]?\s?\W\s?\d\d?\s?\s?:\s?\d\d?\s?\W)|(Start drawing)|(All dimensions apply to the finished part including surface\/material treatment)"
			
 
				+    reg = r"([A-Z])-\1|([A-Z]\W?[A-Z]?\s?\W\s?\d\d?\s?\s?:\s?\d\d?\s?\W)"
			
 
				     details = []
			
 
				     for element in result:
			
 
				         new = []
			
@@ -15,7 +15,10 @@ def get_details(result): #search for all details in drawing and store it in list
 
				     number = len(details)
			
 
				     return details, number
			
 
				 
			
 
				-def get_borders(details):
			
 
				+
			
 
				+
			
 
				+
			
 
				+def get_borders(details, tables):
			
 
				     sections = []
			
 
				     #print(coords)
			
 
				     for first in details:
			
@@ -49,17 +52,19 @@ def get_borders(details):
 
				                     #print(first, second)
			
 
				                     distance_xmin = abs(secondx_min - firstx_max)/2
			
 
				                     x_max = firstx_max + distance_xmin
			
 
				-            if firsty_min > secondy_max and abs(firstx_min-secondx_min) < 40 and first != second: ####check above
			
 
				+            if firsty_min > secondy_max and abs(firstx_min-secondx_min) < 80 and first != second: ####check above
			
 
				                 if abs(firsty_min - secondy_max)/2 < distance_ymin:
			
 
				                     #print(first, second)
			
 
				                     distance_ymin = abs(firsty_min - secondy_max)/2
			
 
				                     y_min = firsty_min
			
 
				-            if firsty_max < secondy_min and abs(firstx_min-secondx_min) < 40 and first != second: ####check below
			
 
				+            if firsty_max < secondy_min and abs(firstx_min-secondx_min) < 80 and first != second: ####check below
			
 
				                 if abs(firsty_max - secondy_min)/2 < distance_ymax:
			
 
				                     #print(first, second)
			
 
				                     distance_ymax = abs(firsty_max - secondy_min)/2
			
 
				                     y_max = secondy_min
			
 
				 
			
 
				+
			
 
				+
			
 
				         if y_min == -1:
			
 
				             y_min = firsty_min
			
 
				         if x_min == -1:
			
@@ -68,20 +73,39 @@ def get_borders(details):
 
				             x_max = firstx_max + distance_xmax
			
 
				         if y_max == -1:
			
 
				             y_max = 1000000000
			
 
				+
			
 
				+        ##check if it intersects with tables
			
 
				+        for table in tables:
			
 
				+            #print(table)
			
 
				+            table_xmin = table[0]
			
 
				+            if "Start drawing" in table[4]:
			
 
				+                table_xmax = 100000000
			
 
				+            else:
			
 
				+                table_xmax = table[2]
			
 
				+            table_ymin = table[1]
			
 
				+            #table_ymax = table[3]
			
 
				+            if y_max > table_ymin:
			
 
				+                if firstx_min > table_xmin and firstx_min < table_xmax:
			
 
				+                    #print("blub",first,table, table_xmax)
			
 
				+                    y_max = table_ymin
			
 
				+                elif x_max > table_xmin and x_max < table_xmax:
			
 
				+                    #print(first,table)
			
 
				+                    y_max = table_ymin
			
 
				+
			
 
				         sections.append((first,x_min, y_min,x_max,y_max))
			
 
				 
			
 
				-    #for section in sections:
			
 
				-    #    print(section)
			
 
				+    """for section in sections:
			
 
				+        print(section)"""
			
 
				     return sections
			
 
				 
			
 
				 def intersects(detail, rectangle): #using the separating axis theorem
			
 
				     #print(detail)
			
 
				 
			
 
				 
			
 
				-    rect1_bottom_left_x = detail[1][0]
			
 
				-    rect1_top_right_x = detail[1][2]
			
 
				-    rect1_bottom_left_y = detail[1][3]
			
 
				-    rect1_top_right_y = detail[1][1]
			
 
				+    rect1_bottom_left_x = float(detail[1][0])
			
 
				+    rect1_top_right_x = float(detail[1][2])
			
 
				+    rect1_bottom_left_y = float(detail[1][3])
			
 
				+    rect1_top_right_y = float(detail[1][1])
			
 
				 
			
 
				     rect2_bottom_left_x = float(rectangle[0])
			
 
				     rect2_top_right_x = float(rectangle[2])
			
@@ -92,12 +116,16 @@ def intersects(detail, rectangle): #using the separating axis theorem
 
				     return not (rect1_top_right_x < rect2_bottom_left_x or rect1_bottom_left_x > rect2_top_right_x or rect1_top_right_y > rect2_bottom_left_y or rect1_bottom_left_y < rect2_top_right_y)
			
 
				 
			
 
				 
			
 
				-def main_function(result):
			
 
				+def main_function(result, tables):
			
 
				     reg = r"([A-Z])-\1|([A-Z]\W?[A-Z]?\s?\W\s?\d\d?\s?\s?:\s?\d\d?\s?\W)"
			
 
				-    details, number= get_details(result)
			
 
				-    print(details)
			
 
				+    details, number = get_details(result)
			
 
				+    #details.extend(tables)
			
 
				+    #print(tables)
			
 
				+    #print(details)
			
 
				     details = sorted(details, key=lambda x: x[0]) #sort by distance from 0,0
			
 
				-    sections = get_borders(details)
			
 
				+    sections = get_borders(details, tables)
			
 
				+
			
 
				+    #sections.append(tables)
			
 
				     section = []
			
 
				     details_dict = {}
			
 
				 
			
@@ -109,19 +137,28 @@ def main_function(result):
 
				         details_dict[coord_name] = coord
			
 
				         section.append(list((coord_name,coord)))
			
 
				     #print(section)
			
 
				+    for table in tables:
			
 
				+        table[3] = 10000000
			
 
				+        coord = []
			
 
				+        name = "Table"
			
 
				+        for tab in table[:4]:
			
 
				+            coord.append(tab)
			
 
				+        details_dict[name] = coord
			
 
				+        section.append(list((name,coord)))
			
 
				+    #print(section)
			
 
				+
			
 
				     if number == 0 | len(section) == 0:
			
 
				             section.append(list(("No details",list((000.000,000.000,100000000.000,10000000.000)))))
			
 
				-     #   print(section)
			
 
				-
			
 
				 
			
 
				 
			
 
				     dict = {}
			
 
				 
			
 
				-
			
 
				     for res in result:
			
 
				+        #print(res)
			
 
				         for det in section:
			
 
				             help_array = []
			
 
				             help_dict = {}
			
 
				+            #print(det)
			
 
				             if re.match(reg, res): ###damit nicht details zu details zugeordnet werden!!!
			
 
				                 break
			
 
				             if intersects(det,result[res]):
			
@@ -136,10 +173,10 @@ def main_function(result):
 
				                     dict[name] = help_dict
			
 
				                 break
			
 
				 
			
 
				-    #for dic in dict:
			
 
				-    #    print(dic)
			
 
				-    #    for d in dict[dic]:
			
 
				-    #        print(d)
			
 
				+    """for dic in dict:
			
 
				+        print(dic)
			
 
				+        for d in dict[dic]:
			
 
				+            print(d)"""
			
 
				 
			
 
				     return dict, details_dict
			
 
				 
			
--- a/regex_clean_new.py
+++ b/regex_clean_new.py
@@ -1,6 +1,7 @@
 
				 # coding=utf8
			
 
				 import re
			
 
				 
			
 
				+
			
 
				 def print_clean(dims): ##alles raus was nicht relevant ist! und zeichen ersetzen!
			
 
				     dims_new = {}
			
 
				     reg_clean = r"[a-zA-Z]{4,}|^\d\s\d$|^[a-zA-Z]{2,}\d.*$|^[A-Z]{1}$|^mm$|^\d{2}\.\d{2}\.\d{4}|^-$|A\d|^\d{1}$|^[A-Za-z]{3,}\.?$|^\d{5}|^\d{1}\s\W\s\d"
			
@@ -54,5 +55,5 @@ def print_clean(dims): ##alles raus was nicht relevant ist! und zeichen ersetzen
 
				 
			
 
				     #for dim in dims_new:
			
 
				     #    print(dim)
			
 
				-    print(dims_new)
			
 
				+    #print(dims_new)
			
 
				     return dims_new
			
--- a/test_extract_pdf_dims.py
+++ b/test_extract_pdf_dims.py
@@ -1,2 +0,0 @@
 
				-
			
 
				-