5 years ago · 5d9b349350
--- a/clustering_precomputed_dbscan.py
+++ b/clustering_precomputed_dbscan.py
@@ -5,9 +5,9 @@ import csv
 
				 from math import sqrt
			
 
				 from sklearn.cluster import DBSCAN
			
 
				 
			
 
				-def get_average_xy(list_input):
			
 
				-    csv_name = "/home/centurio/Projects/engineering_drawings_extraction/temporary/ist_to_csv_with_corner_points.csv"
			
 
				-    resultFile = open(csv_name, 'w')
			
 
				+def get_average_xy(list_input, path):
			
 
				+    csv_name = path+"/temporary/list_to_csv_with_corner_points.csv"
			
 
				+    resultFile = open(csv_name, 'w+')
			
 
				     wr = csv.writer(resultFile, delimiter=";")
			
 
				     wr.writerow(["element", "xmin","ymin","xmax","ymax", "ausrichtung","point_xmi_ymi","point_xma_ymi","point_xmi_yma","point_xma_yma"])
			
 
				     result_df = pandas.DataFrame(columns=["point_xmi_ymi","point_xma_ymi","point_xmi_yma","point_xma_yma","ausrichtung"])
			
@@ -112,28 +112,28 @@ def dist(rectangle1, rectangle2):
 
				             #print(rectangle1)
			
 
				     return distance
			
 
				 
			
 
				-def clustering(dm,eps):
			
 
				+def clustering(dm,eps,path):
			
 
				     db = DBSCAN(eps=eps, min_samples=1, metric="precomputed").fit(dm)                                                                                        ##3.93 until now, bei 5 shon mehr erkannt, 7 noch mehr erkannt aber auch schon zu viel; GV12 ist 4.5 gut für LH zu wenig
			
 
				     labels = db.labels_
			
 
				     n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
			
 
				 
			
 
				     print('Estimated number of clusters: %d' % n_clusters_)
			
 
				-    data_df = pandas.read_csv("/home/centurio/Projects/engineering_drawings_extraction/temporary/list_to_csv_with_corner_points.csv", sep=";")
			
 
				+    data_df = pandas.read_csv(path +"/temporary/list_to_csv_with_corner_points.csv", sep=";")
			
 
				     data_df["cluster"] = labels
			
 
				-    data_df.groupby(['cluster', 'ausrichtung'])['element'].apply(','.join).reset_index().to_csv("//home/centurio/Projects/engineering_drawings_extraction/temporary/values_clusteredfrom_precomputed_dbscan.csv",sep=";", header=False, index=False)
			
 
				+    data_df.groupby(['cluster', 'ausrichtung'])['element'].apply(','.join).reset_index().to_csv(path+"/temporary/values_clusteredfrom_precomputed_dbscan.csv",sep=";", header=False, index=False)
			
 
				     return data_df
			
 
				 
			
 
				-def cluster_and_preprocess(result,eps):
			
 
				-    result = get_average_xy(result) #input: array of arrays, output: either csv file or array of arrays
			
 
				+def cluster_and_preprocess(result,eps,path):
			
 
				+    result = get_average_xy(result, path) #input: array of arrays, output: either csv file or array of arrays
			
 
				 
			
 
				     #data = pandas.read_csv("/home/bscheibel/PycharmProjects/dxf_reader/temporary/list_to_csv_with_corner_points.csv", sep=";")
			
 
				     #data = data[["point_xmi_ymi","point_xma_ymi","point_xmi_yma","point_xma_yma","ausrichtung"]]
			
 
				-    result.to_csv("/home/centurio/Projects/engineering_drawings_extraction/temporary/blub.csv", sep=";", index=False, header=None)
			
 
				-    with open('/home/centurio/Projects/engineering_drawings_extraction/temporary/blub.csv') as csvfile:
			
 
				+    result.to_csv(path+"/temporary/blub.csv", sep=";", index=False, header=None)
			
 
				+    with open(path+"/temporary/blub.csv") as csvfile:
			
 
				         readCSV = csv.reader(csvfile, delimiter=';')
			
 
				         result = list(readCSV)
			
 
				 
			
 
				     dm = np.asarray([[dist(p1, p2) for p2 in result] for p1 in result])
			
 
				-    clustering_result = clustering(dm,float(eps))
			
 
				+    clustering_result = clustering(dm,float(eps), path)
			
 
				     return clustering_result
			
 
				 
			
--- a/main.py
+++ b/main.py
@@ -14,7 +14,9 @@ def write_redis(uuid, result, db_params):
 
				 
			
 
				 
			
 
				 def main(uuid, filepath, db, eps):
			
 
				-    filename = order_bounding_boxes_in_each_block.pdf_to_html(uuid, filepath)
			
 
				+    #path_centurio = "/home/centurio/Projects/engineering_drawings_extraction"
			
 
				+    path = "/home/bscheibel/PycharmProjects/dxf_reader"
			
 
				+    filename = order_bounding_boxes_in_each_block.pdf_to_html(uuid, filepath, path)
			
 
				     #print(filename)
			
 
				     result, number_blocks, number_words= order_bounding_boxes_in_each_block.get_bound_box(filename)  ##get coordinates+text out of html file into array of arrays
			
 
				     if eps == '0':
			
@@ -25,8 +27,8 @@ def main(uuid, filepath, db, eps):
 
				     #print(eps)
			
 
				     isos, general_tol = order_bounding_boxes_in_each_block.extract_isos(result)
			
 
				     print(general_tol)
			
 
				-    res = clustering_precomputed_dbscan.cluster_and_preprocess(result,eps)
			
 
				-    clean_arrays = read_from_clustered_merged.read("/home/centurio/Projects/engineering_drawings_extraction/temporary/values_clusteredfrom_precomputed_dbscan.csv")
			
 
				+    res = clustering_precomputed_dbscan.cluster_and_preprocess(result,eps, path)
			
 
				+    clean_arrays = read_from_clustered_merged.read(path+"/temporary/values_clusteredfrom_precomputed_dbscan.csv")
			
 
				     tables = order_bounding_boxes_in_each_block.get_tables(clean_arrays)
			
 
				     pretty = regex_clean_new.print_clean(clean_arrays)
			
 
				     res, details_dict = organize_drawing_according_to_details_new.main_function(pretty, tables)
			
@@ -44,11 +46,10 @@ def main(uuid, filepath, db, eps):
 
				     #print(redis.Redis('localhost').get(uuid+"dims"))
			
 
				     #print(result)
			
 
				 
			
 
				-"""if __name__ == "__main__":
			
 
				+"""#if __name__ == "__main__":
			
 
				     uuid = sys.argv[1]
			
 
				     filename = sys.argv[2]
			
 
				     db = sys.argv[3]
			
 
				     eps = sys.argv[4]
			
 
				-    main(uuid,filename, db, eps)
			
 
				-"""
			
 
				+    main(uuid,filename, db, eps)"""
			
 
				 main("33333", "/home/bscheibel/PycharmProjects/dxf_reader/drawings/5152166_Rev04.pdf", "localhost",3)
			
--- a/order_bounding_boxes_in_each_block.py
+++ b/order_bounding_boxes_in_each_block.py
@@ -54,8 +54,8 @@ def get_bound_box(file):
 
				 
			
 
				     return new_all_elements, number_blocks, number_words
			
 
				 
			
 
				-def pdf_to_html(uuid,filepath):
			
 
				-    filename = "temporary/" +str(uuid)+"out.html" #to app/temporary later
			
 
				+def pdf_to_html(uuid,filepath, path):
			
 
				+    filename = path +"/temporary/" +str(uuid)+"out.html" #to app/temporary later
			
 
				     subprocess.call(['pdftotext', '-bbox-layout',
			
 
				                      filepath, filename])
			
 
				     return filename