6 years ago · 8045451f61
--- a/clustering_precomputed_dbscan.py
+++ b/clustering_precomputed_dbscan.py
@@ -112,8 +112,8 @@ def dist(rectangle1, rectangle2):
 
				             #print(rectangle1)
			
 
				     return distance
			
 
				 
			
 
				-def clustering(dm):
			
 
				-    db = DBSCAN(eps=2, min_samples=1, metric="precomputed").fit(dm)  ##3.93 until now, bei 5 shon mehr erkannt, 7 noch mehr erkannt aber auch schon zu viel; GV12 ist 4.5 gut für LH zu wenig
			
 
				+def clustering(dm,eps):
			
 
				+    db = DBSCAN(eps=eps, min_samples=1, metric="precomputed").fit(dm)  ##3.93 until now, bei 5 shon mehr erkannt, 7 noch mehr erkannt aber auch schon zu viel; GV12 ist 4.5 gut für LH zu wenig
			
 
				     #db = OPTICS(min_samples=1,xi=0.1, metric="precomputed").fit(dm)
			
 
				     labels = db.labels_
			
 
				     # Number of clusters in labels
			
@@ -126,7 +126,7 @@ def clustering(dm):
 
				     data_df.groupby(['cluster', 'ausrichtung'])['element'].apply(','.join).reset_index().to_csv("/home/bscheibel/PycharmProjects/dxf_reader/temporary/values_clusteredfrom_precomputed_dbscan.csv",sep=";", header=False, index=False)
			
 
				     return data_df
			
 
				 
			
 
				-def cluster_and_preprocess(result):
			
 
				+def cluster_and_preprocess(result,eps):
			
 
				     result = get_average_xy(result) #input: array of arrays, output: either csv file or array of arrays
			
 
				 
			
 
				     #data = pandas.read_csv("/home/bscheibel/PycharmProjects/dxf_reader/temporary/list_to_csv_with_corner_points.csv", sep=";")
			
@@ -137,6 +137,6 @@ def cluster_and_preprocess(result):
 
				         result = list(readCSV)
			
 
				 
			
 
				     dm = np.asarray([[dist(p1, p2) for p2 in result] for p1 in result])
			
 
				-    clustering_result = clustering(dm)
			
 
				+    clustering_result = clustering(dm,eps)
			
 
				     return clustering_result
			
 
				 
			
--- a/main.py
+++ b/main.py
@@ -13,12 +13,12 @@ def write_redis(uuid, result, db_params):
 
				     db.set(uuid, result)
			
 
				 
			
 
				 
			
 
				-def main(uuid, filepath, db):
			
 
				+def main(uuid, filepath, db, eps):
			
 
				     filename = order_bounding_boxes_in_each_block.pdf_to_html(uuid, filepath)
			
 
				     print(filename)
			
 
				     result = order_bounding_boxes_in_each_block.get_bound_box(filename)  ##get coordinates+text out of html file into array of arrays
			
 
				     isos = order_bounding_boxes_in_each_block.extract_isos(result)
			
 
				-    res = clustering_precomputed_dbscan.cluster_and_preprocess(result)
			
 
				+    res = clustering_precomputed_dbscan.cluster_and_preprocess(result,eps)
			
 
				     clean_arrays = read_from_clustered_merged.read("/home/bscheibel/PycharmProjects/dxf_reader/temporary/values_clusteredfrom_precomputed_dbscan.csv")
			
 
				     pretty = regex_clean_new.print_clean(clean_arrays)
			
 
				     res = organize_drawing_according_to_details_new.main_function(pretty)
			
@@ -35,6 +35,7 @@ if __name__ == "__main__":
 
				     uuid = sys.argv[1]
			
 
				     filename = sys.argv[2]
			
 
				     db = sys.argv[3]
			
 
				-    main(uuid,filename, db)
			
 
				+    eps = sys.argv[4]
			
 
				+    main(uuid,filename, db, eps)
			
 
				 
			
 
				 #main("33333", "/home/bscheibel/PycharmProjects/dxf_reader/drawings/GV_12.PDF", "localhost")
			
--- a/organize_drawing_according_to_details_new.py
+++ b/organize_drawing_according_to_details_new.py
@@ -107,7 +107,7 @@ def main_function(result):
 
				         section.append(list((coord_name,coord)))
			
 
				     #print(section)
			
 
				     if number == 0 | len(section)==0:
			
 
				-        section.append(list(("No details",list((0,0,0,0)))))
			
 
				+        section.append(list(("No details",list((000.000,000.000,100000000.000,10000000.000)))))
			
 
				      #   print(section)
			
 
				 
			
 
				 
			
--- a/regex_clean_new.py
+++ b/regex_clean_new.py
@@ -37,13 +37,18 @@ def print_clean(dims): ##alles raus was nicht relevant ist! und zeichen ersetzen
 
				                 dim = dim.replace('⌀', "Ø")
			
 
				             reg12 = re.compile(r"(.*\d{1,4}\W?\d{0,4})\s?\+\s-\s?(\d{1,4}\W?\d{0,4})\s?(\d{1,4}\W?\d{0,3})") ##???? was machst du?? nach toleranzen suchen, mit +/- blabla
			
 
				             reg13 = re.compile(r"(.*)\+\s\+\s(\d*\W\d*)\s(\d*\W\d*)(.*)")
			
 
				+            reg14 = re.compile(r"(\+\s\d*\.?\d*)\s(\d*\.?\d*)\s(\+?\s?\-?\d*\.*\d*)")
			
 
				             g = re.search(reg12, dim)
			
 
				             f = re.search(reg13,dim)
			
 
				+            e = re.search(reg14,dim)
			
 
				             if g:
			
 
				                 dim = re.sub(reg12, g.group(1) + " +" + g.group(2) + " -" + g.group(3), dim) # +/- toleranzen schön darstellen
			
 
				                 #print(dim)
			
 
				             if f:
			
 
				                 dim = f.group(1) + "+" + f.group(2) + " +" + f.group(3) + f.group(4)
			
 
				+            if e:
			
 
				+                dim= e.group(2) +  + e.group(1) +  + e.group(3)
			
 
				+            dim = dim.replace(" ,",".").replace(", ",".").replace(",",".")
			
 
				             dims_new[dim] = coords
			
 
				 
			
 
				     #for dim in dims_new: