|
@@ -1,254 +0,0 @@
|
|
|
-# coding: utf8
|
|
|
-import numpy as np
|
|
|
-import pandas
|
|
|
-import csv
|
|
|
-from math import sqrt
|
|
|
-from sklearn.cluster import DBSCAN
|
|
|
-
|
|
|
-from sklearn import metrics
|
|
|
-from sklearn.metrics import davies_bouldin_score
|
|
|
-import time
|
|
|
-
|
|
|
-def get_average_xy(list_input, path):
|
|
|
- csv_name = path+"/temporary/list_to_csv_with_corner_points.csv"
|
|
|
- resultFile = open(csv_name, 'w')
|
|
|
- wr = csv.writer(resultFile, delimiter=";")
|
|
|
- wr.writerow(["element", "xmin","ymin","xmax","ymax", "ausrichtung","point_xmi_ymi","point_xma_ymi","point_xmi_yma","point_xma_yma"])
|
|
|
-
|
|
|
- #result_df = pandas.DataFrame(columns=["point_xmi_ymi","point_xma_ymi","point_xmi_yma","point_xma_yma"])
|
|
|
- result_df = pandas.DataFrame(columns=["point_xmi_ymi","point_xma_ymi","point_xmi_yma","point_xma_yma","ausrichtung"])
|
|
|
-
|
|
|
- for element in list_input:
|
|
|
- xavg_elem = 0
|
|
|
- yavg_elem = 0
|
|
|
- ymin = 100000000
|
|
|
- ymax = 0
|
|
|
- xmin = 100000000
|
|
|
- xmax = 0
|
|
|
- newList = []
|
|
|
- check = False
|
|
|
- if len(element) == 5 and not isinstance(element[0], list):
|
|
|
- newList.append(element)
|
|
|
- element = newList
|
|
|
- """if len(element) != 5 and isinstance(element[0], list):
|
|
|
- for el in element:
|
|
|
- check = isinstance(el[0], list)
|
|
|
- if len(el) != 5:
|
|
|
- print(el)
|
|
|
- #if check:
|
|
|
- # print(el)"""
|
|
|
-
|
|
|
- for blub in element: #get the smallest and largest x and y value for whole block
|
|
|
-
|
|
|
- if isinstance(blub[0],list) and len(blub[0]) == 5:
|
|
|
- blub = blub [0]
|
|
|
- if float(blub[1]) < ymin:
|
|
|
- ymin = float(blub[1])
|
|
|
- #print("y_min:",y_min)
|
|
|
- if float(blub[0]) < xmin:
|
|
|
- xmin = float(blub[0])
|
|
|
- if float(blub[3]) > ymax:
|
|
|
- ymax = float(blub[3])
|
|
|
- if float(blub[2]) > xmax:
|
|
|
- xmax = float(blub[2])
|
|
|
- if float(xmax)-float(xmin) > 1.3*(float(ymax)-float(ymin)):
|
|
|
- ausrichtung = 0 # horizontal
|
|
|
- #elif
|
|
|
- elif 1.3*(float(xmax)-float(xmin)) < float(ymax)-float(ymin):
|
|
|
- ausrichtung = 1 # vertikal
|
|
|
- else:
|
|
|
- ausrichtung = 3 # sonstiges
|
|
|
-
|
|
|
-
|
|
|
- ##### GET CORNER POINTS
|
|
|
- point_xmi_ymi = [xmin,ymin]
|
|
|
- point_xma_ymi = [xmax,ymin]
|
|
|
- point_xmi_yma = [xmin,ymax]
|
|
|
- point_xma_yma = [xmax,ymax]
|
|
|
- wr.writerow([element,xmin,ymin,xmax,ymax, ausrichtung,point_xmi_ymi,point_xma_ymi,point_xmi_yma,point_xma_yma])
|
|
|
- result_df.loc[len(result_df)]=[point_xmi_ymi,point_xma_ymi, point_xmi_yma, point_xma_yma,ausrichtung]
|
|
|
- #wr.writerow([element, xmin,ymin,xmax,ymax])
|
|
|
- #result_df.loc[len(result_df)]=[xmin,xmax, xmin, ymax, ausrichtung]
|
|
|
-
|
|
|
- resultFile.close()
|
|
|
- return result_df
|
|
|
-
|
|
|
-def intersects(rectangle1, rectangle2): #using the separating axis theorem, returns true if they intersect, otherwise false
|
|
|
-
|
|
|
- rect_1_min = eval(rectangle1[0])
|
|
|
- rect_1_max = eval(rectangle1[3])
|
|
|
- rect1_bottom_left_x = rect_1_min[0]
|
|
|
- rect1_top_right_x = rect_1_max[0]
|
|
|
- rect1_bottom_left_y = rect_1_max[1]
|
|
|
- rect1_top_right_y = rect_1_min[1]
|
|
|
-
|
|
|
- rect_2_min = eval(rectangle2[0])
|
|
|
- rect_2_max = eval(rectangle2[3])
|
|
|
- rect2_bottom_left_x = rect_2_min[0]
|
|
|
- rect2_top_right_x = rect_2_max[0]
|
|
|
- rect2_bottom_left_y = rect_2_max[1]
|
|
|
- rect2_top_right_y = rect_2_min[1]
|
|
|
-
|
|
|
- return not (rect1_top_right_x < rect2_bottom_left_x or rect1_bottom_left_x > rect2_top_right_x or rect1_top_right_y > rect2_bottom_left_y or rect1_bottom_left_y < rect2_top_right_y)
|
|
|
-
|
|
|
-
|
|
|
-def get_ausrichtung(rectangle1,rectangle2):
|
|
|
- #check if rect 1 and rect 2 are above or beside, r,l, a,b
|
|
|
-
|
|
|
- min_1 = eval(rectangle1[0])
|
|
|
- #max_1 = eval(rectangle1[3])
|
|
|
- min_2 = eval(rectangle2[0])
|
|
|
- #max_2 = eval(rectangle2[3])
|
|
|
-
|
|
|
- diff_y = min_1[1] - min_2[1] #
|
|
|
- diff_x = min_1[0] - min_2[0]
|
|
|
-
|
|
|
- if diff_x < diff_y:
|
|
|
- ausrichtung = "above"
|
|
|
- #print(rectangle1, rectangle2, "above")
|
|
|
- else:
|
|
|
- ausrichtung = "side"
|
|
|
- #print(rectangle1,rectangle2, "side")
|
|
|
-
|
|
|
- return ausrichtung
|
|
|
-
|
|
|
-
|
|
|
-def get_parallel(rectangle1, rectangle2):
|
|
|
- #check if long sides are parallel, then we do not want to cluster these
|
|
|
- #check if x or y axis is longer, then get_ausrichtung
|
|
|
- parallel = False
|
|
|
- #x_longer_1 = False
|
|
|
- #x_longer_2 = False
|
|
|
- #print(rectangle1, rectangle1[0])
|
|
|
- min_1 = eval(rectangle1[0])
|
|
|
- max_1 = eval(rectangle1[3])
|
|
|
- min_2 = eval(rectangle2[0])
|
|
|
- max_2 = eval(rectangle2[3])
|
|
|
- ausrichtung_1 = eval(rectangle1[4])
|
|
|
- ausrichtung_2 = eval(rectangle2[4])
|
|
|
- x_axis_rect1 = float(max_1[0])-float(min_1[0])
|
|
|
- x_axis_rect2 = float(max_2[0])-float(min_2[0])
|
|
|
-
|
|
|
- y_axis_rect1 = float(max_1[1])-float(min_1[1])
|
|
|
- y_axis_rect2 = float(max_2[1])-float(min_2[1])
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
- if ausrichtung_1 == ausrichtung_2 and ausrichtung_1 == 0:
|
|
|
- ausrichtung = get_ausrichtung(rectangle1, rectangle2)
|
|
|
- if ausrichtung == "above":
|
|
|
- parallel = True
|
|
|
-
|
|
|
- if ausrichtung_1 == ausrichtung_2 and ausrichtung_1 == 1:
|
|
|
- ausrichtung = get_ausrichtung(rectangle1, rectangle2)
|
|
|
- if ausrichtung == "side":
|
|
|
- parallel = True
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
- return parallel
|
|
|
-
|
|
|
-
|
|
|
-def dist(rectangle1, rectangle2):
|
|
|
- #get minimal distance between two rectangles
|
|
|
- distance = 100000000
|
|
|
- second_dist = 100000
|
|
|
- dist_x = 100000
|
|
|
- dist_y = 100000
|
|
|
- #print(rectangle1, rectangle2)
|
|
|
- #get_parallel(rectangle1, rectangle2)
|
|
|
- for point1 in rectangle1[:4]:
|
|
|
- point1 = eval(point1)
|
|
|
- for point2 in rectangle2[:4]:
|
|
|
- point2 = eval(point2)
|
|
|
- dist = sqrt((float(point2[0]) - float(point1[0]))**2 + ((float(point2[1]) - float(point1[1])))**2)
|
|
|
- if dist < distance:
|
|
|
- second_dist = distance
|
|
|
- distance = dist
|
|
|
- dist_x = float(point2[0]) - float(point1[0])
|
|
|
- #dist_y = (float(point2[1]) - float(point1[1]))
|
|
|
- if get_parallel(rectangle1,rectangle2):
|
|
|
- #print("parallel", rectangle2, rectangle1)
|
|
|
- distance += 1000
|
|
|
- second_dist += 1000
|
|
|
- #continue
|
|
|
- # if rectangle1[4] == rectangle2[4]:
|
|
|
- # if rectangle1[4] == "0" and dist_x < 10:
|
|
|
- # #print(rectangle1, rectangle2)
|
|
|
- # distance = dist + 100
|
|
|
- # elif rectangle1[4] == "1" and dist_y < 10:
|
|
|
- # distance = dist + 100
|
|
|
- # #print(rectangle1, rectangle2)
|
|
|
-
|
|
|
- if intersects(rectangle1, rectangle2):
|
|
|
- #print(rectangle1, rectangle2, " intersect")
|
|
|
- distance = 0
|
|
|
- second_dist = 0
|
|
|
- #print("distanz_zwei: ", second_dist, "distanz: ", distance)
|
|
|
- distance = (distance+second_dist)/2
|
|
|
- return distance
|
|
|
-
|
|
|
-def clustering(dm,eps,path):
|
|
|
- db = DBSCAN(eps=eps, min_samples=1, metric="precomputed").fit(dm)
|
|
|
- labels = db.labels_
|
|
|
- n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
|
|
|
-
|
|
|
- print('Estimated number of clusters: %d' % n_clusters_)
|
|
|
- data_df = pandas.read_csv(path +"/temporary/list_to_csv_with_corner_points.csv", sep=";")
|
|
|
- data_df["cluster"] = labels
|
|
|
- try:
|
|
|
- dbs = davies_bouldin_score(dm, labels)
|
|
|
- #dbs = "1"
|
|
|
- chs = metrics.calinski_harabasz_score(dm, labels)
|
|
|
- #chs = 1
|
|
|
- silhoutte = metrics.silhouette_score(dm, labels, metric='precomputed')
|
|
|
- #silhoutte = 2
|
|
|
- print("DBscore: ", dbs)
|
|
|
- print("calsinski: ", chs)
|
|
|
- print("silhoutte: ", silhoutte)
|
|
|
-
|
|
|
- except:
|
|
|
- dbs=1
|
|
|
- chs=1
|
|
|
- silhoutte=1
|
|
|
-
|
|
|
- data_df["ausrichtung"] = 1
|
|
|
- data_df = data_df.groupby(['cluster', 'ausrichtung'])['element'].apply(','.join).reset_index()
|
|
|
- data_df.to_csv(path+"/temporary/values_clusteredfrom_precomputed_dbscan.csv",sep=";", header=False, index=False)
|
|
|
-
|
|
|
- return data_df, n_clusters_, dbs, chs, silhoutte
|
|
|
-
|
|
|
-def cluster_and_preprocess(result,eps,path):
|
|
|
- start_time = time.time()
|
|
|
- result = get_average_xy(result, path) #input: array of arrays, output: either csv file or array of arrays
|
|
|
- end_time = time.time()
|
|
|
- time_taken_get_average = end_time - start_time
|
|
|
- print("time get average: ", time_taken_get_average)
|
|
|
-
|
|
|
- start_time = time.time()
|
|
|
- result.to_csv(path+"/temporary/blub.csv", sep=";", index=False, header=None)
|
|
|
- end_time = time.time()
|
|
|
- time_taken_tocsv = end_time - start_time
|
|
|
- print("time to csv:" , time_taken_tocsv)
|
|
|
-
|
|
|
- with open(path+"/temporary/blub.csv") as csvfile:
|
|
|
- readCSV = csv.reader(csvfile, delimiter=';')
|
|
|
- result = list(readCSV)
|
|
|
-
|
|
|
-
|
|
|
- start_time = time.time()
|
|
|
- dm = np.asarray([[dist(p1, p2) for p2 in result] for p1 in result])
|
|
|
- end_time = time.time()
|
|
|
- time_taken_dm = end_time - start_time
|
|
|
- print("time dm:" , time_taken_dm)
|
|
|
-
|
|
|
-
|
|
|
- start_time = time.time()
|
|
|
- clustering_result, n_clusters_, dbs, chs, silhoutte = clustering(dm,float(eps), path)
|
|
|
- end_time = time.time()
|
|
|
- time_taken_clustering = end_time - start_time
|
|
|
- print("time clustering:" , time_taken_clustering)
|
|
|
-
|
|
|
- return clustering_result, n_clusters_, dbs, chs, silhoutte, dm
|
|
|
-
|