clustering_precomputed_dbscan_noParallels.py 8.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255
  1. # coding: utf8
  2. import numpy as np
  3. import pandas
  4. import csv
  5. from math import sqrt
  6. from sklearn.cluster import DBSCAN
  7. from sklearn import metrics
  8. from sklearn.metrics import davies_bouldin_score
  9. import time
  10. def get_average_xy(list_input, path):
  11. csv_name = path+"/temporary/list_to_csv_with_corner_points.csv"
  12. resultFile = open(csv_name, 'w')
  13. wr = csv.writer(resultFile, delimiter=";")
  14. wr.writerow(["element", "xmin","ymin","xmax","ymax", "ausrichtung","point_xmi_ymi","point_xma_ymi","point_xmi_yma","point_xma_yma"])
  15. #result_df = pandas.DataFrame(columns=["point_xmi_ymi","point_xma_ymi","point_xmi_yma","point_xma_yma"])
  16. result_df = pandas.DataFrame(columns=["point_xmi_ymi","point_xma_ymi","point_xmi_yma","point_xma_yma","ausrichtung"])
  17. for element in list_input:
  18. xavg_elem = 0
  19. yavg_elem = 0
  20. ymin = 100000000
  21. ymax = 0
  22. xmin = 100000000
  23. xmax = 0
  24. newList = []
  25. check = False
  26. if len(element) == 5 and not isinstance(element[0], list):
  27. newList.append(element)
  28. element = newList
  29. """if len(element) != 5 and isinstance(element[0], list):
  30. for el in element:
  31. check = isinstance(el[0], list)
  32. if len(el) != 5:
  33. print(el)
  34. #if check:
  35. # print(el)"""
  36. for blub in element: #get the smallest and largest x and y value for whole block
  37. if isinstance(blub[0],list) and len(blub[0]) == 5:
  38. blub = blub [0]
  39. if float(blub[1]) < ymin:
  40. ymin = float(blub[1])
  41. #print("y_min:",y_min)
  42. if float(blub[0]) < xmin:
  43. xmin = float(blub[0])
  44. if float(blub[3]) > ymax:
  45. ymax = float(blub[3])
  46. if float(blub[2]) > xmax:
  47. xmax = float(blub[2])
  48. if float(xmax)-float(xmin) > 1.3*(float(ymax)-float(ymin)):
  49. ausrichtung = 0 # horizontal
  50. #elif
  51. elif 1.3*(float(xmax)-float(xmin)) < float(ymax)-float(ymin):
  52. ausrichtung = 1 # vertikal
  53. else:
  54. ausrichtung = 3 # sonstiges
  55. ##### GET CORNER POINTS
  56. point_xmi_ymi = [xmin,ymin]
  57. point_xma_ymi = [xmax,ymin]
  58. point_xmi_yma = [xmin,ymax]
  59. point_xma_yma = [xmax,ymax]
  60. wr.writerow([element,xmin,ymin,xmax,ymax, ausrichtung,point_xmi_ymi,point_xma_ymi,point_xmi_yma,point_xma_yma])
  61. result_df.loc[len(result_df)]=[point_xmi_ymi,point_xma_ymi, point_xmi_yma, point_xma_yma,ausrichtung]
  62. #wr.writerow([element, xmin,ymin,xmax,ymax])
  63. #result_df.loc[len(result_df)]=[xmin,xmax, xmin, ymax, ausrichtung]
  64. resultFile.close()
  65. return result_df
  66. def intersects(rectangle1, rectangle2): #using the separating axis theorem, returns true if they intersect, otherwise false
  67. rect_1_min = eval(rectangle1[0])
  68. rect_1_max = eval(rectangle1[3])
  69. rect1_bottom_left_x = rect_1_min[0]
  70. rect1_top_right_x = rect_1_max[0]
  71. rect1_bottom_left_y = rect_1_max[1]
  72. rect1_top_right_y = rect_1_min[1]
  73. rect_2_min = eval(rectangle2[0])
  74. rect_2_max = eval(rectangle2[3])
  75. rect2_bottom_left_x = rect_2_min[0]
  76. rect2_top_right_x = rect_2_max[0]
  77. rect2_bottom_left_y = rect_2_max[1]
  78. rect2_top_right_y = rect_2_min[1]
  79. return not (rect1_top_right_x < rect2_bottom_left_x or rect1_bottom_left_x > rect2_top_right_x or rect1_top_right_y > rect2_bottom_left_y or rect1_bottom_left_y < rect2_top_right_y)
  80. def get_ausrichtung(rectangle1,rectangle2):
  81. #check if rect 1 and rect 2 are above or beside, r,l, a,b
  82. min_1 = eval(rectangle1[0])
  83. #max_1 = eval(rectangle1[3])
  84. min_2 = eval(rectangle2[0])
  85. #max_2 = eval(rectangle2[3])
  86. diff_y = min_1[1] - min_2[1] #
  87. diff_x = min_1[0] - min_2[0]
  88. if diff_x < diff_y:
  89. ausrichtung = "above"
  90. #print(rectangle1, rectangle2, "above")
  91. else:
  92. ausrichtung = "side"
  93. #print(rectangle1,rectangle2, "side")
  94. return ausrichtung
  95. def get_parallel(rectangle1, rectangle2):
  96. #check if long sides are parallel, then we do not want to cluster these
  97. #check if x or y axis is longer, then get_ausrichtung
  98. parallel = False
  99. #x_longer_1 = False
  100. #x_longer_2 = False
  101. #print(rectangle1, rectangle1[0])
  102. min_1 = eval(rectangle1[0])
  103. max_1 = eval(rectangle1[3])
  104. min_2 = eval(rectangle2[0])
  105. max_2 = eval(rectangle2[3])
  106. ausrichtung_1 = eval(rectangle1[4])
  107. ausrichtung_2 = eval(rectangle2[4])
  108. x_axis_rect1 = float(max_1[0])-float(min_1[0])
  109. x_axis_rect2 = float(max_2[0])-float(min_2[0])
  110. y_axis_rect1 = float(max_1[1])-float(min_1[1])
  111. y_axis_rect2 = float(max_2[1])-float(min_2[1])
  112. if ausrichtung_1 == ausrichtung_2 and ausrichtung_1 == 0:
  113. ausrichtung = get_ausrichtung(rectangle1, rectangle2)
  114. if ausrichtung == "above":
  115. parallel = True
  116. if ausrichtung_1 == ausrichtung_2 and ausrichtung_1 == 1:
  117. ausrichtung = get_ausrichtung(rectangle1, rectangle2)
  118. if ausrichtung == "side":
  119. parallel = True
  120. return parallel
  121. def dist(rectangle1, rectangle2):
  122. #get minimal distance between two rectangles
  123. distance = 100000000
  124. second_dist = 100000
  125. dist_x = 100000
  126. dist_y = 100000
  127. #print(rectangle1, rectangle2)
  128. #get_parallel(rectangle1, rectangle2)
  129. for point1 in rectangle1[:4]:
  130. point1 = eval(point1)
  131. for point2 in rectangle2[:4]:
  132. point2 = eval(point2)
  133. dist = sqrt((float(point2[0]) - float(point1[0]))**2 + ((float(point2[1]) - float(point1[1])))**2)
  134. if dist < distance:
  135. second_dist = distance
  136. distance = dist
  137. dist_x = float(point2[0]) - float(point1[0])
  138. #dist_y = (float(point2[1]) - float(point1[1]))
  139. if get_parallel(rectangle1,rectangle2):
  140. #print("parallel", rectangle2, rectangle1)
  141. distance += 1000
  142. second_dist += 1000
  143. #continue
  144. # if rectangle1[4] == rectangle2[4]:
  145. # if rectangle1[4] == "0" and dist_x < 10:
  146. # #print(rectangle1, rectangle2)
  147. # distance = dist + 100
  148. # elif rectangle1[4] == "1" and dist_y < 10:
  149. # distance = dist + 100
  150. # #print(rectangle1, rectangle2)
  151. if intersects(rectangle1, rectangle2):
  152. #print(rectangle1, rectangle2, " intersect")
  153. distance = 0
  154. second_dist = 0
  155. #print("distanz_zwei: ", second_dist, "distanz: ", distance)
  156. distance = (distance+second_dist)/2
  157. return distance
  158. def clustering(dm,eps,path):
  159. db = DBSCAN(eps=eps, min_samples=1, metric="precomputed").fit(dm)
  160. labels = db.labels_
  161. n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
  162. print('Estimated number of clusters: %d' % n_clusters_)
  163. data_df = pandas.read_csv(path +"/temporary/list_to_csv_with_corner_points.csv", sep=";")
  164. data_df["cluster"] = labels
  165. try:
  166. dbs = davies_bouldin_score(dm, labels)
  167. #dbs = "1"
  168. chs = metrics.calinski_harabasz_score(dm, labels)
  169. #chs = 1
  170. silhoutte = metrics.silhouette_score(dm, labels, metric='precomputed')
  171. #silhoutte = 2
  172. print("DBscore: ", dbs)
  173. print("calsinski: ", chs)
  174. print("silhoutte: ", silhoutte)
  175. except:
  176. dbs=1
  177. chs=1
  178. silhoutte=1
  179. data_df["ausrichtung"] = 1
  180. data_df = data_df.groupby(['cluster', 'ausrichtung'])['element'].apply(','.join).reset_index()
  181. data_df.to_csv(path+"/temporary/values_clusteredfrom_precomputed_dbscan.csv",sep=";", header=False, index=False)
  182. return data_df, n_clusters_, dbs, chs, silhoutte
  183. def cluster_and_preprocess(result,eps,path):
  184. start_time = time.time()
  185. result = get_average_xy(result, path) #input: array of arrays, output: either csv file or array of arrays
  186. end_time = time.time()
  187. time_taken_get_average = end_time - start_time
  188. print("time get average: ", time_taken_get_average)
  189. start_time = time.time()
  190. result.to_csv(path+"/temporary/blub.csv", sep=";", index=False, header=None)
  191. end_time = time.time()
  192. time_taken_tocsv = end_time - start_time
  193. print("time to csv:" , time_taken_tocsv)
  194. with open(path+"/temporary/blub.csv") as csvfile:
  195. readCSV = csv.reader(csvfile, delimiter=';')
  196. result = list(readCSV)
  197. start_time = time.time()
  198. dm = np.asarray([[dist(p1, p2) for p2 in result] for p1 in result])
  199. end_time = time.time()
  200. time_taken_dm = end_time - start_time
  201. print("time dm:" , time_taken_dm)
  202. start_time = time.time()
  203. clustering_result, n_clusters_, dbs, chs, silhoutte = clustering(dm,float(eps), path)
  204. end_time = time.time()
  205. time_taken_clustering = end_time - start_time
  206. print("time clustering:" , time_taken_clustering)
  207. return clustering_result, n_clusters_, dbs, chs, silhoutte, dm