clustering_precomputed_dbscan.py 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
  1. # coding: utf8
  2. import numpy as np
  3. import pandas
  4. import csv
  5. from math import sqrt
  6. from sklearn.cluster import DBSCAN
  7. def get_average_xy(list_input):
  8. csv_name = "/home/bscheibel/PycharmProjects/dxf_reader/temporary/list_to_csv_with_corner_points.csv"
  9. resultFile = open(csv_name, 'w')
  10. wr = csv.writer(resultFile, delimiter=";")
  11. wr.writerow(["element", "xmin","ymin","xmax","ymax", "ausrichtung","point_xmi_ymi","point_xma_ymi","point_xmi_yma","point_xma_yma"])
  12. result_df = pandas.DataFrame(columns=["point_xmi_ymi","point_xma_ymi","point_xmi_yma","point_xma_yma","ausrichtung"])
  13. for element in list_input:
  14. #print(len(element))
  15. xavg_elem = 0
  16. yavg_elem = 0
  17. ymin = 100000000
  18. ymax = 0
  19. xmin = 100000000
  20. xmax = 0
  21. #print(element)
  22. newList = []
  23. check = False
  24. if len(element) == 5 and not isinstance(element[0], list):
  25. #print("bb")
  26. newList.append(element)
  27. #print(len(newList))
  28. element = newList
  29. """if len(element) != 5 and isinstance(element[0], list):
  30. for el in element:
  31. check = isinstance(el[0], list)
  32. if len(el) != 5:
  33. print(el)
  34. #if check:
  35. # print(el)"""
  36. for blub in element: #get the smallest and largest x and y value for whole block
  37. if isinstance(blub[0],list) and len(blub[0])==5:
  38. blub = blub [0]
  39. if float(blub[1]) < ymin:
  40. ymin = float(blub[1])
  41. #print("y_min:",y_min)
  42. if float(blub[0]) < xmin:
  43. xmin = float(blub[0])
  44. if float(blub[3]) > ymax:
  45. ymax = float(blub[3])
  46. if float(blub[2]) > xmax:
  47. xmax = float(blub[2])
  48. if float(xmax)-float(xmin) > 1.3*(float(ymax)-float(ymin)):
  49. ausrichtung = 0 #horizontal
  50. if 1.5*(float(xmax)-float(xmin)) < float(ymax)-float(ymin):
  51. ausrichtung = 1 #vertikal
  52. else:
  53. ausrichtung = 3 #sonstiges
  54. ##### GET CORNER POINTS
  55. point_xmi_ymi = [xmin,ymin]
  56. point_xma_ymi = [xmax,ymin]
  57. point_xmi_yma = [xmin,ymax]
  58. point_xma_yma = [xmax,ymax]
  59. wr.writerow([element,xmin,ymin,xmax,ymax, ausrichtung,point_xmi_ymi,point_xma_ymi,point_xmi_yma,point_xma_yma])
  60. result_df.loc[len(result_df)]=[point_xmi_ymi,point_xma_ymi, point_xmi_yma, point_xma_yma,ausrichtung]
  61. resultFile.close()
  62. #print(result_df)
  63. return result_df
  64. def intersects(rectangle1, rectangle2): #using the separating axis theorem, returns true if they intersect, otherwise false
  65. #print(rectangle2[0])
  66. #for rect in rectangle1:
  67. rect_1_min = eval(rectangle1[0])
  68. rect_1_max = eval(rectangle1[3])
  69. rect1_bottom_left_x= rect_1_min[0]
  70. rect1_top_right_x=rect_1_max[0]
  71. rect1_bottom_left_y= rect_1_max[1]
  72. rect1_top_right_y= rect_1_min[1]
  73. rect_2_min = eval(rectangle2[0])
  74. rect_2_max = eval(rectangle2[3])
  75. rect2_bottom_left_x= rect_2_min[0]
  76. rect2_top_right_x=rect_2_max[0]
  77. rect2_bottom_left_y= rect_2_max[1]
  78. rect2_top_right_y=rect_2_min[1]
  79. return not (rect1_top_right_x < rect2_bottom_left_x or rect1_bottom_left_x > rect2_top_right_x or rect1_top_right_y > rect2_bottom_left_y or rect1_bottom_left_y < rect2_top_right_y)
  80. def dist(rectangle1, rectangle2):
  81. #get minimal distance between two rectangles
  82. distance = 100000000
  83. #print(rectangle1)
  84. for point1 in rectangle1[:4]:
  85. point1 = eval(point1)
  86. #print(point1)
  87. for point2 in rectangle2[:4]:
  88. #print(point2)
  89. point2 = eval(point2)
  90. #dist1 = (float(point2[0]) - float(point1[0])) + ((float(point2[1]) - float(point1[1])))
  91. dist = sqrt(((float(point2[0]) - float(point1[0])))**2 + ((float(point2[1]) - float(point1[1])))**2)
  92. #print(dist)
  93. if dist < distance:
  94. distance = dist
  95. if rectangle1[4] != rectangle2[4]:
  96. distance = dist + 100
  97. #print(intersects(rectangle1,rectangle2))
  98. if intersects(rectangle1, rectangle2):
  99. distance = 0
  100. #print(rectangle1)
  101. return distance
  102. def clustering(dm,eps):
  103. db = DBSCAN(eps=eps, min_samples=1, metric="precomputed").fit(dm) ##3.93 until now, bei 5 shon mehr erkannt, 7 noch mehr erkannt aber auch schon zu viel; GV12 ist 4.5 gut für LH zu wenig
  104. labels = db.labels_
  105. n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
  106. print('Estimated number of clusters: %d' % n_clusters_)
  107. data_df = pandas.read_csv("/home/bscheibel/PycharmProjects/dxf_reader/temporary/list_to_csv_with_corner_points.csv", sep=";")
  108. data_df["cluster"] = labels
  109. data_df.groupby(['cluster', 'ausrichtung'])['element'].apply(','.join).reset_index().to_csv("/home/bscheibel/PycharmProjects/dxf_reader/temporary/values_clusteredfrom_precomputed_dbscan.csv",sep=";", header=False, index=False)
  110. return data_df
  111. def cluster_and_preprocess(result,eps):
  112. result = get_average_xy(result) #input: array of arrays, output: either csv file or array of arrays
  113. #data = pandas.read_csv("/home/bscheibel/PycharmProjects/dxf_reader/temporary/list_to_csv_with_corner_points.csv", sep=";")
  114. #data = data[["point_xmi_ymi","point_xma_ymi","point_xmi_yma","point_xma_yma","ausrichtung"]]
  115. result.to_csv("/home/bscheibel/PycharmProjects/dxf_reader/temporary/blub.csv", sep=";", index=False, header=None)
  116. with open('/home/bscheibel/PycharmProjects/dxf_reader/temporary/blub.csv') as csvfile:
  117. readCSV = csv.reader(csvfile, delimiter=';')
  118. result = list(readCSV)
  119. dm = np.asarray([[dist(p1, p2) for p2 in result] for p1 in result])
  120. clustering_result = clustering(dm,float(eps))
  121. return clustering_result