clustering_precomputed_dbscan.py 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100
  1. import numpy as np
  2. import pandas
  3. import csv
  4. from math import sqrt
  5. from sklearn.cluster import DBSCAN
  6. import order_bounding_boxes_in_each_block
  7. def get_average_xy(list_input):
  8. csv_name = "temporary/list_to_csv_with_corner_points.csv"
  9. new_list = []
  10. resultFile = open(csv_name, 'w')
  11. wr = csv.writer(resultFile, delimiter=";")
  12. wr.writerow(["element", "xmin","ymin","xmax","ymax", "ausrichtung","point_xmi_ymi","point_xma_ymi","point_xmi_yma","point_xma_yma"])
  13. for element in list_input:
  14. xavg_elem = 0
  15. yavg_elem = 0
  16. ymin = 100000000
  17. ymax = 0
  18. xmin = 100000000
  19. xmax = 0
  20. for blub in element: #get the smallest and largest x and y value for whole block
  21. xavg_elem += (float(blub[0]) + float(blub[2]))/2
  22. yavg_elem += (float(blub[1]) + float(blub[3]))/2
  23. if float(blub[1]) < ymin:
  24. ymin = float(blub[1])
  25. #print("y_min:",y_min)
  26. if float(blub[0]) < xmin:
  27. xmin = float(blub[0])
  28. if float(blub[3]) > ymax:
  29. ymax = float(blub[3])
  30. if float(blub[2]) > xmax:
  31. xmax = float(blub[2])
  32. if xmax-xmin > ymax-ymin:
  33. ausrichtung = 0 #horizontal
  34. else:
  35. ausrichtung = 1 #vertikal
  36. xavg_elem = xavg_elem/len(element)
  37. #print(xavg_elem)
  38. yavg_elem = yavg_elem/len(element)
  39. #element.extend([xavg_elem, yavg_elem])
  40. #print(element)
  41. #new_list.append(element)
  42. ##### GET CORNER POINTS
  43. point_xmi_ymi = [xmin,ymin]
  44. point_xma_ymi = [xmax,ymin]
  45. point_xmi_yma = [xmin,ymax]
  46. point_xma_yma = [xmax,ymax]
  47. wr.writerow([element,xmin,ymin,xmax,ymax, ausrichtung,point_xmi_ymi,point_xma_ymi,point_xmi_yma,point_xma_yma])
  48. resultFile.close()
  49. #print(new_list)
  50. return csv_name
  51. def dist(rectangle1, rectangle2):
  52. #get minimal distance between two rectangles
  53. distance = 100000000
  54. #print(rectangle1)
  55. for point1 in rectangle1:
  56. point1 = eval(point1)
  57. #print(point1)
  58. for point2 in rectangle2:
  59. #print(point2)
  60. point2 = eval(point2)
  61. dist = sqrt((float(point2[0]) - float(point1[0]))**2 + (float(point2[1]) - float(point1[1]))**2)
  62. if dist < distance:
  63. distance = dist
  64. return distance
  65. def clustering(distance_matrix):
  66. db = DBSCAN(eps=5, min_samples=1, metric="precomputed").fit(dm) ##3.93 until now, bei 5 shon mehr erkannt, 7 noch mehr erkannt aber auch schon zu viel
  67. labels = db.labels_
  68. # Number of clusters in labels
  69. n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
  70. print('Estimated number of clusters: %d' % n_clusters_)
  71. data_df = pandas.read_csv("/home/bscheibel/PycharmProjects/dxf_reader/temporary/list_to_csv_with_corner_points.csv",
  72. sep=";")
  73. data_df["cluster"] = labels
  74. data_df.groupby('cluster')['element'].apply(' '.join).reset_index().to_csv("values_clusteredfrom_precomputed_dbscan.csv",sep=";")
  75. file = "/home/bscheibel/PycharmProjects/dxf_reader/drawings/5152166_Rev04.html"
  76. result = order_bounding_boxes_in_each_block.get_bound_box(file)
  77. #print(result)
  78. get_average_xy(result)
  79. #rectangle1 = [[0,0],[2,0],[0,2],[2,2]]
  80. #rectangle2 = [[3,3],[4,3],[3,4],[4,4]]
  81. #print(compute_distance(rectangle1,rectangle2))
  82. data = pandas.read_csv("/home/bscheibel/PycharmProjects/dxf_reader/temporary/list_to_csv_with_corner_points.csv", sep=";")
  83. data = data[["point_xmi_ymi","point_xma_ymi","point_xmi_yma","point_xma_yma"]].replace("'","")
  84. #print(data)
  85. data.to_csv("blub.csv", sep=";", index=False, header=None)
  86. result = []
  87. with open('blub.csv') as csvfile:
  88. readCSV = csv.reader(csvfile, delimiter=';')
  89. result = list(readCSV)
  90. dm = np.asarray([[dist(p1, p2) for p2 in result] for p1 in result])
  91. #print(dm)
  92. clustering(dm)