bscheibel
/
technical_drawings_extraction


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255
							# coding: utf8
import numpy as np
import pandas
import csv
from math import sqrt
from sklearn.cluster import DBSCAN

from sklearn import metrics
from sklearn.metrics import davies_bouldin_score
import time

def get_average_xy(list_input, path):
    csv_name = path+"/temporary/list_to_csv_with_corner_points.csv"
    resultFile = open(csv_name, 'w')
    wr = csv.writer(resultFile, delimiter=";")
    wr.writerow(["element", "xmin","ymin","xmax","ymax", "ausrichtung","point_xmi_ymi","point_xma_ymi","point_xmi_yma","point_xma_yma"])

    #result_df = pandas.DataFrame(columns=["point_xmi_ymi","point_xma_ymi","point_xmi_yma","point_xma_yma"])
    result_df = pandas.DataFrame(columns=["point_xmi_ymi","point_xma_ymi","point_xmi_yma","point_xma_yma","ausrichtung"])

    for element in list_input:
        xavg_elem = 0
        yavg_elem = 0
        ymin = 100000000
        ymax = 0
        xmin = 100000000
        xmax = 0
        newList = []
        check = False
        if len(element) == 5 and not isinstance(element[0], list):
            newList.append(element)
            element = newList
        """if len(element) != 5 and isinstance(element[0], list):
            for el in element:
                check = isinstance(el[0], list)
                if len(el) != 5:
                    print(el)
                #if check:
                #    print(el)"""

        for blub in element: #get the smallest and largest x and y value for whole block

            if isinstance(blub[0],list) and len(blub[0]) == 5:
                blub = blub [0]
            if float(blub[1]) < ymin:
                ymin = float(blub[1])
                #print("y_min:",y_min)
            if float(blub[0]) < xmin:
                xmin = float(blub[0])
            if float(blub[3]) > ymax:
                ymax = float(blub[3])
            if float(blub[2]) > xmax:
                xmax = float(blub[2])
        if float(xmax)-float(xmin) > 1.3*(float(ymax)-float(ymin)):
            ausrichtung = 0  # horizontal
        #elif
        elif 1.3*(float(xmax)-float(xmin)) < float(ymax)-float(ymin):
            ausrichtung = 1   # vertikal
        else:
            ausrichtung = 3   # sonstiges


        ##### GET CORNER POINTS
        point_xmi_ymi = [xmin,ymin]
        point_xma_ymi = [xmax,ymin]
        point_xmi_yma = [xmin,ymax]
        point_xma_yma = [xmax,ymax]
        wr.writerow([element,xmin,ymin,xmax,ymax, ausrichtung,point_xmi_ymi,point_xma_ymi,point_xmi_yma,point_xma_yma])
        result_df.loc[len(result_df)]=[point_xmi_ymi,point_xma_ymi, point_xmi_yma, point_xma_yma,ausrichtung]
        #wr.writerow([element, xmin,ymin,xmax,ymax])
        #result_df.loc[len(result_df)]=[xmin,xmax, xmin, ymax, ausrichtung]

    resultFile.close()
    return result_df

def intersects(rectangle1, rectangle2): #using the separating axis theorem, returns true if they intersect, otherwise false

    rect_1_min = eval(rectangle1[0])
    rect_1_max = eval(rectangle1[3])
    rect1_bottom_left_x = rect_1_min[0]
    rect1_top_right_x = rect_1_max[0]
    rect1_bottom_left_y = rect_1_max[1]
    rect1_top_right_y = rect_1_min[1]

    rect_2_min = eval(rectangle2[0])
    rect_2_max = eval(rectangle2[3])
    rect2_bottom_left_x = rect_2_min[0]
    rect2_top_right_x = rect_2_max[0]
    rect2_bottom_left_y = rect_2_max[1]
    rect2_top_right_y = rect_2_min[1]

    return not (rect1_top_right_x < rect2_bottom_left_x or rect1_bottom_left_x > rect2_top_right_x or rect1_top_right_y > rect2_bottom_left_y or rect1_bottom_left_y < rect2_top_right_y)


def get_ausrichtung(rectangle1,rectangle2):
    #check if rect 1 and rect 2 are above or beside, r,l, a,b

    min_1 = eval(rectangle1[0])
    #max_1 = eval(rectangle1[3])
    min_2 = eval(rectangle2[0])
    #max_2 = eval(rectangle2[3])

    diff_y = min_1[1] - min_2[1] #
    diff_x = min_1[0] - min_2[0]

    if diff_x < diff_y:
        ausrichtung = "above"
        #print(rectangle1, rectangle2, "above")
    else:
        ausrichtung = "side"
        #print(rectangle1,rectangle2, "side")

    return ausrichtung


def get_parallel(rectangle1, rectangle2):
    #check if long sides are parallel, then we do not want to cluster these
    #check if x or y axis is longer, then get_ausrichtung
    parallel = False
    #x_longer_1 = False
    #x_longer_2 = False
    #print(rectangle1, rectangle1[0])
    min_1 = eval(rectangle1[0])
    max_1 = eval(rectangle1[3])
    min_2 = eval(rectangle2[0])
    max_2 = eval(rectangle2[3])
    ausrichtung_1 = eval(rectangle1[4])
    ausrichtung_2 = eval(rectangle2[4])
    x_axis_rect1 = float(max_1[0])-float(min_1[0])
    x_axis_rect2 = float(max_2[0])-float(min_2[0])

    y_axis_rect1 = float(max_1[1])-float(min_1[1])
    y_axis_rect2 = float(max_2[1])-float(min_2[1])


    if ausrichtung_1 == ausrichtung_2 and ausrichtung_1 == 0:
        ausrichtung = get_ausrichtung(rectangle1, rectangle2)
        if ausrichtung == "above":
            parallel = True

    if ausrichtung_1 == ausrichtung_2 and ausrichtung_1 == 1:
        ausrichtung = get_ausrichtung(rectangle1, rectangle2)
        if ausrichtung == "side":
            parallel = True


    return parallel


def dist(rectangle1, rectangle2):
 #get minimal distance between two rectangles
    distance = 100000000
    second_dist = 100000
    dist_x = 100000
    dist_y = 100000
    #print(rectangle1, rectangle2)
    #get_parallel(rectangle1, rectangle2)
    for point1 in rectangle1[:4]:
        point1 = eval(point1)
        for point2 in rectangle2[:4]:
            point2 = eval(point2)
            dist = sqrt((float(point2[0]) - float(point1[0]))**2 + ((float(point2[1]) - float(point1[1])))**2)
            if dist < distance:
                second_dist = distance
                distance = dist
                dist_x = float(point2[0]) - float(point1[0])
                #dist_y = (float(point2[1]) - float(point1[1]))
        if get_parallel(rectangle1,rectangle2):
            #print("parallel", rectangle2, rectangle1)
            distance += 1000
            second_dist += 1000
            #continue
        # if rectangle1[4] == rectangle2[4]:
        #     if rectangle1[4] == "0" and dist_x < 10:
        #           #print(rectangle1, rectangle2)
        #         distance = dist + 100
        #     elif rectangle1[4] == "1" and dist_y < 10:
        #         distance = dist + 100
        #          #print(rectangle1, rectangle2)

        if intersects(rectangle1, rectangle2):
          #print(rectangle1, rectangle2, " intersect")
          distance = 0
          second_dist = 0
    #print("distanz_zwei: ", second_dist, "distanz: ", distance)
    distance = (distance+second_dist)/2
    return distance

def clustering(dm,eps,path):
    db = DBSCAN(eps=eps, min_samples=1, metric="precomputed").fit(dm)
    labels = db.labels_
    n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)

    print('Estimated number of clusters: %d' % n_clusters_)
    data_df = pandas.read_csv(path +"/temporary/list_to_csv_with_corner_points.csv", sep=";")
    data_df["cluster"] = labels
    try:
        dbs = davies_bouldin_score(dm, labels)
        #dbs = "1"
        chs = metrics.calinski_harabasz_score(dm, labels)
        #chs = 1
        silhoutte = metrics.silhouette_score(dm, labels, metric='precomputed')
        #silhoutte = 2
        print("DBscore: ", dbs)
        print("calsinski: ", chs)
        print("silhoutte: ", silhoutte)

    except:
        dbs=1
        chs=1
        silhoutte=1

    data_df["ausrichtung"] = 1
    data_df = data_df.groupby(['cluster', 'ausrichtung'])['element'].apply(','.join).reset_index()
    data_df.to_csv(path+"/temporary/values_clusteredfrom_precomputed_dbscan.csv",sep=";", header=False, index=False)

    return data_df, n_clusters_, dbs, chs, silhoutte

def cluster_and_preprocess(result,eps,path):
    start_time = time.time()
    result = get_average_xy(result, path) #input: array of arrays, output: either csv file or array of arrays
    end_time = time.time()
    time_taken_get_average = end_time - start_time
    print("time get average: ", time_taken_get_average)

    start_time = time.time()
    result.to_csv(path+"/temporary/blub.csv", sep=";", index=False, header=None)
    end_time = time.time()
    time_taken_tocsv = end_time - start_time
    print("time to csv:" , time_taken_tocsv)

    with open(path+"/temporary/blub.csv") as csvfile:
        readCSV = csv.reader(csvfile, delimiter=';')
        result = list(readCSV)


    start_time = time.time()
    dm = np.asarray([[dist(p1, p2) for p2 in result] for p1 in result])
    end_time = time.time()
    time_taken_dm = end_time - start_time
    print("time dm:" , time_taken_dm)


    start_time = time.time()
    clustering_result, n_clusters_, dbs, chs, silhoutte = clustering(dm,float(eps), path)
    end_time = time.time()
    time_taken_clustering = end_time - start_time
    print("time clustering:" , time_taken_clustering)

    return clustering_result, n_clusters_, dbs, chs, silhoutte, dm