bscheibel
/
technical_drawings_extraction


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109
							import order_bounding_boxes_in_each_block
import clustering_precomputed_dbscan as dbscan
import read_from_clustered_merged
import organize_drawing_according_to_details_new
import redis
import json
import sys
import csv
import numpy as np

with open('/home/bscheibel/PycharmProjects/clustering/config.txt', 'r') as file:
    config_path = file.read()
    print("Path: ", config_path)


def distance_knn(dm):
    knn = []
    for row in dm:
        row = row[row != 0]
        row = row.round(decimals=2)
        row = sorted(row)
        knn.extend(row[:2])
    return knn


def distance_btw_blocks(result, path):
    result.to_csv(path+"/temporary/blub_distances.csv", sep=";", index=False, header=None)
    with open(path+"/temporary/blub_distances.csv") as csvfile:
        read_csv = csv.reader(csvfile, delimiter=';')
        result = list(read_csv)
    dm = np.asarray([[dbscan.dist(p1, p2) for p2 in result] for p1 in result])
    return dm


def get_min_nn(result, path):
    dm = distance_btw_blocks(result, path)
    knn = distance_knn(dm)
    knn = list(set(knn))
    knn = sorted(knn)
    return knn


def find_nearest_above(my_array, target):
    diff = my_array - target
    mask = np.ma.less_equal(diff, 0)
    if np.all(mask):
        return None
    masked_diff = np.ma.masked_array(diff, mask)
    return masked_diff.argmin()


def write_redis(uuid, result, db_params):
    db_params = redis.Redis(db_params)
    #db = db = redis.Redis(unix_socket_path='/tmp/redis.sock',db=7)
    print(db_params)
    db_params.set(uuid, result)


def main(uuid, filepath, db, eps_manual):
    path = config_path
    filename = order_bounding_boxes_in_each_block.pdf_to_html(uuid, filepath, path)
    result, number_blocks, number_words= order_bounding_boxes_in_each_block.get_bound_box(filename)  ##get coordinates+text out of html file into array of arrays
    isos, general_tol = order_bounding_boxes_in_each_block.extract_isos(result)
    result_df = dbscan.get_average_xy(result, path)
    knn = get_min_nn(result_df, path)
    eps = min(knn)
    res, number_clusters, dbs, chs_old, silhoutte, dm = dbscan.cluster_and_preprocess(result, eps, path)
    stopping_criterion = False

    while not stopping_criterion:

        print("cluster, eps: ", eps)
        silhoutte_old = silhoutte
        res, number_clusters, dbs, chs, silhoutte = dbscan.clustering(dm, eps, path)

        read_from_clustered_merged.read(path + "/temporary/values_clusteredfrom_precomputed_dbscan.csv")
        old_eps = eps
        if not silhoutte >= silhoutte_old:
            print("stopping threshold reached")
            stopping_criterion = True
        try:
            eps = find_nearest_above(knn, eps)
            eps = knn[eps]
        except:
            print("highest nn value reached")
            break

    res, number_clusters, dbs, chs, silhouette = dbscan.clustering(dm, old_eps, path)
    clean_arrays = read_from_clustered_merged.read(path+"/temporary/values_clusteredfrom_precomputed_dbscan.csv")
    tables = order_bounding_boxes_in_each_block.get_tables(clean_arrays)
    pretty = read_from_clustered_merged.print_clean(clean_arrays)
    res, details_dict = organize_drawing_according_to_details_new.main_function(pretty, tables)

    json_isos = json.dumps(isos)
    json_result = json.dumps(res)
    json_details = json.dumps(details_dict)
    write_redis(uuid+"tol", general_tol, db)
    write_redis(uuid+"dims", json_result, db)
    write_redis(uuid+"isos",json_isos, db)
    write_redis(uuid+"eps", str(number_blocks)+"," + str(number_words), db)
    write_redis(uuid+"details", json_details, db)


if __name__ == "__main__":
    uuid = sys.argv[1]
    filename = sys.argv[2]
    db = sys.argv[3]
    eps = sys.argv[4]
    main(uuid, filename, db, eps)