bscheibel
/
technical_drawings_extraction


			
							12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091
							import order_bounding_boxes_in_each_block
import clustering_precomputed_dbscan_noParallels as dbscan
import read_from_clustered_merged
import regex_clean_new
import organize_drawing_according_to_details_new
import json
import redis
import sys
import get_distances
#import algoritm_knn

config_path = "/home/bscheibel/technical_drawings_extraction"

def get_min_nn(result, path):
    dm = get_distances.distance_btw_blocks(result, path)
    knn = get_distances.distance_knn(dm)
    knn = list(set(knn))
    knn = sorted(knn)
    return knn

def find_nearest_above(my_array, target):
    diff = my_array - target
    mask = np.ma.less_equal(diff, 0)
    if np.all(mask):
        return None # returns None if target is greater than any value
    masked_diff = np.ma.masked_array(diff, mask)
    return masked_diff.argmin()

def write_redis(uuid, result, db_params):
    db = redis.Redis(db_params)
    #db = db = redis.Redis(unix_socket_path='/tmp/redis.sock',db=7)
    print(db_params)
    db.set(uuid, result)

def main(uuid, filepath, db, eps):
    print("TEEEEST")
    print(filepath)
    path = config_path
    filename = order_bounding_boxes_in_each_block.pdf_to_html(uuid, filepath, path)
    result, number_blocks, number_words= order_bounding_boxes_in_each_block.get_bound_box(filename)  ##get coordinates+text out of html file into array of arrays
    isos, general_tol = order_bounding_boxes_in_each_block.extract_isos(result)
    result_df = get_distances.get_average_xy(result, path)
    knn = get_min_nn(result_df, path)
    eps = min(knn)
    res, number_clusters, dbs, chs_old, silhoutte, dm = dbscan.cluster_and_preprocess(result, eps, path)
    stopping_criterion = False

    while(not stopping_criterion):    # this condition has to be changed to the breaking condition

        print("cluster, eps: ", eps)
        silhoutte_old = silhoutte
        res, number_clusters, dbs, chs, silhoutte = dbscan.clustering(dm, eps, path)

        read_from_clustered_merged.read(path + "/temporary/values_clusteredfrom_precomputed_dbscan.csv")
        old_eps = eps

        if (not silhoutte >= silhoutte_old): #and avg_words_block_new > avg_words_block:
             print("stopping threshold reached")
             stopping_criterion = True
        try:
            eps = find_nearest_above(knn, eps)
            eps = knn[eps]
        except:
            print("highest nn value reached")
            break

    res, number_clusters, dbs, chs, silhouette = dbscan.clustering(dm, eps, path)
    clean_arrays = read_from_clustered_merged.read(path+"/temporary/values_clusteredfrom_precomputed_dbscan.csv")
    tables = order_bounding_boxes_in_each_block.get_tables(clean_arrays)
    pretty = regex_clean_new.print_clean(clean_arrays)
    res, details_dict = organize_drawing_according_to_details_new.main_function(pretty, tables)

    json_isos = json.dumps(isos)
    json_result = json.dumps(res)
    json_details =json.dumps(details_dict)
    write_redis(uuid+"tol", general_tol,db)
    write_redis(uuid+"dims", json_result, db)
    write_redis(uuid+"isos",json_isos, db)
    write_redis(uuid+"eps", str(number_blocks)+","+str(number_words), db)
    write_redis(uuid+"details",json_details ,db)

if __name__ == "__main__":
    uuid = sys.argv[1]
    filename = sys.argv[2]
    db = sys.argv[3]
    eps = sys.argv[4]
    main(uuid,filename, db, eps)


#main("33333", "/home/bscheibel/PycharmProjects/clustering/drawings/5129275_Rev01-GV12.pdf", "localhost",3)