Browse Source

cleaned up

bscheibel 3 years ago
parent
commit
cf7c10567f

BIN
__pycache__/algoritm_knn.cpython-37.pyc


BIN
__pycache__/clustering_precomputed_dbscan.cpython-37.pyc


BIN
__pycache__/clustering_precomputed_dbscan_noParallels.cpython-37.pyc


BIN
__pycache__/csv_to_text.cpython-37.pyc


BIN
__pycache__/get_distances.cpython-37.pyc


BIN
__pycache__/merge_pandas.cpython-37.pyc


BIN
__pycache__/order_bounding_boxes_in_each_block.cpython-37.pyc


BIN
__pycache__/organize_drawing_according_to_details_new.cpython-37.pyc


BIN
__pycache__/read_from_clustered_merged.cpython-37.pyc


BIN
__pycache__/regex_clean_new.cpython-37.pyc


+ 0 - 171
algoritm_knn.py

@@ -1,171 +0,0 @@
-import subprocess
-import PyPDF2
-import numpy as np
-#import csv
-import order_bounding_boxes_in_each_block
-import read_from_clustered_merged
-import get_distances
-import clustering_precomputed_dbscan_noParallels as dbscan
-
-with open('/home/bscheibel/PycharmProjects/clustering/config.txt', 'r') as myfile:
-    config_path = myfile.read()
-    print("Path: ", config_path)
-
-def calculate_inner_distance(result):
-    min_size_x,max_size_x, min_size_y, max_size_y, diagonal = get_distances.size_blocks(result)
-    #print("inner distance: ", diagonal)
-
-def find_nearest_above(my_array, target):
-    diff = my_array - target
-    mask = np.ma.less_equal(diff, 0)
-    # We need to mask the negative differences and zero
-    # since we are looking for values above
-    if np.all(mask):
-        return None # returns None if target is greater than any value
-    masked_diff = np.ma.masked_array(diff, mask)
-    return masked_diff.argmin()
-
-def avg_words_block_clustered(words, cluster):
-    blocks = cluster
-    avg_words = words/blocks
-    return avg_words
-
-def convert_pdf_img(filename):
-    subprocess.call(['pdftoppm', '-jpeg', '-singlefile',
-                     filename, config_path + '/temporary/out'])
-
-def read_pdf(filename):
-    pdf = PyPDF2.PdfFileReader(filename, strict=False)
-    p = pdf.getPage(0)
-    w = p.mediaBox.getWidth()
-    h = p.mediaBox.getHeight()
-    orientation = p.get('/Rotate')
-    return w, h, orientation
-
-def read_webpage(filename):
-    return "test"
-
-
-def get_min_nn(result, path):
-    dm = get_distances.distance_btw_blocks(result, path)
-    knn = get_distances.distance_knn(dm)
-    knn = list(set(knn))
-    knn = sorted(knn)
-    return knn
-
-def show_boxes(filepath, clean_arrays, eps):
-    img_path = config_path + '/temporary/out.jpg'
-    w, h, orientation = read_pdf(filepath)
-    convert_pdf_img(filepath)
-    filename = filepath.split("/")[-1]
-    filename = filename.split(".pdf")[0]
-    read_from_clustered_merged.highlight_image(clean_arrays, img_path, w, h, orientation, eps, filename)
-    return filename
-
-def main(uuid=123, filepath=config_path+"/"+ "drawings/Stahl_Adapterplatte.PDF"):
-    path = config_path
-    filename = order_bounding_boxes_in_each_block.pdf_to_html(uuid, filepath, path)
-    result, number_blocks, number_words = order_bounding_boxes_in_each_block.get_bound_box(filename)  #get coordinates+text out of html file into array of arrays
-    print("number_blocks:", number_blocks)
-    print("number_words:", number_words)
-    avg_words_block = number_words / number_blocks
-    print("avg words/blocks", avg_words_block)
-    result_df = get_distances.get_average_xy(result, path)
-
-    # get min_nn
-    knn = get_min_nn(result_df, path)
-    #print("knn: ", knn)
-    eps = min(knn)
-    print("min_knn: ", eps)
-
-
-
-    #try one clustering iteration, with eps=smallest value
-    res, number_clusters, dbs, chs_old, silhoutte, dm = dbscan.cluster_and_preprocess(result, eps, path)
-    # res = res.drop(res.columns[[0, 1]], axis=1).to_csv("test.csv", header=False)
-    # #res = res.reset_index().tolist()
-    # with open(path+"/test.csv") as csvfile:
-    #     readCSV = csv.reader(csvfile, delimiter=';')
-    #     res = list(readCSV)
-    # print(res)
-    #get_distances.get_average_xy(res, path)
-    res, number_clusters, dbs, chs_old, silhoutte, dm = dbscan.cluster_and_preprocess(result, eps, path)
-
-    #read default value
-    clean_arrays = read_from_clustered_merged.read_default(path + "/temporary/list_to_csv_with_corner_points.csv")
-    show_boxes(filepath, clean_arrays, "default")
-
-    #print(number_clusters)
-
-    clean_arrays = read_from_clustered_merged.read(path + "/temporary/values_clusteredfrom_precomputed_dbscan.csv")
-
-    # show results
-    show_boxes(filepath, clean_arrays, eps)
-    #look at stop criterion
-    avg_words_block = avg_words_block_clustered(number_words, number_clusters)
-    print("a/w first clustering eps=1: ", avg_words_block)
-    #cluster as long as stop criterion is not met
-    print("cluster, eps: ", eps)
-    chs = chs_old
-
-    while(1):    # this condition has to be changed to the breaking condition
-
-        print("cluster, eps: ", eps)
-        dbs_old = dbs
-        chs_old = chs
-        silhoutte_old = silhoutte
-        res, number_clusters, dbs, chs, silhoutte = dbscan.clustering(dm, eps, path)
-
-        # stop criterion, has to be established (silhoutte, davis-bouldin, c...?), or combination of these three
-        avg_words_block_new = avg_words_block_clustered(number_words, number_clusters)
-        print("avg_words_blocks:", avg_words_block_new)
-        #stop_criterion = avg_words_block_new-avg_words_block
-
-        read_from_clustered_merged.read(path + "/temporary/values_clusteredfrom_precomputed_dbscan.csv")
-        # show results
-        #show_boxes(filepath, clean_arrays, eps)
-
-        print(dbs <= dbs_old)
-        print(chs >= chs_old)
-        print(silhoutte >= silhoutte_old)
-
-        old_eps = eps
-
-        #block to see which conditions apply first, mostly silhoutte and dbs
-        if not dbs<=dbs_old and avg_words_block_new>avg_words_block:
-            print("stopping threshold reached dbs")
-            clean_arrays = read_from_clustered_merged.read(
-            path + "/temporary/values_clusteredfrom_precomputed_dbscan.csv")
-            show_boxes(filepath, clean_arrays, "threshold dbs")
-        if not chs>=chs_old and avg_words_block_new>avg_words_block:
-            print("stopping threshold reached chs")
-            clean_arrays = read_from_clustered_merged.read(
-            path + "/temporary/values_clusteredfrom_precomputed_dbscan.csv")
-            show_boxes(filepath, clean_arrays, "threshold chs")
-        if not silhoutte>=silhoutte_old and avg_words_block_new > avg_words_block:
-            print("stopping threshold reached silhoutte")
-            clean_arrays = read_from_clustered_merged.read(
-            path + "/temporary/values_clusteredfrom_precomputed_dbscan.csv")
-            show_boxes(filepath, clean_arrays, "threshold silhoutte")
-
-        # and/or, or does not cluster too much, but can also be not enough
-        if (not dbs <= dbs_old or not chs >= chs_old or not silhoutte >= silhoutte_old) and avg_words_block_new > avg_words_block:
-            print("stopping threshold reached")
-            break
-        try:
-            eps = find_nearest_above(knn, eps)
-            eps = knn[eps]
-        except:
-            print("highest nn value reached")
-            break
-    res, number_clusters, dbs, chs, silhoutte = dbscan.clustering(dm, old_eps, path)
-    print("Last EPS: ", old_eps)
-    print("Last W/B: ", avg_words_block)
-    clean_arrays = read_from_clustered_merged.read_default(path + "/temporary/list_to_csv_with_corner_points.csv")
-    show_boxes(filepath, clean_arrays, "default")
-    clean_arrays = read_from_clustered_merged.read(path + "/temporary/values_clusteredfrom_precomputed_dbscan.csv")
-    #show results
-    show_boxes(filepath, clean_arrays, eps)
-
-if __name__ == "__main__":
-    main()

+ 103 - 35
clustering_precomputed_dbscan.py

@@ -4,40 +4,32 @@ import pandas
 import csv
 from math import sqrt
 from sklearn.cluster import DBSCAN
+from sklearn import metrics
+from sklearn.metrics import davies_bouldin_score
+import time
 
 def get_average_xy(list_input, path):
     csv_name = path+"/temporary/list_to_csv_with_corner_points.csv"
-    resultFile = open(csv_name, 'w+')
+    resultFile = open(csv_name, 'w')
     wr = csv.writer(resultFile, delimiter=";")
     wr.writerow(["element", "xmin","ymin","xmax","ymax", "ausrichtung","point_xmi_ymi","point_xma_ymi","point_xmi_yma","point_xma_yma"])
     result_df = pandas.DataFrame(columns=["point_xmi_ymi","point_xma_ymi","point_xmi_yma","point_xma_yma","ausrichtung"])
+
     for element in list_input:
-        xavg_elem = 0
-        yavg_elem = 0
         ymin = 100000000
         ymax = 0
         xmin = 100000000
         xmax = 0
         newList = []
-        check = False
         if len(element) == 5 and not isinstance(element[0], list):
             newList.append(element)
             element = newList
-        """if len(element) != 5 and isinstance(element[0], list):
-            for el in element:
-                check = isinstance(el[0], list)
-                if len(el) != 5:
-                    print(el)
-                #if check:
-                #    print(el)"""
-
         for blub in element: #get the smallest and largest x and y value for whole block
 
-            if isinstance(blub[0],list) and len(blub[0])==5:
+            if isinstance(blub[0],list) and len(blub[0]) == 5:
                 blub = blub [0]
             if float(blub[1]) < ymin:
                 ymin = float(blub[1])
-                #print("y_min:",y_min)
             if float(blub[0]) < xmin:
                 xmin = float(blub[0])
             if float(blub[3]) > ymax:
@@ -45,12 +37,12 @@ def get_average_xy(list_input, path):
             if float(blub[2]) > xmax:
                 xmax = float(blub[2])
         if float(xmax)-float(xmin) > 1.3*(float(ymax)-float(ymin)):
-            ausrichtung = 0 #horizontal
-        if 1.5*(float(xmax)-float(xmin)) < float(ymax)-float(ymin):
-            ausrichtung = 1 #vertikal
+            ausrichtung = 0  # horizontal
+        #elif
+        elif 1.3*(float(xmax)-float(xmin)) < float(ymax)-float(ymin):
+            ausrichtung = 1   # vertikal
         else:
-            ausrichtung = 3 #sonstiges
-
+            ausrichtung = 3   # sonstiges
 
         ##### GET CORNER POINTS
         point_xmi_ymi = [xmin,ymin]
@@ -67,35 +59,70 @@ def intersects(rectangle1, rectangle2): #using the separating axis theorem, retu
 
     rect_1_min = eval(rectangle1[0])
     rect_1_max = eval(rectangle1[3])
-    rect1_bottom_left_x= rect_1_min[0]
-    rect1_top_right_x=rect_1_max[0]
-    rect1_bottom_left_y= rect_1_max[1]
-    rect1_top_right_y= rect_1_min[1]
+    rect1_bottom_left_x = rect_1_min[0]
+    rect1_top_right_x = rect_1_max[0]
+    rect1_bottom_left_y = rect_1_max[1]
+    rect1_top_right_y = rect_1_min[1]
 
     rect_2_min = eval(rectangle2[0])
     rect_2_max = eval(rectangle2[3])
-    rect2_bottom_left_x= rect_2_min[0]
-    rect2_top_right_x=rect_2_max[0]
-    rect2_bottom_left_y= rect_2_max[1]
-    rect2_top_right_y=rect_2_min[1]
+    rect2_bottom_left_x = rect_2_min[0]
+    rect2_top_right_x = rect_2_max[0]
+    rect2_bottom_left_y = rect_2_max[1]
+    rect2_top_right_y = rect_2_min[1]
 
     return not (rect1_top_right_x < rect2_bottom_left_x or rect1_bottom_left_x > rect2_top_right_x or rect1_top_right_y > rect2_bottom_left_y or rect1_bottom_left_y < rect2_top_right_y)
 
 
+def get_ausrichtung(rectangle1,rectangle2):
+    #check if rect 1 and rect 2 are above or beside, r,l, a,b
+
+    min_1 = eval(rectangle1[0])
+    min_2 = eval(rectangle2[0])
+    diff_y = min_1[1] - min_2[1]
+    diff_x = min_1[0] - min_2[0]
+    if diff_x < diff_y:
+        ausrichtung = "above"
+    else:
+        ausrichtung = "side"
+    return ausrichtung
+
+
+def get_parallel(rectangle1, rectangle2):
+    parallel = False
+    ausrichtung_1 = eval(rectangle1[4])
+    ausrichtung_2 = eval(rectangle2[4])
+    if ausrichtung_1 == ausrichtung_2 and ausrichtung_1 == 0:
+        ausrichtung = get_ausrichtung(rectangle1, rectangle2)
+        if ausrichtung == "above":
+            parallel = True
+
+    if ausrichtung_1 == ausrichtung_2 and ausrichtung_1 == 1:
+        ausrichtung = get_ausrichtung(rectangle1, rectangle2)
+        if ausrichtung == "side":
+            parallel = True
+    return parallel
+
+
 def dist(rectangle1, rectangle2):
  #get minimal distance between two rectangles
     distance = 100000000
+    second_dist = 100000
     for point1 in rectangle1[:4]:
         point1 = eval(point1)
         for point2 in rectangle2[:4]:
             point2 = eval(point2)
-            dist = sqrt(((float(point2[0]) - float(point1[0])))**2 + ((float(point2[1]) - float(point1[1])))**2)
+            dist = sqrt((float(point2[0]) - float(point1[0]))**2 + ((float(point2[1]) - float(point1[1])))**2)
             if dist < distance:
+                second_dist = distance
                 distance = dist
-        if rectangle1[4] != rectangle2[4]:
-            distance = dist + 100
+        if get_parallel(rectangle1,rectangle2):
+            distance += 1000
+            second_dist += 1000
         if intersects(rectangle1, rectangle2):
-            distance = 0
+          distance = 0
+          second_dist = 0
+    distance = (distance+second_dist)/2
     return distance
 
 def clustering(dm,eps,path):
@@ -106,17 +133,58 @@ def clustering(dm,eps,path):
     print('Estimated number of clusters: %d' % n_clusters_)
     data_df = pandas.read_csv(path +"/temporary/list_to_csv_with_corner_points.csv", sep=";")
     data_df["cluster"] = labels
-    data_df.groupby(['cluster', 'ausrichtung'])['element'].apply(','.join).reset_index().to_csv(path+"/temporary/values_clusteredfrom_precomputed_dbscan.csv",sep=";", header=False, index=False)
-    return data_df
+    try:
+        dbs = davies_bouldin_score(dm, labels)
+        #dbs = "1"
+        chs = metrics.calinski_harabasz_score(dm, labels)
+        #chs = 1
+        silhoutte = metrics.silhouette_score(dm, labels, metric='precomputed')
+        #silhoutte = 2
+        print("DBscore: ", dbs)
+        print("calsinski: ", chs)
+        print("silhoutte: ", silhoutte)
+
+    except:
+        dbs=1
+        chs=1
+        silhoutte=1
+
+    data_df["ausrichtung"] = 1
+    data_df = data_df.groupby(['cluster', 'ausrichtung'])['element'].apply(','.join).reset_index()
+    data_df.to_csv(path+"/temporary/values_clusteredfrom_precomputed_dbscan.csv",sep=";", header=False, index=False)
+
+    return data_df, n_clusters_, dbs, chs, silhoutte
 
 def cluster_and_preprocess(result,eps,path):
+    start_time = time.time()
     result = get_average_xy(result, path) #input: array of arrays, output: either csv file or array of arrays
+    end_time = time.time()
+    time_taken_get_average = end_time - start_time
+    print("time get average: ", time_taken_get_average)
+
+    start_time = time.time()
     result.to_csv(path+"/temporary/blub.csv", sep=";", index=False, header=None)
+    end_time = time.time()
+    time_taken_tocsv = end_time - start_time
+    print("time to csv:" , time_taken_tocsv)
+
     with open(path+"/temporary/blub.csv") as csvfile:
         readCSV = csv.reader(csvfile, delimiter=';')
         result = list(readCSV)
 
+
+    start_time = time.time()
     dm = np.asarray([[dist(p1, p2) for p2 in result] for p1 in result])
-    clustering_result = clustering(dm,float(eps), path)
-    return clustering_result
+    end_time = time.time()
+    time_taken_dm = end_time - start_time
+    print("time dm:" , time_taken_dm)
+
+
+    start_time = time.time()
+    clustering_result, n_clusters_, dbs, chs, silhoutte = clustering(dm,float(eps), path)
+    end_time = time.time()
+    time_taken_clustering = end_time - start_time
+    print("time clustering:" , time_taken_clustering)
+
+    return clustering_result, n_clusters_, dbs, chs, silhoutte, dm
 

+ 0 - 254
clustering_precomputed_dbscan_noParallels.py

@@ -1,254 +0,0 @@
-# coding: utf8
-import numpy as np
-import pandas
-import csv
-from math import sqrt
-from sklearn.cluster import DBSCAN
-
-from sklearn import metrics
-from sklearn.metrics import davies_bouldin_score
-import time
-
-def get_average_xy(list_input, path):
-    csv_name = path+"/temporary/list_to_csv_with_corner_points.csv"
-    resultFile = open(csv_name, 'w')
-    wr = csv.writer(resultFile, delimiter=";")
-    wr.writerow(["element", "xmin","ymin","xmax","ymax", "ausrichtung","point_xmi_ymi","point_xma_ymi","point_xmi_yma","point_xma_yma"])
-
-    #result_df = pandas.DataFrame(columns=["point_xmi_ymi","point_xma_ymi","point_xmi_yma","point_xma_yma"])
-    result_df = pandas.DataFrame(columns=["point_xmi_ymi","point_xma_ymi","point_xmi_yma","point_xma_yma","ausrichtung"])
-
-    for element in list_input:
-        xavg_elem = 0
-        yavg_elem = 0
-        ymin = 100000000
-        ymax = 0
-        xmin = 100000000
-        xmax = 0
-        newList = []
-        check = False
-        if len(element) == 5 and not isinstance(element[0], list):
-            newList.append(element)
-            element = newList
-        """if len(element) != 5 and isinstance(element[0], list):
-            for el in element:
-                check = isinstance(el[0], list)
-                if len(el) != 5:
-                    print(el)
-                #if check:
-                #    print(el)"""
-
-        for blub in element: #get the smallest and largest x and y value for whole block
-
-            if isinstance(blub[0],list) and len(blub[0]) == 5:
-                blub = blub [0]
-            if float(blub[1]) < ymin:
-                ymin = float(blub[1])
-                #print("y_min:",y_min)
-            if float(blub[0]) < xmin:
-                xmin = float(blub[0])
-            if float(blub[3]) > ymax:
-                ymax = float(blub[3])
-            if float(blub[2]) > xmax:
-                xmax = float(blub[2])
-        if float(xmax)-float(xmin) > 1.3*(float(ymax)-float(ymin)):
-            ausrichtung = 0  # horizontal
-        #elif
-        elif 1.3*(float(xmax)-float(xmin)) < float(ymax)-float(ymin):
-            ausrichtung = 1   # vertikal
-        else:
-            ausrichtung = 3   # sonstiges
-
-
-        ##### GET CORNER POINTS
-        point_xmi_ymi = [xmin,ymin]
-        point_xma_ymi = [xmax,ymin]
-        point_xmi_yma = [xmin,ymax]
-        point_xma_yma = [xmax,ymax]
-        wr.writerow([element,xmin,ymin,xmax,ymax, ausrichtung,point_xmi_ymi,point_xma_ymi,point_xmi_yma,point_xma_yma])
-        result_df.loc[len(result_df)]=[point_xmi_ymi,point_xma_ymi, point_xmi_yma, point_xma_yma,ausrichtung]
-        #wr.writerow([element, xmin,ymin,xmax,ymax])
-        #result_df.loc[len(result_df)]=[xmin,xmax, xmin, ymax, ausrichtung]
-
-    resultFile.close()
-    return result_df
-
-def intersects(rectangle1, rectangle2): #using the separating axis theorem, returns true if they intersect, otherwise false
-
-    rect_1_min = eval(rectangle1[0])
-    rect_1_max = eval(rectangle1[3])
-    rect1_bottom_left_x = rect_1_min[0]
-    rect1_top_right_x = rect_1_max[0]
-    rect1_bottom_left_y = rect_1_max[1]
-    rect1_top_right_y = rect_1_min[1]
-
-    rect_2_min = eval(rectangle2[0])
-    rect_2_max = eval(rectangle2[3])
-    rect2_bottom_left_x = rect_2_min[0]
-    rect2_top_right_x = rect_2_max[0]
-    rect2_bottom_left_y = rect_2_max[1]
-    rect2_top_right_y = rect_2_min[1]
-
-    return not (rect1_top_right_x < rect2_bottom_left_x or rect1_bottom_left_x > rect2_top_right_x or rect1_top_right_y > rect2_bottom_left_y or rect1_bottom_left_y < rect2_top_right_y)
-
-
-def get_ausrichtung(rectangle1,rectangle2):
-    #check if rect 1 and rect 2 are above or beside, r,l, a,b
-
-    min_1 = eval(rectangle1[0])
-    #max_1 = eval(rectangle1[3])
-    min_2 = eval(rectangle2[0])
-    #max_2 = eval(rectangle2[3])
-
-    diff_y = min_1[1] - min_2[1] #
-    diff_x = min_1[0] - min_2[0]
-
-    if diff_x < diff_y:
-        ausrichtung = "above"
-        #print(rectangle1, rectangle2, "above")
-    else:
-        ausrichtung = "side"
-        #print(rectangle1,rectangle2, "side")
-
-    return ausrichtung
-
-
-def get_parallel(rectangle1, rectangle2):
-    #check if long sides are parallel, then we do not want to cluster these
-    #check if x or y axis is longer, then get_ausrichtung
-    parallel = False
-    #x_longer_1 = False
-    #x_longer_2 = False
-    #print(rectangle1, rectangle1[0])
-    min_1 = eval(rectangle1[0])
-    max_1 = eval(rectangle1[3])
-    min_2 = eval(rectangle2[0])
-    max_2 = eval(rectangle2[3])
-    ausrichtung_1 = eval(rectangle1[4])
-    ausrichtung_2 = eval(rectangle2[4])
-    x_axis_rect1 = float(max_1[0])-float(min_1[0])
-    x_axis_rect2 = float(max_2[0])-float(min_2[0])
-
-    y_axis_rect1 = float(max_1[1])-float(min_1[1])
-    y_axis_rect2 = float(max_2[1])-float(min_2[1])
-
-
-
-    if ausrichtung_1 == ausrichtung_2 and ausrichtung_1 == 0:
-        ausrichtung = get_ausrichtung(rectangle1, rectangle2)
-        if ausrichtung == "above":
-            parallel = True
-
-    if ausrichtung_1 == ausrichtung_2 and ausrichtung_1 == 1:
-        ausrichtung = get_ausrichtung(rectangle1, rectangle2)
-        if ausrichtung == "side":
-            parallel = True
-
-
-
-
-    return parallel
-
-
-def dist(rectangle1, rectangle2):
- #get minimal distance between two rectangles
-    distance = 100000000
-    second_dist = 100000
-    dist_x = 100000
-    dist_y = 100000
-    #print(rectangle1, rectangle2)
-    #get_parallel(rectangle1, rectangle2)
-    for point1 in rectangle1[:4]:
-        point1 = eval(point1)
-        for point2 in rectangle2[:4]:
-            point2 = eval(point2)
-            dist = sqrt((float(point2[0]) - float(point1[0]))**2 + ((float(point2[1]) - float(point1[1])))**2)
-            if dist < distance:
-                second_dist = distance
-                distance = dist
-                dist_x = float(point2[0]) - float(point1[0])
-                #dist_y = (float(point2[1]) - float(point1[1]))
-        if get_parallel(rectangle1,rectangle2):
-            #print("parallel", rectangle2, rectangle1)
-            distance += 1000
-            second_dist += 1000
-            #continue
-        # if rectangle1[4] == rectangle2[4]:
-        #     if rectangle1[4] == "0" and dist_x < 10:
-        #           #print(rectangle1, rectangle2)
-        #         distance = dist + 100
-        #     elif rectangle1[4] == "1" and dist_y < 10:
-        #         distance = dist + 100
-        #          #print(rectangle1, rectangle2)
-
-        if intersects(rectangle1, rectangle2):
-          #print(rectangle1, rectangle2, " intersect")
-          distance = 0
-          second_dist = 0
-    #print("distanz_zwei: ", second_dist, "distanz: ", distance)
-    distance = (distance+second_dist)/2
-    return distance
-
-def clustering(dm,eps,path):
-    db = DBSCAN(eps=eps, min_samples=1, metric="precomputed").fit(dm)
-    labels = db.labels_
-    n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
-
-    print('Estimated number of clusters: %d' % n_clusters_)
-    data_df = pandas.read_csv(path +"/temporary/list_to_csv_with_corner_points.csv", sep=";")
-    data_df["cluster"] = labels
-    try:
-        dbs = davies_bouldin_score(dm, labels)
-        #dbs = "1"
-        chs = metrics.calinski_harabasz_score(dm, labels)
-        #chs = 1
-        silhoutte = metrics.silhouette_score(dm, labels, metric='precomputed')
-        #silhoutte = 2
-        print("DBscore: ", dbs)
-        print("calsinski: ", chs)
-        print("silhoutte: ", silhoutte)
-
-    except:
-        dbs=1
-        chs=1
-        silhoutte=1
-
-    data_df["ausrichtung"] = 1
-    data_df = data_df.groupby(['cluster', 'ausrichtung'])['element'].apply(','.join).reset_index()
-    data_df.to_csv(path+"/temporary/values_clusteredfrom_precomputed_dbscan.csv",sep=";", header=False, index=False)
-
-    return data_df, n_clusters_, dbs, chs, silhoutte
-
-def cluster_and_preprocess(result,eps,path):
-    start_time = time.time()
-    result = get_average_xy(result, path) #input: array of arrays, output: either csv file or array of arrays
-    end_time = time.time()
-    time_taken_get_average = end_time - start_time
-    print("time get average: ", time_taken_get_average)
-
-    start_time = time.time()
-    result.to_csv(path+"/temporary/blub.csv", sep=";", index=False, header=None)
-    end_time = time.time()
-    time_taken_tocsv = end_time - start_time
-    print("time to csv:" , time_taken_tocsv)
-
-    with open(path+"/temporary/blub.csv") as csvfile:
-        readCSV = csv.reader(csvfile, delimiter=';')
-        result = list(readCSV)
-
-
-    start_time = time.time()
-    dm = np.asarray([[dist(p1, p2) for p2 in result] for p1 in result])
-    end_time = time.time()
-    time_taken_dm = end_time - start_time
-    print("time dm:" , time_taken_dm)
-
-
-    start_time = time.time()
-    clustering_result, n_clusters_, dbs, chs, silhoutte = clustering(dm,float(eps), path)
-    end_time = time.time()
-    time_taken_clustering = end_time - start_time
-    print("time clustering:" , time_taken_clustering)
-
-    return clustering_result, n_clusters_, dbs, chs, silhoutte, dm
-

+ 1 - 1
config.txt

@@ -1 +1 @@
-/home/bscheibel/technical_drawings_extraction
+/home/bscheibel/technical_drawings_extraction

+ 0 - 227
get_distances.py

@@ -1,227 +0,0 @@
-from math import sqrt
-import numpy as np
-import pandas
-import csv
-#import math
-from clustering_precomputed_dbscan_noParallels import intersects
-from scipy import stats
-
-
-def get_average_xy(list_input, path):
-    csv_name = path+"/temporary/list_to_csv_with_corner_points_distances.csv"
-    resultFile = open(csv_name, 'w')
-    wr = csv.writer(resultFile, delimiter=";")
-    wr.writerow(["element","point_xmi_ymi","point_xma_ymi","point_xmi_yma","point_xma_yma"])
-
-    result_df = pandas.DataFrame(columns=["point_xmi_ymi","point_xma_ymi","point_xmi_yma","point_xma_yma"])
-    #result_df = pandas.DataFrame(columns=["xmin","ymin","xmax","ymax"])
-
-    for element in list_input:
-        xavg_elem = 0
-        yavg_elem = 0
-        ymin = 100000000
-        ymax = 0
-        xmin = 100000000
-        xmax = 0
-        newList = []
-        check = False
-        if len(element) == 5 and not isinstance(element[0], list):
-            newList.append(element)
-            element = newList
-
-        for blub in element: #get the smallest and largest x and y value for whole block
-
-            if isinstance(blub[0],list) and len(blub[0])==5:
-                blub = blub [0]
-            if float(blub[1]) < ymin:
-                ymin = float(blub[1])
-                #print("y_min:",y_min)
-            if float(blub[0]) < xmin:
-                xmin = float(blub[0])
-            if float(blub[3]) > ymax:
-                ymax = float(blub[3])
-            if float(blub[2]) > xmax:
-                xmax = float(blub[2])
-        point_xmi_ymi = [xmin,ymin]
-        point_xma_ymi = [xmax,ymin]
-        point_xmi_yma = [xmin,ymax]
-        point_xma_yma = [xmax,ymax]
-        wr.writerow([element, point_xmi_ymi, point_xma_ymi, point_xmi_yma, point_xma_yma])
-        result_df.loc[len(result_df)] = [ point_xmi_ymi, point_xma_ymi, point_xmi_yma, point_xma_yma]
-
-    resultFile.close()
-    #result_df.to_csv(path+"/temporary/blub.csv", sep=";", index=False, header=None)
-
-    return result_df
-
-
-def dist(rectangle1, rectangle2):
- #get minimal distance between two rectangles
-    distance = 100000000
-    #print(rectangle2,rectangle1)
-    for point1 in rectangle1[:4]:
-        #print(point1)
-        point1 = eval(point1) #necessary to convert [] to real tuple
-        for point2 in rectangle2[:4]:
-            #print(point2)
-            point2 = eval(point2)
-            dist = sqrt(((float(point2[0]) - float(point1[0])))**2 + ((float(point2[1]) - float(point1[1])))**2)
-            if dist < distance:
-                distance = dist
-        if intersects(rectangle1, rectangle2):
-            distance = 0
-    return distance
-
-def size_blocks(list_input):  #x, y distance of blocks (not regarding words)
-    #print(list_input)
-    min_size_x = 1000000000
-    max_size_x = 0
-    min_size_y = 1000000000
-    max_size_y = 0
-    x_size = []
-    y_size = []
-    diagonal = []
-    for element in list_input:
-        newList = []
-        ymin = 1000000000
-        ymax = 0
-        xmin = 1000000000
-        xmax = 0
-        if len(element) == 5 and not isinstance(element[0], list):
-            newList.append(element)
-            element = newList
-
-        for blub in element:  # get the smallest and largest x and y value for whole block, block sizes
-            print(blub)
-            if isinstance(blub[0], list) and len(blub[0]) == 5:
-                blub = blub[0]
-            if float(blub[1]) < ymin:
-                ymin = float(blub[1])
-                # print("y_min:",y_min)
-            if float(blub[0]) < xmin:
-                xmin = float(blub[0])
-            if float(blub[3]) > ymax:
-                ymax = float(blub[3])
-            if float(blub[2]) > xmax:
-                xmax = float(blub[2])
-
-        distance_x = xmax-xmin
-        distance_y = ymax-ymin
-        diagonal_ = math.sqrt(distance_x ** 2 + distance_y ** 2)  # satz der pythogoras
-        diagonal.append(diagonal_)
-        x_size.append(distance_x)
-        #print(distance_x, blub[4])
-        #print(distance_y, blub[4])
-        y_size.append(distance_y)
-        if distance_x < min_size_x:
-            min_size_x = distance_x
-        if distance_x > max_size_x:
-            max_size_x = distance_x
-        if distance_y < min_size_y:
-            min_size_y = distance_y
-        if distance_y > max_size_y:
-            max_size_y = distance_y
-
-    x_size = np.array(x_size)
-
-    x_size = x_size.round(decimals=0)
-    #print(x_size)
-    median_size_x = np.median(x_size)
-    modus_size_x = stats.mode(x_size)
-
-    y_size = np.array(y_size)
-
-    y_size = y_size.round(decimals=0)
-    #print(y_size)
-    median_size_y = np.median(y_size)
-    modus_size_y = stats.mode(y_size)
-
-
-
-    #print(min_size_x,max_size_x, min_size_y, max_size_y)
-    print("Size_Median_x:", median_size_x)
-    print("Size_Median_y:", median_size_y)
-    print("Size_Modus_x:", modus_size_x)
-    print("Size_Modus_y:", modus_size_y)
-    return min_size_x,max_size_x, min_size_y, max_size_y, diagonal
-
-
-def distance_btw_blocks(result, path):
-    result.to_csv(path+"/temporary/blub_distances.csv", sep=";", index=False, header=None)
-    with open(path+"/temporary/blub_distances.csv") as csvfile:
-        readCSV = csv.reader(csvfile, delimiter=';')
-        result = list(readCSV)
-    dm = np.asarray([[dist(p1, p2) for p2 in result] for p1 in result])
-
-    dm_flattened = dm.flatten()
-    dm_flattened = dm_flattened[dm_flattened != 0]
-    dm_flattened = dm_flattened.round(decimals=0)
-    #dm_ordered = sorted(dm_flattened)
-    #print(dm_ordered)
-    median= np.median(dm_flattened)
-
-
-
-    mode = stats.mode(dm_flattened)
-    most_often= pandas.value_counts(dm_flattened)
-    #x = itemfreq(dm_flattened)
-    #print(pandas.DataFrame(most_often, columns=["first"]).columns)
-    #print(x)
-    print("Distance Mode:", mode)
-    #print(most_often)
-    #largest = most_often.nsmallest(15, "first")
-    #print(largest)
-    print("Distance Median:", median)
-    #print(max_dist, min_dist)
-
-    return dm
-
-def distance_knn(dm):
-    knn = []
-    for row in dm:
-        row = row[row != 0]
-        row = row.round(decimals=2)
-        row = sorted(row)
-        knn.extend(row[:2])
-    return knn
-
-def avg_words_block_clustered(words, cluster):
-    blocks = cluster
-    avg_words = words/blocks
-    #print(avg_words)
-    return avg_words
-
-
-# def main(uuid, filepath):
-#     path = "/home/bscheibel/PycharmProjects/clustering"
-#     filename = order_bounding_boxes_in_each_block.pdf_to_html(uuid, filepath, path)
-#     result, number_blocks, number_words = order_bounding_boxes_in_each_block.get_bound_box(filename)
-#     print("number_blocks:", number_blocks)
-#     print("number_words:", number_words)
-#     print("avg words/blocks", number_words/number_blocks)
-#     size_blocks(result)
-#
-#     result_df = get_average_xy(result, path)
-#
-#
-#     result = clustering_precomputed_dbscan_og_without.get_average_xy(result, path) #input: array of arrays, output: either csv file or array of arrays
-#     #print(result)
-#     result.to_csv(path+"/temporary/blub.csv", sep=";", index=False, header=None)
-#     #with open(path+"/temporary/blub.csv") as csvfile:
-#     #    readCSV = csv.reader(csvfile, delimiter=';')
-#     #    result = list(readCSV)
-#     dm = distance_btw_blocks(result_df, path)
-#     knn = distance_knn(dm)
-#     median= np.median(knn)
-#     mode = stats.mode(knn)
-#     min_nn = min(knn)
-#     avg = np.sum(knn)/len(knn)
-#     print("min_knn:", min_nn)
-#     print("knn_mean:", avg)
-#     print("knn_median:", median)
-#     print("knn_mode:", mode)
-
-    #knn.nearestNeighbors(dm)
-
-
-#main("33333", "/home/bscheibel/PycharmProjects/clustering/drawings/Werkstattzeichnung Zwischenwelle.pdf")

+ 49 - 30
main.py

@@ -1,51 +1,73 @@
 import order_bounding_boxes_in_each_block
-import clustering_precomputed_dbscan_noParallels as dbscan
+import clustering_precomputed_dbscan as dbscan
 import read_from_clustered_merged
-import regex_clean_new
 import organize_drawing_according_to_details_new
-import json
 import redis
+import json
 import sys
-import get_distances
-#import algoritm_knn
+import csv
+import numpy as np
+
+with open('/home/bscheibel/PycharmProjects/clustering/config.txt', 'r') as file:
+    config_path = file.read()
+    print("Path: ", config_path)
+
+
+def distance_knn(dm):
+    knn = []
+    for row in dm:
+        row = row[row != 0]
+        row = row.round(decimals=2)
+        row = sorted(row)
+        knn.extend(row[:2])
+    return knn
+
+
+def distance_btw_blocks(result, path):
+    result.to_csv(path+"/temporary/blub_distances.csv", sep=";", index=False, header=None)
+    with open(path+"/temporary/blub_distances.csv") as csvfile:
+        read_csv = csv.reader(csvfile, delimiter=';')
+        result = list(read_csv)
+    dm = np.asarray([[dbscan.dist(p1, p2) for p2 in result] for p1 in result])
+    return dm
 
-config_path = "/home/bscheibel/technical_drawings_extraction"
 
 def get_min_nn(result, path):
-    dm = get_distances.distance_btw_blocks(result, path)
-    knn = get_distances.distance_knn(dm)
+    dm = distance_btw_blocks(result, path)
+    knn = distance_knn(dm)
     knn = list(set(knn))
     knn = sorted(knn)
     return knn
 
+
 def find_nearest_above(my_array, target):
     diff = my_array - target
     mask = np.ma.less_equal(diff, 0)
     if np.all(mask):
-        return None # returns None if target is greater than any value
+        return None
     masked_diff = np.ma.masked_array(diff, mask)
     return masked_diff.argmin()
 
+
 def write_redis(uuid, result, db_params):
-    db = redis.Redis(db_params)
+    db_params = redis.Redis(db_params)
     #db = db = redis.Redis(unix_socket_path='/tmp/redis.sock',db=7)
     print(db_params)
-    db.set(uuid, result)
+    db_params.set(uuid, result)
 
-def main(uuid, filepath, db, eps):
-    print("TEEEEST")
-    print(filepath)
+
+def main(uuid, filepath, db, eps_manual):
     path = config_path
     filename = order_bounding_boxes_in_each_block.pdf_to_html(uuid, filepath, path)
     result, number_blocks, number_words= order_bounding_boxes_in_each_block.get_bound_box(filename)  ##get coordinates+text out of html file into array of arrays
     isos, general_tol = order_bounding_boxes_in_each_block.extract_isos(result)
-    result_df = get_distances.get_average_xy(result, path)
+    result_df = dbscan.get_average_xy(result, path)
     knn = get_min_nn(result_df, path)
     eps = min(knn)
     res, number_clusters, dbs, chs_old, silhoutte, dm = dbscan.cluster_and_preprocess(result, eps, path)
     stopping_criterion = False
 
-    while(not stopping_criterion):    # this condition has to be changed to the breaking condition
+    while not stopping_criterion:
 
         print("cluster, eps: ", eps)
         silhoutte_old = silhoutte
@@ -53,10 +75,9 @@ def main(uuid, filepath, db, eps):
 
         read_from_clustered_merged.read(path + "/temporary/values_clusteredfrom_precomputed_dbscan.csv")
         old_eps = eps
-
-        if (not silhoutte >= silhoutte_old): #and avg_words_block_new > avg_words_block:
-             print("stopping threshold reached")
-             stopping_criterion = True
+        if not silhoutte >= silhoutte_old:
+            print("stopping threshold reached")
+            stopping_criterion = True
         try:
             eps = find_nearest_above(knn, eps)
             eps = knn[eps]
@@ -64,27 +85,25 @@ def main(uuid, filepath, db, eps):
             print("highest nn value reached")
             break
 
-    res, number_clusters, dbs, chs, silhouette = dbscan.clustering(dm, eps, path)
+    res, number_clusters, dbs, chs, silhouette = dbscan.clustering(dm, old_eps, path)
     clean_arrays = read_from_clustered_merged.read(path+"/temporary/values_clusteredfrom_precomputed_dbscan.csv")
     tables = order_bounding_boxes_in_each_block.get_tables(clean_arrays)
-    pretty = regex_clean_new.print_clean(clean_arrays)
+    pretty = read_from_clustered_merged.print_clean(clean_arrays)
     res, details_dict = organize_drawing_according_to_details_new.main_function(pretty, tables)
 
     json_isos = json.dumps(isos)
     json_result = json.dumps(res)
-    json_details =json.dumps(details_dict)
-    write_redis(uuid+"tol", general_tol,db)
+    json_details = json.dumps(details_dict)
+    write_redis(uuid+"tol", general_tol, db)
     write_redis(uuid+"dims", json_result, db)
     write_redis(uuid+"isos",json_isos, db)
-    write_redis(uuid+"eps", str(number_blocks)+","+str(number_words), db)
-    write_redis(uuid+"details",json_details ,db)
+    write_redis(uuid+"eps", str(number_blocks)+"," + str(number_words), db)
+    write_redis(uuid+"details", json_details, db)
+
 
 if __name__ == "__main__":
     uuid = sys.argv[1]
     filename = sys.argv[2]
     db = sys.argv[3]
     eps = sys.argv[4]
-    main(uuid,filename, db, eps)
-
-
-#main("33333", "/home/bscheibel/PycharmProjects/clustering/drawings/5129275_Rev01-GV12.pdf", "localhost",3)
+    main(uuid, filename, db, eps)

+ 1 - 16
order_bounding_boxes_in_each_block.py

@@ -1,5 +1,3 @@
-### FIRST READ EACH BLOCK IN AN ARRAY
-
 from bs4 import BeautifulSoup
 import subprocess
 import re
@@ -19,25 +17,13 @@ def get_bound_box(file):
         words = block.findAll('word')
         number_words += len(words)
         for word in words:
-            word_list = []
-            word_list.append(word["xmin"])
-            word_list.append(word["ymin"])
-            word_list.append(word["xmax"])
-            word_list.append(word["ymax"])
-            word_list.append(word.string)
+            word_list = [word["xmin"], word["ymin"], word["xmax"], word["ymax"], word.string]
             list_elements.append(word_list)
         all_elements.append(list_elements)
-
-
-    #### NEXT SORT ELEMENTS IN EACH BLOCK BY THEIR X AND Y COORDINATES
-    #### FIRST TRYING XMIN und YMAX
-    ###FIRST CHECKING IF THE ELEMENTS ARE VERTICAL, IF YES THEN NO SORTING
     new_all_elements = []
 
     for element in all_elements:
         later_bigger = (float(element[-1][0])-(float(element[0][0]))) #check if xmin from first element is bigger than xmin from last element
-        abstand_x = abs(float(element[-1][0])-(float(element[0][2])))
-        abstand_y = abs(float(element[-1][3])-float(element[0][1]))
         if later_bigger >= -5:
             new_all_elements.append(element)
         else:
@@ -74,7 +60,6 @@ def extract_isos(result):
 
     return details_, str(general_tol)
 
-
 def get_tables(result):
     reg = r"(Start drawing)|(All dimensions)"
     tables = []

+ 7 - 10
organize_drawing_according_to_details_new.py

@@ -12,9 +12,6 @@ def get_details(result): #search for all details in drawing and store it in list
     number = len(details)
     return details, number
 
-
-
-
 def get_borders(details, tables):
     sections = []
     #print(coords)
@@ -76,9 +73,9 @@ def get_borders(details, tables):
                 table_xmax = table[2]
             table_ymin = table[1]
             if y_max > table_ymin:
-                if firstx_min > table_xmin and firstx_min < table_xmax:
+                if table_xmin < firstx_min < table_xmax:
                     y_max = table_ymin
-                elif x_max > table_xmin and x_max < table_xmax:
+                elif table_xmin < x_max < table_xmax:
                     y_max = table_ymin
 
         sections.append((first,x_min, y_min,x_max,y_max))
@@ -133,7 +130,7 @@ def main_function(result, tables):
             section.append(list(("No details",list((000.000,000.000,100000000.000,10000000.000)))))
 
 
-    dict = {}
+    dict_help = {}
 
     for res in result:
         for det in section:
@@ -143,11 +140,11 @@ def main_function(result, tables):
             if intersects(det,result[res]):
                 name = det[0]
                 help_dict[res] = result[res]
-                if name in dict:
-                    dict[name].update(help_dict)
+                if name in dict_help:
+                    dict_help[name].update(help_dict)
                 else:
-                    dict[name] = help_dict
+                    dict_help[name] = help_dict
                 break
 
-    return dict, details_dict
+    return dict_help, details_dict
 

+ 54 - 0
read_from_clustered_merged.py

@@ -1,3 +1,5 @@
+# coding=utf8
+import re
 import csv
 
 def read(file):
@@ -76,3 +78,55 @@ def read(file):
             else:
                 dict[element] = coords
     return dict
+
+
+def print_clean(dims): ##alles raus was nicht relevant ist! und zeichen ersetzen!
+    dims_new = {}
+    reg_clean = r"ISO|[a-zA-Z]{4,}|^\d\s\d$|^[a-zA-Z]{2,}\d.*$|^[A-Z]{1}$|^mm$|^\d{2}\.\d{2}\.\d{4}|^-$|A\d|^\d{1}$|^[A-Za-z]{3,}\.?$|^\d{5}|^\d{1}\s\W\s\d"
+    for dim in dims:
+        if re.search(reg_clean, dim):
+            continue
+        else:
+            coords = dims[dim]
+            if re.search(r"b\s\d*\W?\d*\s.",dim):
+                dim = dim.replace('b', u"\u27C2")
+            if re.search(r"g\s\d*\W?\d*", dim):
+                dim = dim.replace('g', u"\u232D")
+            if re.search(r"f\s\d*\W?\d*", dim):
+                dim = dim.replace('f',  u"\u2225")
+            if re.search(r"r\s\d*\W?\d*", dim):
+                dim = dim.replace('r', u"\u25CE")
+            if re.search(r"i\s\d*\W?\d*", dim):
+                dim = dim.replace('i', u"\u232F")
+            if re.search(r"j\s\d*\W?\d*", dim):
+                dim = dim.replace('j', u"\u2316")
+            if re.search(r"d\s\d*\W?\d*", dim):
+                dim = dim.replace('d', u"\u2313")
+            if re.search(r"c\s+\d*", dim):
+                dim = dim.replace('c', u"\u23E5")
+            if re.search(r"n\s+\d*", dim):
+                dim = dim.replace('n', u"\u2300")
+            if "È" in dim:
+                dim = dim.replace('È', 'GG')
+            if "`" in dim:
+                dim = dim.replace('`', u"\u00B1")
+            if "#" in dim:
+                dim = dim.replace('#', "↔")
+            if "⌀" in dim:
+                dim = dim.replace('⌀', "Ø")
+            reg12 = re.compile(r"(.*\d{1,4}\W?\d{0,4})\s?\+\s-\s?(\d{1,4}\W?\d{0,4})\s?(\d{1,4}\W?\d{0,3})") ##???? was machst du?? nach toleranzen suchen, mit +/- blabla
+            reg13 = re.compile(r"(.*)\+\s\+\s(\d*\W\d*)\s(\d*\W\d*)(.*)")
+            reg14 = re.compile(r"(\+\s?\d*,?.?\d*)\s*(\d*,?.?\d*)\s*(\+?\s?\-?\s?\d*,?.?\d*)")
+            g = re.search(reg12, dim)
+            f = re.search(reg13, dim)
+            e = re.search(reg14, dim)
+            if g:
+                dim = re.sub(reg12, g.group(1) + " +" + g.group(2) + " -" + g.group(3), dim) # +/- toleranzen schön darstellen
+            elif f:
+                dim = f.group(1) + "+" + f.group(2) + " +" + f.group(3) + f.group(4)
+            elif e:
+                dim = e.group(2) + " " + e.group(1) + " " + e.group(3)
+
+            dim = dim.replace(" ,",".").replace(", ",".").replace(",",".")
+            dims_new[dim] = coords
+    return dims_new

+ 0 - 59
regex_clean_new.py

@@ -1,59 +0,0 @@
-# coding=utf8
-import re
-
-
-def print_clean(dims): ##alles raus was nicht relevant ist! und zeichen ersetzen!
-    dims_new = {}
-    reg_clean = r"ISO|[a-zA-Z]{4,}|^\d\s\d$|^[a-zA-Z]{2,}\d.*$|^[A-Z]{1}$|^mm$|^\d{2}\.\d{2}\.\d{4}|^-$|A\d|^\d{1}$|^[A-Za-z]{3,}\.?$|^\d{5}|^\d{1}\s\W\s\d"
-    for dim in dims:
-        if re.search(reg_clean, dim):
-            continue
-        else:
-            coords = dims[dim]
-            if re.search(r"b\s\d*\W?\d*\s.",dim):
-                dim = dim.replace('b', u"\u27C2")
-            if re.search(r"g\s\d*\W?\d*", dim):
-                dim = dim.replace('g', u"\u232D")
-            if re.search(r"f\s\d*\W?\d*", dim):
-                dim = dim.replace('f',  u"\u2225")
-            if re.search(r"r\s\d*\W?\d*", dim):
-                dim = dim.replace('r', u"\u25CE")
-            if re.search(r"i\s\d*\W?\d*", dim):
-                dim = dim.replace('i', u"\u232F")
-            if re.search(r"j\s\d*\W?\d*", dim):
-                dim = dim.replace('j', u"\u2316")
-            if re.search(r"d\s\d*\W?\d*", dim):
-                dim = dim.replace('d', u"\u2313")
-            if re.search(r"c\s+\d*", dim):
-                dim = dim.replace('c', u"\u23E5")
-            if re.search(r"n\s+\d*", dim):
-                dim = dim.replace('n', u"\u2300")
-            if "È" in dim:
-                dim = dim.replace('È', 'GG')
-            if "`" in dim:
-                dim = dim.replace('`', u"\u00B1")
-            if "#" in dim:
-                dim = dim.replace('#', "↔")
-            if "⌀" in dim:
-                dim = dim.replace('⌀', "Ø")
-            reg12 = re.compile(r"(.*\d{1,4}\W?\d{0,4})\s?\+\s-\s?(\d{1,4}\W?\d{0,4})\s?(\d{1,4}\W?\d{0,3})") ##???? was machst du?? nach toleranzen suchen, mit +/- blabla
-            reg13 = re.compile(r"(.*)\+\s\+\s(\d*\W\d*)\s(\d*\W\d*)(.*)")
-            reg14 = re.compile(r"(\+\s?\d*,?.?\d*)\s*(\d*,?.?\d*)\s*(\+?\s?\-?\s?\d*,?.?\d*)")
-            g = re.search(reg12, dim)
-            f = re.search(reg13, dim)
-            e = re.search(reg14, dim)
-            if g:
-                dim = re.sub(reg12, g.group(1) + " +" + g.group(2) + " -" + g.group(3), dim) # +/- toleranzen schön darstellen
-                #print(dim)
-            elif f:
-                dim = f.group(1) + "+" + f.group(2) + " +" + f.group(3) + f.group(4)
-            elif e:
-                dim = e.group(2) + " " + e.group(1) + " " + e.group(3)
-
-            dim = dim.replace(" ,",".").replace(", ",".").replace(",",".")
-            dims_new[dim] = coords
-
-    #for dim in dims_new:
-    #    print(dim)
-    #print(dims_new)
-    return dims_new

File diff suppressed because it is too large
+ 0 - 1226
temporary/33333out.html


File diff suppressed because it is too large
+ 0 - 1226
temporary/8409194out.html