Browse Source

added files

bscheibel 4 years ago
parent
commit
c2fb5842fc

+ 27 - 8
dbscan_clustering.py

@@ -12,13 +12,13 @@ def cluster(file_in, file_out):
     # #############################################################################
     data_df = pandas.read_csv("/home/bscheibel/PycharmProjects/dxf_reader/temporary/list_to_csv_with_avg_points.csv", sep=";")
     data_df.head(3)
-    data = data_df[["xavg_elem","yavg_elem"]]
+    data = data_df[["xavg_elem","yavg_elem","ausrichtung"]]
     print(data)
     data = StandardScaler().fit_transform(data)
 
     # #############################################################################
     # Compute DBSCAN
-    db = DBSCAN(eps=0.1, min_samples=1).fit(data)
+    db = DBSCAN(eps=0.06, min_samples=1).fit(data)
     core_samples_mask = np.zeros_like(db.labels_, dtype=bool)
     core_samples_mask[db.core_sample_indices_] = True
     labels = db.labels_
@@ -62,28 +62,45 @@ def cluster(file_in, file_out):
 
     #print(data_df.head(3))
     #data_df.to_csv("values_clusteredfromPDF_GV12.csv")
-    data_df.groupby('cluster')['element'].apply(' '.join).reset_index().to_csv("values_clusteredfromHTML_layout_LH.csv", delimiter=";")
+    data_df.groupby('cluster')['element'].apply(' '.join).reset_index().to_csv("values_clusteredfromHTML_layout_LH.csv",sep=";")
 
 
 def get_average_xy(list_input):
     csv_name = "temporary/list_to_csv_with_avg_points.csv"
     new_list = []
-    resultFile = open(csv_name, 'a')
+    resultFile = open(csv_name, 'w')
     wr = csv.writer(resultFile, delimiter=";")
-    wr.writerow(["element", "xavg_elem","yavg_elem"])
+    wr.writerow(["element", "xavg_elem","yavg_elem", "ausrichtung"])
     for element in list_input:
         xavg_elem = 0
         yavg_elem = 0
+        y_min = 1000000
+        y_max = 0
+        x_min = 1000000
+        x_max = 0
         for blub in element:
             xavg_elem += (float(blub[0]) + float(blub[2]))/2
             yavg_elem += (float(blub[1]) + float(blub[3]))/2
+            if float(blub[1]) < y_min:
+                y_min = float(blub[1])
+                print("y_min:",y_min)
+            if float(blub[0]) < x_min:
+                x_min = float(blub[0])
+            if float(blub[3]) > y_max:
+                y_max = float(blub[3])
+            if float(blub[2]) > x_max:
+                x_max = float(blub[2])
+        if x_max-x_min > y_max-y_min:
+            ausrichtung = 0
+        else:
+            ausrichtung = 1
         xavg_elem = xavg_elem/len(element)
         #print(xavg_elem)
         yavg_elem = yavg_elem/len(element)
         #element.extend([xavg_elem, yavg_elem])
         #print(element)
         #new_list.append(element)
-        wr.writerow([element,xavg_elem,yavg_elem])
+        wr.writerow([element,xavg_elem,yavg_elem, ausrichtung])
 
     resultFile.close()
     #print(new_list)
@@ -91,6 +108,8 @@ def get_average_xy(list_input):
 
 
 #cluster(33,33)
-#result = order_bounding_boxes_in_each_block.get_bound_box()
-#get_average_xy(result)
+file = "/home/bscheibel/PycharmProjects/dxf_reader/drawings/5152166_Rev04.html"
+#file = "/home/bscheibel/PycharmProjects/dxf_reader/drawings/5129275_Rev01-GV12.html"
+result = order_bounding_boxes_in_each_block.get_bound_box(file)
+get_average_xy(result)
 cluster(33,33)

+ 6 - 3
order_bounding_boxes_in_each_block.py

@@ -2,8 +2,8 @@
 
 from bs4 import BeautifulSoup
 
-def get_bound_box():
-    response = open('/home/bscheibel/PycharmProjects/dxf_reader/drawings/5152166_Rev04.html')
+def get_bound_box(file):
+    response = open(file)
     html_doc = response.read()
     response.close()
     html_file = BeautifulSoup(html_doc, 'html.parser')
@@ -47,4 +47,7 @@ def get_bound_box():
 
         print("\n")
 
-    return new_all_elements
+    return new_all_elements
+
+file="/home/bscheibel/PycharmProjects/dxf_reader/drawings/5152166_Rev04.html"
+get_bound_box(file)

+ 6 - 3
read_from_clustered_merged.py

@@ -1,7 +1,10 @@
 import csv
-
+import ast
+import re
 
 with open("/home/bscheibel/PycharmProjects/dxf_reader/values_clusteredfromHTML_layout_LH.csv", "r") as f:
-    reader = csv.reader(f, delimiter=",")
+    reader = csv.reader(f, delimiter=";")
     for row in reader:
-        print(row[2])
+        reg = r",\s*'(\w*\W*.\d*)']"
+        reg_search = re.findall(reg, row[2])
+        print(reg_search)

File diff suppressed because it is too large
+ 331 - 331
temporary/list_to_csv_with_avg_points.csv


File diff suppressed because it is too large
+ 270 - 181
values_clusteredfromHTML_layout_LH.csv