Browse Source

fixed reading

bscheibel 4 years ago
parent
commit
8045451f61
4 changed files with 14 additions and 8 deletions
  1. 4 4
      clustering_precomputed_dbscan.py
  2. 4 3
      main.py
  3. 1 1
      organize_drawing_according_to_details_new.py
  4. 5 0
      regex_clean_new.py

+ 4 - 4
clustering_precomputed_dbscan.py

@@ -112,8 +112,8 @@ def dist(rectangle1, rectangle2):
             #print(rectangle1)
     return distance
 
-def clustering(dm):
-    db = DBSCAN(eps=2, min_samples=1, metric="precomputed").fit(dm)  ##3.93 until now, bei 5 shon mehr erkannt, 7 noch mehr erkannt aber auch schon zu viel; GV12 ist 4.5 gut für LH zu wenig
+def clustering(dm,eps):
+    db = DBSCAN(eps=eps, min_samples=1, metric="precomputed").fit(dm)  ##3.93 until now, bei 5 shon mehr erkannt, 7 noch mehr erkannt aber auch schon zu viel; GV12 ist 4.5 gut für LH zu wenig
     #db = OPTICS(min_samples=1,xi=0.1, metric="precomputed").fit(dm)
     labels = db.labels_
     # Number of clusters in labels
@@ -126,7 +126,7 @@ def clustering(dm):
     data_df.groupby(['cluster', 'ausrichtung'])['element'].apply(','.join).reset_index().to_csv("/home/bscheibel/PycharmProjects/dxf_reader/temporary/values_clusteredfrom_precomputed_dbscan.csv",sep=";", header=False, index=False)
     return data_df
 
-def cluster_and_preprocess(result):
+def cluster_and_preprocess(result,eps):
     result = get_average_xy(result) #input: array of arrays, output: either csv file or array of arrays
 
     #data = pandas.read_csv("/home/bscheibel/PycharmProjects/dxf_reader/temporary/list_to_csv_with_corner_points.csv", sep=";")
@@ -137,6 +137,6 @@ def cluster_and_preprocess(result):
         result = list(readCSV)
 
     dm = np.asarray([[dist(p1, p2) for p2 in result] for p1 in result])
-    clustering_result = clustering(dm)
+    clustering_result = clustering(dm,eps)
     return clustering_result
 

+ 4 - 3
main.py

@@ -13,12 +13,12 @@ def write_redis(uuid, result, db_params):
     db.set(uuid, result)
 
 
-def main(uuid, filepath, db):
+def main(uuid, filepath, db, eps):
     filename = order_bounding_boxes_in_each_block.pdf_to_html(uuid, filepath)
     print(filename)
     result = order_bounding_boxes_in_each_block.get_bound_box(filename)  ##get coordinates+text out of html file into array of arrays
     isos = order_bounding_boxes_in_each_block.extract_isos(result)
-    res = clustering_precomputed_dbscan.cluster_and_preprocess(result)
+    res = clustering_precomputed_dbscan.cluster_and_preprocess(result,eps)
     clean_arrays = read_from_clustered_merged.read("/home/bscheibel/PycharmProjects/dxf_reader/temporary/values_clusteredfrom_precomputed_dbscan.csv")
     pretty = regex_clean_new.print_clean(clean_arrays)
     res = organize_drawing_according_to_details_new.main_function(pretty)
@@ -35,6 +35,7 @@ if __name__ == "__main__":
     uuid = sys.argv[1]
     filename = sys.argv[2]
     db = sys.argv[3]
-    main(uuid,filename, db)
+    eps = sys.argv[4]
+    main(uuid,filename, db, eps)
 
 #main("33333", "/home/bscheibel/PycharmProjects/dxf_reader/drawings/GV_12.PDF", "localhost")

+ 1 - 1
organize_drawing_according_to_details_new.py

@@ -107,7 +107,7 @@ def main_function(result):
         section.append(list((coord_name,coord)))
     #print(section)
     if number == 0 | len(section)==0:
-        section.append(list(("No details",list((0,0,0,0)))))
+        section.append(list(("No details",list((000.000,000.000,100000000.000,10000000.000)))))
      #   print(section)
 
 

+ 5 - 0
regex_clean_new.py

@@ -37,13 +37,18 @@ def print_clean(dims): ##alles raus was nicht relevant ist! und zeichen ersetzen
                 dim = dim.replace('⌀', "Ø")
             reg12 = re.compile(r"(.*\d{1,4}\W?\d{0,4})\s?\+\s-\s?(\d{1,4}\W?\d{0,4})\s?(\d{1,4}\W?\d{0,3})") ##???? was machst du?? nach toleranzen suchen, mit +/- blabla
             reg13 = re.compile(r"(.*)\+\s\+\s(\d*\W\d*)\s(\d*\W\d*)(.*)")
+            reg14 = re.compile(r"(\+\s\d*\.?\d*)\s(\d*\.?\d*)\s(\+?\s?\-?\d*\.*\d*)")
             g = re.search(reg12, dim)
             f = re.search(reg13,dim)
+            e = re.search(reg14,dim)
             if g:
                 dim = re.sub(reg12, g.group(1) + " +" + g.group(2) + " -" + g.group(3), dim) # +/- toleranzen schön darstellen
                 #print(dim)
             if f:
                 dim = f.group(1) + "+" + f.group(2) + " +" + f.group(3) + f.group(4)
+            if e:
+                dim= e.group(2) +  + e.group(1) +  + e.group(3)
+            dim = dim.replace(" ,",".").replace(", ",".").replace(",",".")
             dims_new[dim] = coords
 
     #for dim in dims_new: