Browse Source

general tolerances included

bscheibel 5 years ago
parent
commit
0875c745cb
3 changed files with 9 additions and 3 deletions
  1. 3 1
      main.py
  2. 5 1
      order_bounding_boxes_in_each_block.py
  3. 1 1
      regex_clean_new.py

+ 3 - 1
main.py

@@ -23,7 +23,8 @@ def main(uuid, filepath, db, eps):
         else:
             eps = 1
     #print(eps)
-    isos = order_bounding_boxes_in_each_block.extract_isos(result)
+    isos, general_tol = order_bounding_boxes_in_each_block.extract_isos(result)
+    print(general_tol)
     res = clustering_precomputed_dbscan.cluster_and_preprocess(result,eps)
     clean_arrays = read_from_clustered_merged.read("/home/bscheibel/PycharmProjects/dxf_reader/temporary/values_clusteredfrom_precomputed_dbscan.csv")
     tables = order_bounding_boxes_in_each_block.get_tables(clean_arrays)
@@ -34,6 +35,7 @@ def main(uuid, filepath, db, eps):
     json_isos = json.dumps(isos)
     json_result = json.dumps(res)
     json_details =json.dumps(details_dict)
+    write_redis(uuid+"tol", general_tol,db)
     write_redis(uuid+"dims", json_result, db)
     write_redis(uuid+"isos",json_isos, db)
     write_redis(uuid+"eps", str(number_blocks)+","+str(number_words), db)

+ 5 - 1
order_bounding_boxes_in_each_block.py

@@ -63,6 +63,8 @@ def pdf_to_html(uuid,filepath):
 def extract_isos(result):
     reg = r"(ISO\s\d\d\d\d*\W?\d?\W?\d?)|(EN\s\d*)"
     details_ = []
+    reg_general = r"ISO\s?\d*\s*\W\s*[fmcv][HKL]"
+    general_tol = ""
     for element in result:
         new_arr = ""
         for x in element:
@@ -74,8 +76,10 @@ def extract_isos(result):
                     details_.append(f[0].replace(")",""))
                 if len(f[1]) != 0:
                     details_.append(f[1])
+        if re.search(reg_general, new_arr):
+            general_tol = new_arr
 
-    return details_
+    return details_, str(general_tol)
 
 
 def get_tables(result):

+ 1 - 1
regex_clean_new.py

@@ -4,7 +4,7 @@ import re
 
 def print_clean(dims): ##alles raus was nicht relevant ist! und zeichen ersetzen!
     dims_new = {}
-    reg_clean = r"[a-zA-Z]{4,}|^\d\s\d$|^[a-zA-Z]{2,}\d.*$|^[A-Z]{1}$|^mm$|^\d{2}\.\d{2}\.\d{4}|^-$|A\d|^\d{1}$|^[A-Za-z]{3,}\.?$|^\d{5}|^\d{1}\s\W\s\d"
+    reg_clean = r"ISO|[a-zA-Z]{4,}|^\d\s\d$|^[a-zA-Z]{2,}\d.*$|^[A-Z]{1}$|^mm$|^\d{2}\.\d{2}\.\d{4}|^-$|A\d|^\d{1}$|^[A-Za-z]{3,}\.?$|^\d{5}|^\d{1}\s\W\s\d"
     for dim in dims:
         if re.search(reg_clean, dim):
             continue