Browse Source

included . in regex for showing dimensions

bscheibel 4 years ago
parent
commit
c7cc7a28b1
2 changed files with 2 additions and 2 deletions
  1. 1 1
      order_bounding_boxes_in_each_block.py
  2. 1 1
      regex_clean_new.py

+ 1 - 1
order_bounding_boxes_in_each_block.py

@@ -55,7 +55,7 @@ def get_bound_box(file):
     return new_all_elements, number_blocks, number_words
 
 def pdf_to_html(uuid,filepath):
-    filename = str(uuid)+"out.html"
+    filename = "app/temporary/" +str(uuid)+"out.html"
     subprocess.call(['pdftotext', '-bbox-layout',
                      filepath, filename])
     return filename

+ 1 - 1
regex_clean_new.py

@@ -38,7 +38,7 @@ def print_clean(dims): ##alles raus was nicht relevant ist! und zeichen ersetzen
                 dim = dim.replace('⌀', "Ø")
             reg12 = re.compile(r"(.*\d{1,4}\W?\d{0,4})\s?\+\s-\s?(\d{1,4}\W?\d{0,4})\s?(\d{1,4}\W?\d{0,3})") ##???? was machst du?? nach toleranzen suchen, mit +/- blabla
             reg13 = re.compile(r"(.*)\+\s\+\s(\d*\W\d*)\s(\d*\W\d*)(.*)")
-            reg14 = re.compile(r"(\+\s\d*,?\d*)\s(\d*,?\d*)\s(\+?\s?\-?\d*,?\d*)")
+            reg14 = re.compile(r"(\+\s?\d*,?.?\d*)\s*(\d*,?.?\d*)\s*(\+?\s?\-?\s?\d*,?.?\d*)")
             g = re.search(reg12, dim)
             f = re.search(reg13, dim)
             e = re.search(reg14, dim)