Browse Source

started with regex extraction

bscheibel 4 years ago
parent
commit
9b527a49c8
1 changed files with 3 additions and 0 deletions
  1. 3 0
      regex_extraction.py

+ 3 - 0
regex_extraction.py

@@ -2,6 +2,8 @@
 import re
 
 regex = r"(\S+\s{1,3}?\S*\s?\S*\S*\s?\S*\S*\s?\S*\S*\s?\S*\S*\s+)"
+regex1 = r"([A-Z]\s?\W\s?\d\d?\s?\s?\W\s?\d\d?\s?\W)" #ti get the bezeichnungen raus
+regex2= r"([a-zA-Z]{3,})" #alle wörter raus???
 extracted_dimensions = []
 file=open('/home/bscheibel/PycharmProjects/dxf_reader/drawings/5152166_Rev04.txt', 'r')
 text= file.read()
@@ -11,4 +13,5 @@ for match in matches:
         extracted_dimensions.append(match.strip())
 
 
+
 print(extracted_dimensions)