read_tables.py 2.5 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071
  1. #import tabula
  2. #tables = tabula.read_pdf("iso_documents/ISO1101.PDF", multiple_tables=True)
  3. #for table in tables:
  4. # print(table)
  5. #pdftotext - layout!!!!
  6. #tabula.convert_into("iso_documents/ISO1101.PDF", "output.csv", output_format="csv", pages='all', multiple_tables=True)
  7. #df = tabula.read_pdf("iso_documents/ISO1101.PDF", pages='all', multiple_tables=True)
  8. #print(df)
  9. """def file_read(fname):
  10. content_array = []
  11. with open(fname) as f:
  12. # Content_list is the list that contains the read lines.
  13. for line in f:
  14. content_array.append(line.strip().replace(" ",""))
  15. print(content_array)"""
  16. #file_read('drawings/5129275_Rev01-GV12.txt')
  17. class UnionFind:
  18. def __init__(self):
  19. self.rank = {}
  20. self.parent = {}
  21. def find(self, element):
  22. if element not in self.parent: # leader elements are not in `parent` dict
  23. return element
  24. leader = self.find(self.parent[element]) # search recursively
  25. self.parent[element] = leader # compress path by saving leader as parent
  26. return leader
  27. def union(self, leader1, leader2):
  28. rank1 = self.rank.get(leader1,1)
  29. rank2 = self.rank.get(leader2,1)
  30. if rank1 > rank2: # union by rank
  31. self.parent[leader2] = leader1
  32. elif rank2 > rank1:
  33. self.parent[leader1] = leader2
  34. else: # ranks are equal
  35. self.parent[leader2] = leader1 # favor leader1 arbitrarily
  36. self.rank[leader1] = rank1+1 # increment rank
  37. nodes = set()
  38. groups = UnionFind()
  39. with open('/home/bscheibel/PycharmProjects/dxf_reader/drawings/5129275_Rev01-GV12.txt') as f:
  40. for y, line in enumerate(f): # iterate over lines
  41. for x, char in enumerate(line): # and characters within a line
  42. if char != ' ':
  43. nodes.add((x, y)) # maintain a set of node coordinates
  44. # check for neighbors that have already been read
  45. neighbors = [(x-1, y-1), # up-left
  46. (x, y-1), # up
  47. (x+1, y-1), # up-right
  48. (x-1, y)] # left
  49. for neighbor in neighbors:
  50. if neighbor in nodes:
  51. my_group = groups.find((x, y))
  52. neighbor_group = groups.find(neighbor)
  53. if my_group != neighbor_group:
  54. groups.union(my_group, neighbor_group)
  55. # finally, count the number of unique groups
  56. number_of_groups = len(set(groups.find(n) for n in nodes))