SQLQueries.py 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. Created on Thu Dec 5 10:26:27 2019
  5. @author: tanya
  6. """
  7. import os
  8. import sys
  9. sys.path.append(os.getcwd())
  10. class SQLQueries:
  11. '''
  12. Summary of useful complicated sql queries
  13. '''
  14. def __init__(self):
  15. '''
  16. '''
  17. from cdplib.log import Log
  18. self._log = Log("SQLQueries")
  19. def get_combinations(self, instances: dict, table: str, columns: list = None) -> str:
  20. '''
  21. Returns a query to read data corresponding to combinations of given values for given columns
  22. specified in the argument instances.
  23. :param instances: a dictionary of form {"column_name_1": [val_1, val2, val3], "column_name_2": [val]}
  24. :param table: table name or subquery
  25. :param columns: list of column names to read from the table
  26. '''
  27. from copy import deepcopy
  28. instances = deepcopy(instances)
  29. if len(instances) == 0:
  30. return table[1:-1] # removed external paranthesis
  31. else:
  32. column, values = list(instances.items())[0]
  33. instances.pop(column)
  34. if table.startswith("(SELECT"):
  35. prefix = column + "_subquery."
  36. sub_query_name = " " + prefix[:-1] # removed the dot in the end
  37. else:
  38. prefix = ""
  39. sub_query_name = prefix
  40. if columns is None:
  41. columns_subquery = "*"
  42. else:
  43. columns_with_prefix = [prefix + c for c in columns]
  44. columns_subquery = ", ".join(columns_with_prefix)
  45. if len(values) > 1:
  46. table = "(SELECT DISTINCT {0} FROM {1}{2} WHERE {3}{4} IN {5})"\
  47. .format(columns_subquery, table, sub_query_name, prefix, column, tuple(values))
  48. else:
  49. table = "(SELECT DISTINCT {0} FROM {1}{2} WHERE {3}{4} = {5})"\
  50. .format(columns_subquery, table, sub_query_name, prefix, column, values[0])
  51. return self.get_combinations(instances, table, columns)
  52. def get_cooccurances(self, instances: dict, table: str, columns: list = None) -> str:
  53. '''
  54. Returns a query to read data corresponding to cooccurances of given values for given columns
  55. specified in the argument instances.
  56. :param instances: a dictionary of form {"column_name_1": [val_11, val12, val13],
  57. "column_name_2": [val_21, val22, val23]}
  58. (all the lists in the instances need to be of the same length)
  59. :param table: table name or subquery
  60. :param columns: list of column names to read from the table
  61. '''
  62. value_length = len(list(instances.values())[0])
  63. if not all([len(vals) == value_length for vals in instances.values()]):
  64. self._log.log_and_raise_error("All the values in instances should be of the same length")
  65. if columns is None:
  66. columns_subquery = "*"
  67. else:
  68. columns_subquery = ", ".join(columns)
  69. if len(instances) == 1:
  70. column = list(instances.keys())[0]
  71. values = instances[column]
  72. where_clause = "{0} IN {1}".format(column, values)
  73. else:
  74. and_statements = [" AND ".join(["{0} = {1}".format(column, instances[column][i])
  75. for column in instances.keys()]) for i in range(value_length)]
  76. and_statements = ["({})".format(s) for s in and_statements]
  77. where_clause = " OR ".join(and_statements)
  78. return "SELECT DISTINCT {0} FROM {1} WHERE {2}".format(columns_subquery, table, where_clause)