|
@@ -0,0 +1,108 @@
|
|
|
|
+#!/usr/bin/env python3
|
|
|
|
+# -*- coding: utf-8 -*-
|
|
|
|
+"""
|
|
|
|
+Created on Thu Dec 5 10:26:27 2019
|
|
|
|
+
|
|
|
|
+@author: tanya
|
|
|
|
+"""
|
|
|
|
+
|
|
|
|
+import os
|
|
|
|
+import sys
|
|
|
|
+
|
|
|
|
+sys.path.append(os.getcwd())
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+class SQLQueries:
|
|
|
|
+ '''
|
|
|
|
+ Summary of useful complicated sql queries
|
|
|
|
+ '''
|
|
|
|
+ def __init__(self):
|
|
|
|
+ '''
|
|
|
|
+ '''
|
|
|
|
+ from cdplib.log import Log
|
|
|
|
+
|
|
|
|
+ self._log = Log("SQLQueries")
|
|
|
|
+
|
|
|
|
+ def get_combinations(self, instances: dict, table: str, columns: list = None) -> str:
|
|
|
|
+ '''
|
|
|
|
+ Returns a query to read data corresponding to combinations of given values for given columns
|
|
|
|
+ specified in the argument instances.
|
|
|
|
+
|
|
|
|
+ :param instances: a dictionary of form {"column_name_1": [val_1, val2, val3], "column_name_2": [val]}
|
|
|
|
+ :param table: table name or subquery
|
|
|
|
+ :param columns: list of column names to read from the table
|
|
|
|
+ '''
|
|
|
|
+ from copy import deepcopy
|
|
|
|
+
|
|
|
|
+ instances = deepcopy(instances)
|
|
|
|
+
|
|
|
|
+ if len(instances) == 0:
|
|
|
|
+ return table[1:-1] # removed external paranthesis
|
|
|
|
+
|
|
|
|
+ else:
|
|
|
|
+
|
|
|
|
+ column, values = list(instances.items())[0]
|
|
|
|
+
|
|
|
|
+ instances.pop(column)
|
|
|
|
+
|
|
|
|
+ if table.startswith("(SELECT"):
|
|
|
|
+ prefix = column + "_subquery."
|
|
|
|
+ sub_query_name = " " + prefix[:-1] # removed the dot in the end
|
|
|
|
+ else:
|
|
|
|
+ prefix = ""
|
|
|
|
+ sub_query_name = prefix
|
|
|
|
+
|
|
|
|
+ if columns is None:
|
|
|
|
+ columns_subquery = "*"
|
|
|
|
+ else:
|
|
|
|
+ columns_with_prefix = [prefix + c for c in columns]
|
|
|
|
+
|
|
|
|
+ columns_subquery = ", ".join(columns_with_prefix)
|
|
|
|
+
|
|
|
|
+ if len(values) > 1:
|
|
|
|
+ table = "(SELECT DISTINCT {0} FROM {1}{2} WHERE {3}{4} IN {5})"\
|
|
|
|
+ .format(columns_subquery, table, sub_query_name, prefix, column, tuple(values))
|
|
|
|
+
|
|
|
|
+ else:
|
|
|
|
+ table = "(SELECT DISTINCT {0} FROM {1}{2} WHERE {3}{4} = {5})"\
|
|
|
|
+ .format(columns_subquery, table, sub_query_name, prefix, column, values[0])
|
|
|
|
+
|
|
|
|
+ return self.get_combinations(instances, table, columns)
|
|
|
|
+
|
|
|
|
+ def get_cooccurances(self, instances: dict, table: str, columns: list = None) -> str:
|
|
|
|
+ '''
|
|
|
|
+ Returns a query to read data corresponding to cooccurances of given values for given columns
|
|
|
|
+ specified in the argument instances.
|
|
|
|
+
|
|
|
|
+ :param instances: a dictionary of form {"column_name_1": [val_11, val12, val13],
|
|
|
|
+ "column_name_2": [val_21, val22, val23]}
|
|
|
|
+ (all the lists in the instances need to be of the same length)
|
|
|
|
+ :param table: table name or subquery
|
|
|
|
+ :param columns: list of column names to read from the table
|
|
|
|
+ '''
|
|
|
|
+ value_length = len(list(instances.values())[0])
|
|
|
|
+
|
|
|
|
+ if not all([len(vals) == value_length for vals in instances.values()]):
|
|
|
|
+ self._log.log_and_raise_error("All the values in instances should be of the same length")
|
|
|
|
+
|
|
|
|
+ if columns is None:
|
|
|
|
+ columns_subquery = "*"
|
|
|
|
+ else:
|
|
|
|
+ columns_subquery = ", ".join(columns)
|
|
|
|
+
|
|
|
|
+ if len(instances) == 1:
|
|
|
|
+ column = list(instances.keys())[0]
|
|
|
|
+
|
|
|
|
+ values = instances[column]
|
|
|
|
+
|
|
|
|
+ where_clause = "{0} IN {1}".format(column, values)
|
|
|
|
+
|
|
|
|
+ else:
|
|
|
|
+ and_statements = [" AND ".join(["{0} = {1}".format(column, instances[column][i])
|
|
|
|
+ for column in instances.keys()]) for i in range(value_length)]
|
|
|
|
+
|
|
|
|
+ and_statements = ["({})".format(s) for s in and_statements]
|
|
|
|
+
|
|
|
|
+ where_clause = " OR ".join(and_statements)
|
|
|
|
+
|
|
|
|
+ return "SELECT DISTINCT {0} FROM {1} WHERE {2}".format(columns_subquery, table, where_clause)
|