123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109 |
- #!/usr/bin/env python3
- # -*- coding: utf-8 -*-
- """
- Created on Thu Dec 5 10:26:27 2019
- @author: tanya
- """
- import os
- import sys
- sys.path.append(os.getcwd())
- class SQLQueries:
- '''
- Summary of useful complicated sql queries
- '''
- def __init__(self):
- '''
- '''
- from cdplib.log import Log
- self._log = Log("SQLQueries")
- def get_combinations(self, instances: dict, table: str, columns: list = None) -> str:
- '''
- Returns a query to read data corresponding to combinations of given values for given columns
- specified in the argument instances.
- :param instances: a dictionary of form {"column_name_1": [val_1, val2, val3], "column_name_2": [val]}
- :param table: table name or subquery
- :param columns: list of column names to read from the table
- '''
- from copy import deepcopy
- instances = deepcopy(instances)
- if len(instances) == 0:
- return table[1:-1] # removed external paranthesis
- else:
- column, values = list(instances.items())[0]
- instances.pop(column)
- if table.startswith("(SELECT"):
- prefix = column + "_subquery."
- sub_query_name = " " + prefix[:-1] # removed the dot in the end
- else:
- prefix = ""
- sub_query_name = prefix
- if columns is None:
- columns_subquery = "*"
- else:
- columns_with_prefix = [prefix + c for c in columns]
- columns_subquery = ", ".join(columns_with_prefix)
- if len(values) > 1:
- table = "(SELECT DISTINCT {0} FROM {1}{2} WHERE {3}{4} IN {5})"\
- .format(columns_subquery, table, sub_query_name, prefix, column, tuple(values))
- else:
- table = "(SELECT DISTINCT {0} FROM {1}{2} WHERE {3}{4} = {5})"\
- .format(columns_subquery, table, sub_query_name, prefix, column, values[0])
- return self.get_combinations(instances, table, columns)
- def get_cooccurances(self, instances: dict, table: str, columns: list = None) -> str:
- '''
- Returns a query to read data corresponding to cooccurances of given values for given columns
- specified in the argument instances.
- :param instances: a dictionary of form {"column_name_1": [val_11, val12, val13],
- "column_name_2": [val_21, val22, val23]}
- (all the lists in the instances need to be of the same length)
- :param table: table name or subquery
- :param columns: list of column names to read from the table
- '''
- value_length = len(list(instances.values())[0])
- if not all([len(vals) == value_length for vals in instances.values()]):
- self._log.log_and_raise_error("All the values in instances should be of the same length")
- if columns is None:
- columns_subquery = "*"
- else:
- columns_subquery = ", ".join(columns)
- if len(instances) == 1:
- column = list(instances.keys())[0]
- values = instances[column]
- where_clause = "{0} IN {1}".format(column, values)
- else:
- and_statements = [" AND ".join(["{0} = {1}".format(column, instances[column][i])
- for column in instances.keys()]) for i in range(value_length)]
- and_statements = ["({})".format(s) for s in and_statements]
- where_clause = " OR ".join(and_statements)
- return "SELECT DISTINCT {0} FROM {1} WHERE {2}".format(columns_subquery, table, where_clause)
|