123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546 |
- import sys
- import os
- import yaml
- import shutil
- import pprint
- import lxml.etree as ET
- import lxml.builder
- import datetime
- from datetime import datetime as DT
- import time as t
- import json
- import sqlite3
- from sqlite3 import Error
- def create_connection(db):
- try:
- conn = sqlite3.connect(db)
- return conn
- except Error as e:
- print(e)
- return None
- def check_folder(target_path,folder):
- if not os.path.exists(os.path.join(target_path, folder)):
- os.makedirs(os.path.join(target_path, folder))
- return os.path.join(target_path, folder)
- def get_target_path(prod_step, root_path):
- switch = {
- "Lowerhousing Order Processing": "level1",
- "Lowerhousing Production": "level2",
- "Lowerhousing Turn 1 Production": "level3",
- "Lowerhousing Turn 2 Production": "level3",
- "Lowerhousing Mill 1 Production": "level3",
- "Lowerhousing Mill 2 Production": "level3",
- "Lowerhousing Turn 1 Machining": "level4",
- "Lowerhousing Turn 2 Machining": "level4",
- "Lowerhousing Mill 1 Machining": "level4",
- "Lowerhousing Mill 2 Machining": "level4"
- }
- return check_folder(os.path.join(root_path, "task2/sortedTemplates"), switch[prod_step])
- def check_timestamp(time, rootNode,search):
- #convert time-string to date_time
- call_node_list = []
- time_conv = time.split("+")[0]
- datetime_object = DT.strptime(time_conv, '%Y-%m-%d' + 'T' + '%H:%M:%S')
- time_upper = str(datetime_object + datetime.timedelta(0, 2)) + "+02:00"
- time_lower = str(datetime_object - datetime.timedelta(0, 2)) + "+02:00"
- time_lower = time_lower.split(" ")[0] + "T" + time_lower.split(" ")[1]
- time_upper = time_upper.split(" ")[0] + "T" + time_upper.split(" ")[1]
- #find suitable timestamp among all timestamps for call level
- time_range = rootNode.xpath("//" + search + "/@time")
- time_range.sort()
- for time in time_range:
- if time <= time_upper and time >= time_lower:
- call_node_list = rootNode.xpath("//" + search + "[@time = '" + time + "']")
- # print(call_node_list)
- break
- if time > time_upper:
- break
- return call_node_list
- def create_json(root_path,list,file):
- with open(os.path.join(root_path, 'task2/'+file), 'w') as fp:
- json.dump(list, fp, indent=4)
- fp.close()
- def categorize_logs(root_path, logs_path):
- origin_path = logs_path
- for root, dirs, files in os.walk(origin_path):
- for file in files:
- with open(os.path.join(root, file), "r") as input_file:
- results = yaml.load_all(input_file)
- target_path=""
- for value in results:
- if 'event' in value:
- try:
- val = value['event']['list']['data_values']['info']
- target_path=get_target_path(val, root_path)
- break
- except (KeyError, AttributeError):
- continue
- input_file.close()
- shutil.move(os.path.join(root, file), os.path.join(target_path, file))
- def xml_check(xml_file):
- if not os.path.isfile(xml_file):
- rootNode = ET.Element("ProcessInstances")
- tree = ET.ElementTree(rootNode)
- level1 = ET.SubElement(rootNode, "level1")
- else:
- tree = ET.parse(xml_file, ET.XMLParser(remove_blank_text=True))
- rootNode = tree.getroot()
- if tree.find('level1') is None:
- level1 = ET.SubElement(rootNode, "level1")
- else:
- level1 = tree.find('level1')
- return [tree,rootNode,level1]
- def recreate_process(root_path, conn, level):
- # variables
- cur = conn.cursor()
- origin_path = os.path.join(root_path, "task2/sortedTemplates/level"+str(level))
- counter = 0;
- # traverse through all logs
- for root, dirs, files in os.walk(origin_path):
- for file in files:
- added = False;
- print(file)
- with open(os.path.join(root, file), "r") as input_file:
- results = yaml.load_all(input_file)
- # traverse throug single log
- for value in results:
- #try:
- if 'log' in value:
- logname = value['log']['trace']['cpee:name']
- uuid = value['log']['trace']['cpee:uuid']
- instance = value['log']['trace']['concept:name']
- if 'event' in value and not value['event']['id:id']=="external":
- try:
- time = value['event']['time:timestamp']
- activity = value['event']['cpee:lifecycle:transition']
- name = value['event']['concept:name']
- step_id = value['event']['id:id']
- val="";
- if value['event']['lifecycle:transition'] == "start":
- val = json.dumps(value['event']['list']['data_send'])
- if 'data_receiver' in value['event']['list'].keys() and name!='Fetch':
- val = json.dumps(value['event']['list']['data_receiver'])
- # print(val)
- if not added:
- cur.execute("Select called_by from instances where instance = '" + instance + "'")
- parent_inst = cur.fetchone()
- if parent_inst is None:
- datetime_object = DT.strptime(time.split('+')[0], '%Y-%m-%d' + 'T' + '%H:%M:%S')
- time_upper = str(datetime_object + datetime.timedelta(0, 2)) + "+02:00"
- time_lower = str(datetime_object - datetime.timedelta(0, 2)) + "+02:00"
- time_lower = time_lower.split(" ")[0] + "T" + time_lower.split(" ")[1]
- time_upper = time_upper.split(" ")[0] + "T" + time_upper.split(" ")[1]
- #print(time)
- #print(time_upper)
- #print(time_lower)
- # cur.execute("Insert into Instances VALUES ('" + instance + "','" + uuid + "', '" + logname + "', '2','Null')")
- query = "Select log.instance from LogEntries log join instances i on i.instance = log.instance where i.level='" + str(level-1) +"' and log.activity ='activity/calling' and log.timestamp>='" + time_lower + "' and log.timestamp<='" + time_upper + "'"
- #print(query)
- cur.execute(query)
- parent_inst = cur.fetchone()
- #print(parent_inst)
- if parent_inst is None:
- #print(parent_inst)
- #print(val)
- #print("Insert into Instances VALUES ('" + instance + "','" + uuid + "', '" + logname + "', '3','Null')")
- cur.execute("Insert into Instances VALUES ('" + instance + "','" + uuid + "', '" + logname + "', '" + str(level) +"','Null')")
- #conn.commit()
- else:
- cur.execute("Insert into Instances VALUES ('" + instance + "','" + uuid + "', '" + logname + "', '" + str(level) +"','" + parent_inst[0] + "')")
- else:
- #print(instance)
- #print(parent_inst)
- query = "Update Instances set uuid = '" + uuid + "', name = '" + logname + "' where called_by = '" + parent_inst[0] + "' and instance='" + instance + "'"
- #print(query)
- cur.execute(query)
- #conn.commit()
- #con.commit()
- added = True
- #print("Insert into LogEntries VALUES ('" + step_id + "','" + time + "', '" + name + "','" + activity + "', '" + val + "','" + instance + "' )")
- #to add calling instances
- if 'data_receiver' in value['event']['list'].keys() and activity == 'activity/receiving':
- # if step_id=='a1':
- for attr in value['event']['list']['data_receiver']:
- # print('addNEw')
- if type(attr['data']) is dict and 'CPEE-INSTANCE' in attr['data']:
- # print('addNEw1')
- c_instance = attr['data']['CPEE-INSTANCE'].split('/')[-1]
- query = "Insert into Instances VALUES ('" + c_instance + "','Null', 'Null', '" + str(
- level + 1) + "','" + instance + "')"
- cur.execute(query)
- conn.commit()
- break
- elif attr['name'] == 'instance':
- # print('addNEw2')
- c_instance = attr['data']
- query = "Insert into Instances VALUES ('" + c_instance + "','Null', 'Null', '" + str(
- level + 1) + "','" + instance + "')"
- # print(query)
- cur.execute(query)
- conn.commit()
- break
- # print("Insert into Instances VALUES ('" + c_instance + "','Null', 'Null', '2','"+instance+"')")
- # conn.commit()
- #To avoid adding machine_logs afters receiving status continue
- if step_id == 'a3' and name== 'Status' and activity == 'dataelements/change':
- #print("BR")
- #print(value['event']['list']['data_values'].keys())
- #print(value['event']['list']['data_values']['lets_continue'])
- if value['event']['list']['data_values']['lets_continue']==False:
- #print("EAK")
- query = "Insert into LogEntries VALUES ('" + step_id + "','" + time + "', '" + name + "','" + activity + "', '" + val + "','" + instance + "' )"
- cur.execute(query)
- conn.commit()
- break
- query="Insert into LogEntries VALUES ('" + step_id + "','" + time + "', '" + name + "','" + activity + "', '" + val + "','" + instance + "' )"
- #print(query)
- cur.execute(query)
- conn.commit()
- except(KeyError) as e:
- #print(e)
- #print(activity)
- #print(time)
- counter+=1
- continue
- #except sqlite3.Error as qe:
- except sqlite3.IntegrityError as qe:
- #print(query)
- #print(qe)
- counter += 1
- pass
- #exit()
- except Error as g:
- counter += 1
- print(g)
- print("Unexpected error!!")
- try:
- # to handle machining:
- if step_id == 'a1' and name == 'Fetch' and activity == 'activity/receiving':
- for attr in value['event']['list']['data_receiver']:
- for entry in attr['data']:
- m_id = name = entry['ID']
- name = entry['name']
- val = entry['value']
- clientHandle = entry['meta']['ClientHandle']
- statusCode = entry['meta']['StatusCode']
- server_timestamp = entry['timestamp']
- query = "insert into Machining (timestamp, clientHandle, m_id, status, name, value, level4_step_id, level4_activity, level4_timestamp) VALUES('" + server_timestamp + "','" + clientHandle + "','" + m_id + "','" + statusCode + "','" + name + "','" + val + "','" + step_id + "','" + activity + "','" + time + "')"
- # print(query)
- cur.execute(query)
- conn.commit()
- except(KeyError) as e:
- # print(e)
- # print(activity)
- # print(time)
- counter += 1
- continue
- # except sqlite3.Error as qe:
- except sqlite3.IntegrityError as qe:
- print(query)
- # print(qe)
- counter += 1
- pass
- # exit()
- except Exception as e:
- counter += 1
- #message = template.format(type(ex).__name__, ex.args)
- print(e)
- print("Unexpected error!!")
- #conn.commit()
- #print(counter)
- counter = 0
- input_file.close()
- target_path = check_folder(os.path.join(root_path, 'task2/processed'), ('level'+str(level)))
- shutil.move(os.path.join(origin_path,file), target_path)
- def recreate_process_level1(root_path,conn):
- #variables
- cur = conn.cursor()
- origin_path = os.path.join(root_path,"task2/sortedTemplates/level1")
- #traverse through all logs
- for root, dirs, files in os.walk(origin_path):
- for file in files:
- first = True;
- print(file)
- with open(os.path.join(root, file), "r") as input_file:
- results = yaml.load_all(input_file)
- #traverse throug single log
- for value in results:
- try:
- if 'log' in value:
- logname = value['log']['trace']['cpee:name']
- uuid = value['log']['trace']['cpee:uuid']
- instance = value['log']['trace']['concept:name']
- #print("Insert into Instances VALUES ('" + instance + "','" + uuid + "', '" + logname + "', '1','Null')")
- query = "Insert into Instances VALUES ('" + instance + "','" + uuid + "', '" + logname + "', '1','Null')"
- cur.execute(query)
- #conn.commit()
- if 'event' in value:
- time = value['event']['time:timestamp']
- val=""
- if value['event']['lifecycle:transition']=="start":
- val = json.dumps(value['event']['list']['data_send'])
- if 'data_receiver' in value['event']['list'].keys():
- val = json.dumps(value['event']['list']['data_receiver'])
- step_id = value['event']['id:id']
- activity = value['event']['cpee:lifecycle:transition']
- name = value['event']['concept:name']
- query = "Insert into LogEntries VALUES ('" + step_id + "','" + time + "', '" + name + "','" + activity + "', '" + val + "','" + instance + "' )"
- #print("Insert into LogEntries VALUES ('" + step_id + "','" + time + "', '" + name + "','" + activity + "', '" + val + "','" + instance + "' )")
- cur.execute(query)
- #conn.commit()
- for attr in value['event']['list']['data_receiver']:
- if attr['name'] == "url":
- c_instance = attr['data'].split('/')[-1]
- query="Insert into Instances VALUES ('" + c_instance + "','Null', 'Null', '2','"+instance+"')"
- cur.execute(query)
- #print("Insert into Instances VALUES ('" + c_instance + "','Null', 'Null', '2','"+instance+"')")
- #conn.commit()
- conn.commit()
- except(KeyError) as e:
- #print(e)
- #print(time)
- continue
- input_file.close()
- target_path = check_folder(os.path.join(root_path,'task2/processed'),'level1')
- shutil.move(os.path.join(origin_path,file), target_path)
- root_path = os.getcwd()
- db = os.path.join(root_path,"BIII.db")
- conn = create_connection(db)
- #conn.execute('delete from Machining')
- #conn.execute('delete from LogEntries')
- #conn.execute('delete from Instances')
- logs_path = os.path.join(root_path, "lowerhousing/logs/production")
- categorize_logs(root_path, logs_path)
- recreate_process_level1(root_path,conn)
- recreate_process(root_path,conn,2)
- recreate_process(root_path,conn,3)
- recreate_process(root_path,conn,4)
- #recreate_process_level3(root_path,conn,"level3")
- #recreate_process_level4(root_path,conn,"level4")
- conn.close()
- '''
- def recreate_process_level3(root_path,conn, level):
- # variables
- cur = conn.cursor()
- origin_path = os.path.join(root_path, "task2/sortedTemplates/"+level)
- # traverse through all logs
- for root, dirs, files in os.walk(origin_path):
- for file in files:
- added = False;
- print(file)
- with open(os.path.join(root, file), "r") as input_file:
- results = yaml.load_all(input_file)
- # traverse throug single log
- for value in results:
- try:
- if 'log' in value:
- logname = value['log']['trace']['cpee:name']
- uuid = value['log']['trace']['cpee:uuid']
- instance = value['log']['trace']['concept:name']
- if 'event' in value and not value['event']['id:id'] == "external":
- time = value['event']['time:timestamp']
- activity = value['event']['cpee:lifecycle:transition']
- name = value['event']['concept:name']
- step_id = value['event']['id:id']
- val = "";
- if value['event']['lifecycle:transition'] == "start":
- val = json.dumps(value['event']['list']['data_send'])
- if 'data_receiver' in value['event']['list'].keys():
- val = json.dumps(value['event']['list']['data_receiver'])
- # print(val)
- if not added:
- #print("Select called_by from instances where instance = '" + instance + "'")
- cur.execute("Select called_by from instances where instance = '" + instance + "'")
- parent_inst=cur.fetchone()
- if parent_inst is None:
- datetime_object = DT.strptime(time.split('+')[0], '%Y-%m-%d' + 'T' + '%H:%M:%S')
- time_upper = str(datetime_object + datetime.timedelta(0, 2)) + "+02:00"
- time_lower = str(datetime_object - datetime.timedelta(0, 2)) + "+02:00"
- time_lower = time_lower.split(" ")[0] + "T" + time_lower.split(" ")[1]
- time_upper = time_upper.split(" ")[0] + "T" + time_upper.split(" ")[1]
- cur.execute("Select instance from LogEntries where timestamp>='" + time_lower + "' and timestamp<='" + time_upper + "'")
- if cur.fetchone() is None:
- #print("Insert into Instances VALUES ('" + instance + "','" + uuid + "', '" + logname + "', '3','Null')")
- cur.execute("Insert into Instances VALUES ('" + instance + "','" + uuid + "', '" + logname + "', '3','Null')")
- conn.commit()
- # print("Insert into Instances VALUES ('" + instance + "','" + uuid + "', '" + logname + "', '1','Null')")
- else:
- cur.execute("Update Instances set uuid = '" + uuid + "', name = '" + logname + "' where called_by = '" + parent_inst[0] + "'")
- conn.commit()
- added=True
- #print("Insert into LogEntries VALUES ('" + step_id + "','" + time + "', '" + name + "','" + activity + "', '" + val + "','" + instance + "' )")
- cur.execute("Insert into LogEntries VALUES ('" + step_id + "','" + time + "', '" + name + "','" + activity + "', '" + val + "','" + instance + "' )")
- conn.commit()
- if step_id == 'a1':
- for attr in value['event']['list']['data_receiver']:
- if type(attr['data']) is dict:
- c_instance = attr['data']['CPEE-INSTANCE'].split('/')[-1]
- else:
- c_instance = attr['data']
- cur.execute(
- "Insert into Instances VALUES ('" + c_instance + "','Null', 'Null', '4','" + instance + "')")
- # print("Insert into Instances VALUES ('" + c_instance + "','Null', 'Null', '2','"+instance+"')")
- conn.commit()
- break
- input_file.close()
- target_path = check_folder(os.path.join(root_path, 'task2/processed'), 'level3')
- #shutil.move(os.path.join(origin_path,file), target_path)
- def recreate_process_level2(root_path, conn, level):
- # variables
- cur = conn.cursor()
- origin_path = os.path.join(root_path, "task2/sortedTemplates/"+level)
- # traverse through all logs
- for root, dirs, files in os.walk(origin_path):
- for file in files:
- added = False;
- print(file)
- with open(os.path.join(root, file), "r") as input_file:
- results = yaml.load_all(input_file)
- # traverse throug single log
- for value in results:
- try:
- if 'log' in value:
- logname = value['log']['trace']['cpee:name']
- uuid = value['log']['trace']['cpee:uuid']
- instance = value['log']['trace']['concept:name']
- if 'event' in value and not value['event']['id:id']=="external":
- time = value['event']['time:timestamp']
- activity = value['event']['cpee:lifecycle:transition']
- name = value['event']['concept:name']
- step_id = value['event']['id:id']
- val="";
- if value['event']['lifecycle:transition'] == "start":
- val = json.dumps(value['event']['list']['data_send'])
- if 'data_receiver' in value['event']['list'].keys():
- val = json.dumps(value['event']['list']['data_receiver'])
- # print(val)
- if not added:
- cur.execute("Select called_by from instances where instance = '" + instance + "'")
- parent_inst = cur.fetchone()
- if parent_inst is None:
- datetime_object = DT.strptime(time.split('+')[0], '%Y-%m-%d' + 'T' + '%H:%M:%S')
- time_upper = str(datetime_object + datetime.timedelta(0, 2)) + "+02:00"
- time_lower = str(datetime_object - datetime.timedelta(0, 2)) + "+02:00"
- time_lower = time_lower.split(" ")[0] + "T" + time_lower.split(" ")[1]
- time_upper = time_upper.split(" ")[0] + "T" + time_upper.split(" ")[1]
- #print(time)
- #print(time_upper)
- #print(time_lower)
- # cur.execute("Insert into Instances VALUES ('" + instance + "','" + uuid + "', '" + logname + "', '2','Null')")
- query = "Select log.instance from LogEntries log join instances i on i.instance = log.instance where i.level='1' and log.activity ='activity/calling' and log.timestamp>='" + time_lower + "' and log.timestamp<='" + time_upper + "'"
- cur.execute(query)
- parent_inst = cur.fetchone()
- if parent_inst is None:
- #print(parent_inst)
- #print(val)
- # print("Insert into Instances VALUES ('" + instance + "','" + uuid + "', '" + logname + "', '3','Null')")
- cur.execute("Insert into Instances VALUES ('" + instance + "','" + uuid + "', '" + logname + "', '2','Null')")
- #conn.commit()
- else:
- #print(instance)
- #print(parent_inst)
- cur.execute("Update Instances set uuid = '" + uuid + "', name = '" + logname + "' where called_by = '" + parent_inst[0] + "' and instance='" + instance + "'")
- #conn.commit()
- #con.commit()
- added = True
- #print("Insert into LogEntries VALUES ('" + step_id + "','" + time + "', '" + name + "','" + activity + "', '" + val + "','" + instance + "' )")
- cur.execute("Insert into LogEntries VALUES ('" + step_id + "','" + time + "', '" + name + "','" + activity + "', '" + val + "','" + instance + "' )")
- conn.commit()
- if 'data_receiver' in value['event']['list'].keys():
- #if step_id=='a1':
- for attr in value['event']['list']['data_receiver']:
- if type(attr['data']) is dict:
- c_instance = attr['data']['CPEE-INSTANCE'].split('/')[-1]
- else:
- c_instance = attr['data']
- cur.execute("Insert into Instances VALUES ('" + c_instance + "','Null', 'Null', '3','" + instance + "')")
- # print("Insert into Instances VALUES ('" + c_instance + "','Null', 'Null', '2','"+instance+"')")
- #conn.commit()
- break
- conn.commit()
- if step_id == 'a1':
- for attr in value['event']['list']['data_receiver']:
- if type(attr['data']) is dict:
- c_instance = attr['data']['CPEE-INSTANCE'].split('/')[-1]
- else:
- c_instance = attr['data']
- cur.execute(
- "Insert into Instances VALUES ('" + c_instance + "','Null', 'Null', '4','" + instance + "')")
- # print("Insert into Instances VALUES ('" + c_instance + "','Null', 'Null', '2','"+instance+"')")
- conn.commit()
- break
- except(KeyError) as e:
- print(e)
- print(activity)
- print(time)
- continue
- #conn.commit()
- input_file.close()
- target_path = check_folder(os.path.join(root_path, 'task2/processed'), 'level2')
- #shutil.move(os.path.join(origin_path,file), target_path)
- '''
|