|
@@ -0,0 +1,390 @@
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+from logger import *
|
|
|
+from ua_constants import *
|
|
|
+import tempfile
|
|
|
+import xml.dom.minidom as dom
|
|
|
+import os
|
|
|
+import string
|
|
|
+from collections import Counter
|
|
|
+
|
|
|
+from ua_namespace import opcua_node_id_t
|
|
|
+
|
|
|
+class preProcessDocument:
|
|
|
+ originXML = ''
|
|
|
+ targetXML = ()
|
|
|
+ nodeset = ''
|
|
|
+ parseOK = False;
|
|
|
+ containedNodes = []
|
|
|
+ referencedNodes = []
|
|
|
+ namespaceOrder = []
|
|
|
+ namespaceQualifiers = []
|
|
|
+ referencedNamesSpaceUris = []
|
|
|
+
|
|
|
+ def __init__(self, originXML):
|
|
|
+ self.originXML = originXML
|
|
|
+ self.targetXML = tempfile.mkstemp(prefix=os.path.basename(originXML)+"_preProcessed-" ,suffix=".xml")
|
|
|
+ self.parseOK = True
|
|
|
+ self.containedNodes = []
|
|
|
+ self.referencedNodes = []
|
|
|
+ self.namespaceOrder = []
|
|
|
+ self.referencedNamesSpaceUris = []
|
|
|
+ self.namespaceQualifiers = []
|
|
|
+ try:
|
|
|
+ self.nodeset = dom.parse(originXML)
|
|
|
+ if len(self.nodeset.getElementsByTagName("UANodeSet")) == 0 or len(self.nodeset.getElementsByTagName("UANodeSet")) > 1:
|
|
|
+ log(self, "Document " + self.targetXML[1] + " contains no or more then 1 nodeset", LOG_LEVEL_ERROR)
|
|
|
+ self.parseOK = False
|
|
|
+ except:
|
|
|
+ self.parseOK = False
|
|
|
+ log(self, "Adding new document to be preprocessed " + os.path.basename(originXML) + " as " + self.targetXML[1], LOG_LEVEL_DEBUG)
|
|
|
+
|
|
|
+ def clean(self):
|
|
|
+
|
|
|
+ os.remove(self.targetXML[1])
|
|
|
+
|
|
|
+ def getTargetXMLName(self):
|
|
|
+ if (self.parseOK):
|
|
|
+ return self.targetXML[1]
|
|
|
+ return None
|
|
|
+
|
|
|
+ def extractNamespaceURIs(self):
|
|
|
+ """ extractNamespaceURIs
|
|
|
+
|
|
|
+ minidom gobbles up <NamespaceUris></NamespaceUris> elements, without a decent
|
|
|
+ way to reliably access this dom2 <uri></uri> elements (only attribute xmlns= are
|
|
|
+ accessible using minidom). We need them for dereferencing though... This
|
|
|
+ function attempts to do just that.
|
|
|
+
|
|
|
+ returns: Nothing
|
|
|
+ """
|
|
|
+ infile = open(self.originXML)
|
|
|
+ foundURIs = False
|
|
|
+ nsline = ""
|
|
|
+ line = infile.readline()
|
|
|
+ for line in infile:
|
|
|
+ if "<namespaceuris>" in line.lower():
|
|
|
+ foundURIs = True
|
|
|
+ elif "</namespaceuris>" in line.lower():
|
|
|
+ foundURIs = False
|
|
|
+ nsline = nsline + line
|
|
|
+ break
|
|
|
+ if foundURIs:
|
|
|
+ nsline = nsline + line
|
|
|
+
|
|
|
+ if len(nsline) > 0:
|
|
|
+ ns = dom.parseString(nsline).getElementsByTagName("NamespaceUris")
|
|
|
+ for uri in ns[0].childNodes:
|
|
|
+ if uri.nodeType != uri.ELEMENT_NODE:
|
|
|
+ continue
|
|
|
+ self.referencedNamesSpaceUris.append(uri.firstChild.data)
|
|
|
+
|
|
|
+ infile.close()
|
|
|
+
|
|
|
+ def analyze(self):
|
|
|
+ """ analyze()
|
|
|
+
|
|
|
+ analyze will gather information about the nodes and references contained in a XML File
|
|
|
+ to facilitate later preprocessing stages that adresss XML dependency issues
|
|
|
+
|
|
|
+ returns: No return value
|
|
|
+ """
|
|
|
+ nodeIds = []
|
|
|
+ ns = self.nodeset.getElementsByTagName("UANodeSet")
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+ self.extractNamespaceURIs()
|
|
|
+
|
|
|
+ for key in ns[0].attributes.keys():
|
|
|
+ if "xmlns:" in key:
|
|
|
+ self.namespaceQualifiers.append(key.replace("xmlns:",""))
|
|
|
+ if "xmlns:s" in key:
|
|
|
+ self.namespaceOrder.append((int(key.replace("xmlns:s","")), ns[0].getAttribute(key)))
|
|
|
+
|
|
|
+
|
|
|
+ for nd in ns[0].childNodes:
|
|
|
+ if nd.nodeType != nd.ELEMENT_NODE:
|
|
|
+ continue
|
|
|
+ if nd.hasAttribute(u'NodeId'):
|
|
|
+ self.containedNodes.append( (opcua_node_id_t(nd.getAttribute(u'NodeId')), nd) )
|
|
|
+ refs = nd.getElementsByTagName(u'References')[0]
|
|
|
+ for ref in refs.childNodes:
|
|
|
+ if ref.nodeType == ref.ELEMENT_NODE:
|
|
|
+ self.referencedNodes.append( (opcua_node_id_t(ref.firstChild.data), ref) )
|
|
|
+
|
|
|
+ log(self, "Nodes: " + str(len(self.containedNodes)) + " References: " + str(len(self.referencedNodes)), LOG_LEVEL_DEBUG)
|
|
|
+
|
|
|
+ def getNamespaceId(self):
|
|
|
+ """ namespaceId()
|
|
|
+
|
|
|
+ Counts the namespace IDs in all nodes of this XML and picks the most used
|
|
|
+ namespace as the numeric identifier of this data model.
|
|
|
+
|
|
|
+ returns: Integer ID of the most propable/most used namespace in this XML
|
|
|
+ """
|
|
|
+ max = 0;
|
|
|
+ namespaceIdGuessed = 0;
|
|
|
+ idDict = {}
|
|
|
+
|
|
|
+ for ndid in self.containedNodes:
|
|
|
+ if not idDict.has_key(ndid[0].ns):
|
|
|
+ idDict[ndid[0].ns] = 1
|
|
|
+ else:
|
|
|
+ idDict[ndid[0].ns] = idDict[ndid[0].ns] + 1
|
|
|
+
|
|
|
+ for entry in idDict:
|
|
|
+ if idDict[entry] > max:
|
|
|
+ max = idDict[entry]
|
|
|
+ namespaceIdGuessed = entry
|
|
|
+ log(self, "XML Contents are propably in namespace " + str(entry) + " (used by " + str(idDict[entry]) + " Nodes)", LOG_LEVEL_DEBUG)
|
|
|
+ return namespaceIdGuessed
|
|
|
+
|
|
|
+ def getReferencedNamespaceUri(self, nsId):
|
|
|
+ """ getReferencedNamespaceUri
|
|
|
+
|
|
|
+ returns an URL that hopefully corresponds to the nsId that was used to reference this model
|
|
|
+
|
|
|
+ return: URI string corresponding to nsId
|
|
|
+ """
|
|
|
+
|
|
|
+ if len(self.namespaceOrder) > 0:
|
|
|
+ for el in self.namespaceOrder:
|
|
|
+ if el[0] == nsId:
|
|
|
+ return el[1]
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+ if len(self.referencedNamesSpaceUris) > 0 and len(self.referencedNamesSpaceUris) >= nsId-1:
|
|
|
+ return self.referencedNamesSpaceUris[nsId-1]
|
|
|
+
|
|
|
+
|
|
|
+ return ""
|
|
|
+
|
|
|
+ def getNamespaceDependencies(self):
|
|
|
+ deps = []
|
|
|
+ for ndid in self.referencedNodes:
|
|
|
+ if not ndid[0].ns in deps:
|
|
|
+ deps.append(ndid[0].ns)
|
|
|
+ return deps
|
|
|
+
|
|
|
+ def finalize(self):
|
|
|
+ outfile = self.targetXML[0]
|
|
|
+ outline = self.nodeset.toxml()
|
|
|
+ for qualifier in self.namespaceQualifiers:
|
|
|
+ rq = qualifier+":"
|
|
|
+ outline = outline.replace(rq.decode('UTF-8'), "")
|
|
|
+ os.write(outfile, outline.encode('UTF-8'))
|
|
|
+ os.close(outfile)
|
|
|
+
|
|
|
+ def reassignReferencedNamespaceId(self, currentNsId, newNsId):
|
|
|
+ """ reassignReferencedNamespaceId
|
|
|
+
|
|
|
+ Iterates over all references in this document, find references to currentNsId and changes them to newNsId.
|
|
|
+ NodeIds themselves are not altered.
|
|
|
+
|
|
|
+ returns: nothing
|
|
|
+ """
|
|
|
+ for refNd in self.referencedNodes:
|
|
|
+ if refNd[0].ns == currentNsId:
|
|
|
+ refNd[1].firstChild.data = refNd[1].firstChild.data.replace("ns="+str(currentNsId), "ns="+str(newNsId))
|
|
|
+ refNd[0].ns = newNsId
|
|
|
+ refNd[0].toString()
|
|
|
+
|
|
|
+ def reassignNamespaceId(self, currentNsId, newNsId):
|
|
|
+ """ reassignNamespaceId
|
|
|
+
|
|
|
+ Iterates over all nodes in this document, find those in namespace currentNsId and changes them to newNsId.
|
|
|
+
|
|
|
+ returns: nothing
|
|
|
+ """
|
|
|
+ log(self, "Migrating nodes /w ns index " + str(currentNsId) + " to " + str(newNsId), LOG_LEVEL_DEBUG)
|
|
|
+ for nd in self.containedNodes:
|
|
|
+ if nd[0].ns == currentNsId:
|
|
|
+
|
|
|
+ for refNd in self.referencedNodes:
|
|
|
+ if refNd[0].ns == currentNsId and refNd[0] == nd[0]:
|
|
|
+ refNd[1].firstChild.data = refNd[1].firstChild.data.replace("ns="+str(currentNsId), "ns="+str(newNsId))
|
|
|
+ refNd[0].ns = newNsId
|
|
|
+ refNd[0].toString()
|
|
|
+ nd[1].setAttribute(u'NodeId', nd[1].getAttribute(u'NodeId').replace("ns="+str(currentNsId), "ns="+str(newNsId)))
|
|
|
+ nd[0].ns = newNsId
|
|
|
+ nd[0].toString()
|
|
|
+
|
|
|
+class open62541_XMLPreprocessor:
|
|
|
+ preProcDocuments = []
|
|
|
+
|
|
|
+ def __init__(self):
|
|
|
+ self.preProcDocuments = []
|
|
|
+
|
|
|
+ def addDocument(self, documentPath):
|
|
|
+ self.preProcDocuments.append(preProcessDocument(documentPath))
|
|
|
+
|
|
|
+ def removePreprocessedFiles(self):
|
|
|
+ for doc in self.preProcDocuments:
|
|
|
+ doc.clean()
|
|
|
+
|
|
|
+ def getPreProcessedFiles(self):
|
|
|
+ files = []
|
|
|
+ for doc in self.preProcDocuments:
|
|
|
+ if (doc.parseOK):
|
|
|
+ files.append(doc.getTargetXMLName())
|
|
|
+ return files
|
|
|
+
|
|
|
+ def testModelCongruencyAgainstReferences(self, doc, refs):
|
|
|
+ """ testModelCongruencyAgainstReferences
|
|
|
+
|
|
|
+ Counts how many of the nodes referencef in refs can be found in the model
|
|
|
+ doc.
|
|
|
+
|
|
|
+ returns: double corresponding to the percentage of hits
|
|
|
+ """
|
|
|
+ sspace = len(refs)
|
|
|
+ if sspace == 0:
|
|
|
+ return float(0)
|
|
|
+ found = 0
|
|
|
+ for ref in refs:
|
|
|
+ for n in doc.containedNodes:
|
|
|
+ if str(ref) == str(n[0]):
|
|
|
+ print ref, n[0]
|
|
|
+ found = found + 1
|
|
|
+ break
|
|
|
+ return float(found)/float(sspace)
|
|
|
+
|
|
|
+ def preprocess_assignUniqueNsIds(self):
|
|
|
+ nsdep = []
|
|
|
+ docLst = []
|
|
|
+
|
|
|
+
|
|
|
+ for doc in self.preProcDocuments:
|
|
|
+ if doc.getNamespaceId() == 0:
|
|
|
+ docLst.append(doc)
|
|
|
+ for doc in docLst:
|
|
|
+ self.preProcDocuments.remove(doc)
|
|
|
+
|
|
|
+
|
|
|
+ nsidx = 1
|
|
|
+ for doc in self.preProcDocuments:
|
|
|
+ nsidx = nsidx + 1
|
|
|
+ nsid = doc.getNamespaceId()
|
|
|
+ doc.reassignNamespaceId(nsid, nsidx)
|
|
|
+ docLst.append(doc)
|
|
|
+ log(self, "Document " + doc.originXML + " is now namespace " + str(nsidx), LOG_LEVEL_INFO)
|
|
|
+ self.preProcDocuments = docLst
|
|
|
+
|
|
|
+ def getUsedNamespaceArrayNames(self):
|
|
|
+ """ getUsedNamespaceArrayNames
|
|
|
+
|
|
|
+ Returns the XML xmlns:s1 or <URI>[0] of each XML document (if contained/possible)
|
|
|
+
|
|
|
+ returns: dict of int:nsId -> string:url
|
|
|
+ """
|
|
|
+ nsName = {}
|
|
|
+ for doc in self.preProcDocuments:
|
|
|
+ uri = doc.getReferencedNamespaceUri(1)
|
|
|
+ if uri == None:
|
|
|
+ uri = "http://modeluri.not/retrievable/from/xml"
|
|
|
+ nsName[doc.getNamespaceId()] = doc.getReferencedNamespaceUri(1)
|
|
|
+ return nsName
|
|
|
+
|
|
|
+ def preprocess_linkDependantModels(self):
|
|
|
+ revertToStochastic = []
|
|
|
+
|
|
|
+
|
|
|
+ for doc in self.preProcDocuments:
|
|
|
+ nsid = doc.getNamespaceId()
|
|
|
+ dependencies = doc.getNamespaceDependencies()
|
|
|
+ for d in dependencies:
|
|
|
+ if d != nsid and d != 0:
|
|
|
+
|
|
|
+ nsUri = doc.getReferencedNamespaceUri(d)
|
|
|
+ log(self, "Need a namespace referenced as " + str(d) + ". Which hopefully is " + nsUri, LOG_LEVEL_INFO)
|
|
|
+ targetDoc = None
|
|
|
+ for tgt in self.preProcDocuments:
|
|
|
+
|
|
|
+
|
|
|
+ if tgt.getReferencedNamespaceUri(1) == nsUri:
|
|
|
+ targetDoc = tgt
|
|
|
+ break
|
|
|
+ if not targetDoc == None:
|
|
|
+
|
|
|
+ doc.reassignReferencedNamespaceId(d, targetDoc.getNamespaceId())
|
|
|
+ continue
|
|
|
+ else:
|
|
|
+ revertToStochastic.append((doc, d))
|
|
|
+ log(self, "Failed to reliably identify which XML/Model " + os.path.basename(doc.originXML) + " calls ns=" +str(d), LOG_LEVEL_WARN)
|
|
|
+
|
|
|
+ for (doc, d) in revertToStochastic:
|
|
|
+ log(self, "Attempting to find stochastic match for target namespace ns=" + str(d) + " of " + os.path.basename(doc.originXML), LOG_LEVEL_WARN)
|
|
|
+
|
|
|
+ refs = []
|
|
|
+ matches = []
|
|
|
+ for ref in doc.referencedNodes:
|
|
|
+ if ref[0].ns == d:
|
|
|
+ refs.append(opcua_node_id_t(str(ref[0])))
|
|
|
+ for tDoc in self.preProcDocuments:
|
|
|
+ tDocId = tDoc.getNamespaceId()
|
|
|
+
|
|
|
+ for r in refs:
|
|
|
+ r.ns = tDocId
|
|
|
+ r.toString()
|
|
|
+
|
|
|
+ c = self.testModelCongruencyAgainstReferences(tDoc, refs)
|
|
|
+ print c
|
|
|
+ if c>0:
|
|
|
+ matches.append(c, tDoc)
|
|
|
+ best = (0, None)
|
|
|
+ for m in matches:
|
|
|
+ print m[0]
|
|
|
+ if m[0] > best[0]:
|
|
|
+ best = m
|
|
|
+ if best[1] != None:
|
|
|
+ log(self, "Best match (" + str(best[1]*100) + "%) for what " + os.path.basename(doc.originXML) + " refers to as ns="+str(d)+" was " + os.path.basename(best[1].originXML), LOG_LEVEL_WARN)
|
|
|
+ doc.reassignReferencedNamespaceId(d, best[1].getNamespaceId())
|
|
|
+ else:
|
|
|
+ log(self, "Failed to find a match for what " + os.path.basename(doc.originXML) + " refers to as ns=" + str(d) ,LOG_LEVEL_ERROR )
|
|
|
+
|
|
|
+ def preprocessAll(self):
|
|
|
+
|
|
|
+
|
|
|
+ for doc in self.preProcDocuments:
|
|
|
+ doc.analyze()
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+ self.preprocess_assignUniqueNsIds()
|
|
|
+ self.preprocess_linkDependantModels()
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+ for doc in self.preProcDocuments:
|
|
|
+ doc.finalize()
|
|
|
+
|
|
|
+ return True
|
|
|
+
|
|
|
+
|
|
|
+
|