|
@@ -15,7 +15,7 @@
|
|
|
### this program.
|
|
|
###
|
|
|
|
|
|
-from logger import *
|
|
|
+import logging
|
|
|
from ua_constants import *
|
|
|
import tempfile
|
|
|
import xml.dom.minidom as dom
|
|
@@ -25,6 +25,9 @@ from collections import Counter
|
|
|
|
|
|
from ua_namespace import opcua_node_id_t
|
|
|
|
|
|
+
|
|
|
+logger = logging.getLogger(__name__)
|
|
|
+
|
|
|
class preProcessDocument:
|
|
|
originXML = '' # Original XML passed to the preprocessor
|
|
|
targetXML = () # tuple of (fileHandle, fileName)
|
|
@@ -35,7 +38,7 @@ class preProcessDocument:
|
|
|
namespaceOrder = [] # contains xmlns:sX attributed as tuples (int ns, string name)
|
|
|
namespaceQualifiers = [] # contains all xmlns:XYZ qualifiers that might prefix value aliases (like "<uax:Int32>")
|
|
|
referencedNamesSpaceUris = [] # contains <NamespaceUris> URI elements
|
|
|
-
|
|
|
+
|
|
|
def __init__(self, originXML):
|
|
|
self.originXML = originXML
|
|
|
self.targetXML = tempfile.mkstemp(prefix=os.path.basename(originXML)+"_preProcessed-" ,suffix=".xml")
|
|
@@ -48,29 +51,29 @@ class preProcessDocument:
|
|
|
try:
|
|
|
self.nodeset = dom.parse(originXML)
|
|
|
if len(self.nodeset.getElementsByTagName("UANodeSet")) == 0 or len(self.nodeset.getElementsByTagName("UANodeSet")) > 1:
|
|
|
- log(self, "Document " + self.targetXML[1] + " contains no or more then 1 nodeset", LOG_LEVEL_ERROR)
|
|
|
+ logger.error(self, "Document " + self.targetXML[1] + " contains no or more then 1 nodeset", LOG_LEVEL_ERROR)
|
|
|
self.parseOK = False
|
|
|
except:
|
|
|
self.parseOK = False
|
|
|
- log(self, "Adding new document to be preprocessed " + os.path.basename(originXML) + " as " + self.targetXML[1], LOG_LEVEL_DEBUG)
|
|
|
-
|
|
|
+ logger.debug("Adding new document to be preprocessed " + os.path.basename(originXML) + " as " + self.targetXML[1])
|
|
|
+
|
|
|
def clean(self):
|
|
|
#os.close(self.targetXML[0]) Don't -> done to flush() after finalize()
|
|
|
os.remove(self.targetXML[1])
|
|
|
-
|
|
|
+
|
|
|
def getTargetXMLName(self):
|
|
|
if (self.parseOK):
|
|
|
return self.targetXML[1]
|
|
|
return None
|
|
|
-
|
|
|
+
|
|
|
def extractNamespaceURIs(self):
|
|
|
""" extractNamespaceURIs
|
|
|
-
|
|
|
+
|
|
|
minidom gobbles up <NamespaceUris></NamespaceUris> elements, without a decent
|
|
|
- way to reliably access this dom2 <uri></uri> elements (only attribute xmlns= are
|
|
|
- accessible using minidom). We need them for dereferencing though... This
|
|
|
+ way to reliably access this dom2 <uri></uri> elements (only attribute xmlns= are
|
|
|
+ accessible using minidom). We need them for dereferencing though... This
|
|
|
function attempts to do just that.
|
|
|
-
|
|
|
+
|
|
|
returns: Nothing
|
|
|
"""
|
|
|
infile = open(self.originXML)
|
|
@@ -86,82 +89,80 @@ class preProcessDocument:
|
|
|
break
|
|
|
if foundURIs:
|
|
|
nsline = nsline + line
|
|
|
-
|
|
|
+
|
|
|
if len(nsline) > 0:
|
|
|
ns = dom.parseString(nsline).getElementsByTagName("NamespaceUris")
|
|
|
for uri in ns[0].childNodes:
|
|
|
if uri.nodeType != uri.ELEMENT_NODE:
|
|
|
continue
|
|
|
self.referencedNamesSpaceUris.append(uri.firstChild.data)
|
|
|
-
|
|
|
+
|
|
|
infile.close()
|
|
|
-
|
|
|
+
|
|
|
def analyze(self):
|
|
|
""" analyze()
|
|
|
-
|
|
|
+
|
|
|
analyze will gather information about the nodes and references contained in a XML File
|
|
|
to facilitate later preprocessing stages that adresss XML dependency issues
|
|
|
-
|
|
|
+
|
|
|
returns: No return value
|
|
|
- """
|
|
|
+ """
|
|
|
nodeIds = []
|
|
|
ns = self.nodeset.getElementsByTagName("UANodeSet")
|
|
|
-
|
|
|
+
|
|
|
# We need to find out what the namespace calls itself and other referenced, as numeric id's are pretty
|
|
|
# useless sans linked nodes. There is two information sources...
|
|
|
self.extractNamespaceURIs() # From <URI>...</URI> definitions
|
|
|
-
|
|
|
+
|
|
|
for key in ns[0].attributes.keys(): # from xmlns:sX attributes
|
|
|
if "xmlns:" in key: # Any key: we will be removing these qualifiers from Values later
|
|
|
self.namespaceQualifiers.append(key.replace("xmlns:",""))
|
|
|
if "xmlns:s" in key: # get a numeric nsId and modelname/uri
|
|
|
self.namespaceOrder.append((int(key.replace("xmlns:s","")), ns[0].getAttribute(key)))
|
|
|
-
|
|
|
+
|
|
|
# Get all nodeIds contained in this XML
|
|
|
for nd in ns[0].childNodes:
|
|
|
if nd.nodeType != nd.ELEMENT_NODE:
|
|
|
continue
|
|
|
if nd.hasAttribute(u'NodeId'):
|
|
|
self.containedNodes.append( (opcua_node_id_t(nd.getAttribute(u'NodeId')), nd) )
|
|
|
- refs = nd.getElementsByTagName(u'References')
|
|
|
- if len(refs) > 0:
|
|
|
- refs = refs[0]
|
|
|
- for ref in refs.childNodes:
|
|
|
- if ref.nodeType == ref.ELEMENT_NODE:
|
|
|
- self.referencedNodes.append( (opcua_node_id_t(ref.firstChild.data), ref) )
|
|
|
-
|
|
|
- log(self, "Nodes: " + str(len(self.containedNodes)) + " References: " + str(len(self.referencedNodes)), LOG_LEVEL_DEBUG)
|
|
|
-
|
|
|
+ refs = nd.getElementsByTagName(u'References')[0]
|
|
|
+ for ref in refs.childNodes:
|
|
|
+ if ref.nodeType == ref.ELEMENT_NODE:
|
|
|
+ self.referencedNodes.append( (opcua_node_id_t(ref.firstChild.data), ref) )
|
|
|
+
|
|
|
+ logger.debug("Nodes: " + str(len(self.containedNodes)) + " References: " + str(len(self.referencedNodes)))
|
|
|
+
|
|
|
def getNamespaceId(self):
|
|
|
""" namespaceId()
|
|
|
-
|
|
|
+
|
|
|
Counts the namespace IDs in all nodes of this XML and picks the most used
|
|
|
namespace as the numeric identifier of this data model.
|
|
|
-
|
|
|
+
|
|
|
returns: Integer ID of the most propable/most used namespace in this XML
|
|
|
"""
|
|
|
max = 0;
|
|
|
namespaceIdGuessed = 0;
|
|
|
idDict = {}
|
|
|
-
|
|
|
+
|
|
|
for ndid in self.containedNodes:
|
|
|
if not idDict.has_key(ndid[0].ns):
|
|
|
idDict[ndid[0].ns] = 1
|
|
|
else:
|
|
|
idDict[ndid[0].ns] = idDict[ndid[0].ns] + 1
|
|
|
-
|
|
|
+
|
|
|
for entry in idDict:
|
|
|
if idDict[entry] > max:
|
|
|
max = idDict[entry]
|
|
|
namespaceIdGuessed = entry
|
|
|
- log(self, "XML Contents are propably in namespace " + str(entry) + " (used by " + str(idDict[entry]) + " Nodes)", LOG_LEVEL_DEBUG)
|
|
|
+ logger.debug("XML Contents are propably in namespace " + str(entry) + " (used by " + str(idDict[entry]) + " Nodes)")
|
|
|
return namespaceIdGuessed
|
|
|
-
|
|
|
+
|
|
|
def getReferencedNamespaceUri(self, nsId):
|
|
|
""" getReferencedNamespaceUri
|
|
|
-
|
|
|
+
|
|
|
returns an URL that hopefully corresponds to the nsId that was used to reference this model
|
|
|
-
|
|
|
+
|
|
|
return: URI string corresponding to nsId
|
|
|
"""
|
|
|
# Might be the more reliable method: Get the URI from the xmlns attributes (they have numers)
|
|
@@ -169,22 +170,22 @@ class preProcessDocument:
|
|
|
for el in self.namespaceOrder:
|
|
|
if el[0] == nsId:
|
|
|
return el[1]
|
|
|
-
|
|
|
- # Fallback:
|
|
|
+
|
|
|
+ # Fallback:
|
|
|
# Some models do not have xmlns:sX attributes, but still <URI>s (usually when they only reference NS0)
|
|
|
if len(self.referencedNamesSpaceUris) > 0 and len(self.referencedNamesSpaceUris) >= nsId-1:
|
|
|
return self.referencedNamesSpaceUris[nsId-1]
|
|
|
-
|
|
|
+
|
|
|
#Nope, not found.
|
|
|
return ""
|
|
|
-
|
|
|
+
|
|
|
def getNamespaceDependencies(self):
|
|
|
deps = []
|
|
|
for ndid in self.referencedNodes:
|
|
|
if not ndid[0].ns in deps:
|
|
|
deps.append(ndid[0].ns)
|
|
|
return deps
|
|
|
-
|
|
|
+
|
|
|
def finalize(self):
|
|
|
outfile = self.targetXML[0]
|
|
|
outline = self.nodeset.toxml()
|
|
@@ -193,29 +194,29 @@ class preProcessDocument:
|
|
|
outline = outline.replace(rq.decode('UTF-8'), "")
|
|
|
os.write(outfile, outline.encode('UTF-8'))
|
|
|
os.close(outfile)
|
|
|
-
|
|
|
+
|
|
|
def reassignReferencedNamespaceId(self, currentNsId, newNsId):
|
|
|
""" reassignReferencedNamespaceId
|
|
|
-
|
|
|
+
|
|
|
Iterates over all references in this document, find references to currentNsId and changes them to newNsId.
|
|
|
NodeIds themselves are not altered.
|
|
|
-
|
|
|
+
|
|
|
returns: nothing
|
|
|
- """
|
|
|
+ """
|
|
|
for refNd in self.referencedNodes:
|
|
|
if refNd[0].ns == currentNsId:
|
|
|
refNd[1].firstChild.data = refNd[1].firstChild.data.replace("ns="+str(currentNsId), "ns="+str(newNsId))
|
|
|
refNd[0].ns = newNsId
|
|
|
refNd[0].toString()
|
|
|
-
|
|
|
+
|
|
|
def reassignNamespaceId(self, currentNsId, newNsId):
|
|
|
""" reassignNamespaceId
|
|
|
-
|
|
|
+
|
|
|
Iterates over all nodes in this document, find those in namespace currentNsId and changes them to newNsId.
|
|
|
-
|
|
|
+
|
|
|
returns: nothing
|
|
|
- """
|
|
|
- log(self, "Migrating nodes /w ns index " + str(currentNsId) + " to " + str(newNsId), LOG_LEVEL_DEBUG)
|
|
|
+ """
|
|
|
+ logger.debug("Migrating nodes /w ns index " + str(currentNsId) + " to " + str(newNsId))
|
|
|
for nd in self.containedNodes:
|
|
|
if nd[0].ns == currentNsId:
|
|
|
# In our own document, update any references to this node
|
|
@@ -227,33 +228,33 @@ class preProcessDocument:
|
|
|
nd[1].setAttribute(u'NodeId', nd[1].getAttribute(u'NodeId').replace("ns="+str(currentNsId), "ns="+str(newNsId)))
|
|
|
nd[0].ns = newNsId
|
|
|
nd[0].toString()
|
|
|
-
|
|
|
+
|
|
|
class open62541_XMLPreprocessor:
|
|
|
preProcDocuments = []
|
|
|
-
|
|
|
+
|
|
|
def __init__(self):
|
|
|
self.preProcDocuments = []
|
|
|
-
|
|
|
+
|
|
|
def addDocument(self, documentPath):
|
|
|
self.preProcDocuments.append(preProcessDocument(documentPath))
|
|
|
-
|
|
|
+
|
|
|
def removePreprocessedFiles(self):
|
|
|
for doc in self.preProcDocuments:
|
|
|
doc.clean()
|
|
|
-
|
|
|
+
|
|
|
def getPreProcessedFiles(self):
|
|
|
files = []
|
|
|
for doc in self.preProcDocuments:
|
|
|
if (doc.parseOK):
|
|
|
files.append(doc.getTargetXMLName())
|
|
|
return files
|
|
|
-
|
|
|
+
|
|
|
def testModelCongruencyAgainstReferences(self, doc, refs):
|
|
|
""" testModelCongruencyAgainstReferences
|
|
|
-
|
|
|
+
|
|
|
Counts how many of the nodes referencef in refs can be found in the model
|
|
|
doc.
|
|
|
-
|
|
|
+
|
|
|
returns: double corresponding to the percentage of hits
|
|
|
"""
|
|
|
sspace = len(refs)
|
|
@@ -267,7 +268,7 @@ class open62541_XMLPreprocessor:
|
|
|
found = found + 1
|
|
|
break
|
|
|
return float(found)/float(sspace)
|
|
|
-
|
|
|
+
|
|
|
def preprocess_assignUniqueNsIds(self):
|
|
|
nsdep = []
|
|
|
docLst = []
|
|
@@ -278,7 +279,7 @@ class open62541_XMLPreprocessor:
|
|
|
docLst.append(doc)
|
|
|
for doc in docLst:
|
|
|
self.preProcDocuments.remove(doc)
|
|
|
-
|
|
|
+
|
|
|
# Reassign namespace id's to be in ascending order
|
|
|
nsidx = 1 # next namespace id to assign on collision (first one will be "2")
|
|
|
for doc in self.preProcDocuments:
|
|
@@ -286,14 +287,14 @@ class open62541_XMLPreprocessor:
|
|
|
nsid = doc.getNamespaceId()
|
|
|
doc.reassignNamespaceId(nsid, nsidx)
|
|
|
docLst.append(doc)
|
|
|
- log(self, "Document " + doc.originXML + " is now namespace " + str(nsidx), LOG_LEVEL_INFO)
|
|
|
+ logger.info("Document " + doc.originXML + " is now namespace " + str(nsidx))
|
|
|
self.preProcDocuments = docLst
|
|
|
-
|
|
|
+
|
|
|
def getUsedNamespaceArrayNames(self):
|
|
|
""" getUsedNamespaceArrayNames
|
|
|
-
|
|
|
+
|
|
|
Returns the XML xmlns:s1 or <URI>[0] of each XML document (if contained/possible)
|
|
|
-
|
|
|
+
|
|
|
returns: dict of int:nsId -> string:url
|
|
|
"""
|
|
|
nsName = {}
|
|
@@ -303,10 +304,10 @@ class open62541_XMLPreprocessor:
|
|
|
uri = "http://modeluri.not/retrievable/from/xml"
|
|
|
nsName[doc.getNamespaceId()] = doc.getReferencedNamespaceUri(1)
|
|
|
return nsName
|
|
|
-
|
|
|
- def preprocess_linkDependantModels(self):
|
|
|
+
|
|
|
+ def preprocess_linkDependantModels(self):
|
|
|
revertToStochastic = [] # (doc, int id), where id was not resolvable using model URIs
|
|
|
-
|
|
|
+
|
|
|
# Attemp to identify the model relations by using model URIs in xmlns:sX or <URI> contents
|
|
|
for doc in self.preProcDocuments:
|
|
|
nsid = doc.getNamespaceId()
|
|
@@ -315,10 +316,10 @@ class open62541_XMLPreprocessor:
|
|
|
if d != nsid and d != 0:
|
|
|
# Attempt to identify the namespace URI this d referes to...
|
|
|
nsUri = doc.getReferencedNamespaceUri(d) # FIXME: This could actually fail and return ""!
|
|
|
- log(self, "Need a namespace referenced as " + str(d) + ". Which hopefully is " + nsUri, LOG_LEVEL_INFO)
|
|
|
+ logger.info("Need a namespace referenced as " + str(d) + ". Which hopefully is " + nsUri)
|
|
|
targetDoc = None
|
|
|
for tgt in self.preProcDocuments:
|
|
|
- # That model, whose URI is known but its current id is not, will
|
|
|
+ # That model, whose URI is known but its current id is not, will
|
|
|
# refer have referred to itself as "1"
|
|
|
if tgt.getReferencedNamespaceUri(1) == nsUri:
|
|
|
targetDoc = tgt
|
|
@@ -328,11 +329,11 @@ class open62541_XMLPreprocessor:
|
|
|
doc.reassignReferencedNamespaceId(d, targetDoc.getNamespaceId())
|
|
|
continue
|
|
|
else:
|
|
|
- revertToStochastic.append((doc, d))
|
|
|
- log(self, "Failed to reliably identify which XML/Model " + os.path.basename(doc.originXML) + " calls ns=" +str(d), LOG_LEVEL_WARN)
|
|
|
-
|
|
|
+ revertToStochastic.append((doc, d))
|
|
|
+ logger.warn("Failed to reliably identify which XML/Model " + os.path.basename(doc.originXML) + " calls ns=" +str(d))
|
|
|
+
|
|
|
for (doc, d) in revertToStochastic:
|
|
|
- log(self, "Attempting to find stochastic match for target namespace ns=" + str(d) + " of " + os.path.basename(doc.originXML), LOG_LEVEL_WARN)
|
|
|
+ logger.warn("Attempting to find stochastic match for target namespace ns=" + str(d) + " of " + os.path.basename(doc.originXML))
|
|
|
# Copy all references to the given namespace
|
|
|
refs = []
|
|
|
matches = [] # list of (match%, targetDoc) to pick from later
|
|
@@ -356,37 +357,34 @@ class open62541_XMLPreprocessor:
|
|
|
if m[0] > best[0]:
|
|
|
best = m
|
|
|
if best[1] != None:
|
|
|
- log(self, "Best match (" + str(best[1]*100) + "%) for what " + os.path.basename(doc.originXML) + " refers to as ns="+str(d)+" was " + os.path.basename(best[1].originXML), LOG_LEVEL_WARN)
|
|
|
+ logger.warn("Best match (" + str(best[1]*100) + "%) for what " + os.path.basename(doc.originXML) + " refers to as ns="+str(d)+" was " + os.path.basename(best[1].originXML))
|
|
|
doc.reassignReferencedNamespaceId(d, best[1].getNamespaceId())
|
|
|
- else:
|
|
|
- log(self, "Failed to find a match for what " + os.path.basename(doc.originXML) + " refers to as ns=" + str(d) ,LOG_LEVEL_ERROR )
|
|
|
-
|
|
|
+ else:
|
|
|
+ logger.error("Failed to find a match for what " + os.path.basename(doc.originXML) + " refers to as ns=" + str(d))
|
|
|
+
|
|
|
def preprocessAll(self):
|
|
|
##
|
|
|
## First: Gather statistics about the namespaces:
|
|
|
for doc in self.preProcDocuments:
|
|
|
doc.analyze()
|
|
|
-
|
|
|
+
|
|
|
# Preprocess step: Remove XML specific Naming scheme ("uax:")
|
|
|
# FIXME: Not implemented
|
|
|
-
|
|
|
+
|
|
|
##
|
|
|
## Preprocess step: Check namespace ID multiplicity and reassign IDs if necessary
|
|
|
##
|
|
|
self.preprocess_assignUniqueNsIds()
|
|
|
self.preprocess_linkDependantModels()
|
|
|
-
|
|
|
-
|
|
|
- ##
|
|
|
+
|
|
|
+
|
|
|
+ ##
|
|
|
## Prep step: prevent any XML from using namespace 1 (reserved for instances)
|
|
|
## FIXME: Not implemented
|
|
|
-
|
|
|
+
|
|
|
##
|
|
|
## Final: Write modified XML tmp files
|
|
|
for doc in self.preProcDocuments:
|
|
|
doc.finalize()
|
|
|
-
|
|
|
- return True
|
|
|
-
|
|
|
-
|
|
|
|
|
|
+ return True
|