9 years ago · ec853c3b4c
--- a/tools/pyUANamespace/open62541_XMLPreprocessor.py
+++ b/tools/pyUANamespace/open62541_XMLPreprocessor.py
@@ -15,7 +15,7 @@
 
				 ### this program.
			
 
				 ###
			
 
				 
			
 
				-from logger import *
			
 
				+import logging
			
 
				 from ua_constants import *
			
 
				 import tempfile
			
 
				 import xml.dom.minidom as dom
			
@@ -25,6 +25,9 @@ from collections import Counter
 
				 
			
 
				 from ua_namespace import opcua_node_id_t
			
 
				 
			
 
				+
			
 
				+logger = logging.getLogger(__name__)
			
 
				+
			
 
				 class preProcessDocument:
			
 
				   originXML = '' # Original XML passed to the preprocessor
			
 
				   targetXML = () # tuple of (fileHandle, fileName)
			
@@ -35,7 +38,7 @@ class preProcessDocument:
 
				   namespaceOrder  = [] # contains xmlns:sX attributed as tuples (int ns, string name)
			
 
				   namespaceQualifiers = []      # contains all xmlns:XYZ qualifiers that might prefix value aliases (like "<uax:Int32>")
			
 
				   referencedNamesSpaceUris = [] # contains <NamespaceUris> URI elements
			
 
				-  
			
 
				+
			
 
				   def __init__(self, originXML):
			
 
				     self.originXML = originXML
			
 
				     self.targetXML = tempfile.mkstemp(prefix=os.path.basename(originXML)+"_preProcessed-" ,suffix=".xml")
			
@@ -48,29 +51,29 @@ class preProcessDocument:
 
				     try:
			
 
				       self.nodeset = dom.parse(originXML)
			
 
				       if len(self.nodeset.getElementsByTagName("UANodeSet")) == 0 or len(self.nodeset.getElementsByTagName("UANodeSet")) > 1:
			
 
				-        log(self, "Document " + self.targetXML[1] + " contains no or more then 1 nodeset", LOG_LEVEL_ERROR)
			
 
				+        logger.error(self, "Document " + self.targetXML[1] + " contains no or more then 1 nodeset", LOG_LEVEL_ERROR)
			
 
				         self.parseOK   = False
			
 
				     except:
			
 
				       self.parseOK   = False
			
 
				-    log(self, "Adding new document to be preprocessed " + os.path.basename(originXML) + " as " + self.targetXML[1], LOG_LEVEL_DEBUG)
			
 
				-  
			
 
				+    logger.debug("Adding new document to be preprocessed " + os.path.basename(originXML) + " as " + self.targetXML[1])
			
 
				+
			
 
				   def clean(self):
			
 
				     #os.close(self.targetXML[0]) Don't -> done to flush() after finalize()
			
 
				     os.remove(self.targetXML[1])
			
 
				-  
			
 
				+
			
 
				   def getTargetXMLName(self):
			
 
				     if (self.parseOK):
			
 
				       return self.targetXML[1]
			
 
				     return None
			
 
				-  
			
 
				+
			
 
				   def extractNamespaceURIs(self):
			
 
				     """ extractNamespaceURIs
			
 
				-        
			
 
				+
			
 
				         minidom gobbles up <NamespaceUris></NamespaceUris> elements, without a decent
			
 
				-        way to reliably access this dom2 <uri></uri> elements (only attribute xmlns= are 
			
 
				-        accessible using minidom).  We need them for dereferencing though... This 
			
 
				+        way to reliably access this dom2 <uri></uri> elements (only attribute xmlns= are
			
 
				+        accessible using minidom).  We need them for dereferencing though... This
			
 
				         function attempts to do just that.
			
 
				-        
			
 
				+
			
 
				         returns: Nothing
			
 
				     """
			
 
				     infile = open(self.originXML)
			
@@ -86,82 +89,80 @@ class preProcessDocument:
 
				         break
			
 
				       if foundURIs:
			
 
				         nsline = nsline + line
			
 
				-    
			
 
				+
			
 
				     if len(nsline) > 0:
			
 
				       ns = dom.parseString(nsline).getElementsByTagName("NamespaceUris")
			
 
				       for uri in ns[0].childNodes:
			
 
				         if uri.nodeType != uri.ELEMENT_NODE:
			
 
				           continue
			
 
				         self.referencedNamesSpaceUris.append(uri.firstChild.data)
			
 
				-      
			
 
				+
			
 
				     infile.close()
			
 
				-    
			
 
				+
			
 
				   def analyze(self):
			
 
				     """ analyze()
			
 
				-    
			
 
				+
			
 
				         analyze will gather information about the nodes and references contained in a XML File
			
 
				         to facilitate later preprocessing stages that adresss XML dependency issues
			
 
				-        
			
 
				+
			
 
				         returns: No return value
			
 
				-    """ 
			
 
				+    """
			
 
				     nodeIds = []
			
 
				     ns = self.nodeset.getElementsByTagName("UANodeSet")
			
 
				-    
			
 
				+
			
 
				     # We need to find out what the namespace calls itself and other referenced, as numeric id's are pretty
			
 
				     # useless sans linked nodes. There is two information sources...
			
 
				     self.extractNamespaceURIs() # From <URI>...</URI> definitions
			
 
				-    
			
 
				+
			
 
				     for key in ns[0].attributes.keys(): # from xmlns:sX attributes
			
 
				       if "xmlns:" in key:  # Any key: we will be removing these qualifiers from Values later
			
 
				         self.namespaceQualifiers.append(key.replace("xmlns:",""))
			
 
				       if "xmlns:s" in key: # get a numeric nsId and modelname/uri
			
 
				         self.namespaceOrder.append((int(key.replace("xmlns:s","")), ns[0].getAttribute(key)))
			
 
				-    
			
 
				+
			
 
				     # Get all nodeIds contained in this XML
			
 
				     for nd in ns[0].childNodes:
			
 
				       if nd.nodeType != nd.ELEMENT_NODE:
			
 
				         continue
			
 
				       if nd.hasAttribute(u'NodeId'):
			
 
				         self.containedNodes.append( (opcua_node_id_t(nd.getAttribute(u'NodeId')), nd) )
			
 
				-        refs = nd.getElementsByTagName(u'References')
			
 
				-        if len(refs) > 0:
			
 
				-          refs = refs[0]
			
 
				-          for ref in refs.childNodes:
			
 
				-            if ref.nodeType == ref.ELEMENT_NODE:
			
 
				-              self.referencedNodes.append( (opcua_node_id_t(ref.firstChild.data), ref) )
			
 
				-    
			
 
				-    log(self, "Nodes: " + str(len(self.containedNodes)) + " References: " + str(len(self.referencedNodes)), LOG_LEVEL_DEBUG)
			
 
				-  
			
 
				+        refs = nd.getElementsByTagName(u'References')[0]
			
 
				+        for ref in refs.childNodes:
			
 
				+          if ref.nodeType == ref.ELEMENT_NODE:
			
 
				+            self.referencedNodes.append( (opcua_node_id_t(ref.firstChild.data), ref) )
			
 
				+
			
 
				+    logger.debug("Nodes: " + str(len(self.containedNodes)) + " References: " + str(len(self.referencedNodes)))
			
 
				+
			
 
				   def getNamespaceId(self):
			
 
				     """ namespaceId()
			
 
				-        
			
 
				+
			
 
				         Counts the namespace IDs in all nodes of this XML and picks the most used
			
 
				         namespace as the numeric identifier of this data model.
			
 
				-        
			
 
				+
			
 
				         returns: Integer ID of the most propable/most used namespace in this XML
			
 
				     """
			
 
				     max = 0;
			
 
				     namespaceIdGuessed = 0;
			
 
				     idDict = {}
			
 
				-    
			
 
				+
			
 
				     for ndid in self.containedNodes:
			
 
				       if not idDict.has_key(ndid[0].ns):
			
 
				         idDict[ndid[0].ns] = 1
			
 
				       else:
			
 
				         idDict[ndid[0].ns] = idDict[ndid[0].ns] + 1
			
 
				-    
			
 
				+
			
 
				     for entry in idDict:
			
 
				       if idDict[entry] > max:
			
 
				         max = idDict[entry]
			
 
				         namespaceIdGuessed = entry
			
 
				-    log(self, "XML Contents are propably in namespace " + str(entry) + " (used by " + str(idDict[entry]) + " Nodes)", LOG_LEVEL_DEBUG)
			
 
				+    logger.debug("XML Contents are propably in namespace " + str(entry) + " (used by " + str(idDict[entry]) + " Nodes)")
			
 
				     return namespaceIdGuessed
			
 
				-  
			
 
				+
			
 
				   def getReferencedNamespaceUri(self, nsId):
			
 
				     """ getReferencedNamespaceUri
			
 
				-    
			
 
				+
			
 
				         returns an URL that hopefully corresponds to the nsId that was used to reference this model
			
 
				-        
			
 
				+
			
 
				         return: URI string corresponding to nsId
			
 
				     """
			
 
				     # Might be the more reliable method: Get the URI from the xmlns attributes (they have numers)
			
@@ -169,22 +170,22 @@ class preProcessDocument:
 
				       for el in self.namespaceOrder:
			
 
				         if el[0] == nsId:
			
 
				           return el[1]
			
 
				-    
			
 
				-    # Fallback: 
			
 
				+
			
 
				+    # Fallback:
			
 
				     #  Some models do not have xmlns:sX attributes, but still <URI>s (usually when they only reference NS0)
			
 
				     if len(self.referencedNamesSpaceUris) > 0  and len(self.referencedNamesSpaceUris) >= nsId-1:
			
 
				       return self.referencedNamesSpaceUris[nsId-1]
			
 
				-    
			
 
				+
			
 
				     #Nope, not found.
			
 
				     return ""
			
 
				-  
			
 
				+
			
 
				   def getNamespaceDependencies(self):
			
 
				     deps = []
			
 
				     for ndid in self.referencedNodes:
			
 
				       if not ndid[0].ns in deps:
			
 
				         deps.append(ndid[0].ns)
			
 
				     return deps
			
 
				-    
			
 
				+
			
 
				   def finalize(self):
			
 
				     outfile = self.targetXML[0]
			
 
				     outline = self.nodeset.toxml()
			
@@ -193,29 +194,29 @@ class preProcessDocument:
 
				       outline = outline.replace(rq.decode('UTF-8'), "")
			
 
				     os.write(outfile, outline.encode('UTF-8'))
			
 
				     os.close(outfile)
			
 
				-    
			
 
				+
			
 
				   def reassignReferencedNamespaceId(self, currentNsId, newNsId):
			
 
				     """ reassignReferencedNamespaceId
			
 
				-        
			
 
				+
			
 
				         Iterates over all references in this document, find references to currentNsId and changes them to newNsId.
			
 
				         NodeIds themselves are not altered.
			
 
				-        
			
 
				+
			
 
				         returns: nothing
			
 
				-    """ 
			
 
				+    """
			
 
				     for refNd in self.referencedNodes:
			
 
				       if refNd[0].ns == currentNsId:
			
 
				         refNd[1].firstChild.data = refNd[1].firstChild.data.replace("ns="+str(currentNsId), "ns="+str(newNsId))
			
 
				         refNd[0].ns = newNsId
			
 
				         refNd[0].toString()
			
 
				-  
			
 
				+
			
 
				   def reassignNamespaceId(self, currentNsId, newNsId):
			
 
				     """ reassignNamespaceId
			
 
				-        
			
 
				+
			
 
				         Iterates over all nodes in this document, find those in namespace currentNsId and changes them to newNsId.
			
 
				-        
			
 
				+
			
 
				         returns: nothing
			
 
				-    """ 
			
 
				-    log(self, "Migrating nodes /w ns index " + str(currentNsId) + " to " + str(newNsId), LOG_LEVEL_DEBUG)
			
 
				+    """
			
 
				+    logger.debug("Migrating nodes /w ns index " + str(currentNsId) + " to " + str(newNsId))
			
 
				     for nd in self.containedNodes:
			
 
				       if nd[0].ns == currentNsId:
			
 
				         # In our own document, update any references to this node
			
@@ -227,33 +228,33 @@ class preProcessDocument:
 
				         nd[1].setAttribute(u'NodeId', nd[1].getAttribute(u'NodeId').replace("ns="+str(currentNsId), "ns="+str(newNsId)))
			
 
				         nd[0].ns = newNsId
			
 
				         nd[0].toString()
			
 
				-  
			
 
				+
			
 
				 class open62541_XMLPreprocessor:
			
 
				   preProcDocuments = []
			
 
				-  
			
 
				+
			
 
				   def __init__(self):
			
 
				       self.preProcDocuments = []
			
 
				-      
			
 
				+
			
 
				   def addDocument(self, documentPath):
			
 
				     self.preProcDocuments.append(preProcessDocument(documentPath))
			
 
				-    
			
 
				+
			
 
				   def removePreprocessedFiles(self):
			
 
				     for doc in self.preProcDocuments:
			
 
				       doc.clean()
			
 
				-  
			
 
				+
			
 
				   def getPreProcessedFiles(self):
			
 
				     files = []
			
 
				     for doc in self.preProcDocuments:
			
 
				       if (doc.parseOK):
			
 
				         files.append(doc.getTargetXMLName())
			
 
				     return files
			
 
				-  
			
 
				+
			
 
				   def testModelCongruencyAgainstReferences(self, doc, refs):
			
 
				     """ testModelCongruencyAgainstReferences
			
 
				-    
			
 
				+
			
 
				         Counts how many of the nodes referencef in refs can be found in the model
			
 
				         doc.
			
 
				-        
			
 
				+
			
 
				         returns: double corresponding to the percentage of hits
			
 
				     """
			
 
				     sspace = len(refs)
			
@@ -267,7 +268,7 @@ class open62541_XMLPreprocessor:
 
				           found = found + 1
			
 
				           break
			
 
				     return float(found)/float(sspace)
			
 
				-    
			
 
				+
			
 
				   def preprocess_assignUniqueNsIds(self):
			
 
				     nsdep  = []
			
 
				     docLst = []
			
@@ -278,7 +279,7 @@ class open62541_XMLPreprocessor:
 
				         docLst.append(doc)
			
 
				     for doc in docLst:
			
 
				       self.preProcDocuments.remove(doc)
			
 
				-    
			
 
				+
			
 
				     # Reassign namespace id's to be in ascending order
			
 
				     nsidx = 1 # next namespace id to assign on collision (first one will be "2")
			
 
				     for doc in self.preProcDocuments:
			
@@ -286,14 +287,14 @@ class open62541_XMLPreprocessor:
 
				       nsid = doc.getNamespaceId()
			
 
				       doc.reassignNamespaceId(nsid, nsidx)
			
 
				       docLst.append(doc)
			
 
				-      log(self, "Document " + doc.originXML + " is now namespace " + str(nsidx), LOG_LEVEL_INFO)
			
 
				+      logger.info("Document " + doc.originXML + " is now namespace " + str(nsidx))
			
 
				     self.preProcDocuments = docLst
			
 
				-  
			
 
				+
			
 
				   def getUsedNamespaceArrayNames(self):
			
 
				     """ getUsedNamespaceArrayNames
			
 
				-    
			
 
				+
			
 
				         Returns the XML xmlns:s1 or <URI>[0] of each XML document (if contained/possible)
			
 
				-        
			
 
				+
			
 
				         returns: dict of int:nsId -> string:url
			
 
				     """
			
 
				     nsName = {}
			
@@ -303,10 +304,10 @@ class open62541_XMLPreprocessor:
 
				         uri = "http://modeluri.not/retrievable/from/xml"
			
 
				       nsName[doc.getNamespaceId()] = doc.getReferencedNamespaceUri(1)
			
 
				     return nsName
			
 
				-      
			
 
				-  def preprocess_linkDependantModels(self):    
			
 
				+
			
 
				+  def preprocess_linkDependantModels(self):
			
 
				     revertToStochastic = [] # (doc, int id), where id was not resolvable using model URIs
			
 
				-    
			
 
				+
			
 
				     # Attemp to identify the model relations by using model URIs in xmlns:sX or <URI> contents
			
 
				     for doc in self.preProcDocuments:
			
 
				       nsid = doc.getNamespaceId()
			
@@ -315,10 +316,10 @@ class open62541_XMLPreprocessor:
 
				         if d != nsid and d != 0:
			
 
				           # Attempt to identify the namespace URI this d referes to...
			
 
				           nsUri = doc.getReferencedNamespaceUri(d) # FIXME: This could actually fail and return ""!
			
 
				-          log(self, "Need a namespace referenced as " + str(d) + ". Which hopefully is " + nsUri, LOG_LEVEL_INFO)
			
 
				+          logger.info("Need a namespace referenced as " + str(d) + ". Which hopefully is " + nsUri)
			
 
				           targetDoc = None
			
 
				           for tgt in self.preProcDocuments:
			
 
				-            # That model, whose URI is known but its current id is not, will 
			
 
				+            # That model, whose URI is known but its current id is not, will
			
 
				             #   refer have referred to itself as "1"
			
 
				             if tgt.getReferencedNamespaceUri(1) == nsUri:
			
 
				               targetDoc = tgt
			
@@ -328,11 +329,11 @@ class open62541_XMLPreprocessor:
 
				             doc.reassignReferencedNamespaceId(d, targetDoc.getNamespaceId())
			
 
				             continue
			
 
				           else:
			
 
				-            revertToStochastic.append((doc, d)) 
			
 
				-            log(self, "Failed to reliably identify which XML/Model " + os.path.basename(doc.originXML) + " calls ns=" +str(d), LOG_LEVEL_WARN)
			
 
				-    
			
 
				+            revertToStochastic.append((doc, d))
			
 
				+            logger.warn("Failed to reliably identify which XML/Model " + os.path.basename(doc.originXML) + " calls ns=" +str(d))
			
 
				+
			
 
				     for (doc, d) in revertToStochastic:
			
 
				-      log(self, "Attempting to find stochastic match for target namespace ns=" + str(d) + " of " + os.path.basename(doc.originXML), LOG_LEVEL_WARN)
			
 
				+      logger.warn("Attempting to find stochastic match for target namespace ns=" + str(d) + " of " + os.path.basename(doc.originXML))
			
 
				       # Copy all references to the given namespace
			
 
				       refs = []
			
 
				       matches = [] # list of (match%, targetDoc) to pick from later
			
@@ -356,37 +357,34 @@ class open62541_XMLPreprocessor:
 
				         if m[0] > best[0]:
			
 
				           best = m
			
 
				       if best[1] != None:
			
 
				-        log(self, "Best match (" + str(best[1]*100) + "%) for what " + os.path.basename(doc.originXML) + " refers to as ns="+str(d)+" was " + os.path.basename(best[1].originXML), LOG_LEVEL_WARN)
			
 
				+        logger.warn("Best match (" + str(best[1]*100) + "%) for what " + os.path.basename(doc.originXML) + " refers to as ns="+str(d)+" was " + os.path.basename(best[1].originXML))
			
 
				         doc.reassignReferencedNamespaceId(d, best[1].getNamespaceId())
			
 
				-      else: 
			
 
				-        log(self, "Failed to find a match for what " +  os.path.basename(doc.originXML) + " refers to as ns=" + str(d) ,LOG_LEVEL_ERROR )
			
 
				-      
			
 
				+      else:
			
 
				+        logger.error("Failed to find a match for what " +  os.path.basename(doc.originXML) + " refers to as ns=" + str(d))
			
 
				+
			
 
				   def preprocessAll(self):
			
 
				     ##
			
 
				     ## First: Gather statistics about the namespaces:
			
 
				     for doc in self.preProcDocuments:
			
 
				       doc.analyze()
			
 
				-    
			
 
				+
			
 
				     # Preprocess step: Remove XML specific Naming scheme ("uax:")
			
 
				     # FIXME: Not implemented
			
 
				-    
			
 
				+
			
 
				     ##
			
 
				     ## Preprocess step: Check namespace ID multiplicity and reassign IDs if necessary
			
 
				     ##
			
 
				     self.preprocess_assignUniqueNsIds()
			
 
				     self.preprocess_linkDependantModels()
			
 
				-    
			
 
				-    
			
 
				-    ##  
			
 
				+
			
 
				+
			
 
				+    ##
			
 
				     ## Prep step: prevent any XML from using namespace 1 (reserved for instances)
			
 
				     ## FIXME: Not implemented
			
 
				-    
			
 
				+
			
 
				     ##
			
 
				     ## Final: Write modified XML tmp files
			
 
				     for doc in self.preProcDocuments:
			
 
				       doc.finalize()
			
 
				-    
			
 
				-    return True
			
 
				-      
			
 
				-  
			
 
				 
			
 
				+    return True