Explorar o código

Added XML Preprocessing stage to namespace compiler. (requested by #584)

ichrispa %!s(int64=9) %!d(string=hai) anos
pai
achega
f689ad992d

+ 18 - 2
tools/pyUANamespace/generate_open62541CCode.py

@@ -21,6 +21,7 @@ from sys import argv, exit
 from os import path
 from ua_namespace import *
 from logger import *
+from open62541_XMLPreprocessor import open62541_XMLPreprocessor
 
 def usage():
   print("Script usage:")
@@ -116,11 +117,26 @@ if __name__ == '__main__':
   # to any number of different OPC-UA namespaces.
   ns = opcua_namespace("open62541")
 
-  # Parse the XML files
+  # Clean up the XML files by removing duplicate namespaces and unwanted prefixes
+  preProc = open62541_XMLPreprocessor()
   for xmlfile in infiles:
+    log(None, "Preprocessing " + str(xmlfile), LOG_LEVEL_INFO)
+    preProc.addDocument(xmlfile)
+  preProc.preprocessAll()
+  
+  for xmlfile in preProc.getPreProcessedFiles():
     log(None, "Parsing " + str(xmlfile), LOG_LEVEL_INFO)
     ns.parseXML(xmlfile)
-
+  
+  # We need to notify the open62541 server of the namespaces used to be able to use i.e. ns=3
+  namespaceArrayNames = preProc.getUsedNamespaceArrayNames()
+  for key in namespaceArrayNames:
+    ns.addNamespace(key, namespaceArrayNames[key])
+    
+  # Remove any temp files - they are not needed after the AST is created
+  # Removed for debugging
+  preProc.removePreprocessedFiles()
+  
   # Remove blacklisted nodes from the namespace
   # Doing this now ensures that unlinkable pointers will be cleanly removed
   # during sanitation.

+ 21 - 19
tools/pyUANamespace/open62541_MacroHelper.py

@@ -24,24 +24,6 @@ __unique_item_id = 0
 
 defined_typealiases = []
 
-def substitutePunctuationCharacters(input):
-  '''
-  replace punctuation characters in input
-  '''
-  # No punctuation characters <>!$
-  illegal_chars = list(string.punctuation)
-  # underscore is allowed
-  illegal_chars.remove('_')
-
-  illegal = "".join(illegal_chars)
-  substitution = ""
-  # Map all punctuation characters to underscore
-  for illegal_char in illegal_chars:
-      substitution = substitution + '_'
-
-  return input.translate(string.maketrans(illegal, substitution), illegal)
-
-
 class open62541_MacroHelper():
   def __init__(self, supressGenerationOfAttribute=[]):
     self.supressGenerationOfAttribute = supressGenerationOfAttribute
@@ -60,7 +42,27 @@ class open62541_MacroHelper():
     else:
       return ""
 
+  def substitutePunctuationCharacters(self, input):
+    ''' substitutePunctuationCharacters
+    
+        Replace punctuation characters in input. Part of this class because it is used by
+        ua_namespace on occasion.
+        
+        returns: C-printable string representation of input
+    '''
+    # No punctuation characters <>!$
+    illegal_chars = list(string.punctuation)
+    # underscore is allowed
+    illegal_chars.remove('_')
+
+    illegal = "".join(illegal_chars)
+    substitution = ""
+    # Map all punctuation characters to underscore
+    for illegal_char in illegal_chars:
+        substitution = substitution + '_'
 
+    return input.translate(string.maketrans(illegal, substitution), illegal)
+  
   def getNodeIdDefineString(self, node):
     code = []
     extrNs = node.browseName().split(":")
@@ -71,7 +73,7 @@ class open62541_MacroHelper():
     else:
         nodename = extrNs[0]
 
-    symbolic_name = substitutePunctuationCharacters(nodename)
+    symbolic_name = self.substitutePunctuationCharacters(nodename)
     if symbolic_name != nodename :
         log(self, "Subsituted characters in browsename for nodeid " + str(node.id().i) + " while generating C-Code ", LOG_LEVEL_WARN)
     

+ 390 - 0
tools/pyUANamespace/open62541_XMLPreprocessor.py

@@ -0,0 +1,390 @@
+#!/usr/bin/env/python
+# -*- coding: utf-8 -*-
+
+###
+### Author:  Chris Iatrou (ichrispa@core-vector.net)
+###
+### This program was created for educational purposes and has been
+### contributed to the open62541 project by the author. All licensing
+### terms for this source is inherited by the terms and conditions
+### specified for by the open62541 project (see the projects readme
+### file for more information on the LGPL terms and restrictions).
+###
+### This program is not meant to be used in a production environment. The
+### author is not liable for any complications arising due to the use of
+### this program.
+###
+
+from logger import *
+from ua_constants import *
+import tempfile
+import xml.dom.minidom as dom
+import os
+import string
+from collections import Counter
+
+from ua_namespace import opcua_node_id_t
+
+class preProcessDocument:
+  originXML = '' # Original XML passed to the preprocessor
+  targetXML = () # tuple of (fileHandle, fileName)
+  nodeset   = '' # Parsed DOM XML object
+  parseOK   = False;
+  containedNodes  = [] # contains tuples of (opcua_node_id_t, xmlelement)
+  referencedNodes = [] # contains tuples of (opcua_node_id_t, xmlelement)
+  namespaceOrder  = [] # contains xmlns:sX attributed as tuples (int ns, string name)
+  namespaceQualifiers = []      # contains all xmlns:XYZ qualifiers that might prefix value aliases (like "<uax:Int32>")
+  referencedNamesSpaceUris = [] # contains <NamespaceUris> URI elements
+  
+  def __init__(self, originXML):
+    self.originXML = originXML
+    self.targetXML = tempfile.mkstemp(prefix=os.path.basename(originXML)+"_preProcessed-" ,suffix=".xml")
+    self.parseOK   = True
+    self.containedNodes  = []
+    self.referencedNodes = []
+    self.namespaceOrder  = []
+    self.referencedNamesSpaceUris = []
+    self.namespaceQualifiers = []
+    try:
+      self.nodeset = dom.parse(originXML)
+      if len(self.nodeset.getElementsByTagName("UANodeSet")) == 0 or len(self.nodeset.getElementsByTagName("UANodeSet")) > 1:
+        log(self, "Document " + self.targetXML[1] + " contains no or more then 1 nodeset", LOG_LEVEL_ERROR)
+        self.parseOK   = False
+    except:
+      self.parseOK   = False
+    log(self, "Adding new document to be preprocessed " + os.path.basename(originXML) + " as " + self.targetXML[1], LOG_LEVEL_DEBUG)
+  
+  def clean(self):
+    #os.close(self.targetXML[0]) Don't -> done to flush() after finalize()
+    os.remove(self.targetXML[1])
+  
+  def getTargetXMLName(self):
+    if (self.parseOK):
+      return self.targetXML[1]
+    return None
+  
+  def extractNamespaceURIs(self):
+    """ extractNamespaceURIs
+        
+        minidom gobbles up <NamespaceUris></NamespaceUris> elements, without a decent
+        way to reliably access this dom2 <uri></uri> elements (only attribute xmlns= are 
+        accessible using minidom).  We need them for dereferencing though... This 
+        function attempts to do just that.
+        
+        returns: Nothing
+    """
+    infile = open(self.originXML)
+    foundURIs = False
+    nsline = ""
+    line = infile.readline()
+    for line in infile:
+      if "<namespaceuris>" in line.lower():
+        foundURIs = True
+      elif "</namespaceuris>" in line.lower():
+        foundURIs = False
+        nsline = nsline + line
+        break
+      if foundURIs:
+        nsline = nsline + line
+    
+    if len(nsline) > 0:
+      ns = dom.parseString(nsline).getElementsByTagName("NamespaceUris")
+      for uri in ns[0].childNodes:
+        if uri.nodeType != uri.ELEMENT_NODE:
+          continue
+        self.referencedNamesSpaceUris.append(uri.firstChild.data)
+      
+    infile.close()
+    
+  def analyze(self):
+    """ analyze()
+    
+        analyze will gather information about the nodes and references contained in a XML File
+        to facilitate later preprocessing stages that adresss XML dependency issues
+        
+        returns: No return value
+    """ 
+    nodeIds = []
+    ns = self.nodeset.getElementsByTagName("UANodeSet")
+    
+    # We need to find out what the namespace calls itself and other referenced, as numeric id's are pretty
+    # useless sans linked nodes. There is two information sources...
+    self.extractNamespaceURIs() # From <URI>...</URI> definitions
+    
+    for key in ns[0].attributes.keys(): # from xmlns:sX attributes
+      if "xmlns:" in key:  # Any key: we will be removing these qualifiers from Values later
+        self.namespaceQualifiers.append(key.replace("xmlns:",""))
+      if "xmlns:s" in key: # get a numeric nsId and modelname/uri
+        self.namespaceOrder.append((int(key.replace("xmlns:s","")), ns[0].getAttribute(key)))
+    
+    # Get all nodeIds contained in this XML
+    for nd in ns[0].childNodes:
+      if nd.nodeType != nd.ELEMENT_NODE:
+        continue
+      if nd.hasAttribute(u'NodeId'):
+        self.containedNodes.append( (opcua_node_id_t(nd.getAttribute(u'NodeId')), nd) )
+        refs = nd.getElementsByTagName(u'References')[0]
+        for ref in refs.childNodes:
+          if ref.nodeType == ref.ELEMENT_NODE:
+            self.referencedNodes.append( (opcua_node_id_t(ref.firstChild.data), ref) )
+    
+    log(self, "Nodes: " + str(len(self.containedNodes)) + " References: " + str(len(self.referencedNodes)), LOG_LEVEL_DEBUG)
+  
+  def getNamespaceId(self):
+    """ namespaceId()
+        
+        Counts the namespace IDs in all nodes of this XML and picks the most used
+        namespace as the numeric identifier of this data model.
+        
+        returns: Integer ID of the most propable/most used namespace in this XML
+    """
+    max = 0;
+    namespaceIdGuessed = 0;
+    idDict = {}
+    
+    for ndid in self.containedNodes:
+      if not idDict.has_key(ndid[0].ns):
+        idDict[ndid[0].ns] = 1
+      else:
+        idDict[ndid[0].ns] = idDict[ndid[0].ns] + 1
+    
+    for entry in idDict:
+      if idDict[entry] > max:
+        max = idDict[entry]
+        namespaceIdGuessed = entry
+    log(self, "XML Contents are propably in namespace " + str(entry) + " (used by " + str(idDict[entry]) + " Nodes)", LOG_LEVEL_DEBUG)
+    return namespaceIdGuessed
+  
+  def getReferencedNamespaceUri(self, nsId):
+    """ getReferencedNamespaceUri
+    
+        returns an URL that hopefully corresponds to the nsId that was used to reference this model
+        
+        return: URI string corresponding to nsId
+    """
+    # Might be the more reliable method: Get the URI from the xmlns attributes (they have numers)
+    if len(self.namespaceOrder) > 0:
+      for el in self.namespaceOrder:
+        if el[0] == nsId:
+          return el[1]
+    
+    # Fallback: 
+    #  Some models do not have xmlns:sX attributes, but still <URI>s (usually when they only reference NS0)
+    if len(self.referencedNamesSpaceUris) > 0  and len(self.referencedNamesSpaceUris) >= nsId-1:
+      return self.referencedNamesSpaceUris[nsId-1]
+    
+    #Nope, not found.
+    return ""
+  
+  def getNamespaceDependencies(self):
+    deps = []
+    for ndid in self.referencedNodes:
+      if not ndid[0].ns in deps:
+        deps.append(ndid[0].ns)
+    return deps
+    
+  def finalize(self):
+    outfile = self.targetXML[0]
+    outline = self.nodeset.toxml()
+    for qualifier in self.namespaceQualifiers:
+      rq = qualifier+":"
+      outline = outline.replace(rq.decode('UTF-8'), "")
+    os.write(outfile, outline.encode('UTF-8'))
+    os.close(outfile)
+    
+  def reassignReferencedNamespaceId(self, currentNsId, newNsId):
+    """ reassignReferencedNamespaceId
+        
+        Iterates over all references in this document, find references to currentNsId and changes them to newNsId.
+        NodeIds themselves are not altered.
+        
+        returns: nothing
+    """ 
+    for refNd in self.referencedNodes:
+      if refNd[0].ns == currentNsId:
+        refNd[1].firstChild.data = refNd[1].firstChild.data.replace("ns="+str(currentNsId), "ns="+str(newNsId))
+        refNd[0].ns = newNsId
+        refNd[0].toString()
+  
+  def reassignNamespaceId(self, currentNsId, newNsId):
+    """ reassignNamespaceId
+        
+        Iterates over all nodes in this document, find those in namespace currentNsId and changes them to newNsId.
+        
+        returns: nothing
+    """ 
+    log(self, "Migrating nodes /w ns index " + str(currentNsId) + " to " + str(newNsId), LOG_LEVEL_DEBUG)
+    for nd in self.containedNodes:
+      if nd[0].ns == currentNsId:
+        # In our own document, update any references to this node
+        for refNd in self.referencedNodes:
+          if refNd[0].ns == currentNsId and refNd[0] == nd[0]:
+            refNd[1].firstChild.data = refNd[1].firstChild.data.replace("ns="+str(currentNsId), "ns="+str(newNsId))
+            refNd[0].ns = newNsId
+            refNd[0].toString()
+        nd[1].setAttribute(u'NodeId', nd[1].getAttribute(u'NodeId').replace("ns="+str(currentNsId), "ns="+str(newNsId)))
+        nd[0].ns = newNsId
+        nd[0].toString()
+  
+class open62541_XMLPreprocessor:
+  preProcDocuments = []
+  
+  def __init__(self):
+      self.preProcDocuments = []
+      
+  def addDocument(self, documentPath):
+    self.preProcDocuments.append(preProcessDocument(documentPath))
+    
+  def removePreprocessedFiles(self):
+    for doc in self.preProcDocuments:
+      doc.clean()
+  
+  def getPreProcessedFiles(self):
+    files = []
+    for doc in self.preProcDocuments:
+      if (doc.parseOK):
+        files.append(doc.getTargetXMLName())
+    return files
+  
+  def testModelCongruencyAgainstReferences(self, doc, refs):
+    """ testModelCongruencyAgainstReferences
+    
+        Counts how many of the nodes referencef in refs can be found in the model
+        doc.
+        
+        returns: double corresponding to the percentage of hits
+    """
+    sspace = len(refs)
+    if sspace == 0:
+      return float(0)
+    found   = 0
+    for ref in refs:
+      for n in doc.containedNodes:
+        if str(ref) == str(n[0]):
+          print ref, n[0]
+          found = found + 1
+          break
+    return float(found)/float(sspace)
+    
+  def preprocess_assignUniqueNsIds(self):
+    nsdep  = []
+    docLst = []
+    # Search for namespace 0('s) - plural possible if user is overwriting NS0 defaults
+    # Remove them from the list of namespaces, zero does not get demangled
+    for doc in self.preProcDocuments:
+      if doc.getNamespaceId() == 0:
+        docLst.append(doc)
+    for doc in docLst:
+      self.preProcDocuments.remove(doc)
+    
+    # Reassign namespace id's to be in ascending order
+    nsidx = 1 # next namespace id to assign on collision (first one will be "2")
+    for doc in self.preProcDocuments:
+      nsidx = nsidx + 1
+      nsid = doc.getNamespaceId()
+      doc.reassignNamespaceId(nsid, nsidx)
+      docLst.append(doc)
+      log(self, "Document " + doc.originXML + " is now namespace " + str(nsidx), LOG_LEVEL_INFO)
+    self.preProcDocuments = docLst
+  
+  def getUsedNamespaceArrayNames(self):
+    """ getUsedNamespaceArrayNames
+    
+        Returns the XML xmlns:s1 or <URI>[0] of each XML document (if contained/possible)
+        
+        returns: dict of int:nsId -> string:url
+    """
+    nsName = {}
+    for doc in self.preProcDocuments:
+      uri = doc.getReferencedNamespaceUri(1)
+      if uri == None:
+        uri = "http://modeluri.not/retrievable/from/xml"
+      nsName[doc.getNamespaceId()] = doc.getReferencedNamespaceUri(1)
+    return nsName
+      
+  def preprocess_linkDependantModels(self):    
+    revertToStochastic = [] # (doc, int id), where id was not resolvable using model URIs
+    
+    # Attemp to identify the model relations by using model URIs in xmlns:sX or <URI> contents
+    for doc in self.preProcDocuments:
+      nsid = doc.getNamespaceId()
+      dependencies = doc.getNamespaceDependencies()
+      for d in dependencies:
+        if d != nsid and d != 0:
+          # Attempt to identify the namespace URI this d referes to...
+          nsUri = doc.getReferencedNamespaceUri(d) # FIXME: This could actually fail and return ""!
+          log(self, "Need a namespace referenced as " + str(d) + ". Which hopefully is " + nsUri, LOG_LEVEL_INFO)
+          targetDoc = None
+          for tgt in self.preProcDocuments:
+            # That model, whose URI is known but its current id is not, will 
+            #   refer have referred to itself as "1"
+            if tgt.getReferencedNamespaceUri(1) == nsUri:
+              targetDoc = tgt
+              break
+          if not targetDoc == None:
+            # Found the model... relink the references
+            doc.reassignReferencedNamespaceId(d, targetDoc.getNamespaceId())
+            continue
+          else:
+            revertToStochastic.append((doc, d)) 
+            log(self, "Failed to reliably identify which XML/Model " + os.path.basename(doc.originXML) + " calls ns=" +str(d), LOG_LEVEL_WARN)
+    
+    for (doc, d) in revertToStochastic:
+      log(self, "Attempting to find stochastic match for target namespace ns=" + str(d) + " of " + os.path.basename(doc.originXML), LOG_LEVEL_WARN)
+      # Copy all references to the given namespace
+      refs = []
+      matches = [] # list of (match%, targetDoc) to pick from later
+      for ref in doc.referencedNodes:
+        if ref[0].ns == d:
+          refs.append(opcua_node_id_t(str(ref[0])))
+      for tDoc in self.preProcDocuments:
+        tDocId = tDoc.getNamespaceId()
+        # Scenario: If these references did target this documents namespace...
+        for r in refs:
+          r.ns = tDocId
+          r.toString()
+        # ... how many of them would be found!?
+        c = self.testModelCongruencyAgainstReferences(tDoc, refs)
+        print c
+        if c>0:
+          matches.append(c, tDoc)
+      best = (0, None)
+      for m in matches:
+        print m[0]
+        if m[0] > best[0]:
+          best = m
+      if best[1] != None:
+        log(self, "Best match (" + str(best[1]*100) + "%) for what " + os.path.basename(doc.originXML) + " refers to as ns="+str(d)+" was " + os.path.basename(best[1].originXML), LOG_LEVEL_WARN)
+        doc.reassignReferencedNamespaceId(d, best[1].getNamespaceId())
+      else: 
+        log(self, "Failed to find a match for what " +  os.path.basename(doc.originXML) + " refers to as ns=" + str(d) ,LOG_LEVEL_ERROR )
+      
+  def preprocessAll(self):
+    ##
+    ## First: Gather statistics about the namespaces:
+    for doc in self.preProcDocuments:
+      doc.analyze()
+    
+    # Preprocess step: Remove XML specific Naming scheme ("uax:")
+    # FIXME: Not implemented
+    
+    ##
+    ## Preprocess step: Check namespace ID multiplicity and reassign IDs if necessary
+    ##
+    self.preprocess_assignUniqueNsIds()
+    self.preprocess_linkDependantModels()
+    
+    
+    ##  
+    ## Prep step: prevent any XML from using namespace 1 (reserved for instances)
+    ## FIXME: Not implemented
+    
+    ##
+    ## Final: Write modified XML tmp files
+    for doc in self.preProcDocuments:
+      doc.finalize()
+    
+    return True
+      
+  
+

+ 16 - 2
tools/pyUANamespace/ua_namespace.py

@@ -58,7 +58,8 @@ class opcua_namespace():
   __binaryIndirectPointers__ = []
   name = ""
   knownNodeTypes = ""
-
+  namespaceIdentifiers = {} # list of 'int':'string' giving different namespace an array-mapable name
+  
   def __init__(self, name):
     self.nodes = []
     self.knownNodeTypes = ['variable', 'object', 'method', 'referencetype', \
@@ -67,8 +68,12 @@ class opcua_namespace():
     self.name = name
     self.nodeids = {}
     self.aliases = {}
+    self.namespaceIdentifiers = {}
     self.__binaryIndirectPointers__ = []
 
+  def addNamespace(self, numericId, stringURL):
+    self.namespaceIdentifiers[numericId] = stringURL
+    
   def linkLater(self, pointer):
     """ Called by nodes or references who have parsed an XML reference to a
         node represented by a string.
@@ -650,7 +655,16 @@ class opcua_namespace():
       
     code.append('#include "'+outfilename+'.h"')
     code.append("UA_INLINE void "+outfilename+"(UA_Server *server) {")
-
+    
+    # Before printing nodes, we need to request additional namespace arrays from the server
+    for nsid in self.namespaceIdentifiers:
+      if nsid == 0 or nsid==1:
+        continue
+      else:
+        name =  self.namespaceIdentifiers[nsid]
+        name = name.replace("\"","\\\"")
+        code.append("UA_Server_addNamespace(server, \"" + name.encode('UTF-8') + "\");")
+    
     # Find all references necessary to create the namespace and
     # "Bootstrap" them so all other nodes can safely use these referencetypes whenever
     # they can locate both source and target of the reference.

+ 4 - 1
tools/pyUANamespace/ua_node_types.py

@@ -215,7 +215,10 @@ class opcua_node_id_t():
 
   def __str__(self):
     return self.__mystrname__
-
+  
+  def __eq__(self, nodeId2):    
+    return (self.toString() == nodeId2.toString())
+  
   def __repr__(self):
     return self.__mystrname__