bscheibel
/
technical_drawings_extraction


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377
							.. Copyright (C) 2001-2019 NLTK Project
.. For license information, see LICENSE.TXT

==============================
Combinatory Categorial Grammar
==============================

Relative Clauses
----------------

    >>> from nltk.ccg import chart, lexicon

Construct a lexicon:

    >>> lex = lexicon.parseLexicon('''
    ...     :- S, NP, N, VP
    ...
    ...     Det :: NP/N
    ...     Pro :: NP
    ...     Modal :: S\\NP/VP
    ...
    ...     TV :: VP/NP
    ...     DTV :: TV/NP
    ...
    ...     the => Det
    ...
    ...     that => Det
    ...     that => NP
    ...
    ...     I => Pro
    ...     you => Pro
    ...     we => Pro
    ...
    ...     chef => N
    ...     cake => N
    ...     children => N
    ...     dough => N
    ...
    ...     will => Modal
    ...     should => Modal
    ...     might => Modal
    ...     must => Modal
    ...
    ...     and => var\\.,var/.,var
    ...
    ...     to => VP[to]/VP
    ...
    ...     without => (VP\\VP)/VP[ing]
    ...
    ...     be => TV
    ...     cook => TV
    ...     eat => TV
    ...
    ...     cooking => VP[ing]/NP
    ...
    ...     give => DTV
    ...
    ...     is => (S\\NP)/NP
    ...     prefer => (S\\NP)/NP
    ...
    ...     which => (N\\N)/(S/NP)
    ...
    ...     persuade => (VP/VP[to])/NP
    ...     ''')

    >>> parser = chart.CCGChartParser(lex, chart.DefaultRuleSet)
    >>> for parse in parser.parse("you prefer that cake".split()):
    ...     chart.printCCGDerivation(parse)
    ...     break
    ...
     you    prefer      that   cake
     NP   ((S\NP)/NP)  (NP/N)   N
                      -------------->
                            NP
         --------------------------->
                   (S\NP)
    --------------------------------<
                   S

    >>> for parse in parser.parse("that is the cake which you prefer".split()):
    ...     chart.printCCGDerivation(parse)
    ...     break
    ...
     that      is        the    cake      which       you    prefer
      NP   ((S\NP)/NP)  (NP/N)   N    ((N\N)/(S/NP))  NP   ((S\NP)/NP)
                                                     ----->T
                                                  (S/(S\NP))
                                                     ------------------>B
                                                           (S/NP)
                                     ---------------------------------->
                                                   (N\N)
                               ----------------------------------------<
                                                  N
                       ------------------------------------------------>
                                              NP
          ------------------------------------------------------------->
                                     (S\NP)
    -------------------------------------------------------------------<
                                     S


Some other sentences to try:
"that is the cake which we will persuade the chef to cook"
"that is the cake which we will persuade the chef to give the children"

    >>> sent = "that is the dough which you will eat without cooking".split()
    >>> nosub_parser = chart.CCGChartParser(lex, chart.ApplicationRuleSet +
    ...                       chart.CompositionRuleSet + chart.TypeRaiseRuleSet)

Without Substitution (no output)

    >>> for parse in nosub_parser.parse(sent):
    ...     chart.printCCGDerivation(parse)

With Substitution:

    >>> for parse in parser.parse(sent):
    ...     chart.printCCGDerivation(parse)
    ...     break
    ...
     that      is        the    dough      which       you     will        eat          without           cooking
      NP   ((S\NP)/NP)  (NP/N)    N    ((N\N)/(S/NP))  NP   ((S\NP)/VP)  (VP/NP)  ((VP\VP)/VP['ing'])  (VP['ing']/NP)
                                                      ----->T
                                                   (S/(S\NP))
                                                                                 ------------------------------------->B
                                                                                             ((VP\VP)/NP)
                                                                        ----------------------------------------------<Sx
                                                                                           (VP/NP)
                                                           ----------------------------------------------------------->B
                                                                                   ((S\NP)/NP)
                                                      ---------------------------------------------------------------->B
                                                                                   (S/NP)
                                      -------------------------------------------------------------------------------->
                                                                           (N\N)
                               ---------------------------------------------------------------------------------------<
                                                                          N
                       ----------------------------------------------------------------------------------------------->
                                                                     NP
          ------------------------------------------------------------------------------------------------------------>
                                                             (S\NP)
    ------------------------------------------------------------------------------------------------------------------<
                                                            S


Conjunction
-----------

    >>> from nltk.ccg.chart import CCGChartParser, ApplicationRuleSet, CompositionRuleSet
    >>> from nltk.ccg.chart import SubstitutionRuleSet, TypeRaiseRuleSet, printCCGDerivation
    >>> from nltk.ccg import lexicon

Lexicons for the tests:

    >>> test1_lex = '''
    ...        :- S,N,NP,VP
    ...        I => NP
    ...        you => NP
    ...        will => S\\NP/VP
    ...        cook => VP/NP
    ...        which => (N\\N)/(S/NP)
    ...        and => var\\.,var/.,var
    ...        might => S\\NP/VP
    ...        eat => VP/NP
    ...        the => NP/N
    ...        mushrooms => N
    ...        parsnips => N'''
    >>> test2_lex = '''
    ...         :- N, S, NP, VP
    ...         articles => N
    ...         the => NP/N
    ...         and => var\\.,var/.,var
    ...         which => (N\\N)/(S/NP)
    ...         I => NP
    ...         anyone => NP
    ...         will => (S/VP)\\NP
    ...         file => VP/NP
    ...         without => (VP\\VP)/VP[ing]
    ...         forget => VP/NP
    ...         reading => VP[ing]/NP
    ...         '''

Tests handling of conjunctions.
Note that while the two derivations are different, they are semantically equivalent.

    >>> lex = lexicon.parseLexicon(test1_lex)
    >>> parser = CCGChartParser(lex, ApplicationRuleSet + CompositionRuleSet + SubstitutionRuleSet)
    >>> for parse in parser.parse("I will cook and might eat the mushrooms and parsnips".split()):
    ...     printCCGDerivation(parse)
     I      will       cook               and                might       eat     the    mushrooms             and             parsnips
     NP  ((S\NP)/VP)  (VP/NP)  ((_var0\.,_var0)/.,_var0)  ((S\NP)/VP)  (VP/NP)  (NP/N)      N      ((_var0\.,_var0)/.,_var0)     N
        ---------------------->B
             ((S\NP)/NP)
                                                         ---------------------->B
                                                              ((S\NP)/NP)
                              ------------------------------------------------->
                                         (((S\NP)/NP)\.,((S\NP)/NP))
        -----------------------------------------------------------------------<
                                      ((S\NP)/NP)
                                                                                                  ------------------------------------->
                                                                                                                 (N\.,N)
                                                                                       ------------------------------------------------<
                                                                                                              N
                                                                               -------------------------------------------------------->
                                                                                                          NP
        ------------------------------------------------------------------------------------------------------------------------------->
                                                                    (S\NP)
    -----------------------------------------------------------------------------------------------------------------------------------<
                                                                     S
     I      will       cook               and                might       eat     the    mushrooms             and             parsnips
     NP  ((S\NP)/VP)  (VP/NP)  ((_var0\.,_var0)/.,_var0)  ((S\NP)/VP)  (VP/NP)  (NP/N)      N      ((_var0\.,_var0)/.,_var0)     N
        ---------------------->B
             ((S\NP)/NP)
                                                         ---------------------->B
                                                              ((S\NP)/NP)
                              ------------------------------------------------->
                                         (((S\NP)/NP)\.,((S\NP)/NP))
        -----------------------------------------------------------------------<
                                      ((S\NP)/NP)
        ------------------------------------------------------------------------------->B
                                          ((S\NP)/N)
                                                                                                  ------------------------------------->
                                                                                                                 (N\.,N)
                                                                                       ------------------------------------------------<
                                                                                                              N
        ------------------------------------------------------------------------------------------------------------------------------->
                                                                    (S\NP)
    -----------------------------------------------------------------------------------------------------------------------------------<
                                                                     S


Tests handling subject extraction.
Interesting to point that the two parses are clearly semantically different.

    >>> lex = lexicon.parseLexicon(test2_lex)
    >>> parser = CCGChartParser(lex, ApplicationRuleSet + CompositionRuleSet + SubstitutionRuleSet)
    >>> for parse in parser.parse("articles which I will file and forget without reading".split()):
    ...     printCCGDerivation(parse)
     articles      which       I      will       file               and             forget         without           reading
        N      ((N\N)/(S/NP))  NP  ((S/VP)\NP)  (VP/NP)  ((_var0\.,_var0)/.,_var0)  (VP/NP)  ((VP\VP)/VP['ing'])  (VP['ing']/NP)
                              -----------------<
                                   (S/VP)
                                                                                            ------------------------------------->B
                                                                                                        ((VP\VP)/NP)
                                                                                   ----------------------------------------------<Sx
                                                                                                      (VP/NP)
                                                        ------------------------------------------------------------------------->
                                                                                   ((VP/NP)\.,(VP/NP))
                                               ----------------------------------------------------------------------------------<
                                                                                    (VP/NP)
                              --------------------------------------------------------------------------------------------------->B
                                                                            (S/NP)
              ------------------------------------------------------------------------------------------------------------------->
                                                                     (N\N)
    -----------------------------------------------------------------------------------------------------------------------------<
                                                                  N
     articles      which       I      will       file               and             forget         without           reading
        N      ((N\N)/(S/NP))  NP  ((S/VP)\NP)  (VP/NP)  ((_var0\.,_var0)/.,_var0)  (VP/NP)  ((VP\VP)/VP['ing'])  (VP['ing']/NP)
                              -----------------<
                                   (S/VP)
                                                        ------------------------------------>
                                                                ((VP/NP)\.,(VP/NP))
                                               ---------------------------------------------<
                                                                  (VP/NP)
                                                                                            ------------------------------------->B
                                                                                                        ((VP\VP)/NP)
                                               ----------------------------------------------------------------------------------<Sx
                                                                                    (VP/NP)
                              --------------------------------------------------------------------------------------------------->B
                                                                            (S/NP)
              ------------------------------------------------------------------------------------------------------------------->
                                                                     (N\N)
    -----------------------------------------------------------------------------------------------------------------------------<
                                                                  N


Unicode support
---------------

Unicode words are supported.

    >>> from nltk.ccg import chart, lexicon

Lexicons for the tests:

    >>> lex = lexicon.parseLexicon(u'''
    ...        :- S, N, NP, PP
    ...
    ...        AdjI :: N\\N
    ...        AdjD :: N/N
    ...        AdvD :: S/S
    ...        AdvI :: S\\S
    ...        Det :: NP/N
    ...        PrepNPCompl :: PP/NP
    ...        PrepNAdjN :: S\\S/N
    ...        PrepNAdjNP :: S\\S/NP
    ...        VPNP :: S\\NP/NP
    ...        VPPP :: S\\NP/PP
    ...        VPser :: S\\NP/AdjI
    ...
    ...        auto => N
    ...        bebidas => N
    ...        cine => N
    ...        ley => N
    ...        libro => N
    ...        ministro => N
    ...        panadería => N
    ...        presidente => N
    ...        super => N
    ...
    ...        el => Det
    ...        la => Det
    ...        las => Det
    ...        un => Det
    ...
    ...        Ana => NP
    ...        Pablo => NP
    ...
    ...        y => var\\.,var/.,var
    ...
    ...        pero => (S/NP)\\(S/NP)/(S/NP)
    ...
    ...        anunció => VPNP
    ...        compró => VPNP
    ...        cree => S\\NP/S[dep]
    ...        desmintió => VPNP
    ...        lee => VPNP
    ...        fueron => VPPP
    ...
    ...        es => VPser
    ...
    ...        interesante => AdjD
    ...        interesante => AdjI
    ...        nueva => AdjD
    ...        nueva => AdjI
    ...
    ...        a => PrepNPCompl
    ...        en => PrepNAdjN
    ...        en => PrepNAdjNP
    ...
    ...        ayer => AdvI
    ...
    ...        que => (NP\\NP)/(S/NP)
    ...        que => S[dep]/S
    ...     ''')

    >>> parser = chart.CCGChartParser(lex, chart.DefaultRuleSet)
    >>> for parse in parser.parse(u"el ministro anunció pero el presidente desmintió la nueva ley".split()):
    ...     printCCGDerivation(parse) # doctest: +SKIP 
    ...     # it fails on python2.7 because of the unicode problem explained in https://github.com/nltk/nltk/pull/1354
    ...     break
       el    ministro    anunció              pero              el    presidente   desmintió     la    nueva  ley
     (NP/N)     N      ((S\NP)/NP)  (((S/NP)\(S/NP))/(S/NP))  (NP/N)      N       ((S\NP)/NP)  (NP/N)  (N/N)   N
    ------------------>
            NP
    ------------------>T
        (S/(S\NP))
                                                             -------------------->
                                                                      NP
                                                             -------------------->T
                                                                  (S/(S\NP))
                                                             --------------------------------->B
                                                                          (S/NP)
                                   ----------------------------------------------------------->
                                                         ((S/NP)\(S/NP))
                                                                                                      ------------>
                                                                                                           N
                                                                                              -------------------->
                                                                                                       NP
                                                                                              --------------------<T
                                                                                                   (S\(S/NP))
                                   -------------------------------------------------------------------------------<B
                                                                     (S\(S/NP))
                      --------------------------------------------------------------------------------------------<B
                                                                 (S/NP)
    -------------------------------------------------------------------------------------------------------------->
                                                          S