123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614 |
- # Natural Language Toolkit: Interface to Boxer
- # <http://svn.ask.it.usyd.edu.au/trac/candc/wiki/boxer>
- #
- # Author: Dan Garrette <dhgarrette@gmail.com>
- #
- # Copyright (C) 2001-2019 NLTK Project
- # URL: <http://nltk.org/>
- # For license information, see LICENSE.TXT
- """
- An interface to Boxer.
- This interface relies on the latest version of the development (subversion) version of
- C&C and Boxer.
- Usage:
- Set the environment variable CANDC to the bin directory of your CandC installation.
- The models directory should be in the CandC root directory.
- For example:
- /path/to/candc/
- bin/
- candc
- boxer
- models/
- boxer/
- """
- from __future__ import print_function, unicode_literals
- import os
- import re
- import operator
- import subprocess
- from optparse import OptionParser
- import tempfile
- from functools import reduce
- from nltk.internals import find_binary
- from nltk.sem.logic import (
- ExpectedMoreTokensException,
- LogicalExpressionException,
- UnexpectedTokenException,
- Variable,
- )
- from nltk.sem.drt import (
- DRS,
- DrtApplicationExpression,
- DrtEqualityExpression,
- DrtNegatedExpression,
- DrtOrExpression,
- DrtParser,
- DrtProposition,
- DrtTokens,
- DrtVariableExpression,
- )
- from nltk.compat import python_2_unicode_compatible
- class Boxer(object):
- """
- This class is an interface to Johan Bos's program Boxer, a wide-coverage
- semantic parser that produces Discourse Representation Structures (DRSs).
- """
- def __init__(
- self,
- boxer_drs_interpreter=None,
- elimeq=False,
- bin_dir=None,
- verbose=False,
- resolve=True,
- ):
- """
- :param boxer_drs_interpreter: A class that converts from the
- ``AbstractBoxerDrs`` object hierarchy to a different object. The
- default is ``NltkDrtBoxerDrsInterpreter``, which converts to the NLTK
- DRT hierarchy.
- :param elimeq: When set to true, Boxer removes all equalities from the
- DRSs and discourse referents standing in the equality relation are
- unified, but only if this can be done in a meaning-preserving manner.
- :param resolve: When set to true, Boxer will resolve all anaphoric DRSs and perform merge-reduction.
- Resolution follows Van der Sandt's theory of binding and accommodation.
- """
- if boxer_drs_interpreter is None:
- boxer_drs_interpreter = NltkDrtBoxerDrsInterpreter()
- self._boxer_drs_interpreter = boxer_drs_interpreter
- self._resolve = resolve
- self._elimeq = elimeq
- self.set_bin_dir(bin_dir, verbose)
- def set_bin_dir(self, bin_dir, verbose=False):
- self._candc_bin = self._find_binary('candc', bin_dir, verbose)
- self._candc_models_path = os.path.normpath(
- os.path.join(self._candc_bin[:-5], '../models')
- )
- self._boxer_bin = self._find_binary('boxer', bin_dir, verbose)
- def interpret(self, input, discourse_id=None, question=False, verbose=False):
- """
- Use Boxer to give a first order representation.
- :param input: str Input sentence to parse
- :param occur_index: bool Should predicates be occurrence indexed?
- :param discourse_id: str An identifier to be inserted to each occurrence-indexed predicate.
- :return: ``drt.DrtExpression``
- """
- discourse_ids = [discourse_id] if discourse_id is not None else None
- d, = self.interpret_multi_sents([[input]], discourse_ids, question, verbose)
- if not d:
- raise Exception('Unable to interpret: "{0}"'.format(input))
- return d
- def interpret_multi(self, input, discourse_id=None, question=False, verbose=False):
- """
- Use Boxer to give a first order representation.
- :param input: list of str Input sentences to parse as a single discourse
- :param occur_index: bool Should predicates be occurrence indexed?
- :param discourse_id: str An identifier to be inserted to each occurrence-indexed predicate.
- :return: ``drt.DrtExpression``
- """
- discourse_ids = [discourse_id] if discourse_id is not None else None
- d, = self.interpret_multi_sents([input], discourse_ids, question, verbose)
- if not d:
- raise Exception('Unable to interpret: "{0}"'.format(input))
- return d
- def interpret_sents(
- self, inputs, discourse_ids=None, question=False, verbose=False
- ):
- """
- Use Boxer to give a first order representation.
- :param inputs: list of str Input sentences to parse as individual discourses
- :param occur_index: bool Should predicates be occurrence indexed?
- :param discourse_ids: list of str Identifiers to be inserted to each occurrence-indexed predicate.
- :return: list of ``drt.DrtExpression``
- """
- return self.interpret_multi_sents(
- [[input] for input in inputs], discourse_ids, question, verbose
- )
- def interpret_multi_sents(
- self, inputs, discourse_ids=None, question=False, verbose=False
- ):
- """
- Use Boxer to give a first order representation.
- :param inputs: list of list of str Input discourses to parse
- :param occur_index: bool Should predicates be occurrence indexed?
- :param discourse_ids: list of str Identifiers to be inserted to each occurrence-indexed predicate.
- :return: ``drt.DrtExpression``
- """
- if discourse_ids is not None:
- assert len(inputs) == len(discourse_ids)
- assert reduce(operator.and_, (id is not None for id in discourse_ids))
- use_disc_id = True
- else:
- discourse_ids = list(map(str, range(len(inputs))))
- use_disc_id = False
- candc_out = self._call_candc(inputs, discourse_ids, question, verbose=verbose)
- boxer_out = self._call_boxer(candc_out, verbose=verbose)
- # if 'ERROR: input file contains no ccg/2 terms.' in boxer_out:
- # raise UnparseableInputException('Could not parse with candc: "%s"' % input_str)
- drs_dict = self._parse_to_drs_dict(boxer_out, use_disc_id)
- return [drs_dict.get(id, None) for id in discourse_ids]
- def _call_candc(self, inputs, discourse_ids, question, verbose=False):
- """
- Call the ``candc`` binary with the given input.
- :param inputs: list of list of str Input discourses to parse
- :param discourse_ids: list of str Identifiers to be inserted to each occurrence-indexed predicate.
- :param filename: str A filename for the output file
- :return: stdout
- """
- args = [
- '--models',
- os.path.join(self._candc_models_path, ['boxer', 'questions'][question]),
- '--candc-printer',
- 'boxer',
- ]
- return self._call(
- '\n'.join(
- sum(
- (
- ["<META>'{0}'".format(id)] + d
- for d, id in zip(inputs, discourse_ids)
- ),
- [],
- )
- ),
- self._candc_bin,
- args,
- verbose,
- )
- def _call_boxer(self, candc_out, verbose=False):
- """
- Call the ``boxer`` binary with the given input.
- :param candc_out: str output from C&C parser
- :return: stdout
- """
- f = None
- try:
- fd, temp_filename = tempfile.mkstemp(
- prefix='boxer-', suffix='.in', text=True
- )
- f = os.fdopen(fd, 'w')
- f.write(candc_out)
- finally:
- if f:
- f.close()
- args = [
- '--box',
- 'false',
- '--semantics',
- 'drs',
- #'--flat', 'false', # removed from boxer
- '--resolve',
- ['false', 'true'][self._resolve],
- '--elimeq',
- ['false', 'true'][self._elimeq],
- '--format',
- 'prolog',
- '--instantiate',
- 'true',
- '--input',
- temp_filename,
- ]
- stdout = self._call(None, self._boxer_bin, args, verbose)
- os.remove(temp_filename)
- return stdout
- def _find_binary(self, name, bin_dir, verbose=False):
- return find_binary(
- name,
- path_to_bin=bin_dir,
- env_vars=['CANDC'],
- url='http://svn.ask.it.usyd.edu.au/trac/candc/',
- binary_names=[name, name + '.exe'],
- verbose=verbose,
- )
- def _call(self, input_str, binary, args=[], verbose=False):
- """
- Call the binary with the given input.
- :param input_str: A string whose contents are used as stdin.
- :param binary: The location of the binary to call
- :param args: A list of command-line arguments.
- :return: stdout
- """
- if verbose:
- print('Calling:', binary)
- print('Args:', args)
- print('Input:', input_str)
- print('Command:', binary + ' ' + ' '.join(args))
- # Call via a subprocess
- if input_str is None:
- cmd = [binary] + args
- p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
- else:
- cmd = 'echo "{0}" | {1} {2}'.format(input_str, binary, ' '.join(args))
- p = subprocess.Popen(
- cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True
- )
- stdout, stderr = p.communicate()
- if verbose:
- print('Return code:', p.returncode)
- if stdout:
- print('stdout:\n', stdout, '\n')
- if stderr:
- print('stderr:\n', stderr, '\n')
- if p.returncode != 0:
- raise Exception(
- 'ERROR CALLING: {0} {1}\nReturncode: {2}\n{3}'.format(
- binary, ' '.join(args), p.returncode, stderr
- )
- )
- return stdout
- def _parse_to_drs_dict(self, boxer_out, use_disc_id):
- lines = boxer_out.split('\n')
- drs_dict = {}
- i = 0
- while i < len(lines):
- line = lines[i]
- if line.startswith('id('):
- comma_idx = line.index(',')
- discourse_id = line[3:comma_idx]
- if discourse_id[0] == "'" and discourse_id[-1] == "'":
- discourse_id = discourse_id[1:-1]
- drs_id = line[comma_idx + 1 : line.index(')')]
- i += 1
- line = lines[i]
- assert line.startswith('sem({0},'.format(drs_id))
- if line[-4:] == "').'":
- line = line[:-4] + ")."
- assert line.endswith(').'), "can't parse line: {0}".format(line)
- search_start = len('sem({0},['.format(drs_id))
- brace_count = 1
- drs_start = -1
- for j, c in enumerate(line[search_start:]):
- if c == '[':
- brace_count += 1
- if c == ']':
- brace_count -= 1
- if brace_count == 0:
- drs_start = search_start + j + 1
- if line[drs_start : drs_start + 3] == "','":
- drs_start = drs_start + 3
- else:
- drs_start = drs_start + 1
- break
- assert drs_start > -1
- drs_input = line[drs_start:-2].strip()
- parsed = self._parse_drs(drs_input, discourse_id, use_disc_id)
- drs_dict[discourse_id] = self._boxer_drs_interpreter.interpret(parsed)
- i += 1
- return drs_dict
- def _parse_drs(self, drs_string, discourse_id, use_disc_id):
- return BoxerOutputDrsParser([None, discourse_id][use_disc_id]).parse(drs_string)
- class BoxerOutputDrsParser(DrtParser):
- def __init__(self, discourse_id=None):
- """
- This class is used to parse the Prolog DRS output from Boxer into a
- hierarchy of python objects.
- """
- DrtParser.__init__(self)
- self.discourse_id = discourse_id
- self.sentence_id_offset = None
- self.quote_chars = [("'", "'", "\\", False)]
- def parse(self, data, signature=None):
- return DrtParser.parse(self, data, signature)
- def get_all_symbols(self):
- return ['(', ')', ',', '[', ']', ':']
- def handle(self, tok, context):
- return self.handle_drs(tok)
- def attempt_adjuncts(self, expression, context):
- return expression
- def parse_condition(self, indices):
- """
- Parse a DRS condition
- :return: list of ``DrtExpression``
- """
- tok = self.token()
- accum = self.handle_condition(tok, indices)
- if accum is None:
- raise UnexpectedTokenException(tok)
- return accum
- def handle_drs(self, tok):
- if tok == 'drs':
- return self.parse_drs()
- elif tok in ['merge', 'smerge']:
- return self._handle_binary_expression(self._make_merge_expression)(None, [])
- elif tok in ['alfa']:
- return self._handle_alfa(self._make_merge_expression)(None, [])
- def handle_condition(self, tok, indices):
- """
- Handle a DRS condition
- :param indices: list of int
- :return: list of ``DrtExpression``
- """
- if tok == 'not':
- return [self._handle_not()]
- if tok == 'or':
- conds = [self._handle_binary_expression(self._make_or_expression)]
- elif tok == 'imp':
- conds = [self._handle_binary_expression(self._make_imp_expression)]
- elif tok == 'eq':
- conds = [self._handle_eq()]
- elif tok == 'prop':
- conds = [self._handle_prop()]
- elif tok == 'pred':
- conds = [self._handle_pred()]
- elif tok == 'named':
- conds = [self._handle_named()]
- elif tok == 'rel':
- conds = [self._handle_rel()]
- elif tok == 'timex':
- conds = self._handle_timex()
- elif tok == 'card':
- conds = [self._handle_card()]
- elif tok == 'whq':
- conds = [self._handle_whq()]
- elif tok == 'duplex':
- conds = [self._handle_duplex()]
- else:
- conds = []
- return sum(
- [
- [cond(sent_index, word_indices) for cond in conds]
- for sent_index, word_indices in self._sent_and_word_indices(indices)
- ],
- [],
- )
- def _handle_not(self):
- self.assertToken(self.token(), '(')
- drs = self.process_next_expression(None)
- self.assertToken(self.token(), ')')
- return BoxerNot(drs)
- def _handle_pred(self):
- # pred(_G3943, dog, n, 0)
- self.assertToken(self.token(), '(')
- variable = self.parse_variable()
- self.assertToken(self.token(), ',')
- name = self.token()
- self.assertToken(self.token(), ',')
- pos = self.token()
- self.assertToken(self.token(), ',')
- sense = int(self.token())
- self.assertToken(self.token(), ')')
- def _handle_pred_f(sent_index, word_indices):
- return BoxerPred(
- self.discourse_id, sent_index, word_indices, variable, name, pos, sense
- )
- return _handle_pred_f
- def _handle_duplex(self):
- # duplex(whq, drs(...), var, drs(...))
- self.assertToken(self.token(), '(')
- # self.assertToken(self.token(), '[')
- ans_types = []
- # while self.token(0) != ']':
- # cat = self.token()
- # self.assertToken(self.token(), ':')
- # if cat == 'des':
- # ans_types.append(self.token())
- # elif cat == 'num':
- # ans_types.append('number')
- # typ = self.token()
- # if typ == 'cou':
- # ans_types.append('count')
- # else:
- # ans_types.append(typ)
- # else:
- # ans_types.append(self.token())
- # self.token() #swallow the ']'
- self.assertToken(self.token(), 'whq')
- self.assertToken(self.token(), ',')
- d1 = self.process_next_expression(None)
- self.assertToken(self.token(), ',')
- ref = self.parse_variable()
- self.assertToken(self.token(), ',')
- d2 = self.process_next_expression(None)
- self.assertToken(self.token(), ')')
- return lambda sent_index, word_indices: BoxerWhq(
- self.discourse_id, sent_index, word_indices, ans_types, d1, ref, d2
- )
- def _handle_named(self):
- # named(x0, john, per, 0)
- self.assertToken(self.token(), '(')
- variable = self.parse_variable()
- self.assertToken(self.token(), ',')
- name = self.token()
- self.assertToken(self.token(), ',')
- type = self.token()
- self.assertToken(self.token(), ',')
- sense = self.token() # as per boxer rev 2554
- self.assertToken(self.token(), ')')
- return lambda sent_index, word_indices: BoxerNamed(
- self.discourse_id, sent_index, word_indices, variable, name, type, sense
- )
- def _handle_rel(self):
- # rel(_G3993, _G3943, agent, 0)
- self.assertToken(self.token(), '(')
- var1 = self.parse_variable()
- self.assertToken(self.token(), ',')
- var2 = self.parse_variable()
- self.assertToken(self.token(), ',')
- rel = self.token()
- self.assertToken(self.token(), ',')
- sense = int(self.token())
- self.assertToken(self.token(), ')')
- return lambda sent_index, word_indices: BoxerRel(
- self.discourse_id, sent_index, word_indices, var1, var2, rel, sense
- )
- def _handle_timex(self):
- # timex(_G18322, date([]: (+), []:'XXXX', [1004]:'04', []:'XX'))
- self.assertToken(self.token(), '(')
- arg = self.parse_variable()
- self.assertToken(self.token(), ',')
- new_conds = self._handle_time_expression(arg)
- self.assertToken(self.token(), ')')
- return new_conds
- def _handle_time_expression(self, arg):
- # date([]: (+), []:'XXXX', [1004]:'04', []:'XX')
- tok = self.token()
- self.assertToken(self.token(), '(')
- if tok == 'date':
- conds = self._handle_date(arg)
- elif tok == 'time':
- conds = self._handle_time(arg)
- else:
- return None
- self.assertToken(self.token(), ')')
- return [
- lambda sent_index, word_indices: BoxerPred(
- self.discourse_id, sent_index, word_indices, arg, tok, 'n', 0
- )
- ] + [lambda sent_index, word_indices: cond for cond in conds]
- def _handle_date(self, arg):
- # []: (+), []:'XXXX', [1004]:'04', []:'XX'
- conds = []
- (sent_index, word_indices), = self._sent_and_word_indices(
- self._parse_index_list()
- )
- self.assertToken(self.token(), '(')
- pol = self.token()
- self.assertToken(self.token(), ')')
- conds.append(
- BoxerPred(
- self.discourse_id,
- sent_index,
- word_indices,
- arg,
- 'date_pol_{0}'.format(pol),
- 'a',
- 0,
- )
- )
- self.assertToken(self.token(), ',')
- (sent_index, word_indices), = self._sent_and_word_indices(
- self._parse_index_list()
- )
- year = self.token()
- if year != 'XXXX':
- year = year.replace(':', '_')
- conds.append(
- BoxerPred(
- self.discourse_id,
- sent_index,
- word_indices,
- arg,
- 'date_year_{0}'.format(year),
- 'a',
- 0,
- )
- )
- self.assertToken(self.token(), ',')
- (sent_index, word_indices), = self._sent_and_word_indices(
- self._parse_index_list()
- )
- month = self.token()
- if month != 'XX':
- conds.append(
- BoxerPred(
- self.discourse_id,
- sent_index,
- word_indices,
- arg,
- 'date_month_{0}'.format(month),
- 'a',
- 0,
- )
- )
- self.assertToken(self.token(), ',')
- (sent_index, word_indices), = self._sent_and_word_indices(
- self._parse_index_list()
- )
- day = self.token()
- if day != 'XX':
- conds.append(
- BoxerPred(
- self.discourse_id,
- sent_index,
- word_indices,
- arg,
- 'date_day_{0}'.format(day),
- 'a',
- 0,
- )
- )
- return conds
- def _handle_time(self, arg):
- # time([1018]:'18', []:'XX', []:'XX')
- conds = []
- self._parse_index_list()
- hour = self.token()
- if hour != 'XX':
- conds.append(self._make_atom('r_hour_2', arg, hour))
- self.assertToken(self.token(), ',')
- self._parse_index_list()
- min = self.token()
- if min != 'XX':
- conds.append(self._make_atom('r_min_2', arg, min))
- self.assertToken(self.token(), ',')
- self._parse_index_list()
- sec = self.token()
- if sec != 'XX':
- conds.append(self._make_atom('r_sec_2', arg, sec))
- return conds
- def _handle_card(self):
- # card(_G18535, 28, ge)
- self.assertToken(self.token(), '(')
- variable = self.parse_variable()
- self.assertToken(self.token(), ',')
- value = self.token()
- self.assertToken(self.token(), ',')
- type = self.token()
- self.assertToken(self.token(), ')')
- return lambda sent_index, word_indices: BoxerCard(
- self.discourse_id, sent_index, word_indices, variable, value, type
- )
- def _handle_prop(self):
- # prop(_G15949, drs(...))
- self.assertToken(self.token(), '(')
- variable = self.parse_variable()
- self.assertToken(self.token(), ',')
- drs = self.process_next_expression(None)
- self.assertToken(self.token(), ')')
- return lambda sent_index, word_indices: BoxerProp(
- self.discourse_id, sent_index, word_indices, variable, drs
- )
- def _parse_index_list(self):
- # [1001,1002]:
- indices = []
- self.assertToken(self.token(), '[')
- while self.token(0) != ']':
- indices.append(self.parse_index())
- if self.token(0) == ',':
- self.token() # swallow ','
- self.token() # swallow ']'
- self.assertToken(self.token(), ':')
- return indices
- def parse_drs(self):
- # drs([[1001]:_G3943],
- # [[1002]:pred(_G3943, dog, n, 0)]
- # )
- self.assertToken(self.token(), '(')
- self.assertToken(self.token(), '[')
- refs = set()
- while self.token(0) != ']':
- indices = self._parse_index_list()
- refs.add(self.parse_variable())
- if self.token(0) == ',':
- self.token() # swallow ','
- self.token() # swallow ']'
- self.assertToken(self.token(), ',')
- self.assertToken(self.token(), '[')
- conds = []
- while self.token(0) != ']':
- indices = self._parse_index_list()
- conds.extend(self.parse_condition(indices))
- if self.token(0) == ',':
- self.token() # swallow ','
- self.token() # swallow ']'
- self.assertToken(self.token(), ')')
- return BoxerDrs(list(refs), conds)
- def _handle_binary_expression(self, make_callback):
- self.assertToken(self.token(), '(')
- drs1 = self.process_next_expression(None)
- self.assertToken(self.token(), ',')
- drs2 = self.process_next_expression(None)
- self.assertToken(self.token(), ')')
- return lambda sent_index, word_indices: make_callback(
- sent_index, word_indices, drs1, drs2
- )
- def _handle_alfa(self, make_callback):
- self.assertToken(self.token(), '(')
- type = self.token()
- self.assertToken(self.token(), ',')
- drs1 = self.process_next_expression(None)
- self.assertToken(self.token(), ',')
- drs2 = self.process_next_expression(None)
- self.assertToken(self.token(), ')')
- return lambda sent_index, word_indices: make_callback(
- sent_index, word_indices, drs1, drs2
- )
- def _handle_eq(self):
- self.assertToken(self.token(), '(')
- var1 = self.parse_variable()
- self.assertToken(self.token(), ',')
- var2 = self.parse_variable()
- self.assertToken(self.token(), ')')
- return lambda sent_index, word_indices: BoxerEq(
- self.discourse_id, sent_index, word_indices, var1, var2
- )
- def _handle_whq(self):
- self.assertToken(self.token(), '(')
- self.assertToken(self.token(), '[')
- ans_types = []
- while self.token(0) != ']':
- cat = self.token()
- self.assertToken(self.token(), ':')
- if cat == 'des':
- ans_types.append(self.token())
- elif cat == 'num':
- ans_types.append('number')
- typ = self.token()
- if typ == 'cou':
- ans_types.append('count')
- else:
- ans_types.append(typ)
- else:
- ans_types.append(self.token())
- self.token() # swallow the ']'
- self.assertToken(self.token(), ',')
- d1 = self.process_next_expression(None)
- self.assertToken(self.token(), ',')
- ref = self.parse_variable()
- self.assertToken(self.token(), ',')
- d2 = self.process_next_expression(None)
- self.assertToken(self.token(), ')')
- return lambda sent_index, word_indices: BoxerWhq(
- self.discourse_id, sent_index, word_indices, ans_types, d1, ref, d2
- )
- def _make_merge_expression(self, sent_index, word_indices, drs1, drs2):
- return BoxerDrs(drs1.refs + drs2.refs, drs1.conds + drs2.conds)
- def _make_or_expression(self, sent_index, word_indices, drs1, drs2):
- return BoxerOr(self.discourse_id, sent_index, word_indices, drs1, drs2)
- def _make_imp_expression(self, sent_index, word_indices, drs1, drs2):
- return BoxerDrs(drs1.refs, drs1.conds, drs2)
- def parse_variable(self):
- var = self.token()
- assert re.match('^[exps]\d+$', var), var
- return var
- def parse_index(self):
- return int(self.token())
- def _sent_and_word_indices(self, indices):
- """
- :return: list of (sent_index, word_indices) tuples
- """
- sent_indices = set((i / 1000) - 1 for i in indices if i >= 0)
- if sent_indices:
- pairs = []
- for sent_index in sent_indices:
- word_indices = [
- (i % 1000) - 1 for i in indices if sent_index == (i / 1000) - 1
- ]
- pairs.append((sent_index, word_indices))
- return pairs
- else:
- word_indices = [(i % 1000) - 1 for i in indices]
- return [(None, word_indices)]
- class BoxerDrsParser(DrtParser):
- """
- Reparse the str form of subclasses of ``AbstractBoxerDrs``
- """
- def __init__(self, discourse_id=None):
- DrtParser.__init__(self)
- self.discourse_id = discourse_id
- def get_all_symbols(self):
- return [
- DrtTokens.OPEN,
- DrtTokens.CLOSE,
- DrtTokens.COMMA,
- DrtTokens.OPEN_BRACKET,
- DrtTokens.CLOSE_BRACKET,
- ]
- def attempt_adjuncts(self, expression, context):
- return expression
- def handle(self, tok, context):
- try:
- # if tok == 'drs':
- # self.assertNextToken(DrtTokens.OPEN)
- # label = int(self.token())
- # self.assertNextToken(DrtTokens.COMMA)
- # refs = list(map(int, self.handle_refs()))
- # self.assertNextToken(DrtTokens.COMMA)
- # conds = self.handle_conds(None)
- # self.assertNextToken(DrtTokens.CLOSE)
- # return BoxerDrs(label, refs, conds)
- if tok == 'pred':
- self.assertNextToken(DrtTokens.OPEN)
- disc_id = (
- self.discourse_id if self.discourse_id is not None else self.token()
- )
- self.assertNextToken(DrtTokens.COMMA)
- sent_id = self.nullableIntToken()
- self.assertNextToken(DrtTokens.COMMA)
- word_ids = list(map(int, self.handle_refs()))
- self.assertNextToken(DrtTokens.COMMA)
- variable = int(self.token())
- self.assertNextToken(DrtTokens.COMMA)
- name = self.token()
- self.assertNextToken(DrtTokens.COMMA)
- pos = self.token()
- self.assertNextToken(DrtTokens.COMMA)
- sense = int(self.token())
- self.assertNextToken(DrtTokens.CLOSE)
- return BoxerPred(disc_id, sent_id, word_ids, variable, name, pos, sense)
- elif tok == 'named':
- self.assertNextToken(DrtTokens.OPEN)
- disc_id = (
- self.discourse_id if self.discourse_id is not None else self.token()
- )
- self.assertNextToken(DrtTokens.COMMA)
- sent_id = int(self.token())
- self.assertNextToken(DrtTokens.COMMA)
- word_ids = map(int, self.handle_refs())
- self.assertNextToken(DrtTokens.COMMA)
- variable = int(self.token())
- self.assertNextToken(DrtTokens.COMMA)
- name = self.token()
- self.assertNextToken(DrtTokens.COMMA)
- type = self.token()
- self.assertNextToken(DrtTokens.COMMA)
- sense = int(self.token())
- self.assertNextToken(DrtTokens.CLOSE)
- return BoxerNamed(
- disc_id, sent_id, word_ids, variable, name, type, sense
- )
- elif tok == 'rel':
- self.assertNextToken(DrtTokens.OPEN)
- disc_id = (
- self.discourse_id if self.discourse_id is not None else self.token()
- )
- self.assertNextToken(DrtTokens.COMMA)
- sent_id = self.nullableIntToken()
- self.assertNextToken(DrtTokens.COMMA)
- word_ids = list(map(int, self.handle_refs()))
- self.assertNextToken(DrtTokens.COMMA)
- var1 = int(self.token())
- self.assertNextToken(DrtTokens.COMMA)
- var2 = int(self.token())
- self.assertNextToken(DrtTokens.COMMA)
- rel = self.token()
- self.assertNextToken(DrtTokens.COMMA)
- sense = int(self.token())
- self.assertNextToken(DrtTokens.CLOSE)
- return BoxerRel(disc_id, sent_id, word_ids, var1, var2, rel, sense)
- elif tok == 'prop':
- self.assertNextToken(DrtTokens.OPEN)
- disc_id = (
- self.discourse_id if self.discourse_id is not None else self.token()
- )
- self.assertNextToken(DrtTokens.COMMA)
- sent_id = int(self.token())
- self.assertNextToken(DrtTokens.COMMA)
- word_ids = list(map(int, self.handle_refs()))
- self.assertNextToken(DrtTokens.COMMA)
- variable = int(self.token())
- self.assertNextToken(DrtTokens.COMMA)
- drs = self.process_next_expression(None)
- self.assertNextToken(DrtTokens.CLOSE)
- return BoxerProp(disc_id, sent_id, word_ids, variable, drs)
- elif tok == 'not':
- self.assertNextToken(DrtTokens.OPEN)
- drs = self.process_next_expression(None)
- self.assertNextToken(DrtTokens.CLOSE)
- return BoxerNot(drs)
- elif tok == 'imp':
- self.assertNextToken(DrtTokens.OPEN)
- drs1 = self.process_next_expression(None)
- self.assertNextToken(DrtTokens.COMMA)
- drs2 = self.process_next_expression(None)
- self.assertNextToken(DrtTokens.CLOSE)
- return BoxerDrs(drs1.refs, drs1.conds, drs2)
- elif tok == 'or':
- self.assertNextToken(DrtTokens.OPEN)
- disc_id = (
- self.discourse_id if self.discourse_id is not None else self.token()
- )
- self.assertNextToken(DrtTokens.COMMA)
- sent_id = self.nullableIntToken()
- self.assertNextToken(DrtTokens.COMMA)
- word_ids = map(int, self.handle_refs())
- self.assertNextToken(DrtTokens.COMMA)
- drs1 = self.process_next_expression(None)
- self.assertNextToken(DrtTokens.COMMA)
- drs2 = self.process_next_expression(None)
- self.assertNextToken(DrtTokens.CLOSE)
- return BoxerOr(disc_id, sent_id, word_ids, drs1, drs2)
- elif tok == 'eq':
- self.assertNextToken(DrtTokens.OPEN)
- disc_id = (
- self.discourse_id if self.discourse_id is not None else self.token()
- )
- self.assertNextToken(DrtTokens.COMMA)
- sent_id = self.nullableIntToken()
- self.assertNextToken(DrtTokens.COMMA)
- word_ids = list(map(int, self.handle_refs()))
- self.assertNextToken(DrtTokens.COMMA)
- var1 = int(self.token())
- self.assertNextToken(DrtTokens.COMMA)
- var2 = int(self.token())
- self.assertNextToken(DrtTokens.CLOSE)
- return BoxerEq(disc_id, sent_id, word_ids, var1, var2)
- elif tok == 'card':
- self.assertNextToken(DrtTokens.OPEN)
- disc_id = (
- self.discourse_id if self.discourse_id is not None else self.token()
- )
- self.assertNextToken(DrtTokens.COMMA)
- sent_id = self.nullableIntToken()
- self.assertNextToken(DrtTokens.COMMA)
- word_ids = map(int, self.handle_refs())
- self.assertNextToken(DrtTokens.COMMA)
- var = int(self.token())
- self.assertNextToken(DrtTokens.COMMA)
- value = self.token()
- self.assertNextToken(DrtTokens.COMMA)
- type = self.token()
- self.assertNextToken(DrtTokens.CLOSE)
- return BoxerCard(disc_id, sent_id, word_ids, var, value, type)
- elif tok == 'whq':
- self.assertNextToken(DrtTokens.OPEN)
- disc_id = (
- self.discourse_id if self.discourse_id is not None else self.token()
- )
- self.assertNextToken(DrtTokens.COMMA)
- sent_id = self.nullableIntToken()
- self.assertNextToken(DrtTokens.COMMA)
- word_ids = list(map(int, self.handle_refs()))
- self.assertNextToken(DrtTokens.COMMA)
- ans_types = self.handle_refs()
- self.assertNextToken(DrtTokens.COMMA)
- drs1 = self.process_next_expression(None)
- self.assertNextToken(DrtTokens.COMMA)
- var = int(self.token())
- self.assertNextToken(DrtTokens.COMMA)
- drs2 = self.process_next_expression(None)
- self.assertNextToken(DrtTokens.CLOSE)
- return BoxerWhq(disc_id, sent_id, word_ids, ans_types, drs1, var, drs2)
- except Exception as e:
- raise LogicalExpressionException(self._currentIndex, str(e))
- assert False, repr(tok)
- def nullableIntToken(self):
- t = self.token()
- return int(t) if t != 'None' else None
- def get_next_token_variable(self, description):
- try:
- return self.token()
- except ExpectedMoreTokensException as e:
- raise ExpectedMoreTokensException(e.index, 'Variable expected.')
- class AbstractBoxerDrs(object):
- def variables(self):
- """
- :return: (set<variables>, set<events>, set<propositions>)
- """
- variables, events, propositions = self._variables()
- return (variables - (events | propositions), events, propositions - events)
- def variable_types(self):
- vartypes = {}
- for t, vars in zip(('z', 'e', 'p'), self.variables()):
- for v in vars:
- vartypes[v] = t
- return vartypes
- def _variables(self):
- """
- :return: (set<variables>, set<events>, set<propositions>)
- """
- return (set(), set(), set())
- def atoms(self):
- return set()
- def clean(self):
- return self
- def _clean_name(self, name):
- return name.replace('-', '_').replace("'", "_")
- def renumber_sentences(self, f):
- return self
- def __hash__(self):
- return hash("{0}".format(self))
- @python_2_unicode_compatible
- class BoxerDrs(AbstractBoxerDrs):
- def __init__(self, refs, conds, consequent=None):
- AbstractBoxerDrs.__init__(self)
- self.refs = refs
- self.conds = conds
- self.consequent = consequent
- def _variables(self):
- variables = (set(), set(), set())
- for cond in self.conds:
- for s, v in zip(variables, cond._variables()):
- s.update(v)
- if self.consequent is not None:
- for s, v in zip(variables, self.consequent._variables()):
- s.update(v)
- return variables
- def atoms(self):
- atoms = reduce(operator.or_, (cond.atoms() for cond in self.conds), set())
- if self.consequent is not None:
- atoms.update(self.consequent.atoms())
- return atoms
- def clean(self):
- consequent = self.consequent.clean() if self.consequent else None
- return BoxerDrs(self.refs, [c.clean() for c in self.conds], consequent)
- def renumber_sentences(self, f):
- consequent = self.consequent.renumber_sentences(f) if self.consequent else None
- return BoxerDrs(
- self.refs, [c.renumber_sentences(f) for c in self.conds], consequent
- )
- def __repr__(self):
- s = 'drs([%s], [%s])' % (
- ', '.join("%s" % r for r in self.refs),
- ', '.join("%s" % c for c in self.conds),
- )
- if self.consequent is not None:
- s = 'imp(%s, %s)' % (s, self.consequent)
- return s
- def __eq__(self, other):
- return (
- self.__class__ == other.__class__
- and self.refs == other.refs
- and len(self.conds) == len(other.conds)
- and reduce(
- operator.and_, (c1 == c2 for c1, c2 in zip(self.conds, other.conds))
- )
- and self.consequent == other.consequent
- )
- def __ne__(self, other):
- return not self == other
- __hash__ = AbstractBoxerDrs.__hash__
- @python_2_unicode_compatible
- class BoxerNot(AbstractBoxerDrs):
- def __init__(self, drs):
- AbstractBoxerDrs.__init__(self)
- self.drs = drs
- def _variables(self):
- return self.drs._variables()
- def atoms(self):
- return self.drs.atoms()
- def clean(self):
- return BoxerNot(self.drs.clean())
- def renumber_sentences(self, f):
- return BoxerNot(self.drs.renumber_sentences(f))
- def __repr__(self):
- return 'not(%s)' % (self.drs)
- def __eq__(self, other):
- return self.__class__ == other.__class__ and self.drs == other.drs
- def __ne__(self, other):
- return not self == other
- __hash__ = AbstractBoxerDrs.__hash__
- @python_2_unicode_compatible
- class BoxerIndexed(AbstractBoxerDrs):
- def __init__(self, discourse_id, sent_index, word_indices):
- AbstractBoxerDrs.__init__(self)
- self.discourse_id = discourse_id
- self.sent_index = sent_index
- self.word_indices = word_indices
- def atoms(self):
- return set([self])
- def __eq__(self, other):
- return (
- self.__class__ == other.__class__
- and self.discourse_id == other.discourse_id
- and self.sent_index == other.sent_index
- and self.word_indices == other.word_indices
- and reduce(operator.and_, (s == o for s, o in zip(self, other)))
- )
- def __ne__(self, other):
- return not self == other
- __hash__ = AbstractBoxerDrs.__hash__
- def __repr__(self):
- s = '%s(%s, %s, [%s]' % (
- self._pred(),
- self.discourse_id,
- self.sent_index,
- ', '.join("%s" % wi for wi in self.word_indices),
- )
- for v in self:
- s += ', %s' % v
- return s + ')'
- class BoxerPred(BoxerIndexed):
- def __init__(self, discourse_id, sent_index, word_indices, var, name, pos, sense):
- BoxerIndexed.__init__(self, discourse_id, sent_index, word_indices)
- self.var = var
- self.name = name
- self.pos = pos
- self.sense = sense
- def _variables(self):
- return (set([self.var]), set(), set())
- def change_var(self, var):
- return BoxerPred(
- self.discourse_id,
- self.sent_index,
- self.word_indices,
- var,
- self.name,
- self.pos,
- self.sense,
- )
- def clean(self):
- return BoxerPred(
- self.discourse_id,
- self.sent_index,
- self.word_indices,
- self.var,
- self._clean_name(self.name),
- self.pos,
- self.sense,
- )
- def renumber_sentences(self, f):
- new_sent_index = f(self.sent_index)
- return BoxerPred(
- self.discourse_id,
- new_sent_index,
- self.word_indices,
- self.var,
- self.name,
- self.pos,
- self.sense,
- )
- def __iter__(self):
- return iter((self.var, self.name, self.pos, self.sense))
- def _pred(self):
- return 'pred'
- class BoxerNamed(BoxerIndexed):
- def __init__(self, discourse_id, sent_index, word_indices, var, name, type, sense):
- BoxerIndexed.__init__(self, discourse_id, sent_index, word_indices)
- self.var = var
- self.name = name
- self.type = type
- self.sense = sense
- def _variables(self):
- return (set([self.var]), set(), set())
- def change_var(self, var):
- return BoxerNamed(
- self.discourse_id,
- self.sent_index,
- self.word_indices,
- var,
- self.name,
- self.type,
- self.sense,
- )
- def clean(self):
- return BoxerNamed(
- self.discourse_id,
- self.sent_index,
- self.word_indices,
- self.var,
- self._clean_name(self.name),
- self.type,
- self.sense,
- )
- def renumber_sentences(self, f):
- return BoxerNamed(
- self.discourse_id,
- f(self.sent_index),
- self.word_indices,
- self.var,
- self.name,
- self.type,
- self.sense,
- )
- def __iter__(self):
- return iter((self.var, self.name, self.type, self.sense))
- def _pred(self):
- return 'named'
- class BoxerRel(BoxerIndexed):
- def __init__(self, discourse_id, sent_index, word_indices, var1, var2, rel, sense):
- BoxerIndexed.__init__(self, discourse_id, sent_index, word_indices)
- self.var1 = var1
- self.var2 = var2
- self.rel = rel
- self.sense = sense
- def _variables(self):
- return (set([self.var1, self.var2]), set(), set())
- def clean(self):
- return BoxerRel(
- self.discourse_id,
- self.sent_index,
- self.word_indices,
- self.var1,
- self.var2,
- self._clean_name(self.rel),
- self.sense,
- )
- def renumber_sentences(self, f):
- return BoxerRel(
- self.discourse_id,
- f(self.sent_index),
- self.word_indices,
- self.var1,
- self.var2,
- self.rel,
- self.sense,
- )
- def __iter__(self):
- return iter((self.var1, self.var2, self.rel, self.sense))
- def _pred(self):
- return 'rel'
- class BoxerProp(BoxerIndexed):
- def __init__(self, discourse_id, sent_index, word_indices, var, drs):
- BoxerIndexed.__init__(self, discourse_id, sent_index, word_indices)
- self.var = var
- self.drs = drs
- def _variables(self):
- return tuple(
- map(operator.or_, (set(), set(), set([self.var])), self.drs._variables())
- )
- def referenced_labels(self):
- return set([self.drs])
- def atoms(self):
- return self.drs.atoms()
- def clean(self):
- return BoxerProp(
- self.discourse_id,
- self.sent_index,
- self.word_indices,
- self.var,
- self.drs.clean(),
- )
- def renumber_sentences(self, f):
- return BoxerProp(
- self.discourse_id,
- f(self.sent_index),
- self.word_indices,
- self.var,
- self.drs.renumber_sentences(f),
- )
- def __iter__(self):
- return iter((self.var, self.drs))
- def _pred(self):
- return 'prop'
- class BoxerEq(BoxerIndexed):
- def __init__(self, discourse_id, sent_index, word_indices, var1, var2):
- BoxerIndexed.__init__(self, discourse_id, sent_index, word_indices)
- self.var1 = var1
- self.var2 = var2
- def _variables(self):
- return (set([self.var1, self.var2]), set(), set())
- def atoms(self):
- return set()
- def renumber_sentences(self, f):
- return BoxerEq(
- self.discourse_id,
- f(self.sent_index),
- self.word_indices,
- self.var1,
- self.var2,
- )
- def __iter__(self):
- return iter((self.var1, self.var2))
- def _pred(self):
- return 'eq'
- class BoxerCard(BoxerIndexed):
- def __init__(self, discourse_id, sent_index, word_indices, var, value, type):
- BoxerIndexed.__init__(self, discourse_id, sent_index, word_indices)
- self.var = var
- self.value = value
- self.type = type
- def _variables(self):
- return (set([self.var]), set(), set())
- def renumber_sentences(self, f):
- return BoxerCard(
- self.discourse_id,
- f(self.sent_index),
- self.word_indices,
- self.var,
- self.value,
- self.type,
- )
- def __iter__(self):
- return iter((self.var, self.value, self.type))
- def _pred(self):
- return 'card'
- class BoxerOr(BoxerIndexed):
- def __init__(self, discourse_id, sent_index, word_indices, drs1, drs2):
- BoxerIndexed.__init__(self, discourse_id, sent_index, word_indices)
- self.drs1 = drs1
- self.drs2 = drs2
- def _variables(self):
- return tuple(map(operator.or_, self.drs1._variables(), self.drs2._variables()))
- def atoms(self):
- return self.drs1.atoms() | self.drs2.atoms()
- def clean(self):
- return BoxerOr(
- self.discourse_id,
- self.sent_index,
- self.word_indices,
- self.drs1.clean(),
- self.drs2.clean(),
- )
- def renumber_sentences(self, f):
- return BoxerOr(
- self.discourse_id,
- f(self.sent_index),
- self.word_indices,
- self.drs1,
- self.drs2,
- )
- def __iter__(self):
- return iter((self.drs1, self.drs2))
- def _pred(self):
- return 'or'
- class BoxerWhq(BoxerIndexed):
- def __init__(
- self, discourse_id, sent_index, word_indices, ans_types, drs1, variable, drs2
- ):
- BoxerIndexed.__init__(self, discourse_id, sent_index, word_indices)
- self.ans_types = ans_types
- self.drs1 = drs1
- self.variable = variable
- self.drs2 = drs2
- def _variables(self):
- return tuple(
- map(
- operator.or_,
- (set([self.variable]), set(), set()),
- self.drs1._variables(),
- self.drs2._variables(),
- )
- )
- def atoms(self):
- return self.drs1.atoms() | self.drs2.atoms()
- def clean(self):
- return BoxerWhq(
- self.discourse_id,
- self.sent_index,
- self.word_indices,
- self.ans_types,
- self.drs1.clean(),
- self.variable,
- self.drs2.clean(),
- )
- def renumber_sentences(self, f):
- return BoxerWhq(
- self.discourse_id,
- f(self.sent_index),
- self.word_indices,
- self.ans_types,
- self.drs1,
- self.variable,
- self.drs2,
- )
- def __iter__(self):
- return iter(
- ('[' + ','.join(self.ans_types) + ']', self.drs1, self.variable, self.drs2)
- )
- def _pred(self):
- return 'whq'
- class PassthroughBoxerDrsInterpreter(object):
- def interpret(self, ex):
- return ex
- class NltkDrtBoxerDrsInterpreter(object):
- def __init__(self, occur_index=False):
- self._occur_index = occur_index
- def interpret(self, ex):
- """
- :param ex: ``AbstractBoxerDrs``
- :return: ``DrtExpression``
- """
- if isinstance(ex, BoxerDrs):
- drs = DRS(
- [Variable(r) for r in ex.refs], list(map(self.interpret, ex.conds))
- )
- if ex.consequent is not None:
- drs.consequent = self.interpret(ex.consequent)
- return drs
- elif isinstance(ex, BoxerNot):
- return DrtNegatedExpression(self.interpret(ex.drs))
- elif isinstance(ex, BoxerPred):
- pred = self._add_occur_indexing('%s_%s' % (ex.pos, ex.name), ex)
- return self._make_atom(pred, ex.var)
- elif isinstance(ex, BoxerNamed):
- pred = self._add_occur_indexing('ne_%s_%s' % (ex.type, ex.name), ex)
- return self._make_atom(pred, ex.var)
- elif isinstance(ex, BoxerRel):
- pred = self._add_occur_indexing('%s' % (ex.rel), ex)
- return self._make_atom(pred, ex.var1, ex.var2)
- elif isinstance(ex, BoxerProp):
- return DrtProposition(Variable(ex.var), self.interpret(ex.drs))
- elif isinstance(ex, BoxerEq):
- return DrtEqualityExpression(
- DrtVariableExpression(Variable(ex.var1)),
- DrtVariableExpression(Variable(ex.var2)),
- )
- elif isinstance(ex, BoxerCard):
- pred = self._add_occur_indexing('card_%s_%s' % (ex.type, ex.value), ex)
- return self._make_atom(pred, ex.var)
- elif isinstance(ex, BoxerOr):
- return DrtOrExpression(self.interpret(ex.drs1), self.interpret(ex.drs2))
- elif isinstance(ex, BoxerWhq):
- drs1 = self.interpret(ex.drs1)
- drs2 = self.interpret(ex.drs2)
- return DRS(drs1.refs + drs2.refs, drs1.conds + drs2.conds)
- assert False, '%s: %s' % (ex.__class__.__name__, ex)
- def _make_atom(self, pred, *args):
- accum = DrtVariableExpression(Variable(pred))
- for arg in args:
- accum = DrtApplicationExpression(
- accum, DrtVariableExpression(Variable(arg))
- )
- return accum
- def _add_occur_indexing(self, base, ex):
- if self._occur_index and ex.sent_index is not None:
- if ex.discourse_id:
- base += '_%s' % ex.discourse_id
- base += '_s%s' % ex.sent_index
- base += '_w%s' % sorted(ex.word_indices)[0]
- return base
- class UnparseableInputException(Exception):
- pass
- if __name__ == '__main__':
- opts = OptionParser("usage: %prog TEXT [options]")
- opts.add_option(
- "--verbose",
- "-v",
- help="display verbose logs",
- action="store_true",
- default=False,
- dest="verbose",
- )
- opts.add_option(
- "--fol", "-f", help="output FOL", action="store_true", default=False, dest="fol"
- )
- opts.add_option(
- "--question",
- "-q",
- help="input is a question",
- action="store_true",
- default=False,
- dest="question",
- )
- opts.add_option(
- "--occur",
- "-o",
- help="occurrence index",
- action="store_true",
- default=False,
- dest="occur_index",
- )
- (options, args) = opts.parse_args()
- if len(args) != 1:
- opts.error("incorrect number of arguments")
- interpreter = NltkDrtBoxerDrsInterpreter(occur_index=options.occur_index)
- drs = Boxer(interpreter).interpret_multi(
- args[0].split(r'\n'), question=options.question, verbose=options.verbose
- )
- if drs is None:
- print(None)
- else:
- drs = drs.simplify().eliminate_equality()
- if options.fol:
- print(drs.fol().normalize())
- else:
- drs.pretty_print()
|