chat80.py 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864
  1. # Natural Language Toolkit: Chat-80 KB Reader
  2. # See http://www.w3.org/TR/swbp-skos-core-guide/
  3. #
  4. # Copyright (C) 2001-2019 NLTK Project
  5. # Author: Ewan Klein <ewan@inf.ed.ac.uk>,
  6. # URL: <http://nltk.sourceforge.net>
  7. # For license information, see LICENSE.TXT
  8. """
  9. Overview
  10. ========
  11. Chat-80 was a natural language system which allowed the user to
  12. interrogate a Prolog knowledge base in the domain of world
  13. geography. It was developed in the early '80s by Warren and Pereira; see
  14. ``http://www.aclweb.org/anthology/J82-3002.pdf`` for a description and
  15. ``http://www.cis.upenn.edu/~pereira/oldies.html`` for the source
  16. files.
  17. This module contains functions to extract data from the Chat-80
  18. relation files ('the world database'), and convert then into a format
  19. that can be incorporated in the FOL models of
  20. ``nltk.sem.evaluate``. The code assumes that the Prolog
  21. input files are available in the NLTK corpora directory.
  22. The Chat-80 World Database consists of the following files::
  23. world0.pl
  24. rivers.pl
  25. cities.pl
  26. countries.pl
  27. contain.pl
  28. borders.pl
  29. This module uses a slightly modified version of ``world0.pl``, in which
  30. a set of Prolog rules have been omitted. The modified file is named
  31. ``world1.pl``. Currently, the file ``rivers.pl`` is not read in, since
  32. it uses a list rather than a string in the second field.
  33. Reading Chat-80 Files
  34. =====================
  35. Chat-80 relations are like tables in a relational database. The
  36. relation acts as the name of the table; the first argument acts as the
  37. 'primary key'; and subsequent arguments are further fields in the
  38. table. In general, the name of the table provides a label for a unary
  39. predicate whose extension is all the primary keys. For example,
  40. relations in ``cities.pl`` are of the following form::
  41. 'city(athens,greece,1368).'
  42. Here, ``'athens'`` is the key, and will be mapped to a member of the
  43. unary predicate *city*.
  44. The fields in the table are mapped to binary predicates. The first
  45. argument of the predicate is the primary key, while the second
  46. argument is the data in the relevant field. Thus, in the above
  47. example, the third field is mapped to the binary predicate
  48. *population_of*, whose extension is a set of pairs such as
  49. ``'(athens, 1368)'``.
  50. An exception to this general framework is required by the relations in
  51. the files ``borders.pl`` and ``contains.pl``. These contain facts of the
  52. following form::
  53. 'borders(albania,greece).'
  54. 'contains0(africa,central_africa).'
  55. We do not want to form a unary concept out the element in
  56. the first field of these records, and we want the label of the binary
  57. relation just to be ``'border'``/``'contain'`` respectively.
  58. In order to drive the extraction process, we use 'relation metadata bundles'
  59. which are Python dictionaries such as the following::
  60. city = {'label': 'city',
  61. 'closures': [],
  62. 'schema': ['city', 'country', 'population'],
  63. 'filename': 'cities.pl'}
  64. According to this, the file ``city['filename']`` contains a list of
  65. relational tuples (or more accurately, the corresponding strings in
  66. Prolog form) whose predicate symbol is ``city['label']`` and whose
  67. relational schema is ``city['schema']``. The notion of a ``closure`` is
  68. discussed in the next section.
  69. Concepts
  70. ========
  71. In order to encapsulate the results of the extraction, a class of
  72. ``Concept`` objects is introduced. A ``Concept`` object has a number of
  73. attributes, in particular a ``prefLabel`` and ``extension``, which make
  74. it easier to inspect the output of the extraction. In addition, the
  75. ``extension`` can be further processed: in the case of the ``'border'``
  76. relation, we check that the relation is symmetric, and in the case
  77. of the ``'contain'`` relation, we carry out the transitive
  78. closure. The closure properties associated with a concept is
  79. indicated in the relation metadata, as indicated earlier.
  80. The ``extension`` of a ``Concept`` object is then incorporated into a
  81. ``Valuation`` object.
  82. Persistence
  83. ===========
  84. The functions ``val_dump`` and ``val_load`` are provided to allow a
  85. valuation to be stored in a persistent database and re-loaded, rather
  86. than having to be re-computed each time.
  87. Individuals and Lexical Items
  88. =============================
  89. As well as deriving relations from the Chat-80 data, we also create a
  90. set of individual constants, one for each entity in the domain. The
  91. individual constants are string-identical to the entities. For
  92. example, given a data item such as ``'zloty'``, we add to the valuation
  93. a pair ``('zloty', 'zloty')``. In order to parse English sentences that
  94. refer to these entities, we also create a lexical item such as the
  95. following for each individual constant::
  96. PropN[num=sg, sem=<\P.(P zloty)>] -> 'Zloty'
  97. The set of rules is written to the file ``chat_pnames.cfg`` in the
  98. current directory.
  99. """
  100. from __future__ import print_function, unicode_literals
  101. import re
  102. import shelve
  103. import os
  104. import sys
  105. from six import string_types
  106. import nltk.data
  107. from nltk.compat import python_2_unicode_compatible
  108. ###########################################################################
  109. # Chat-80 relation metadata bundles needed to build the valuation
  110. ###########################################################################
  111. borders = {
  112. 'rel_name': 'borders',
  113. 'closures': ['symmetric'],
  114. 'schema': ['region', 'border'],
  115. 'filename': 'borders.pl',
  116. }
  117. contains = {
  118. 'rel_name': 'contains0',
  119. 'closures': ['transitive'],
  120. 'schema': ['region', 'contain'],
  121. 'filename': 'contain.pl',
  122. }
  123. city = {
  124. 'rel_name': 'city',
  125. 'closures': [],
  126. 'schema': ['city', 'country', 'population'],
  127. 'filename': 'cities.pl',
  128. }
  129. country = {
  130. 'rel_name': 'country',
  131. 'closures': [],
  132. 'schema': [
  133. 'country',
  134. 'region',
  135. 'latitude',
  136. 'longitude',
  137. 'area',
  138. 'population',
  139. 'capital',
  140. 'currency',
  141. ],
  142. 'filename': 'countries.pl',
  143. }
  144. circle_of_lat = {
  145. 'rel_name': 'circle_of_latitude',
  146. 'closures': [],
  147. 'schema': ['circle_of_latitude', 'degrees'],
  148. 'filename': 'world1.pl',
  149. }
  150. circle_of_long = {
  151. 'rel_name': 'circle_of_longitude',
  152. 'closures': [],
  153. 'schema': ['circle_of_longitude', 'degrees'],
  154. 'filename': 'world1.pl',
  155. }
  156. continent = {
  157. 'rel_name': 'continent',
  158. 'closures': [],
  159. 'schema': ['continent'],
  160. 'filename': 'world1.pl',
  161. }
  162. region = {
  163. 'rel_name': 'in_continent',
  164. 'closures': [],
  165. 'schema': ['region', 'continent'],
  166. 'filename': 'world1.pl',
  167. }
  168. ocean = {
  169. 'rel_name': 'ocean',
  170. 'closures': [],
  171. 'schema': ['ocean'],
  172. 'filename': 'world1.pl',
  173. }
  174. sea = {'rel_name': 'sea', 'closures': [], 'schema': ['sea'], 'filename': 'world1.pl'}
  175. items = [
  176. 'borders',
  177. 'contains',
  178. 'city',
  179. 'country',
  180. 'circle_of_lat',
  181. 'circle_of_long',
  182. 'continent',
  183. 'region',
  184. 'ocean',
  185. 'sea',
  186. ]
  187. items = tuple(sorted(items))
  188. item_metadata = {
  189. 'borders': borders,
  190. 'contains': contains,
  191. 'city': city,
  192. 'country': country,
  193. 'circle_of_lat': circle_of_lat,
  194. 'circle_of_long': circle_of_long,
  195. 'continent': continent,
  196. 'region': region,
  197. 'ocean': ocean,
  198. 'sea': sea,
  199. }
  200. rels = item_metadata.values()
  201. not_unary = ['borders.pl', 'contain.pl']
  202. ###########################################################################
  203. @python_2_unicode_compatible
  204. class Concept(object):
  205. """
  206. A Concept class, loosely based on SKOS
  207. (http://www.w3.org/TR/swbp-skos-core-guide/).
  208. """
  209. def __init__(self, prefLabel, arity, altLabels=[], closures=[], extension=set()):
  210. """
  211. :param prefLabel: the preferred label for the concept
  212. :type prefLabel: str
  213. :param arity: the arity of the concept
  214. :type arity: int
  215. @keyword altLabels: other (related) labels
  216. :type altLabels: list
  217. @keyword closures: closure properties of the extension \
  218. (list items can be ``symmetric``, ``reflexive``, ``transitive``)
  219. :type closures: list
  220. @keyword extension: the extensional value of the concept
  221. :type extension: set
  222. """
  223. self.prefLabel = prefLabel
  224. self.arity = arity
  225. self.altLabels = altLabels
  226. self.closures = closures
  227. # keep _extension internally as a set
  228. self._extension = extension
  229. # public access is via a list (for slicing)
  230. self.extension = sorted(list(extension))
  231. def __str__(self):
  232. # _extension = ''
  233. # for element in sorted(self.extension):
  234. # if isinstance(element, tuple):
  235. # element = '(%s, %s)' % (element)
  236. # _extension += element + ', '
  237. # _extension = _extension[:-1]
  238. return "Label = '%s'\nArity = %s\nExtension = %s" % (
  239. self.prefLabel,
  240. self.arity,
  241. self.extension,
  242. )
  243. def __repr__(self):
  244. return "Concept('%s')" % self.prefLabel
  245. def augment(self, data):
  246. """
  247. Add more data to the ``Concept``'s extension set.
  248. :param data: a new semantic value
  249. :type data: string or pair of strings
  250. :rtype: set
  251. """
  252. self._extension.add(data)
  253. self.extension = sorted(list(self._extension))
  254. return self._extension
  255. def _make_graph(self, s):
  256. """
  257. Convert a set of pairs into an adjacency linked list encoding of a graph.
  258. """
  259. g = {}
  260. for (x, y) in s:
  261. if x in g:
  262. g[x].append(y)
  263. else:
  264. g[x] = [y]
  265. return g
  266. def _transclose(self, g):
  267. """
  268. Compute the transitive closure of a graph represented as a linked list.
  269. """
  270. for x in g:
  271. for adjacent in g[x]:
  272. # check that adjacent is a key
  273. if adjacent in g:
  274. for y in g[adjacent]:
  275. if y not in g[x]:
  276. g[x].append(y)
  277. return g
  278. def _make_pairs(self, g):
  279. """
  280. Convert an adjacency linked list back into a set of pairs.
  281. """
  282. pairs = []
  283. for node in g:
  284. for adjacent in g[node]:
  285. pairs.append((node, adjacent))
  286. return set(pairs)
  287. def close(self):
  288. """
  289. Close a binary relation in the ``Concept``'s extension set.
  290. :return: a new extension for the ``Concept`` in which the
  291. relation is closed under a given property
  292. """
  293. from nltk.sem import is_rel
  294. assert is_rel(self._extension)
  295. if 'symmetric' in self.closures:
  296. pairs = []
  297. for (x, y) in self._extension:
  298. pairs.append((y, x))
  299. sym = set(pairs)
  300. self._extension = self._extension.union(sym)
  301. if 'transitive' in self.closures:
  302. all = self._make_graph(self._extension)
  303. closed = self._transclose(all)
  304. trans = self._make_pairs(closed)
  305. # print sorted(trans)
  306. self._extension = self._extension.union(trans)
  307. self.extension = sorted(list(self._extension))
  308. def clause2concepts(filename, rel_name, schema, closures=[]):
  309. """
  310. Convert a file of Prolog clauses into a list of ``Concept`` objects.
  311. :param filename: filename containing the relations
  312. :type filename: str
  313. :param rel_name: name of the relation
  314. :type rel_name: str
  315. :param schema: the schema used in a set of relational tuples
  316. :type schema: list
  317. :param closures: closure properties for the extension of the concept
  318. :type closures: list
  319. :return: a list of ``Concept`` objects
  320. :rtype: list
  321. """
  322. concepts = []
  323. # position of the subject of a binary relation
  324. subj = 0
  325. # label of the 'primary key'
  326. pkey = schema[0]
  327. # fields other than the primary key
  328. fields = schema[1:]
  329. # convert a file into a list of lists
  330. records = _str2records(filename, rel_name)
  331. # add a unary concept corresponding to the set of entities
  332. # in the primary key position
  333. # relations in 'not_unary' are more like ordinary binary relations
  334. if not filename in not_unary:
  335. concepts.append(unary_concept(pkey, subj, records))
  336. # add a binary concept for each non-key field
  337. for field in fields:
  338. obj = schema.index(field)
  339. concepts.append(binary_concept(field, closures, subj, obj, records))
  340. return concepts
  341. def cities2table(filename, rel_name, dbname, verbose=False, setup=False):
  342. """
  343. Convert a file of Prolog clauses into a database table.
  344. This is not generic, since it doesn't allow arbitrary
  345. schemas to be set as a parameter.
  346. Intended usage::
  347. cities2table('cities.pl', 'city', 'city.db', verbose=True, setup=True)
  348. :param filename: filename containing the relations
  349. :type filename: str
  350. :param rel_name: name of the relation
  351. :type rel_name: str
  352. :param dbname: filename of persistent store
  353. :type schema: str
  354. """
  355. import sqlite3
  356. records = _str2records(filename, rel_name)
  357. connection = sqlite3.connect(dbname)
  358. cur = connection.cursor()
  359. if setup:
  360. cur.execute(
  361. '''CREATE TABLE city_table
  362. (City text, Country text, Population int)'''
  363. )
  364. table_name = "city_table"
  365. for t in records:
  366. cur.execute('insert into %s values (?,?,?)' % table_name, t)
  367. if verbose:
  368. print("inserting values into %s: " % table_name, t)
  369. connection.commit()
  370. if verbose:
  371. print("Committing update to %s" % dbname)
  372. cur.close()
  373. def sql_query(dbname, query):
  374. """
  375. Execute an SQL query over a database.
  376. :param dbname: filename of persistent store
  377. :type schema: str
  378. :param query: SQL query
  379. :type rel_name: str
  380. """
  381. import sqlite3
  382. try:
  383. path = nltk.data.find(dbname)
  384. connection = sqlite3.connect(str(path))
  385. cur = connection.cursor()
  386. return cur.execute(query)
  387. except (ValueError, sqlite3.OperationalError):
  388. import warnings
  389. warnings.warn(
  390. "Make sure the database file %s is installed and uncompressed." % dbname
  391. )
  392. raise
  393. def _str2records(filename, rel):
  394. """
  395. Read a file into memory and convert each relation clause into a list.
  396. """
  397. recs = []
  398. contents = nltk.data.load("corpora/chat80/%s" % filename, format="text")
  399. for line in contents.splitlines():
  400. if line.startswith(rel):
  401. line = re.sub(rel + r'\(', '', line)
  402. line = re.sub(r'\)\.$', '', line)
  403. record = line.split(',')
  404. recs.append(record)
  405. return recs
  406. def unary_concept(label, subj, records):
  407. """
  408. Make a unary concept out of the primary key in a record.
  409. A record is a list of entities in some relation, such as
  410. ``['france', 'paris']``, where ``'france'`` is acting as the primary
  411. key.
  412. :param label: the preferred label for the concept
  413. :type label: string
  414. :param subj: position in the record of the subject of the predicate
  415. :type subj: int
  416. :param records: a list of records
  417. :type records: list of lists
  418. :return: ``Concept`` of arity 1
  419. :rtype: Concept
  420. """
  421. c = Concept(label, arity=1, extension=set())
  422. for record in records:
  423. c.augment(record[subj])
  424. return c
  425. def binary_concept(label, closures, subj, obj, records):
  426. """
  427. Make a binary concept out of the primary key and another field in a record.
  428. A record is a list of entities in some relation, such as
  429. ``['france', 'paris']``, where ``'france'`` is acting as the primary
  430. key, and ``'paris'`` stands in the ``'capital_of'`` relation to
  431. ``'france'``.
  432. More generally, given a record such as ``['a', 'b', 'c']``, where
  433. label is bound to ``'B'``, and ``obj`` bound to 1, the derived
  434. binary concept will have label ``'B_of'``, and its extension will
  435. be a set of pairs such as ``('a', 'b')``.
  436. :param label: the base part of the preferred label for the concept
  437. :type label: str
  438. :param closures: closure properties for the extension of the concept
  439. :type closures: list
  440. :param subj: position in the record of the subject of the predicate
  441. :type subj: int
  442. :param obj: position in the record of the object of the predicate
  443. :type obj: int
  444. :param records: a list of records
  445. :type records: list of lists
  446. :return: ``Concept`` of arity 2
  447. :rtype: Concept
  448. """
  449. if not label == 'border' and not label == 'contain':
  450. label = label + '_of'
  451. c = Concept(label, arity=2, closures=closures, extension=set())
  452. for record in records:
  453. c.augment((record[subj], record[obj]))
  454. # close the concept's extension according to the properties in closures
  455. c.close()
  456. return c
  457. def process_bundle(rels):
  458. """
  459. Given a list of relation metadata bundles, make a corresponding
  460. dictionary of concepts, indexed by the relation name.
  461. :param rels: bundle of metadata needed for constructing a concept
  462. :type rels: list(dict)
  463. :return: a dictionary of concepts, indexed by the relation name.
  464. :rtype: dict(str): Concept
  465. """
  466. concepts = {}
  467. for rel in rels:
  468. rel_name = rel['rel_name']
  469. closures = rel['closures']
  470. schema = rel['schema']
  471. filename = rel['filename']
  472. concept_list = clause2concepts(filename, rel_name, schema, closures)
  473. for c in concept_list:
  474. label = c.prefLabel
  475. if label in concepts:
  476. for data in c.extension:
  477. concepts[label].augment(data)
  478. concepts[label].close()
  479. else:
  480. concepts[label] = c
  481. return concepts
  482. def make_valuation(concepts, read=False, lexicon=False):
  483. """
  484. Convert a list of ``Concept`` objects into a list of (label, extension) pairs;
  485. optionally create a ``Valuation`` object.
  486. :param concepts: concepts
  487. :type concepts: list(Concept)
  488. :param read: if ``True``, ``(symbol, set)`` pairs are read into a ``Valuation``
  489. :type read: bool
  490. :rtype: list or Valuation
  491. """
  492. vals = []
  493. for c in concepts:
  494. vals.append((c.prefLabel, c.extension))
  495. if lexicon:
  496. read = True
  497. if read:
  498. from nltk.sem import Valuation
  499. val = Valuation({})
  500. val.update(vals)
  501. # add labels for individuals
  502. val = label_indivs(val, lexicon=lexicon)
  503. return val
  504. else:
  505. return vals
  506. def val_dump(rels, db):
  507. """
  508. Make a ``Valuation`` from a list of relation metadata bundles and dump to
  509. persistent database.
  510. :param rels: bundle of metadata needed for constructing a concept
  511. :type rels: list of dict
  512. :param db: name of file to which data is written.
  513. The suffix '.db' will be automatically appended.
  514. :type db: str
  515. """
  516. concepts = process_bundle(rels).values()
  517. valuation = make_valuation(concepts, read=True)
  518. db_out = shelve.open(db, 'n')
  519. db_out.update(valuation)
  520. db_out.close()
  521. def val_load(db):
  522. """
  523. Load a ``Valuation`` from a persistent database.
  524. :param db: name of file from which data is read.
  525. The suffix '.db' should be omitted from the name.
  526. :type db: str
  527. """
  528. dbname = db + ".db"
  529. if not os.access(dbname, os.R_OK):
  530. sys.exit("Cannot read file: %s" % dbname)
  531. else:
  532. db_in = shelve.open(db)
  533. from nltk.sem import Valuation
  534. val = Valuation(db_in)
  535. # val.read(db_in.items())
  536. return val
  537. # def alpha(str):
  538. # """
  539. # Utility to filter out non-alphabetic constants.
  540. #:param str: candidate constant
  541. #:type str: string
  542. #:rtype: bool
  543. # """
  544. # try:
  545. # int(str)
  546. # return False
  547. # except ValueError:
  548. ## some unknown values in records are labeled '?'
  549. # if not str == '?':
  550. # return True
  551. def label_indivs(valuation, lexicon=False):
  552. """
  553. Assign individual constants to the individuals in the domain of a ``Valuation``.
  554. Given a valuation with an entry of the form ``{'rel': {'a': True}}``,
  555. add a new entry ``{'a': 'a'}``.
  556. :type valuation: Valuation
  557. :rtype: Valuation
  558. """
  559. # collect all the individuals into a domain
  560. domain = valuation.domain
  561. # convert the domain into a sorted list of alphabetic terms
  562. # use the same string as a label
  563. pairs = [(e, e) for e in domain]
  564. if lexicon:
  565. lex = make_lex(domain)
  566. with open("chat_pnames.cfg", 'w') as outfile:
  567. outfile.writelines(lex)
  568. # read the pairs into the valuation
  569. valuation.update(pairs)
  570. return valuation
  571. def make_lex(symbols):
  572. """
  573. Create lexical CFG rules for each individual symbol.
  574. Given a valuation with an entry of the form ``{'zloty': 'zloty'}``,
  575. create a lexical rule for the proper name 'Zloty'.
  576. :param symbols: a list of individual constants in the semantic representation
  577. :type symbols: sequence -- set(str)
  578. :rtype: list(str)
  579. """
  580. lex = []
  581. header = """
  582. ##################################################################
  583. # Lexical rules automatically generated by running 'chat80.py -x'.
  584. ##################################################################
  585. """
  586. lex.append(header)
  587. template = "PropN[num=sg, sem=<\P.(P %s)>] -> '%s'\n"
  588. for s in symbols:
  589. parts = s.split('_')
  590. caps = [p.capitalize() for p in parts]
  591. pname = '_'.join(caps)
  592. rule = template % (s, pname)
  593. lex.append(rule)
  594. return lex
  595. ###########################################################################
  596. # Interface function to emulate other corpus readers
  597. ###########################################################################
  598. def concepts(items=items):
  599. """
  600. Build a list of concepts corresponding to the relation names in ``items``.
  601. :param items: names of the Chat-80 relations to extract
  602. :type items: list(str)
  603. :return: the ``Concept`` objects which are extracted from the relations
  604. :rtype: list(Concept)
  605. """
  606. if isinstance(items, string_types):
  607. items = (items,)
  608. rels = [item_metadata[r] for r in items]
  609. concept_map = process_bundle(rels)
  610. return concept_map.values()
  611. ###########################################################################
  612. def main():
  613. import sys
  614. from optparse import OptionParser
  615. description = """
  616. Extract data from the Chat-80 Prolog files and convert them into a
  617. Valuation object for use in the NLTK semantics package.
  618. """
  619. opts = OptionParser(description=description)
  620. opts.set_defaults(verbose=True, lex=False, vocab=False)
  621. opts.add_option(
  622. "-s", "--store", dest="outdb", help="store a valuation in DB", metavar="DB"
  623. )
  624. opts.add_option(
  625. "-l",
  626. "--load",
  627. dest="indb",
  628. help="load a stored valuation from DB",
  629. metavar="DB",
  630. )
  631. opts.add_option(
  632. "-c",
  633. "--concepts",
  634. action="store_true",
  635. help="print concepts instead of a valuation",
  636. )
  637. opts.add_option(
  638. "-r",
  639. "--relation",
  640. dest="label",
  641. help="print concept with label REL (check possible labels with '-v' option)",
  642. metavar="REL",
  643. )
  644. opts.add_option(
  645. "-q",
  646. "--quiet",
  647. action="store_false",
  648. dest="verbose",
  649. help="don't print out progress info",
  650. )
  651. opts.add_option(
  652. "-x",
  653. "--lex",
  654. action="store_true",
  655. dest="lex",
  656. help="write a file of lexical entries for country names, then exit",
  657. )
  658. opts.add_option(
  659. "-v",
  660. "--vocab",
  661. action="store_true",
  662. dest="vocab",
  663. help="print out the vocabulary of concept labels and their arity, then exit",
  664. )
  665. (options, args) = opts.parse_args()
  666. if options.outdb and options.indb:
  667. opts.error("Options --store and --load are mutually exclusive")
  668. if options.outdb:
  669. # write the valuation to a persistent database
  670. if options.verbose:
  671. outdb = options.outdb + ".db"
  672. print("Dumping a valuation to %s" % outdb)
  673. val_dump(rels, options.outdb)
  674. sys.exit(0)
  675. else:
  676. # try to read in a valuation from a database
  677. if options.indb is not None:
  678. dbname = options.indb + ".db"
  679. if not os.access(dbname, os.R_OK):
  680. sys.exit("Cannot read file: %s" % dbname)
  681. else:
  682. valuation = val_load(options.indb)
  683. # we need to create the valuation from scratch
  684. else:
  685. # build some concepts
  686. concept_map = process_bundle(rels)
  687. concepts = concept_map.values()
  688. # just print out the vocabulary
  689. if options.vocab:
  690. items = sorted([(c.arity, c.prefLabel) for c in concepts])
  691. for (arity, label) in items:
  692. print(label, arity)
  693. sys.exit(0)
  694. # show all the concepts
  695. if options.concepts:
  696. for c in concepts:
  697. print(c)
  698. print()
  699. if options.label:
  700. print(concept_map[options.label])
  701. sys.exit(0)
  702. else:
  703. # turn the concepts into a Valuation
  704. if options.lex:
  705. if options.verbose:
  706. print("Writing out lexical rules")
  707. make_valuation(concepts, lexicon=True)
  708. else:
  709. valuation = make_valuation(concepts, read=True)
  710. print(valuation)
  711. def sql_demo():
  712. """
  713. Print out every row from the 'city.db' database.
  714. """
  715. print()
  716. print("Using SQL to extract rows from 'city.db' RDB.")
  717. for row in sql_query('corpora/city_database/city.db', "SELECT * FROM city_table"):
  718. print(row)
  719. if __name__ == '__main__':
  720. main()
  721. sql_demo()