1-- 2-- dict.sql 3-- 4-- SQLite3 style SQL commands to create the tables holding the various 5-- dictionary bits and pieces. Right now, this is just a very simple 6-- pair of tables, one to hold word classes, and another to hold all 7-- the disjuncts for that class. 8-- 9-- To create a new database, simply say: 10-- cat dict.sql | sqlite3 dict.db 11-- To populate it with the demo data: 12-- cat demo.sql | sqlite3 dict.db 13-- 14 15CREATE TABLE Morphemes 16( 17 -- For English, the 'morpheme' is the 'word'. A given morpheme 18 -- may appear multiple times in this table. This is the field that 19 -- the tokenizer uses to determine if a token is in the dictionary. 20 morpheme TEXT NOT NULL, 21 22 -- The subscripted form of the above. The subscripted forms are 23 -- always unique for the dictionary. They serve as a debugging tool, 24 -- unique identifier for the database. 25 subscript TEXT UNIQUE NOT NULL, 26 27 -- The classname is the set that the subscripted 'word' belongs to. 28 -- All members of the class share a common set of disjuncts, with 29 -- a common set of costs. 30 classname TEXT NOT NULL 31); 32 33-- We want fast lookup of words. 34CREATE INDEX morph_idx ON Morphemes(morpheme); 35 36CREATE TABLE Disjuncts 37( 38 -- All words/morphemes sharing this classname also share this 39 -- disjunct and cost. 40 classname TEXT NOT NULL, 41 42 -- The standard Link Grammar disjunct, expressed as an ASCII string. 43 -- The disjunct can be composed of the & operator, and the optional 44 -- connectors i.e. {} and the multiple connector i.e. @. The and 45 -- operator is NOT allowed. This means that the grouping parents () 46 -- must also not appear in the expression. The cost operators [] are 47 -- also not allowed; costs are to be expressed using the cost field. 48 -- 49 -- An example of a valid disjunct: 50 -- A+ & B- & {Ca*bc*f+} & @Mpqr- 51 -- 52 -- An example of an INVALID disjunct: 53 -- (A+ & B-) & {Ca*bc*f+ or [D-]} & @Mpqr- 54 -- 55 disjunct TEXT NOT NULL, 56 57 -- Cost of using this disjunct. 58 cost REAL 59); 60 61-- We want fast lookup of classnames. 62CREATE INDEX class_idx ON Disjuncts(classname); 63