1--
2-- dict.sql
3--
4-- SQLite3 style SQL commands to create the tables holding the various
5-- dictionary bits and pieces.  Right now, this is just a very simple
6-- pair of tables, one to hold word classes, and another to hold all
7-- the disjuncts for that class.
8--
9-- To create a new database, simply say:
10--    cat dict.sql | sqlite3 dict.db
11-- To populate it with the demo data:
12--    cat demo.sql | sqlite3 dict.db
13--
14
15CREATE TABLE Morphemes
16(
17	-- For English, the 'morpheme' is the 'word'. A given morpheme
18	-- may appear multiple times in this table.  This is the field that
19	-- the tokenizer uses to determine if a token is in the dictionary.
20	morpheme TEXT NOT NULL,
21
22	-- The subscripted form of the above.  The subscripted forms are
23	-- always unique for the dictionary. They serve as a debugging tool,
24	-- unique identifier for the database.
25	subscript TEXT UNIQUE NOT NULL,
26
27	-- The classname is the set that the subscripted 'word' belongs to.
28	-- All members of the class share a common set of disjuncts, with
29	-- a common set of costs.
30	classname TEXT NOT NULL
31);
32
33-- We want fast lookup of words.
34CREATE INDEX morph_idx ON Morphemes(morpheme);
35
36CREATE TABLE Disjuncts
37(
38	-- All words/morphemes sharing this classname also share this
39	-- disjunct and cost.
40	classname TEXT NOT NULL,
41
42	-- The standard Link Grammar disjunct, expressed as an ASCII string.
43	-- The disjunct can be composed of the & operator, and the optional
44	-- connectors i.e. {} and the multiple connector i.e. @. The and
45	-- operator is NOT allowed. This means that the grouping parents ()
46	-- must also not appear in the expression.  The cost operators [] are
47	-- also not allowed; costs are to be expressed using the cost field.
48	--
49	-- An example of a valid disjunct:
50	--     A+ & B- & {Ca*bc*f+} & @Mpqr-
51	--
52	-- An example of an INVALID disjunct:
53	--     (A+ & B-) & {Ca*bc*f+ or [D-]} & @Mpqr-
54	--
55	disjunct TEXT NOT NULL,
56
57	-- Cost of using this disjunct.
58	cost REAL
59);
60
61-- We want fast lookup of classnames.
62CREATE INDEX class_idx ON Disjuncts(classname);
63