1--$Revision: 6.4 $
2--**********************************************************************
3--
4--  Biological Macromolecule 3-D Structure Data Types for MMDB,
5--                A Molecular Modeling Database
6--
7--  Definitions for structural features and biostruc addressing
8--
9--  By Hitomi Ohkawa, Jim Ostell, Chris Hogue and Steve Bryant
10--
11--  National Center for Biotechnology Information
12--  National Institutes of Health
13--  Bethesda, MD 20894 USA
14--
15--  July, 1996
16--
17--**********************************************************************
18
19MMDB-Features DEFINITIONS ::=
20
21BEGIN
22
23EXPORTS Biostruc-feature-set, Chem-graph-pntrs, Atom-pntrs,
24	Chem-graph-alignment, Chem-graph-interaction, Sphere,
25	Cone, Cylinder, Brick, Transform, Biostruc-feature-set-id,
26	Biostruc-feature-id;
27
28IMPORTS Biostruc-id FROM MMDB
29	Molecule-id, Residue-id, Atom-id FROM MMDB-Chemical-graph
30	Model-id, Model-coordinate-set-id FROM MMDB-Structural-model
31	User-object FROM NCBI-General
32	Pub FROM NCBI-Pub;
33
34-- Named model features refer to sets of residues or atoms, or a region in
35-- the model space.  A few specific feature types are allowed for compatibility
36-- with PDB usage, but the purpose of a named model feature is simply to
37-- associate various types of information with a set of atoms or
38-- residues, or a spatially-defined region of the model structure.  They also
39-- support association of various properties with each residue or atom of a
40-- set.
41
42-- PDB-derived secondary structure defines a single feature, represented as a
43-- sequence of residue motifs, as are the contents of PDB SITE and
44-- FTNOTE records.  NCBI-assigned core and secondary structure descriptions
45-- are also represented as a sequence of residue motifs.
46
47Biostruc-feature-set ::= SEQUENCE {
48	id		Biostruc-feature-set-id,
49	descr		SEQUENCE OF Biostruc-feature-set-descr OPTIONAL,
50	features	SEQUENCE OF Biostruc-feature }
51
52Biostruc-feature-set-id ::= INTEGER
53
54Biostruc-feature-set-descr ::= CHOICE {
55	name			VisibleString,
56	pdb-comment		VisibleString,
57	other-comment		VisibleString,
58	attribution		Pub }
59
60-- An explicitly specified type in Biostruc-feature allows for
61-- efficient extraction and indexing of feature sets of a specific type.
62-- Special types are provided for coloring and rendering, as
63-- as needed by molecular graphics programs.
64
65Biostruc-feature ::= SEQUENCE {
66	id		Biostruc-feature-id OPTIONAL,
67	name		VisibleString OPTIONAL,
68	type	INTEGER {	helix(1),
69				strand(2),
70				sheet(3),
71				turn(4),
72				site(5),
73				footnote(6),
74				comment(7),      -- new
75				interaction(8),  -- interaction Data
76				subgraph(100),   -- NCBI domain reserved
77				region(101),
78				core(102),       -- user core definition
79				supercore(103),  -- NCBI reserved
80				color(150),      -- new
81				render(151),     -- new
82				label(152),      -- new
83				transform(153),  -- new
84				camera(154),     -- new
85				script(155),      -- for scripts
86				alignment(200),  -- VAST reserved
87				similarity(201),
88				multalign(202),  -- multiple alignment
89                indirect(203),   -- new
90				cn3dstate(254),  -- Cn3D reserved
91				other(255) } OPTIONAL,
92	property	CHOICE {
93				color		Color-prop,
94				render		Render-prop,
95				transform	Transform,
96				camera		Camera,
97				script		Biostruc-script,
98				user		User-object } OPTIONAL,
99	location	CHOICE {
100				subgraph	Chem-graph-pntrs,
101				region		Region-pntrs,
102				alignment	Chem-graph-alignment,
103				interaction     Chem-graph-interaction,
104				similarity	Region-similarity,
105				indirect	Other-feature } OPTIONAL } -- new
106
107-- Other-feature allows for specifying location via reference to another
108-- Biostruc-feature and its location.
109
110Other-feature ::= SEQUENCE {
111	biostruc-id		Biostruc-id,
112	set			Biostruc-feature-set-id,
113	feature			Biostruc-feature-id }
114
115Biostruc-feature-id ::= INTEGER
116
117-- Atom, residue or molecule motifs describe a substructure defined by a set
118-- of nodes from the chemical graph. PDB secondary structure features are
119-- described as a residue motif, since they are not associated with any one of
120-- the multiple models that may be provided in a PDB file.  NCBI-assigned
121-- secondary structure is represented in the same way, even though it is
122-- model specific, since this allows for simple mapping of the structural
123-- feature onto a sequence-only representation. This addressing mode may also
124-- be used to describe features to be associated with particular atoms,
125-- as, for example, the chemical shift observed in an NMR experiment.
126
127Biostruc-molecule-pntr ::= SEQUENCE {
128	biostruc-id		Biostruc-id,
129	molecule-id		Molecule-id }
130
131Chem-graph-pntrs ::= CHOICE {
132	atoms			Atom-pntrs,
133	residues		Residue-pntrs,
134	molecules		Molecule-pntrs }
135
136Atom-pntrs ::= SEQUENCE {
137	number-of-ptrs		INTEGER,
138	molecule-ids		SEQUENCE OF Molecule-id,
139	residue-ids		SEQUENCE OF Residue-id,
140	atom-ids		SEQUENCE OF Atom-id }
141
142Residue-pntrs ::= CHOICE {
143	explicit		Residue-explicit-pntrs,
144	interval		SEQUENCE OF Residue-interval-pntr }
145
146Residue-explicit-pntrs ::= SEQUENCE {
147	number-of-ptrs		INTEGER,
148	molecule-ids		SEQUENCE OF Molecule-id,
149	residue-ids		SEQUENCE OF Residue-id }
150
151Residue-interval-pntr ::= SEQUENCE {
152	molecule-id		Molecule-id,
153	from			Residue-id,
154	to			Residue-id }
155
156Molecule-pntrs ::= SEQUENCE {
157	number-of-ptrs		INTEGER,
158	molecule-ids		SEQUENCE OF Molecule-id }
159
160-- Region motifs describe features defined by spatial location, such as the
161-- site specified by a coordinate value, or a rgeion within a bounding volume.
162
163Region-pntrs ::= SEQUENCE {
164	model-id	Model-id,
165	region		CHOICE {
166				site		SEQUENCE OF Region-coordinates,
167				boundary	SEQUENCE OF Region-boundary } }
168
169-- Coordinate sites describe a region in space by reference to individual
170-- coordinates, in a particular model.  These coordinates may be either the
171-- x, y and z values of atomic coordinates, the triangles of a surface mesh,
172-- or the grid points of a density model. All are addressed in the same manner,
173-- as coordinate indices which give offsets from the beginning of the
174-- coordinate data arrays.  A coordinate-index of 5, for example, refers to
175-- the 5th x, y and z values of an atomic coordinate set, the 5th v1, v2, and v3
176-- values of a triangle mesh, or the 5th value in a density grid.
177
178-- PDB SITE and FTNOTE records refer to particular atomic coordinates, and they
179-- are represented as a region motif with addresses of type Region-coordinates.
180-- Any names or descriptions provided by PDB are thus associated with the
181-- indicated sites, in the indicated model.
182
183Region-coordinates ::= SEQUENCE {
184	model-coord-set-id	Model-coordinate-set-id,
185	number-of-coords	INTEGER OPTIONAL,
186	coordinate-indices	SEQUENCE OF INTEGER OPTIONAL }
187
188-- Region boundaries are defined by regular solids located in the model space.
189
190Region-boundary ::=	CHOICE {	sphere		Sphere,
191					cone		Cone,
192					cylinder	Cylinder,
193					brick		Brick }
194
195-- A biostruc alignment establishes an equivalence of nodes in the chemical
196-- graphs of two or more biostrucs. This may be mapped to a sequence
197-- alignment in the case of biopolymers.
198-- The 'dimension' component indicates the number of participants
199-- in the alignment.  For pairwise alignments, such as VAST
200-- structure-structure alignments, the dimension will be always 2, with
201-- biostruc-ids, alignment, and domain each containing two entries for an
202-- aligned pair.  The 'alignment' component contains a pair of Chem-graph-pntrs
203-- specifying a like number of corresponding residues in each structure.
204-- The 'domain' component specifies a region of each structure considered
205-- in the alignment.  Only one transform (for the second structure) and
206-- one aligndata (for the pair) are provided for each VAST alignment.
207--
208-- For multiple alignments, a set of components are treated as
209-- parallel arrays of length 'dimension'.
210-- The 'transform' component moves each structure to align it with
211-- the structure specified as the first element in the "parallel" array,
212-- so necessarily the first transform is a NULL transform.
213-- Align-stats are placeholders for scores.
214
215Chem-graph-alignment ::= SEQUENCE {
216	dimension		INTEGER DEFAULT 2,
217	biostruc-ids		SEQUENCE OF Biostruc-id,
218	alignment		SEQUENCE OF Chem-graph-pntrs,
219	domain			SEQUENCE OF Chem-graph-pntrs OPTIONAL,
220	transform		SEQUENCE OF Transform OPTIONAL,
221	aligndata		SEQUENCE OF Align-stats OPTIONAL }
222
223Chem-graph-interaction ::= SEQUENCE {
224	type	INTEGER {	protein-protein(1),
225				protein-dna(2),
226				protein-rna(3),
227				protein-chemical(4),
228				dna-dna(5),
229				dna-rna(6),
230				dna-chemical(7),
231				rna-rna(8),
232				rna-chemical(9),
233				other(255) } OPTIONAL,
234	distance-threshold RealValue OPTIONAL,
235	interactors SEQUENCE OF Biostruc-molecule-pntr,
236	residue-contacts SEQUENCE OF Chem-graph-pntrs OPTIONAL,
237	atom-contacts SEQUENCE OF Chem-graph-pntrs OPTIONAL,
238	atom-distance SEQUENCE OF RealValue OPTIONAL}
239
240Align-stats ::= SEQUENCE {
241	descr		VisibleString OPTIONAL,
242	scale-factor	INTEGER OPTIONAL,
243	vast-score	INTEGER OPTIONAL,
244	vast-mlogp	INTEGER OPTIONAL,
245	align-res	INTEGER OPTIONAL,
246 	rmsd		INTEGER OPTIONAL,
247	blast-score	INTEGER OPTIONAL,
248	blast-mlogp	INTEGER OPTIONAL,
249	other-score	INTEGER OPTIONAL }
250
251-- A biostruc similarity describes spatial features which are similar between
252-- two or more biostrucs.  Similarities are model dependent, and the model and
253-- coordinate set ids of the biostrucs must be specified.  They do not
254-- necessarily map to a sequence alignment, as the regions referenced may
255-- be pieces of a surface or grid, and thus not uniquely mapable to particular
256-- chemical components.
257
258Region-similarity ::= SEQUENCE {
259	dimension		INTEGER DEFAULT 2,
260	biostruc-ids		SEQUENCE OF Biostruc-id,
261	similarity		SEQUENCE OF Region-pntrs,
262	transform		SEQUENCE OF Transform }
263
264-- Geometrical primitives are used in the definition of region motifs, and
265-- also non-atomic coordinates.  Spheres, cones, cylinders and bricks are
266-- defined by a few points in the model space.
267
268Sphere ::= SEQUENCE {
269	center			Model-space-point,
270	radius			RealValue }
271
272Cone ::= SEQUENCE {
273	axis-top		Model-space-point,
274	axis-bottom		Model-space-point,
275	radius-bottom		RealValue }
276
277Cylinder ::= SEQUENCE {
278	axis-top		Model-space-point,
279	axis-bottom		Model-space-point,
280	radius			RealValue }
281
282-- A brick is defined by the coordinates of eight corners.  These are assumed
283-- to appear in the order 000, 001, 010, 011, 100, 101, 110, 111, where the
284-- digits 0 and 1 refer to respectively to the x, y and z axes of a unit cube.
285-- Opposite edges are assumed to be parallel.
286
287Brick ::= SEQUENCE {
288	corner-000		Model-space-point,
289	corner-001		Model-space-point,
290	corner-010		Model-space-point,
291	corner-011		Model-space-point,
292	corner-100		Model-space-point,
293	corner-101		Model-space-point,
294	corner-110		Model-space-point,
295	corner-111		Model-space-point }
296
297Model-space-point ::= SEQUENCE {
298	scale-factor		INTEGER,
299	x			INTEGER,
300	y			INTEGER,
301	z			INTEGER }
302
303RealValue ::= SEQUENCE {
304	scale-factor		INTEGER,
305	scaled-integer-value	INTEGER }
306
307
308Transform ::=  SEQUENCE {
309            id  INTEGER,
310            moves SEQUENCE OF Move }
311
312Move ::= CHOICE {
313	rotate		Rot-matrix,
314	translate	Trans-matrix }
315
316-- A rotation matrix is defined by 9 numbers, given by row, i.e.,
317-- with column indices varying fastest.
318-- Coordinates, as a matrix with columns x, y, an z, are rotated
319-- via multiplication with the rotation matrix.
320-- A translation matrix is defined by 3 numbers, which is added to
321-- the rotated coordinates for specified amount of translation.
322
323Rot-matrix ::= SEQUENCE {
324	scale-factor		INTEGER,
325	rot-11			INTEGER,
326	rot-12			INTEGER,
327	rot-13			INTEGER,
328	rot-21			INTEGER,
329	rot-22			INTEGER,
330	rot-23			INTEGER,
331	rot-31			INTEGER,
332	rot-32			INTEGER,
333	rot-33			INTEGER }
334
335Trans-matrix ::= SEQUENCE {
336	scale-factor		INTEGER,
337	tran-1			INTEGER,
338	tran-2			INTEGER,
339	tran-3			INTEGER }
340
341-- The camera is a position relative to the world coordinates
342-- of the structure referred to by a location.
343-- this is used to set the initial position of the
344-- camera using OpenGL.  scale is the value used to scale the
345-- other values from floating point to integer
346
347Camera ::= SEQUENCE {
348	x		INTEGER,
349	y		INTEGER,
350	distance	INTEGER,
351	angle		INTEGER,
352	scale		INTEGER,
353    modelview   GL-matrix }
354
355
356GL-matrix ::= SEQUENCE {
357	scale		INTEGER,
358	m11			INTEGER,
359	m12			INTEGER,
360	m13			INTEGER,
361	m14			INTEGER,
362	m21			INTEGER,
363	m22			INTEGER,
364	m23			INTEGER,
365	m24			INTEGER,
366	m31			INTEGER,
367	m32			INTEGER,
368	m33			INTEGER,
369	m34			INTEGER,
370	m41			INTEGER,
371	m42			INTEGER,
372	m43			INTEGER,
373	m44			INTEGER }
374
375
376Color-prop ::= SEQUENCE {
377	r		INTEGER OPTIONAL,
378	g		INTEGER OPTIONAL,
379	b		INTEGER OPTIONAL,
380	name		VisibleString OPTIONAL }
381
382-- Note that Render-prop is compatible with the Annmm specification,
383-- i.e., its numbering schemes do not clash with those in Render-prop.
384
385Render-prop ::= INTEGER {
386	default		(0),  -- Default view
387	wire		(1),  -- use wireframe
388	space		(2),  -- use spacefill
389	stick		(3),  -- use stick model (thin cylinders)
390	ballNStick	(4),  -- use ball & stick model
391	thickWire	(5),  -- thicker wireframe
392	hide		(9),  -- don't show this
393	name		(10), -- display its name next to it
394	number 		(11), -- display its number next to it
395	pdbNumber	(12), -- display its PDB number next to it
396	objWireFrame	(150), -- display MMDB surface object as wireframe
397	objPolygons	(151), -- display MMDB surface object as polygons
398	colorsetCPK	(225), -- color atoms like CPK models
399	colorsetbyChain	(226), -- color each chain different
400	colorsetbyTemp	(227), -- color using isotropic Temp factors
401	colorsetbyRes	(228), -- color using residue properties
402	colorsetbyLen	(229), -- color changes along chain length
403	colorsetbySStru	(230), -- color by secondary structure
404	colorsetbyHydro (231), -- color by hydrophobicity
405	colorsetbyObject(246), -- color each object differently
406	colorsetbyDomain(247), -- color each domain differently
407	other           (255)
408	}
409
410--  When a Biostruc-Feature with a Biostruc-script is initiated,
411--  it should play the specified steps one at a time, setting the feature-do
412--  list as the active display.
413--  The camera can be set using a feature-do,
414--  but it may be moved independently with
415--  camera-move, which specifies how to move
416--  the camera dynamically during the step along the path defined (e.g.,
417--  a zoom, a rotate).
418--  Any value of pause (in 1:10th's of a second) will force a pause
419--  after an image is shown.
420--  If waitevent is TRUE, it will await a mouse or keypress and ignore
421--  the pause value.
422
423Biostruc-script ::= SEQUENCE OF Biostruc-script-step
424
425Biostruc-script-step ::= SEQUENCE {
426	step-id			Step-id,
427	step-name		VisibleString OPTIONAL,
428	feature-do		SEQUENCE OF Other-feature OPTIONAL,
429	camera-move		Transform OPTIONAL,
430	pause			INTEGER DEFAULT 10,
431	waitevent		BOOLEAN,
432	extra			INTEGER,
433	jump			Step-id OPTIONAL }
434
435Step-id ::= INTEGER
436
437END
438