Parent Directory
|
Revision Log
Revision 1.39 - (view) (download) (as text)
1 : | parrello | 1.1 | <?xml version="1.0" encoding="utf-8" ?> |
2 : | <Database> | ||
3 : | <Title>Sprout Genome and Subsystem Database</Title> | ||
4 : | <Entities> | ||
5 : | <Entity name="Genome" keyType="name-string"> | ||
6 : | <Notes>A [i]genome[/i] contains the sequence data for a particular individual organism.</Notes> | ||
7 : | <Fields> | ||
8 : | <Field name="genus" type="name-string"> | ||
9 : | <Notes>Genus of the relevant organism.</Notes> | ||
10 : | <DataGen pass="1">RandParam('streptococcus', 'staphyloccocus', 'felis', 'homo', 'ficticio', 'strangera', 'escherischia', 'carborunda')</DataGen> | ||
11 : | </Field> | ||
12 : | <Field name="species" type="name-string"> | ||
13 : | parrello | 1.8 | <Notes>Species of the relevant organism.</Notes> |
14 : | parrello | 1.1 | <DataGen pass="1">StringGen('PKVKVKVKVKV')</DataGen> |
15 : | parrello | 1.8 | </Field> |
16 : | parrello | 1.1 | <Field name="unique-characterization" type="medium-string"> |
17 : | parrello | 1.8 | <Notes>The unique characterization identifies the particular organism instance from which the |
18 : | genome is taken. It is possible to have in the database more than one genome for a | ||
19 : | parrello | 1.1 | particular species, and every individual organism has variations in its DNA.</Notes> |
20 : | parrello | 1.8 | <DataGen>StringGen('PKVKVK999')</DataGen> |
21 : | parrello | 1.1 | </Field> |
22 : | <Field name="access-code" type="key-string"> | ||
23 : | parrello | 1.8 | <Notes>The access code determines which users can look at the data relating to this genome. |
24 : | Each user is associated with a set of access codes. In order to view a genome, one of | ||
25 : | the user's access codes must match this value.</Notes> | ||
26 : | <DataGen>RandParam('low','medium','high')</DataGen> | ||
27 : | </Field> | ||
28 : | parrello | 1.15 | <Field name="complete" type="boolean"> |
29 : | <Notes>TRUE if the genome is complete, else FALSE</Notes> | ||
30 : | </Field> | ||
31 : | parrello | 1.8 | <Field name="taxonomy" type="text"> |
32 : | <Notes>The taxonomy string contains the full taxonomy of the organism, while individual elements | ||
33 : | separated by semi-colons (and optional white space), starting with the domain and ending with | ||
34 : | the disambiguated genus and species (which is the organism's scientific name plus an | ||
35 : | identifying string).</Notes> | ||
36 : | <DataGen pass="2">join('; ', (RandParam('bacteria', 'archaea', 'eukaryote', 'virus', 'environmental'), | ||
37 : | ListGen('PKVKVKVK', 5), $this->{genus}, $this->{species}))</DataGen> | ||
38 : | </Field> | ||
39 : | parrello | 1.37 | <Field name="primary-group" type="name-string"> |
40 : | <Notes>The primary NMPDR group for this organism. There is always exactly one NMPDR group | ||
41 : | (either based on the organism name or the default value "Supporting"), whereas there can be | ||
42 : | multiple named groups or even none.</Notes> | ||
43 : | </Field> | ||
44 : | parrello | 1.8 | <Field name="group-name" type="name-string" relation="GenomeGroups"> |
45 : | <Notes>The group identifies a special grouping of organisms that would be displayed on a particular | ||
46 : | page or of particular interest to a research group or web site. A single genome can belong to multiple | ||
47 : | such groups or none at all.</Notes> | ||
48 : | </Field> | ||
49 : | parrello | 1.1 | </Fields> |
50 : | <Indexes> | ||
51 : | parrello | 1.37 | <Index Unique="false"> |
52 : | parrello | 1.1 | <Notes>This index allows the applications to find all genomes associated with |
53 : | a specific access code, so that a complete list of the genomes users can view | ||
54 : | may be generated.</Notes> | ||
55 : | <IndexFields> | ||
56 : | <IndexField name="access-code" order="ascending" /> | ||
57 : | <IndexField name="genus" order="ascending" /> | ||
58 : | <IndexField name="species" order="ascending" /> | ||
59 : | <IndexField name="unique-characterization" order="ascending" /> | ||
60 : | </IndexFields> | ||
61 : | </Index> | ||
62 : | <Index Unique="false"> | ||
63 : | parrello | 1.37 | <Notes>This index allows the applications to find all genomes associated with |
64 : | a specific primary (NMPDR) group.</Notes> | ||
65 : | <IndexFields> | ||
66 : | <IndexField name="primary-group" order="ascending" /> | ||
67 : | <IndexField name="genus" order="ascending" /> | ||
68 : | <IndexField name="species" order="ascending" /> | ||
69 : | <IndexField name="unique-characterization" order="ascending" /> | ||
70 : | </IndexFields> | ||
71 : | </Index> | ||
72 : | <Index Unique="false"> | ||
73 : | parrello | 1.1 | <Notes>This index allows the applications to find all genomes for a particular |
74 : | species.</Notes> | ||
75 : | <IndexFields> | ||
76 : | <IndexField name="genus" order="ascending" /> | ||
77 : | <IndexField name="species" order="ascending" /> | ||
78 : | <IndexField name="unique-characterization" order="ascending" /> | ||
79 : | </IndexFields> | ||
80 : | </Index> | ||
81 : | </Indexes> | ||
82 : | </Entity> | ||
83 : | <Entity name="Source" keyType="medium-string"> | ||
84 : | <Notes>A [i]source[/i] describes a place from which genome data was taken. This can be an organization | ||
85 : | or a paper citation.</Notes> | ||
86 : | <Fields> | ||
87 : | <Field name="URL" type="string" relation="SourceURL"> | ||
88 : | parrello | 1.8 | <Notes>URL the paper cited or of the organization's web site. This field optional.</Notes> |
89 : | <DataGen>"http://www.conservativecat.com/Ferdy/TestTarget.php?Source=" . $this->{id}</DataGen> | ||
90 : | </Field> | ||
91 : | parrello | 1.1 | <Field name="description" type="text"> |
92 : | parrello | 1.8 | <Notes>Description the source. The description can be a street address or a citation.</Notes> |
93 : | <DataGen>$this->{id} . ': ' . StringGen(IntGen(50,200))</DataGen> | ||
94 : | </Field> | ||
95 : | parrello | 1.1 | </Fields> |
96 : | </Entity> | ||
97 : | <Entity name="Contig" keyType="name-string"> | ||
98 : | <Notes>A [i]contig[/i] is a contiguous run of residues. The contig's ID consists of the | ||
99 : | genome ID followed by a name that identifies which contig this is for the parent genome. As | ||
100 : | is the case with all keys in this database, the individual components are separated by a | ||
101 : | period. | ||
102 : | [p]A contig can contain over a million residues. For performance reasons, therefore, | ||
103 : | the contig is split into multiple pieces called [i]sequences[/i]. The sequences | ||
104 : | contain the characters that represent the residues as well as data on the quality of | ||
105 : | the residue identification.</Notes> | ||
106 : | </Entity> | ||
107 : | <Entity name="Sequence" keyType="name-string"> | ||
108 : | <Notes>A [i]sequence[/i] is a continuous piece of a [i]contig[/i]. Contigs are split into | ||
109 : | sequences so that we don't have to have the entire contig in memory when we are | ||
110 : | manipulating it. The key of the sequence is the contig ID followed by the index of | ||
111 : | the begin point.</Notes> | ||
112 : | <Fields> | ||
113 : | <Field name="sequence" type="text"> | ||
114 : | parrello | 1.8 | <Notes>String consisting of the residues. Each residue is described by a single |
115 : | character in the string.</Notes> | ||
116 : | <DataGen>RandChars("ACGT", IntGen(100,400))</DataGen> | ||
117 : | </Field> | ||
118 : | parrello | 1.1 | <Field name="quality-vector" type="text"> |
119 : | parrello | 1.9 | <Notes>String describing the quality data for each base pair. Individual values will |
120 : | parrello | 1.8 | be separated by periods. The value represents negative exponent of the probability |
121 : | of error. Thus, for example, a quality of 30 indicates the probability of error is | ||
122 : | 10^-30. A higher quality number a better chance of a correct match. It is possible | ||
123 : | parrello | 1.9 | that the quality data is not known for a sequence. If that is the case, the quality |
124 : | parrello | 1.8 | vector will contain the [b]unknown[/b].</Notes> |
125 : | <DataGen>unknown</DataGen> | ||
126 : | </Field> | ||
127 : | parrello | 1.1 | </Fields> |
128 : | </Entity> | ||
129 : | parrello | 1.25 | <Entity name="Feature" keyType="id-string"> |
130 : | parrello | 1.1 | <Notes>A [i]feature[/i] is a part of a genome that is of special interest. Features |
131 : | may be spread across multiple contigs of a genome, but never across more than | ||
132 : | one genome. Features can be assigned to roles via spreadsheet cells, | ||
133 : | and are the targets of annotation.</Notes> | ||
134 : | <Fields> | ||
135 : | <Field name="feature-type" type="string"> | ||
136 : | parrello | 1.8 | <Notes>Code indicating the type of this feature.</Notes> |
137 : | <DataGen>RandParam('peg','rna')</DataGen> | ||
138 : | </Field> | ||
139 : | parrello | 1.14 | <Field name="alias" type="medium-string" relation="FeatureAlias"> |
140 : | parrello | 1.9 | <Notes>Alternative name for this feature. A feature can have many aliases.</Notes> |
141 : | parrello | 1.8 | <DataGen testCount="3">StringGen('Pgi|99999', 'Puni|XXXXXX', 'PAAAAAA999')</DataGen> |
142 : | </Field> | ||
143 : | parrello | 1.1 | <Field name="translation" type="text" relation="FeatureTranslation"> |
144 : | parrello | 1.8 | <Notes>[i](optional)[/i] A translation of this feature's residues into character |
145 : | codes, formed by concatenating the pieces of the feature together. For a | ||
146 : | protein encoding group, this is the protein characters. For other types | ||
147 : | it is the DNA characters.</Notes> | ||
148 : | <DataGen testCount="0"></DataGen> | ||
149 : | </Field> | ||
150 : | parrello | 1.1 | <Field name="upstream-sequence" type="text" relation="FeatureUpstream"> |
151 : | parrello | 1.8 | <Notes>Upstream sequence the feature. This includes residues preceding the feature as well as some of |
152 : | the feature's initial residues.</Notes> | ||
153 : | <DataGen testCount="0"></DataGen> | ||
154 : | </Field> | ||
155 : | parrello | 1.1 | <Field name="active" type="boolean"> |
156 : | parrello | 1.11 | <Notes>TRUE if this feature is still considered valid, FALSE if it has been logically deleted.</Notes> |
157 : | parrello | 1.8 | <DataGen>1</DataGen> |
158 : | </Field> | ||
159 : | parrello | 1.36 | <Field name="assignment" type="text"> |
160 : | <Notes>This is the primary functional assignment for the feature.</Notes> | ||
161 : | </Field> | ||
162 : | parrello | 1.8 | <Field name="link" type="text" relation="FeatureLink"> |
163 : | <Notes>Web hyperlink for this feature. A feature have no hyperlinks or it can have many. The | ||
164 : | links are to other websites that have useful about the gene that the feature represents, and | ||
165 : | are coded as raw HTML, using [b]<a href="[i]link[/i]">[i]text[/i]</a>[/b] notation.</Notes> | ||
166 : | <DataGen testCount="3">'http://www.conservativecat.com/Ferdy/TestTarget.php?Source=' . $this->{id} . | ||
167 : | "&Number=" . IntGen(1,99)</DataGen> | ||
168 : | </Field> | ||
169 : | parrello | 1.1 | </Fields> |
170 : | parrello | 1.8 | <Indexes> |
171 : | <Index> | ||
172 : | <Notes>This index allows the user to find the feature corresponding to | ||
173 : | the specified alias name.</Notes> | ||
174 : | <IndexFields> | ||
175 : | <IndexField name="alias" order="ascending" /> | ||
176 : | </IndexFields> | ||
177 : | </Index> | ||
178 : | </Indexes> | ||
179 : | parrello | 1.1 | </Entity> |
180 : | parrello | 1.27 | <Entity name="SynonymGroup" keyType="id-string"> |
181 : | <Notes>A [i]synonym group[/i] represents a group of features. Substantially identical features | ||
182 : | are mapped to the same synonym group, and this information is used to expand similarities.</Notes> | ||
183 : | </Entity> | ||
184 : | parrello | 1.1 | <Entity name="Role" keyType="string"> |
185 : | <Notes>A [i]role[/i] describes a biological function that may be fulfilled by a feature. | ||
186 : | One of the main goals of the database is to record the roles of the various features.</Notes> | ||
187 : | parrello | 1.8 | <Fields> |
188 : | parrello | 1.18 | <Field name="EC" type="string" relation="RoleEC"> |
189 : | <Notes>EC code for this role.</Notes> | ||
190 : | parrello | 1.8 | <DataGen testCount="1">StringGen(IntGen(20,40)) . "(" . $this->{id} . ")"</DataGen> |
191 : | </Field> | ||
192 : | parrello | 1.15 | <Field name="abbr" type="name-string"> |
193 : | <Notes>Abbreviated name for the role, generally non-unique, but useful | ||
194 : | in column headings for HTML tables.</Notes> | ||
195 : | </Field> | ||
196 : | parrello | 1.8 | </Fields> |
197 : | parrello | 1.18 | <Indexes> |
198 : | <Index> | ||
199 : | <Notes>This index allows the user to find the role corresponding to | ||
200 : | an EC number.</Notes> | ||
201 : | <IndexFields> | ||
202 : | <IndexField name="EC" order="ascending" /> | ||
203 : | </IndexFields> | ||
204 : | </Index> | ||
205 : | </Indexes> | ||
206 : | parrello | 1.1 | </Entity> |
207 : | <Entity name="Annotation" keyType="name-string"> | ||
208 : | <Notes>An [i]annotation[/i] contains supplementary information about a feature. Annotations | ||
209 : | parrello | 1.8 | are currently the only objects that may be inserted directly into the database. All other |
210 : | parrello | 1.24 | information is loaded from data exported by the SEED.</Notes> |
211 : | parrello | 1.8 | <Fields> |
212 : | <Field name="time" type="date"> | ||
213 : | <Notes>Date and time of the annotation.</Notes> | ||
214 : | </Field> | ||
215 : | <Field name="annotation" type="text"> | ||
216 : | <Notes>Text of the annotation.</Notes> | ||
217 : | </Field> | ||
218 : | parrello | 1.1 | </Fields> |
219 : | parrello | 1.26 | <Indexes> |
220 : | <Index> | ||
221 : | <Notes>This index allows the user to find recent annotations.</Notes> | ||
222 : | <IndexFields> | ||
223 : | <IndexField name="time" order="descending" /> | ||
224 : | </IndexFields> | ||
225 : | </Index> | ||
226 : | </Indexes> | ||
227 : | parrello | 1.1 | </Entity> |
228 : | parrello | 1.15 | <Entity name="Reaction" keyType="key-string"> |
229 : | <Notes>A [i]reaction[/i] is a chemical process catalyzed by a protein. The reaction ID | ||
230 : | is generally a small number preceded by a letter.</Notes> | ||
231 : | <Fields> | ||
232 : | <Field name="url" type="string" relation="ReactionURL"> | ||
233 : | <Notes>HTML string containing a link to a web location that describes the | ||
234 : | reaction. This field is optional.</Notes> | ||
235 : | </Field> | ||
236 : | <Field name="rev" type="boolean"> | ||
237 : | <Notes>TRUE if this reaction is reversible, else FALSE</Notes> | ||
238 : | </Field> | ||
239 : | </Fields> | ||
240 : | </Entity> | ||
241 : | <Entity name="Compound" keyType="name-string"> | ||
242 : | <Notes>A [i]compound[/i] is a chemical that participates in a reaction. | ||
243 : | All compounds have a unique ID and may also have one or more names.</Notes> | ||
244 : | <Fields> | ||
245 : | <Field name="name-priority" type="int" relation="CompoundName"> | ||
246 : | <Notes>Priority of a compound name. The name with the loweset | ||
247 : | priority is the main name of this compound.</Notes> | ||
248 : | </Field> | ||
249 : | <Field name="name" type="name-string" relation="CompoundName"> | ||
250 : | <Notes>Descriptive name for the compound. A compound may | ||
251 : | have several names.</Notes> | ||
252 : | </Field> | ||
253 : | <Field name="cas-id" type="name-string" relation="CompoundCAS"> | ||
254 : | <Notes>Chemical Abstract Service ID for this compound (optional).</Notes> | ||
255 : | </Field> | ||
256 : | parrello | 1.19 | <Field name="label" type="name-string"> |
257 : | <Notes>Name used in reaction display strings. | ||
258 : | It is the same as the name possessing a priority of 1, but it is placed | ||
259 : | here to speed up the query used to create the display strings.</Notes> | ||
260 : | </Field> | ||
261 : | parrello | 1.15 | </Fields> |
262 : | <Indexes> | ||
263 : | <Index> | ||
264 : | <Notes>This index allows the user to find the compound corresponding to | ||
265 : | the specified name.</Notes> | ||
266 : | <IndexFields> | ||
267 : | <IndexField name="name" order="ascending" /> | ||
268 : | </IndexFields> | ||
269 : | </Index> | ||
270 : | <Index> | ||
271 : | parrello | 1.17 | <Notes>This index allows the user to find the compound corresponding to |
272 : | the specified CAS ID.</Notes> | ||
273 : | <IndexFields> | ||
274 : | <IndexField name="cas-id" order="ascending" /> | ||
275 : | </IndexFields> | ||
276 : | </Index> | ||
277 : | <Index> | ||
278 : | parrello | 1.15 | <Notes>This index allows the user to access the compound names in |
279 : | priority order.</Notes> | ||
280 : | <IndexFields> | ||
281 : | <IndexField name="id" order="ascending" /> | ||
282 : | <IndexField name="name-priority" order="ascending" /> | ||
283 : | </IndexFields> | ||
284 : | </Index> | ||
285 : | </Indexes> | ||
286 : | </Entity> | ||
287 : | parrello | 1.5 | <Entity name="Subsystem" keyType="string"> |
288 : | parrello | 1.1 | <Notes>A [i]subsystem[/i] is a collection of roles that work together in a cell. Identification of subsystems |
289 : | is an important tool for recognizing parallel genetic features in different organisms.</Notes> | ||
290 : | parrello | 1.15 | <Fields> |
291 : | <Field name="curator" type="string"> | ||
292 : | <Notes>Name of the person currently in charge of the subsystem.</Notes> | ||
293 : | </Field> | ||
294 : | <Field name="notes" type="text"> | ||
295 : | <Notes>Descriptive notes about the subsystem.</Notes> | ||
296 : | </Field> | ||
297 : | parrello | 1.28 | <Field name="classification" type="string" relation="SubsystemClass"> |
298 : | <Notes>General classification data about the subsystem.</Notes> | ||
299 : | </Field> | ||
300 : | parrello | 1.15 | </Fields> |
301 : | </Entity> | ||
302 : | <Entity name="RoleSubset" keyType="string"> | ||
303 : | <Notes>A [i]role subset[/i] is a named collection of roles in a particular subsystem. The | ||
304 : | subset names are generally very short, non-unique strings. The ID of the parent | ||
305 : | subsystem is prefixed to the subset ID in order to make it unique.</Notes> | ||
306 : | </Entity> | ||
307 : | <Entity name="GenomeSubset" keyType="string"> | ||
308 : | <Notes>A [i]genome subset[/i] is a named collection of genomes that participate | ||
309 : | in a particular subsystem. The subset names are generally very short, non-unique | ||
310 : | strings. The ID of the parent subsystem is prefixed to the subset ID in order | ||
311 : | to make it unique.</Notes> | ||
312 : | parrello | 1.1 | </Entity> |
313 : | parrello | 1.24 | <Entity name="SSCell" keyType="hash-string"> |
314 : | parrello | 1.1 | <Notes>Part of the process of locating and assigning features is creating a spreadsheet of |
315 : | genomes and roles to which features are assigned. A [i]spreadsheet cell[/i] represents one | ||
316 : | of the positions on the spreadsheet.</Notes> | ||
317 : | </Entity> | ||
318 : | <Entity name="SproutUser" keyType="name-string"> | ||
319 : | <Notes>A [i]user[/i] is a person who can make annotations and view data in the database. The | ||
320 : | user object is keyed on the user's login name.</Notes> | ||
321 : | <Fields> | ||
322 : | parrello | 1.8 | <Field name="description" type="string"> |
323 : | <Notes>Full name or description of this user.</Notes> | ||
324 : | </Field> | ||
325 : | parrello | 1.1 | <Field name="access-code" type="key-string" relation="UserAccess"> |
326 : | parrello | 1.8 | <Notes>Access code possessed by this |
327 : | parrello | 1.1 | user. A user can have many access codes; a genome is accessible to the user if its |
328 : | access code matches any one of the user's access codes.</Notes> | ||
329 : | parrello | 1.8 | <DataGen testCount="2">RandParam('low', 'medium', 'high')</DataGen> |
330 : | </Field> | ||
331 : | parrello | 1.1 | </Fields> |
332 : | </Entity> | ||
333 : | parrello | 1.8 | <Entity name="Property" keyType="int"> |
334 : | <Notes>A [i]property[/i] is a type of assertion that could be made about the properties of | ||
335 : | a particular feature. Each property instance is a key/value pair and can be associated | ||
336 : | with many different features. Conversely, a feature can be associated with many key/value | ||
337 : | pairs, even some that notionally contradict each other. For example, there can be evidence | ||
338 : | that a feature is essential to the organism's survival and evidence that it is superfluous.</Notes> | ||
339 : | <Fields> | ||
340 : | <Field name="property-name" type="name-string"> | ||
341 : | <Notes>Name of this property.</Notes> | ||
342 : | </Field> | ||
343 : | <Field name="property-value" type="string"> | ||
344 : | <Notes>Value associated with this property. For each property | ||
345 : | name, there must by a property record for all of its possible | ||
346 : | values.</Notes> | ||
347 : | </Field> | ||
348 : | </Fields> | ||
349 : | <Indexes> | ||
350 : | <Index> | ||
351 : | <Notes>This index enables the application to find all values for a specified property | ||
352 : | name, or any given name/value pair.</Notes> | ||
353 : | <IndexFields> | ||
354 : | <IndexField name="property-name" order="ascending" /> | ||
355 : | <IndexField name="property-value" order="ascending" /> | ||
356 : | </IndexFields> | ||
357 : | </Index> | ||
358 : | </Indexes> | ||
359 : | </Entity> | ||
360 : | <Entity name="Diagram" keyType="name-string"> | ||
361 : | <Notes>A functional diagram describes the chemical reactions, often comprising a single | ||
362 : | subsystem. A diagram is identified by a short name and contains a longer descriptive name. | ||
363 : | The actual diagram shows which functional roles guide the reactions along with the inputs | ||
364 : | and outputs; the database, however, only indicate which roles belong to a particular | ||
365 : | map.</Notes> | ||
366 : | <Fields> | ||
367 : | <Field name="name" type="text"> | ||
368 : | <Notes>Descriptive name of this diagram.</Notes> | ||
369 : | </Field> | ||
370 : | </Fields> | ||
371 : | </Entity> | ||
372 : | <Entity name="ExternalAliasOrg" keyType="name-string"> | ||
373 : | <Notes>An external alias is a feature name for a functional assignment that is not a | ||
374 : | FIG ID. Functional assignments for external aliases are kept in a separate section of | ||
375 : | the database. This table contains a description of the relevant organism for an | ||
376 : | external alias functional assignment.</Notes> | ||
377 : | <Fields> | ||
378 : | <Field name="org" type="text"> | ||
379 : | <Notes>Descriptive name of the target organism for this external alias.</Notes> | ||
380 : | </Field> | ||
381 : | </Fields> | ||
382 : | </Entity> | ||
383 : | <Entity name="ExternalAliasFunc" keyType="name-string"> | ||
384 : | <Notes>An external alias is a feature name for a functional assignment that is not a | ||
385 : | FIG ID. Functional assignments for external aliases are kept in a separate section of | ||
386 : | the database. This table contains the functional role for the external alias functional | ||
387 : | assignment.</Notes> | ||
388 : | <Fields> | ||
389 : | <Field name="func" type="text"> | ||
390 : | <Notes>Functional role for this external alias.</Notes> | ||
391 : | </Field> | ||
392 : | </Fields> | ||
393 : | </Entity> | ||
394 : | parrello | 1.29 | <Entity name="Coupling" keyType="id-string"> |
395 : | parrello | 1.6 | <Notes>A coupling is a relationship between two features. The features are |
396 : | physically close on the contig, and there is evidence that they generally | ||
397 : | belong together. The key of this entity is formed by combining the coupled | ||
398 : | parrello | 1.8 | feature IDs with a space.</Notes> |
399 : | parrello | 1.6 | <Fields> |
400 : | <Field name="score" type="int"> | ||
401 : | <Notes>A number based on the set of PCHs (pairs of close homologs). A PCH | ||
402 : | indicates that two genes near each other on one genome are very similar to | ||
403 : | genes near each other on another genome. The score only counts PCHs for which | ||
404 : | the genomes are very different. (In other words, we have a pairing that persists | ||
405 : | between different organisms.) A higher score implies a stronger meaning to the | ||
406 : | clustering.</Notes> | ||
407 : | </Field> | ||
408 : | </Fields> | ||
409 : | </Entity> | ||
410 : | parrello | 1.30 | <Entity name="PCH" keyType="counter"> |
411 : | parrello | 1.6 | <Notes>A PCH (physically close homolog) connects a clustering (which is a |
412 : | pair of physically close features on a contig) to a second pair of physically | ||
413 : | close features that are similar to the first. Essentially, the PCH is a | ||
414 : | relationship between two clusterings in which the first clustering's features | ||
415 : | are similar to the second clustering's features. The simplest model for | ||
416 : | this would be to simply relate clusterings to each other; however, not all | ||
417 : | physically close pairs qualify as clusterings, so we relate a clustering to | ||
418 : | parrello | 1.30 | a pair of features. The key a unique ID number.</Notes> |
419 : | parrello | 1.6 | <Fields> |
420 : | <Field name="used" type="boolean"> | ||
421 : | <Notes>TRUE if this PCH is used in scoring the attached clustering, | ||
422 : | else FALSE. If a clustering has a PCH for a particular genome and many | ||
423 : | similar genomes are present, then a PCH will probably exist for the | ||
424 : | similar genomes as well. When this happens, only one of the PCHs will | ||
425 : | be scored: the others are considered duplicates of the same evidence.</Notes> | ||
426 : | </Field> | ||
427 : | </Fields> | ||
428 : | </Entity> | ||
429 : | parrello | 1.31 | <Entity name="Family" keyType="id-string"> |
430 : | <Notes>A family is a group of homologous PEGs believed to have the same function. Protein | ||
431 : | families provide a mechanism for verifying the accuracy of functional assignments | ||
432 : | and are also used in determining phylogenetic trees.</Notes> | ||
433 : | <Fields> | ||
434 : | parrello | 1.32 | <Field name="function" type="text"> |
435 : | parrello | 1.31 | <Notes>The functional assignment expected for all PEGs in this family.</Notes> |
436 : | </Field> | ||
437 : | parrello | 1.33 | <Field name="size" type="int"> |
438 : | parrello | 1.31 | <Notes>The number of proteins in this family. This may be larger than the |
439 : | number of PEGs included in the family, since the family may also contain external | ||
440 : | IDs.</Notes> | ||
441 : | </Field> | ||
442 : | </Fields> | ||
443 : | </Entity> | ||
444 : | parrello | 1.1 | </Entities> |
445 : | <Relationships> | ||
446 : | parrello | 1.34 | <Relationship name="IsFamilyForFeature" from="Family" to="Feature" arity="MM"> |
447 : | parrello | 1.31 | <Notes>This relationship connects a protein family to all of its PEGs and connects |
448 : | each PEG to all of its protein families.</Notes> | ||
449 : | </Relationship> | ||
450 : | parrello | 1.6 | <Relationship name="ParticipatesInCoupling" from="Feature" to="Coupling" arity="MM"> |
451 : | <Notes>This relationship connects a feature to all the functional couplings | ||
452 : | in which it participates. A functional coupling is a recognition of the fact | ||
453 : | that the features are close to each other on a chromosome, and similar | ||
454 : | features in other genomes also tend to be close.</Notes> | ||
455 : | <Fields> | ||
456 : | <Field name="pos" type="int"> | ||
457 : | <Notes>Ordinal position of the feature in the coupling. Currently, | ||
458 : | this is either "1" or "2".</Notes> | ||
459 : | </Field> | ||
460 : | </Fields> | ||
461 : | <ToIndex> | ||
462 : | <Notes>This index enables the application to view the features of | ||
463 : | a coupling in the proper order. The order influences the way the | ||
464 : | PCHs are examined.</Notes> | ||
465 : | <IndexFields> | ||
466 : | <IndexField name="pos" order="ascending" /> | ||
467 : | </IndexFields> | ||
468 : | </ToIndex> | ||
469 : | </Relationship> | ||
470 : | parrello | 1.27 | <Relationship name="IsSynonymGroupFor" from="SynonymGroup" to="Feature" arity="1M"> |
471 : | <Notes>This relation connects a synonym group to the features that make it | ||
472 : | up.</Notes> | ||
473 : | </Relationship> | ||
474 : | parrello | 1.24 | <Relationship name="HasFeature" from="Genome" to="Feature" arity="1M"> |
475 : | <Notes>This relationship connects a genome to all of its features. This | ||
476 : | relationship is redundant in a sense, because the genome ID is part | ||
477 : | of the feature ID; however, it makes the creation of certain queries more | ||
478 : | convenient because you can drag in filtering information for a feature's | ||
479 : | genome.</Notes> | ||
480 : | <Fields> | ||
481 : | <Field name="type" type="key-string"> | ||
482 : | <Notes>Feature type (eg. peg, rna)</Notes> | ||
483 : | </Field> | ||
484 : | </Fields> | ||
485 : | parrello | 1.38 | <FromIndex> |
486 : | parrello | 1.24 | <Notes>This index enables the application to view the features of a |
487 : | Genome sorted by type.</Notes> | ||
488 : | <IndexFields> | ||
489 : | <IndexField name="type" order="ascending" /> | ||
490 : | </IndexFields> | ||
491 : | parrello | 1.38 | </FromIndex> |
492 : | parrello | 1.24 | </Relationship> |
493 : | parrello | 1.6 | <Relationship name="IsEvidencedBy" from="Coupling" to="PCH" arity="1M"> |
494 : | <Notes>This relationship connects a functional coupling to the physically | ||
495 : | close homologs (PCHs) which affirm that the coupling is meaningful.</Notes> | ||
496 : | </Relationship> | ||
497 : | <Relationship name="UsesAsEvidence" from="PCH" to="Feature" arity="MM"> | ||
498 : | <Notes>This relationship connects a PCH to the features that represent its | ||
499 : | evidence. Each PCH is connected to a parent coupling that relates two features | ||
500 : | on a specific genome. The PCH's evidence that the parent coupling is functional | ||
501 : | is the existence of two physically close features on a different genome that | ||
502 : | correspond to the features in the coupling. Those features are found on the | ||
503 : | far side of this relationship.</Notes> | ||
504 : | <Fields> | ||
505 : | <Field name="pos" type="int"> | ||
506 : | <Notes>Ordinal position of the feature in the coupling that corresponds | ||
507 : | to our target feature. There is a one-to-one correspondence between the | ||
508 : | features connected to the PCH by this relationship and the features | ||
509 : | connected to the PCH's parent coupling. The ordinal position is used | ||
510 : | to decode that relationship. Currently, this field is either "1" or | ||
511 : | "2".</Notes> | ||
512 : | </Field> | ||
513 : | </Fields> | ||
514 : | <FromIndex> | ||
515 : | <Notes>This index enables the application to view the features of | ||
516 : | a PCH in the proper order.</Notes> | ||
517 : | <IndexFields> | ||
518 : | <IndexField name="pos" order="ascending" /> | ||
519 : | </IndexFields> | ||
520 : | </FromIndex> | ||
521 : | </Relationship> | ||
522 : | parrello | 1.1 | <Relationship name="HasContig" from="Genome" to="Contig" arity="1M"> |
523 : | <Notes>This relationship connects a genome to the contigs that contain the actual genetic | ||
524 : | information.</Notes> | ||
525 : | </Relationship> | ||
526 : | <Relationship name="ComesFrom" from="Genome" to="Source" arity="MM"> | ||
527 : | <Notes>This relationship connects a genome to the sources that mapped it. A genome can | ||
528 : | come from a single source or from a cooperation among multiple sources.</Notes> | ||
529 : | </Relationship> | ||
530 : | <Relationship name="IsMadeUpOf" from="Contig" to="Sequence" arity="1M"> | ||
531 : | <Notes>A contig is stored in the database as an ordered set of sequences. By splitting the | ||
532 : | contig into sequences, we get a performance boost from only needing to keep small portions | ||
533 : | of a contig in memory at any one time. This relationship connects the contig to its | ||
534 : | constituent sequences.</Notes> | ||
535 : | <Fields> | ||
536 : | <Field name="len" type="int"> | ||
537 : | parrello | 1.15 | <Notes>Length of the sequence.</Notes> |
538 : | </Field> | ||
539 : | parrello | 1.1 | <Field name="start-position" type="int"> |
540 : | parrello | 1.15 | <Notes>Index (1-based) of the point in the contig where this |
541 : | sequence starts.</Notes> | ||
542 : | </Field> | ||
543 : | parrello | 1.1 | </Fields> |
544 : | <FromIndex> | ||
545 : | <Notes>This index enables the application to find all of the sequences in | ||
546 : | parrello | 1.8 | a contig in order, and makes it easier to find a particular residue section.</Notes> |
547 : | parrello | 1.1 | <IndexFields> |
548 : | <IndexField name="start-position" order="ascending" /> | ||
549 : | <IndexField name="len" order="ascending" /> | ||
550 : | </IndexFields> | ||
551 : | </FromIndex> | ||
552 : | </Relationship> | ||
553 : | <Relationship name="IsTargetOfAnnotation" from="Feature" to="Annotation" arity="1M"> | ||
554 : | <Notes>This relationship connects a feature to its annotations.</Notes> | ||
555 : | </Relationship> | ||
556 : | <Relationship name="MadeAnnotation" from="SproutUser" to="Annotation" arity="1M"> | ||
557 : | <Notes>This relationship connects an annotation to the user who made it.</Notes> | ||
558 : | </Relationship> | ||
559 : | <Relationship name="ParticipatesIn" from="Genome" to="Subsystem" arity="MM"> | ||
560 : | <Notes>This relationship connects subsystems to the genomes that use | ||
561 : | it. If the subsystem has been curated for the genome, then the subsystem's roles will also be | ||
562 : | connected to the genome features through the [b]SSCell[/b] object.</Notes> | ||
563 : | parrello | 1.15 | <Fields> |
564 : | <Field name="variant-code" type="key-string"> | ||
565 : | parrello | 1.20 | <Notes>Code indicating the subsystem variant to which this |
566 : | parrello | 1.15 | genome belongs. Each subsystem can have multiple variants. A variant |
567 : | parrello | 1.20 | code of [b]-1[/b] indicates that the genome does not have a functional |
568 : | variant of the subsystem. A variant code of [b]0[/b] indicates that | ||
569 : | the genome's participation is considered iffy.</Notes> | ||
570 : | parrello | 1.15 | </Field> |
571 : | </Fields> | ||
572 : | <ToIndex> | ||
573 : | <Notes>This index enables the application to find all of the genomes using | ||
574 : | a subsystem in order by variant code, which is how we wish to display them | ||
575 : | in the spreadsheets.</Notes> | ||
576 : | <IndexFields> | ||
577 : | <IndexField name="variant-code" order="ascending" /> | ||
578 : | </IndexFields> | ||
579 : | </ToIndex> | ||
580 : | parrello | 1.1 | </Relationship> |
581 : | <Relationship name="OccursInSubsystem" from="Role" to="Subsystem" arity="MM"> | ||
582 : | <Notes>This relationship connects roles to the subsystems that implement them. </Notes> | ||
583 : | parrello | 1.15 | <Fields> |
584 : | <Field name="column-number" type="int"> | ||
585 : | <Notes>Column number for this role in the specified subsystem's | ||
586 : | spreadsheet.</Notes> | ||
587 : | </Field> | ||
588 : | </Fields> | ||
589 : | <ToIndex> | ||
590 : | <Notes>This index enables the application to see the subsystem roles | ||
591 : | in column order. The ordering of the roles is usually significant, | ||
592 : | so it is important to preserve it.</Notes> | ||
593 : | <IndexFields> | ||
594 : | <IndexField name="column-number" order="ascending" /> | ||
595 : | </IndexFields> | ||
596 : | </ToIndex> | ||
597 : | parrello | 1.1 | </Relationship> |
598 : | <Relationship name="IsGenomeOf" from="Genome" to="SSCell" arity="1M"> | ||
599 : | <Notes>This relationship connects a subsystem's spreadsheet cell to the | ||
600 : | genome for the spreadsheet column.</Notes> | ||
601 : | </Relationship> | ||
602 : | <Relationship name="IsRoleOf" from="Role" to="SSCell" arity="1M"> | ||
603 : | <Notes>This relationship connects a subsystem's spreadsheet cell to the | ||
604 : | role for the spreadsheet row.</Notes> | ||
605 : | </Relationship> | ||
606 : | <Relationship name="ContainsFeature" from="SSCell" to="Feature" arity="MM"> | ||
607 : | <Notes>This relationship connects a subsystem's spreadsheet cell to the | ||
608 : | features assigned to it.</Notes> | ||
609 : | parrello | 1.15 | <Fields> |
610 : | <Field name="cluster-number" type="int"> | ||
611 : | <Notes>ID of this feature's cluster. Clusters represent families of | ||
612 : | related proteins participating in a subsystem.</Notes> | ||
613 : | </Field> | ||
614 : | </Fields> | ||
615 : | </Relationship> | ||
616 : | <Relationship name="IsAComponentOf" from="Compound" to="Reaction" arity="MM"> | ||
617 : | <Notes>This relationship connects a reaction to the compounds that participate | ||
618 : | in it.</Notes> | ||
619 : | <Fields> | ||
620 : | <Field name="product" type="boolean"> | ||
621 : | <Notes>TRUE if the compound is a product of the reaction, FALSE if | ||
622 : | it is a substrate. When a reaction is written on paper in | ||
623 : | chemical notation, the substrates are left of the arrow and the | ||
624 : | products are to the right. Sorting on this field will cause | ||
625 : | the substrates to appear first, followed by the products. If the | ||
626 : | reaction is reversible, then the notion of substrates and products | ||
627 : | is not at intuitive; however, a value here of FALSE still puts the | ||
628 : | compound left of the arrow and a value of TRUE still puts it to the | ||
629 : | right.</Notes> | ||
630 : | </Field> | ||
631 : | parrello | 1.19 | <Field name="stoichiometry" type="key-string"> |
632 : | parrello | 1.15 | <Notes>Number of molecules of the compound that participate in a |
633 : | single instance of the reaction. For example, if a reaction | ||
634 : | parrello | 1.19 | produces two water molecules, the stoichiometry of water for the |
635 : | parrello | 1.15 | reaction would be two. When a reaction is written on paper in |
636 : | parrello | 1.19 | chemical notation, the stoichiometry is the number next to the |
637 : | parrello | 1.15 | chemical formula of the compound.</Notes> |
638 : | </Field> | ||
639 : | <Field name="main" type="boolean"> | ||
640 : | <Notes>TRUE if this compound is one of the main participants in | ||
641 : | the reaction, else FALSE. It is permissible for none of the | ||
642 : | compounds in the reaction to be considered main, in which | ||
643 : | case this value would be FALSE for all of the relevant | ||
644 : | compounds.</Notes> | ||
645 : | </Field> | ||
646 : | <Field name="loc" type="key-string"> | ||
647 : | <Notes>An optional character string that indicates the relative | ||
648 : | position of this compound in the reaction's chemical formula. The | ||
649 : | location affects the way the compounds present as we cross the | ||
650 : | relationship from the reaction side. The product/substrate flag | ||
651 : | comes first, then the value of this field, then the main flag. | ||
652 : | The default value is an empty string; however, the empty string | ||
653 : | sorts first, so if this field is used, it should probably be | ||
654 : | used for every compound in the reaction.</Notes> | ||
655 : | </Field> | ||
656 : | parrello | 1.19 | <Field name="discriminator" type="int"> |
657 : | <Notes>A unique ID for this record. The discriminator does not | ||
658 : | provide any useful data, but it prevents identical records from | ||
659 : | being collapsed by the SELECT DISTINCT command used by ERDB to | ||
660 : | retrieve data.</Notes> | ||
661 : | </Field> | ||
662 : | parrello | 1.15 | </Fields> |
663 : | <ToIndex> | ||
664 : | <Notes>This index presents the compounds in the reaction in the | ||
665 : | order they should be displayed when writing it in chemical notation. | ||
666 : | All the substrates appear before all the products, and within that | ||
667 : | ordering, the main compounds appear first.</Notes> | ||
668 : | parrello | 1.19 | <IndexFields> |
669 : | <IndexField name="product" order="ascending" /> | ||
670 : | <IndexField name="loc" order="ascending" /> | ||
671 : | <IndexField name="main" order="descending" /> | ||
672 : | </IndexFields> | ||
673 : | parrello | 1.15 | </ToIndex> |
674 : | parrello | 1.1 | </Relationship> |
675 : | <Relationship name="IsLocatedIn" from="Feature" to="Contig" arity="MM"> | ||
676 : | <Notes>This relationship connects a feature to the contig segments that work together | ||
677 : | to effect it. The segments are numbered sequentially starting from 1. The database is | ||
678 : | required to place an upper limit on the length of each segment. If a segment is longer | ||
679 : | than the maximum, it can be broken into smaller bits. | ||
680 : | [p]The upper limit enables applications to locate all features that contain a specific | ||
681 : | residue. For example, if the upper limit is 100 and we are looking for a feature that | ||
682 : | contains residue 234 of contig [b]ABC[/b], we can look for features with a begin point | ||
683 : | between 135 and 333. The results can then be filtered by direction and length of the | ||
684 : | segment.</Notes> | ||
685 : | <Fields> | ||
686 : | <Field name="locN" type="int"> | ||
687 : | parrello | 1.8 | <Notes>Sequence number of this segment.</Notes> |
688 : | </Field> | ||
689 : | parrello | 1.1 | <Field name="beg" type="int"> |
690 : | parrello | 1.8 | <Notes>Index (1-based) of the first residue in the contig that |
691 : | belongs to the segment.</Notes> | ||
692 : | </Field> | ||
693 : | parrello | 1.1 | <Field name="len" type="int"> |
694 : | parrello | 1.8 | <Notes>Number of residues in the segment. A length of 0 identifies |
695 : | a specific point between residues. This is the point before the residue if the direction | ||
696 : | is forward and the point after the residue if the direction is backward.</Notes> | ||
697 : | </Field> | ||
698 : | parrello | 1.1 | <Field name="dir" type="char"> |
699 : | parrello | 1.8 | <Notes>Direction of the segment: [b]+[/b] if it is forward and |
700 : | [b]-[/b] if it is backward.</Notes> | ||
701 : | </Field> | ||
702 : | parrello | 1.1 | </Fields> |
703 : | <FromIndex Unique="false"> | ||
704 : | <Notes>This index allows the application to find all the segments of a feature in | ||
705 : | parrello | 1.8 | the proper order.</Notes> |
706 : | parrello | 1.1 | <IndexFields> |
707 : | <IndexField name="locN" order="ascending" /> | ||
708 : | </IndexFields> | ||
709 : | </FromIndex> | ||
710 : | <ToIndex> | ||
711 : | <Notes>This index is the one used by applications to find all the feature | ||
712 : | segments that contain a specific residue.</Notes> | ||
713 : | <IndexFields> | ||
714 : | <IndexField name="beg" order="ascending" /> | ||
715 : | </IndexFields> | ||
716 : | </ToIndex> | ||
717 : | </Relationship> | ||
718 : | parrello | 1.8 | <Relationship name="HasProperty" from="Feature" to="Property" arity="MM"> |
719 : | <Notes>This relationship connects a feature to its known property values. | ||
720 : | The relationship contains text data that indicates the paper or organization | ||
721 : | that discovered evidence that the feature possesses the property. So, for | ||
722 : | example, if two papers presented evidence that a feature is essential, | ||
723 : | there would be an instance of this relationship for both.</Notes> | ||
724 : | <Fields> | ||
725 : | <Field name="evidence" type="text"> | ||
726 : | <Notes>URL or citation of the paper or | ||
727 : | institution that reported evidence of the relevant feature possessing | ||
728 : | the specified property value.</Notes> | ||
729 : | </Field> | ||
730 : | </Fields> | ||
731 : | </Relationship> | ||
732 : | <Relationship name="RoleOccursIn" from="Role" to="Diagram" arity="MM"> | ||
733 : | <Notes>This relationship connects a role to the diagrams on which it | ||
734 : | appears. A role frequently identifies an enzyme, and can appear in many | ||
735 : | diagrams. A diagram generally contains many different roles.</Notes> | ||
736 : | </Relationship> | ||
737 : | <Relationship name="HasSSCell" from="Subsystem" to="SSCell" arity="1M"> | ||
738 : | <Notes>This relationship connects a subsystem to the spreadsheet cells | ||
739 : | used to analyze and display it. The cells themselves can be thought of | ||
740 : | as a grid with Roles on one axis and Genomes on the other. The | ||
741 : | various features of the subsystem are then assigned to the cells.</Notes> | ||
742 : | </Relationship> | ||
743 : | <Relationship name="IsTrustedBy" from="SproutUser" to="SproutUser" arity="MM"> | ||
744 : | <Notes>This relationship identifies the users trusted by each | ||
745 : | particular user. When viewing functional assignments, the | ||
746 : | assignment displayed is the most recent one by a user trusted | ||
747 : | by the current user. The current user implicitly trusts himself. | ||
748 : | If no trusted users are specified in the database, the user | ||
749 : | also implicitly trusts the user [b]FIG[/b].</Notes> | ||
750 : | </Relationship> | ||
751 : | parrello | 1.15 | <Relationship name="ConsistsOfRoles" from="RoleSubset" to="Role" arity="MM"> |
752 : | <Notes>This relationship connects a role subset to the roles that it covers. | ||
753 : | A subset is, essentially, a named group of roles belonging to a specific | ||
754 : | subsystem, and this relationship effects that. Note that will a role | ||
755 : | may belong to many subsystems, a subset belongs to only one subsystem, | ||
756 : | and all roles in the subset must have that subsystem in common.</Notes> | ||
757 : | </Relationship> | ||
758 : | <Relationship name="ConsistsOfGenomes" from="GenomeSubset" to="Genome" arity="MM"> | ||
759 : | <Notes>This relationship connects a subset to the genomes that it covers. | ||
760 : | A subset is, essentially, a named group of genomes participating in a specific | ||
761 : | subsystem, and this relationship effects that. Note that while a genome | ||
762 : | may belong to many subsystems, a subset belongs to only one subsystem, | ||
763 : | and all genomes in the subset must have that subsystem in common.</Notes> | ||
764 : | </Relationship> | ||
765 : | <Relationship name="HasRoleSubset" from="Subsystem" to="RoleSubset" arity="1M"> | ||
766 : | <Notes>This relationship connects a subsystem to its constituent | ||
767 : | role subsets. Note that some roles in a subsystem may not belong to a | ||
768 : | subset, so the relationship between roles and subsystems cannot be | ||
769 : | derived from the relationships going through the subset.</Notes> | ||
770 : | </Relationship> | ||
771 : | <Relationship name="HasGenomeSubset" from="Subsystem" to="GenomeSubset" arity="1M"> | ||
772 : | <Notes>This relationship connects a subsystem to its constituent | ||
773 : | genome subsets. Note that some genomes in a subsystem may not belong to a | ||
774 : | subset, so the relationship between genomes and subsystems cannot be | ||
775 : | derived from the relationships going through the subset.</Notes> | ||
776 : | </Relationship> | ||
777 : | <Relationship name="Catalyzes" from="Role" to="Reaction" arity="MM"> | ||
778 : | <Notes>This relationship connects a role to the reactions it catalyzes. | ||
779 : | The purpose of a role is to create proteins that trigger certain | ||
780 : | chemical reactions. A single reaction can be triggered by many roles, | ||
781 : | and a role can trigger many reactions.</Notes> | ||
782 : | </Relationship> | ||
783 : | parrello | 1.39 | <Relationship name="HasRoleInSubsystem" from="Feature" to="Subsystem" arity="MM"> |
784 : | <Notes>This relationship connects a feature to the subsystems in which it | ||
785 : | participates. This is technically redundant information, but it is used | ||
786 : | so often that it deserves its own table.</Notes> | ||
787 : | </Relationship> | ||
788 : | parrello | 1.1 | </Relationships> |
789 : | </Database> |
MCS Webmaster | ViewVC Help |
Powered by ViewVC 1.0.3 |