[Bio] / Sprout / SimBlocksDBD.xml Repository:
ViewVC logotype

Annotation of /Sprout/SimBlocksDBD.xml

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.3 - (view) (download) (as text)

1 : parrello 1.1 <?xml version="1.0" encoding="UTF-8"?>
2 :     <Database>
3 :     <Title>Similarity Block Database</Title>
4 :     <Entities>
5 :     <Entity name="Genome" keyType="name-string">
6 :     <Notes>A [i]genome[/i] contains the sequence data for a particular
7 : parrello 1.3 individual organism.</Notes>
8 :     <Fields>
9 :     <Field name="description" type="string">
10 :     <Notes>Brief description of this genome.</Notes>
11 :     </Field>
12 :     </Fields>
13 :     </Entity>
14 : parrello 1.2 <Entity name="Contig" keyType="key-string">
15 : parrello 1.1 <Notes>A [i]contig[/i] is a contiguous run of nucleotides. The contig's
16 : parrello 1.3 ID consists of the genome ID followed by a name that identifies
17 :     which contig this is for the parent genome. The individual components
18 :     are separated by a colon.</Notes>
19 : parrello 1.1 </Entity>
20 : parrello 1.2 <Entity name="GroupBlock" keyType="int">
21 :     <Notes>A [i]group block[/i] is a set of similar genome regions.
22 : parrello 1.3 A group block can represent a gene or an inter-genic region.
23 :     The result is that every position in a contig belongs to exactly
24 :     one block, though some will belong to several.</Notes>
25 : parrello 1.1 <Fields>
26 :     <Field name="len" type="int">
27 : parrello 1.3 <Notes>Number of nucleotides in the regions belonging to
28 :     this block. This may include insertion markers ([b]-[/b]).</Notes>
29 :     </Field>
30 :     <Field name="pattern" type="text">
31 :     <Notes>A representation of the nucleotides in the group,
32 :     with question marks substituted for positions that are
33 :     not identical for all group members.</Notes>
34 :     </Field>
35 :     <Field name="variance" type="float">
36 :     <Notes>The proportion of nucleotides that vary between
37 :     regions in this group. For example, a value of 0 means all
38 :     regions are identical at every position. A value of
39 :     0.5 means all regions are identical at exactly half of
40 :     the positions. For a block length of 100, a value
41 :     of 0.03 means all regions are identical at every position
42 :     but 3. The variance does not indicate the degree
43 :     of dissimilarity, just how much of each region needs to be
44 :     examined for SNPs.</Notes>
45 :     </Field>
46 :     <Field name="snip-count" type="int">
47 :     <Notes>The number of positions at which the nucleotides
48 :     vary between regions in this group. The variance value
49 :     is this number divided by the block length.</Notes>
50 :     </Field>
51 :     <Field name="description" type="string">
52 :     <Notes>Descriptive name of this block. This will be
53 :     the gene name for gene blocks, and a generated
54 :     string for inter-genic blocks.</Notes>
55 :     </Field>
56 : parrello 1.1 </Fields>
57 :     </Entity>
58 : parrello 1.3 <Entity name="Region" keyType="name-string">
59 :     <Notes>A [i]region[/i] describes a location in a contig, and
60 :     essentially bridges the gap between blocks and contigs. Each
61 :     instance of this object corresponds to a single segment on
62 :     a contig. The key is the region's sprout-style location
63 :     string.</Notes>
64 : parrello 1.1 <Fields>
65 : parrello 1.3 <Field name="contigID" type="key-string">
66 :     <Notes>Name of the contig containing this region.</Notes>
67 :     </Field>
68 : parrello 1.1 <Field name="position" type="int">
69 : parrello 1.3 <Notes>Index (1-based) of the region's leftmost nucleotide
70 :     in the contig.</Notes>
71 :     </Field>
72 : parrello 1.1 <Field name="direction" type="char">
73 : parrello 1.3 <Notes>[b]+[/b] for a forward region, [b]-[/b] for a reverse
74 :     region.</Notes>
75 :     </Field>
76 :     <Field name="content" type="text">
77 :     <Notes>Nucleotide sequence of variance in this region
78 :     (upper case). For a forward region, this is the exact
79 :     content of each position of variance in the region.
80 :     For a reverse region, it is the complement in
81 :     reverse order.</Notes>
82 :     </Field>
83 :     <Field name="len" type="int">
84 :     <Notes>Length of this region. This may be slightly smaller
85 :     than the block length.</Notes>
86 :     </Field>
87 :     <Field name="end" type="int">
88 :     <Notes>Index (1-based) of the region's rightmost nucleotide
89 :     in the contig.</Notes>
90 :     </Field>
91 :     <Field name="peg" type="name-string">
92 :     <Notes>PEG identifier for this block if it is a gene block,
93 :     or a string generated from the nearby PEGs if it is an
94 :     inter-genic block</Notes>
95 :     </Field>
96 : parrello 1.1 </Fields>
97 : parrello 1.3 <Indexes>
98 :     <Index>
99 :     <Notes>This index enables the application to find regions
100 :     that overlap a specific section of the contig. The index
101 :     can be used to find the first region whose end point is at
102 :     or follows the start of the section in question. Because
103 :     every nucleotide is in at most one region, this guarantees
104 :     that if any region overlaps the section, the region found
105 :     by the index will.</Notes>
106 :     <IndexFields>
107 :     <IndexField name="end" order="ascending" />
108 :     </IndexFields>
109 :     </Index>
110 :     </Indexes>
111 :     </Entity>
112 : parrello 1.2 </Entities>
113 :     <Relationships>
114 :     <Relationship name="ContainsRegion" from="Contig" to="Region" arity="1M">
115 :     <Notes>This relationship connects contigs to the regions on
116 : parrello 1.3 them.</Notes>
117 :     <Fields>
118 : parrello 1.2 <Field name="position" type="int">
119 : parrello 1.3 <Notes>Index (1-based) of the region's leftmost nucleotide
120 :     in the contig.</Notes>
121 :     </Field>
122 :     <Field name="len" type="int">
123 :     <Notes>Length of this region. This may be slightly smaller
124 :     than the block length.</Notes>
125 :     </Field>
126 :     </Fields>
127 : parrello 1.2 <ToIndex>
128 : parrello 1.1 <Notes>This index enables the application to find all of the
129 : parrello 1.3 regions in a contig in the order they are present in the
130 :     contig.</Notes>
131 : parrello 1.1 <IndexFields>
132 :     <IndexField name="position" order="ascending" />
133 :     <IndexField name="len" order="descending" />
134 :     </IndexFields>
135 : parrello 1.2 </ToIndex>
136 : parrello 1.1 </Relationship>
137 : parrello 1.3 <Relationship name="IncludesRegion" from="GroupBlock" to="Region" arity="1M">
138 :     <Notes>This relationship connects a block to the regions it covers. Note
139 :     that since the ID of the region is its Sprout-style location string,
140 :     often it is not necessary to cross to the [b]Region[/b] table when
141 :     accessing this relationship.</Notes>
142 :     </Relationship>
143 : parrello 1.1 <Relationship name="HasInstanceOf" from="Genome" to="GroupBlock" arity="MM">
144 :     <Notes>This relationship connects a genome to the groups represented
145 : parrello 1.3 in its contigs. It provides a fast was to get an ordered list of
146 :     groups for a genome. The group lists for genomes can then be
147 :     merged to determine the common groups of a set of genomes.</Notes>
148 :     </Relationship>
149 :     <Relationship name="ConsistsOf" from="Genome" to="Contig" arity="1M">
150 :     <Notes>This relationship connects a genome to its contigs.</Notes>
151 : parrello 1.1 </Relationship>
152 :     </Relationships>
153 :     </Database>

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3