1 |
commit: 9410f41fefdcd2fb758ae0b6921fb66be1d98378 |
2 |
Author: Michał Górny <mgorny <AT> gentoo <DOT> org> |
3 |
AuthorDate: Sat Dec 9 23:20:58 2017 +0000 |
4 |
Commit: Michał Górny <mgorny <AT> gentoo <DOT> org> |
5 |
CommitDate: Sun Dec 10 00:05:28 2017 +0000 |
6 |
URL: https://gitweb.gentoo.org/repo/gentoo.git/commit/?id=9410f41f |
7 |
|
8 |
sci-biology/goby-cpp: [QA] Move aux files to a distfile |
9 |
|
10 |
Closes: https://bugs.gentoo.org/620614 |
11 |
|
12 |
sci-biology/goby-cpp/Manifest | 7 +- |
13 |
sci-biology/goby-cpp/files/Alignments.proto | 597 --------------------- |
14 |
sci-biology/goby-cpp/files/Reads.proto | 96 ---- |
15 |
.../files/goby-cpp-2.0.1-underlinking.patch | 16 - |
16 |
sci-biology/goby-cpp/goby-cpp-2.0.1.ebuild | 9 +- |
17 |
5 files changed, 9 insertions(+), 716 deletions(-) |
18 |
|
19 |
diff --git a/sci-biology/goby-cpp/Manifest b/sci-biology/goby-cpp/Manifest |
20 |
index 88622e3c813..2bd240a998a 100644 |
21 |
--- a/sci-biology/goby-cpp/Manifest |
22 |
+++ b/sci-biology/goby-cpp/Manifest |
23 |
@@ -1,3 +1,4 @@ |
24 |
-DIST goby_1.9.7.3-cpp.zip 127215 SHA256 8493daa7c850732c6c48d4512bd26b7eec411a729b39d9861a4a6aae08faa674 SHA512 56bf190224b6f22e0578cea4cc950e52e746655c75ffc13675276787b4d0ced682f891f6ecf7af3cf124b535ac3afc8711b0ecff44d6fd25fe521de7371c3486 WHIRLPOOL ae7ead1b0364383b46d4ef8b59453146c68384b379c26498fc9b24d014ba096a99723bad42cfeb84d44c20e4fc14882bbad303ab8c981889f90dff88a882c5c0 |
25 |
-DIST goby_1.9.8.1-cpp.zip 134904 SHA256 2f1bd87f2870af178f34a8e7c11819aa9e42f35e20f1985d2ceb054f452e2a97 SHA512 d31cd7f0be19074bfe8da74d9f2510f0e0f15fe6c485bbed8520052468d2cd2f1bc5fcad8b0d6a1586f5acde73db326059f45994ecfbb5fb6c09692d8e155190 WHIRLPOOL 6ce51c46f8802d31068f510f6da13b2920086eafdae24506830b42d79e48eb6ed9cac48a96090a81964daebf4a0c8f21c490ca3b0af2f589ac57647bde1be79f |
26 |
-DIST goby_2.0.1-cpp.zip 177718 SHA256 5ec57b833cb1a0f53e975112d1c360b14a9b17cfff3fb0ad77dd70672c1881db SHA512 992bd10d5538dec1478820f26151dd311f4de13e7947b49f0b06d6cbdd4b71deeb3aa8a4c6a598fb92fbcb9cbf4ff97bf81205c9389d4a0da4443317e48aea9f WHIRLPOOL ab94cf674703917b6f0cde812d0fbcd94e18fb6055b30d6a1eefa1e4cb5b76bbe18c67388c66e25e87e522df9a9946b0eae5a164428abe874a382f5bc39a13d0 |
27 |
+DIST goby-cpp-2.0.1-files.tar.bz2 8354 BLAKE2B 0169e1bbcdc27f359cde47df708546dd6af0a68334295b247a6aac9122b7e9b1ee590fe0b57052c642b7e25478f5b118c70bec0c4b4af3694ab0f68c1c9ea73a SHA512 6f0cf466688cdbe9fe646cdff78dd0721fd0b0819c354c63e7c39c45895c319754cdadf23aeb9d544b0b2c68f1168583cb541ec160ba7f567fa0218dbad38e1e |
28 |
+DIST goby_1.9.7.3-cpp.zip 127215 BLAKE2B 0673c36b503a6daee5fdaaf96fb415277502c0a49e530eb39983d4718f4a1d8eb9a6ff0a3202413c358600aafc2bf73482be12462f798923c13e19a6bcd590b1 SHA512 56bf190224b6f22e0578cea4cc950e52e746655c75ffc13675276787b4d0ced682f891f6ecf7af3cf124b535ac3afc8711b0ecff44d6fd25fe521de7371c3486 |
29 |
+DIST goby_1.9.8.1-cpp.zip 134904 BLAKE2B 800f3bcbe9f721bfb636f514630fb1ceba3a1fe41616f63fc15f9f2a24394ef9be90419ccad0c9bd8b29100eeaea57659ba013042cf4a11b6038fc6dee782619 SHA512 d31cd7f0be19074bfe8da74d9f2510f0e0f15fe6c485bbed8520052468d2cd2f1bc5fcad8b0d6a1586f5acde73db326059f45994ecfbb5fb6c09692d8e155190 |
30 |
+DIST goby_2.0.1-cpp.zip 177718 BLAKE2B 666b50fdc199693f8a4f9b6007f6609e91ab6093b643da88e580c9a3438a150cd7be78d2b5dcdd2fe905263d32ebbac1e0e47dbc637fd5d59f877e7cbdaaeeb2 SHA512 992bd10d5538dec1478820f26151dd311f4de13e7947b49f0b06d6cbdd4b71deeb3aa8a4c6a598fb92fbcb9cbf4ff97bf81205c9389d4a0da4443317e48aea9f |
31 |
|
32 |
diff --git a/sci-biology/goby-cpp/files/Alignments.proto b/sci-biology/goby-cpp/files/Alignments.proto |
33 |
deleted file mode 100644 |
34 |
index fe7f5664764..00000000000 |
35 |
--- a/sci-biology/goby-cpp/files/Alignments.proto |
36 |
+++ /dev/null |
37 |
@@ -1,597 +0,0 @@ |
38 |
-package goby; |
39 |
- |
40 |
-option java_package = "edu.cornell.med.icb.goby.alignments"; |
41 |
- |
42 |
-option optimize_for = SPEED; |
43 |
- |
44 |
-/* |
45 |
- This message is written to 'basename'.entries as a very large chunked collection. |
46 |
-*/ |
47 |
-message AlignmentCollection { |
48 |
- repeated AlignmentEntry alignment_entries = 1; |
49 |
-} |
50 |
- |
51 |
- |
52 |
-message AlignmentEntry { |
53 |
- /* Multiplicity of this entry. The number of times this alignment entry would be repeated exactly the same if |
54 |
- query redundancy had not been removed by read factorization. |
55 |
- */ |
56 |
- optional uint32 multiplicity = 7; |
57 |
- |
58 |
- /* |
59 |
- Compressed stream of data. Removed since Goby 2.0 supports chunk codecs. Do not reuse field index 23 |
60 |
- optional bytes compressed_data = 23; |
61 |
- */ |
62 |
- |
63 |
- /* An integer that uniquely identifies the query (a short read) in a set of alignment runs. When several |
64 |
- alignment runs are made with the same set of query sequences, equality of query index means that the query |
65 |
- sequences were the same. (Comparing integers for equality is much faster than comparing strings.) |
66 |
- This field is required (enforced by semantic validation in Goby 2.0+). |
67 |
- */ |
68 |
- optional uint32 query_index = 1; |
69 |
- /* An integer that uniquely identifies the target (e.g., a chromosome) in a set of alignment runs. When several |
70 |
- alignment runs are made with the same set of target sequences, equality of target index means that the target |
71 |
- sequence was the same across the runs. (Comparing integers for equality is much faster than comparing strings.) |
72 |
- This field is required (enforced by semantic validation in Goby 2.0+). |
73 |
- */ |
74 |
- optional uint32 target_index = 2; |
75 |
- /* |
76 |
- The position on the target of the start of the alignment between the query and the target. |
77 |
- In the following example, position is 3 because the third base of the query 'C' was aligned with |
78 |
- position 3 of the reference (two read bases were soft clipped: "ct"). This example shows that the |
79 |
- alignment can start at a mismatch if it was so constructed by the aligner. |
80 |
- |
81 |
- 0123456789 |
82 |
- AAAAGTCAAA target |
83 |
- ctCGTC query |
84 |
- This field is required (enforced by semantic validation in Goby 2.0+). |
85 |
- */ |
86 |
- optional uint32 position = 3; |
87 |
- |
88 |
- /* |
89 |
- True when the query matches the target on the reverse strand |
90 |
- */ |
91 |
- optional bool matching_reverse_strand = 6; |
92 |
- |
93 |
- /* |
94 |
- The position on the query where the alignment starts. This value is different from zero |
95 |
- when some bases/residues of the query could not be aligned with the target. |
96 |
- TODO: Rename this to left_trim. Add a right_trim property. |
97 |
- */ |
98 |
- optional uint32 query_position = 5; |
99 |
- |
100 |
- /* |
101 |
- The score of the alignment, where larger scores indicate better matches between the query and the target. |
102 |
- If an aligner outputs only the number of mismatches between query and target, the score is taken to be |
103 |
- -(#mismatches(query,target)). |
104 |
- */ |
105 |
- optional float score = 4; |
106 |
- |
107 |
- /* |
108 |
- Number of bases/residues that differ in the alignment between query and target sequences. |
109 |
- */ |
110 |
- optional uint32 number_of_mismatches = 8; |
111 |
- |
112 |
- /* |
113 |
- Cumulative number of insertions and/or deletions present in the alignment. |
114 |
- */ |
115 |
- optional uint32 number_of_indels = 9; |
116 |
- |
117 |
- /* |
118 |
- Number of bases that have been aligned for the query. Please note that query_aligned_length must be |
119 |
- less or equal to query_length. |
120 |
- */ |
121 |
- optional uint32 query_aligned_length = 11; |
122 |
- |
123 |
- /* |
124 |
- Number of bases that have been aligned for the target. |
125 |
- */ |
126 |
- optional uint32 target_aligned_length = 12; |
127 |
- |
128 |
- repeated SequenceVariation sequence_variations = 13; |
129 |
- |
130 |
- /* |
131 |
- Length of the query sequence. |
132 |
- */ |
133 |
- optional uint32 query_length = 10; |
134 |
- /* |
135 |
- Mapping Quality (phred-scaled posterior probability that the mapping |
136 |
- position of this read is incorrect). Please note that different aligners |
137 |
- may estimate mapping quality with different approaches, resulting in aligner |
138 |
- specific differences in the distribution of mapping quality. It is recommended |
139 |
- to condition mapping quality on the aligner that produced the specific alignment |
140 |
- being processed. See aligner name and version in the header. |
141 |
- Note that the following description is preliminary. A clear specification is |
142 |
- needed: |
143 |
- The mapping quality should be proportional to the |
144 |
- log of the probability that the given mapping is the "correct" one. |
145 |
- So if there are five equally good mappings of a read to the genome, |
146 |
- the probability of each would be 0.2, and the mapping quality would be |
147 |
- something like -10*log10(1-0.2) = 1. If a mapping is highly likely, |
148 |
- say a 1e-4 of it being wrong, then the mapping quality would be |
149 |
- -10*log10(1e-4) = 40. |
150 |
- */ |
151 |
- optional int32 mapping_quality = 14; |
152 |
- |
153 |
- /* |
154 |
- If this read was aligned with a pair, the flags for the pair alignment (based on SAM): |
155 |
- 000000001 paired |
156 |
- 000000010 properly paired |
157 |
- 000000100 read unmapped |
158 |
- 000001000 mate unmapped |
159 |
- 000010000 read reverse strand |
160 |
- 000100000 mate reverse strand |
161 |
- 001000000 first in pair |
162 |
- 010000000 second in pair |
163 |
- 100000000 not primary alignment |
164 |
- */ |
165 |
- optional uint32 pair_flags = 15; |
166 |
- |
167 |
- /* |
168 |
- If there is an alignment entry for the paired read (the paired read was mapped), a link to the entry is given. |
169 |
- */ |
170 |
- optional RelatedAlignmentEntry pair_alignment_link = 16; |
171 |
- |
172 |
- /* Index of the read fragment from which this alignment was obtained. */ |
173 |
- optional uint32 fragment_index = 17; |
174 |
- |
175 |
- /* If a read spans exon-exon junctions some aligners (e.g., GSNAP) will output two or more |
176 |
- alignment entries, one for each matching part of the read, and link these entries with |
177 |
- spliced_alignment_links. The field spliced_forward_alignment_link points to the next |
178 |
- AlignmentEntry in the chain of spliced alignments. |
179 |
- */ |
180 |
- optional RelatedAlignmentEntry spliced_forward_alignment_link = 18; |
181 |
- |
182 |
- /* If a read spans exon-exon junctions some aligners (e.g., GSNAP) will output two or more |
183 |
- alignment entries, one for each matching part of the read, and link these entries with |
184 |
- spliced_alignment_links. The field spliced_backward_alignment_link points to the previous |
185 |
- AlignmentEntry in the chain of spliced alignments. |
186 |
- */ |
187 |
- optional RelatedAlignmentEntry spliced_backward_alignment_link = 22; |
188 |
- |
189 |
- /* |
190 |
- If a read spans exon-exon junctions some aligners (e.g., GSNAP) will output two alignment entries, one for each |
191 |
- matching part of the read, and flag describes the spliced_alignment_link with these |
192 |
- binary flags: |
193 |
- 000000001 normal |
194 |
- 000000010 novel |
195 |
- */ |
196 |
- optional uint32 spliced_flags = 19; |
197 |
- |
198 |
- /* The size of the insert used when making the sequence library. This is the total size of the DNA |
199 |
- fragment to sequence, without the adapters. This is not the length of sequence that separates the reads. |
200 |
- See http://seqanswers.com/forums/showthread.php?t=8730 for details. Insert size is inferred for each pair |
201 |
- of reads by the aligner and is recorded here if was estimated (i.e., for paired-end reads). |
202 |
- */ |
203 |
- optional sint32 insert_size = 20; |
204 |
- |
205 |
- /* |
206 |
- The sample index. Uniquely identifies the aligned sample this read was read from. Storing the sample index in the |
207 |
- alignment entry makes it possible to concat alignments from different origins and track what sample originally |
208 |
- contained each entry. |
209 |
- */ |
210 |
- optional uint32 sample_index = 21; |
211 |
- /* |
212 |
- The total number of times the query index associated with this entry occurs across the entire alignment file. |
213 |
- |
214 |
- This field is used to purge queryIndex->smallIndex associations after all instances of a queryindex have |
215 |
- been seen (see QueryIndexPermutation class). When each entry has a value for this field, the header field |
216 |
- query_index_occurrences is true. |
217 |
- This field is required (enforced by semantic validation in Goby 2.0+). |
218 |
- */ |
219 |
- optional uint32 query_index_occurrences = 25; |
220 |
- /* |
221 |
- The total number of times the read matches the reference across the entire alignment file. This differs from |
222 |
- query_index_occurrences because reads that are matching through splice and pair links count as one for ambiguity. |
223 |
- The field can be used to filter by ambiguity-threshold on the fly after an alignment has been done (to restrict |
224 |
- entries to more smaller thresholds). When each entry has a value for this field, the header field |
225 |
- ambiguity_stored_in_entries is true. |
226 |
- |
227 |
- This field is required (enforced by semantic validation in Goby 2.0+). |
228 |
- */ |
229 |
- optional uint32 ambiguity = 27; |
230 |
- /* |
231 |
- List of BAM attributes, if the alignment was imported from BAM. The attributes are stored in exactly the format |
232 |
- allowed for BAM. For instance, X0:i:9 X1:i:1 MD:Z:68 RG:Z:SRR084825 will be stored as four strings: |
233 |
- "X0:i:9", "X1:i:1", "MD:Z:68", "RG:Z:SRR084825". Note that sam-to-compact will interpret some BAM attributes |
234 |
- and populate goby native fields. Such tags do not appear in bam_attributes, and are instead re-generated from |
235 |
- the corresponding goby native fields. |
236 |
- Since Goby 2.0. |
237 |
- */ |
238 |
- repeated string bam_attributes = 50; |
239 |
- /* |
240 |
- Quality scores for all bases of the read. |
241 |
- Since Goby 2.0. |
242 |
- */ |
243 |
- optional bytes read_quality_scores = 55; |
244 |
- |
245 |
- /* |
246 |
- Origin index. An integer that references a ReadOriginInfo message in the alignment header and |
247 |
- makes it possible to track the origin of the read (especially useful after several alignments |
248 |
- have been merged/concatenated). |
249 |
- (Since Goby 2.0). |
250 |
- */ |
251 |
- optional uint32 read_origin_index = 26; |
252 |
- /* |
253 |
- Bases that an aligner considered do not belong to the alignment of the read to the reference. Potentially |
254 |
- erroneous bases, or bases that belong to a different part of the reference genome. Left clipped bases are |
255 |
- stored in this field as character bases, or as an equal sign character '=' when the clipped base did match |
256 |
- the reference base. For instance "A=G" for three soft-clipped bases, the middle one matching the genome at |
257 |
- this position. The number of bases in softClippedBasesLeft is exactly equal to queryPosition. |
258 |
- */ |
259 |
- optional string softClippedBasesLeft = 30; |
260 |
- /* |
261 |
- Bases that an aligner considered do not belong to the alignment of the read to the reference. Potentially |
262 |
- erroneous bases, or bases that belong to a different part of the reference genome. Right clipped bases are |
263 |
- stored in this field as character bases, or as an equal sign character '=' when the clipped base did match |
264 |
- the reference base. The number of bases in softClippedBasesRight is exactly equal |
265 |
- to queryLength - queryAlignedLength - queryPosition. |
266 |
- */ |
267 |
- optional string softClippedBasesRight = 31; |
268 |
- |
269 |
- /* |
270 |
- Quality scores for bases in softClippedBasesLeft. Stored in Phred Units. |
271 |
- */ |
272 |
- optional bytes softClippedQualityLeft = 32; |
273 |
- /* |
274 |
- Quality scores for bases in softClippedBasesRight. Stored in Phred Units. |
275 |
- */ |
276 |
- optional bytes softClippedQualityRight = 33; |
277 |
- /* |
278 |
- Sequence for a read placed near this entry, but unmapped to the reference sequence. For instance, used to record |
279 |
- the sequence of a mate that did not map to the reference. We know that the mate maps in the proximity of this entry |
280 |
- (it is placed) but are unable to map it to a specific genomic position. The sequence is always given as obtained |
281 |
- from the reads file. |
282 |
- */ |
283 |
- optional string placedUnmappedSequence=40; |
284 |
- /* |
285 |
- Quality scores for a read placed near this entry. Phred units. |
286 |
- */ |
287 |
- optional bytes placedUnmappedQuality=41; |
288 |
- |
289 |
- /* |
290 |
- Read name. In SAM/BAM this is referred to as QNAME. Paired and segmented reads will have the same Read name. |
291 |
- */ |
292 |
- optional string readName=42; |
293 |
-} |
294 |
- |
295 |
-/* A link to another alignment entry. This message type is used to represent relations |
296 |
- between alignments, such as the relation between the two read fragments in a paired-end protocol, |
297 |
- or the relation between parts of reads that align through an exon exon junction and map in |
298 |
- different locations of the genome. |
299 |
- */ |
300 |
-message RelatedAlignmentEntry { |
301 |
- /* Target index of the location where the other alignment entry is mapped. |
302 |
- This field is required (enforced by semantic validation in Goby 2.0+). |
303 |
- */ |
304 |
- optional uint32 target_index = 1; |
305 |
- |
306 |
- /* Position on the reference where the other alignment entry is mapped. * |
307 |
- This field is required (enforced by semantic validation in Goby 2.0+). |
308 |
- */ |
309 |
- optional uint32 position = 2; |
310 |
- |
311 |
- /* Index of the fragment for the related alignment entry. This index |
312 |
- makes it possible to identify which of the read fragments mapped to the given |
313 |
- location is related to the source alignment entry. |
314 |
- This field is required (enforced by semantic validation in Goby 2.0+). |
315 |
- */ |
316 |
- optional uint32 fragment_index = 3; |
317 |
- |
318 |
- optional uint32 optimized_index=50; |
319 |
-} |
320 |
- |
321 |
-/* |
322 |
- Represents sequence variations between the query and the reference sequences. Many variations can be represented. |
323 |
- For instance, an insertion at position 5 in the reference would be represented as from="A", to="" position=5. |
324 |
- A mutation T->G at position 6 would be rendered as from="T", to="G" position=6. Padded alignments (see SAM description) |
325 |
- can be described by a combination of pair-wise alignments, where the gap character '-' is used to indicate that no |
326 |
- base exists in the sequence considered for the alignment position, for instance: |
327 |
- |
328 |
- - Padding example: |
329 |
- |
330 |
- 123 (<-positions) |
331 |
-ref A-C |
332 |
- A-T [from="-" to="" position=2] [from="C" to="T" position=3] |
333 |
- ACT [from="" to="C" position=2] [from="C" to="T" position=3] |
334 |
- A-T [from="-" to="" position=2] [from="C" to="T" position=3] |
335 |
- |
336 |
- - Mutation example: |
337 |
- 123 (<-positions) |
338 |
-ref ATT |
339 |
- ACT [from="T" to="C" position=2] |
340 |
- |
341 |
- -- Example of deletion in a read: |
342 |
- 123 (<-positions) |
343 |
-ref ATT |
344 |
- A-T [from="T" to="-" position=2] |
345 |
- |
346 |
- -- Example of insertion of two base pairs in a read: |
347 |
- 12345 (<-positions) |
348 |
-ref A--TT |
349 |
- ACCTT [from="" to="CC" position=2] |
350 |
- |
351 |
- */ |
352 |
-message SequenceVariation { |
353 |
- /* The reference bases. Can include one or more gap characters '-', to indicate that the reference sequence has |
354 |
- no base at this alignment position. |
355 |
- This field is required (enforced by semantic validation in Goby 2.0+). |
356 |
- */ |
357 |
- optional string from = 2; |
358 |
- /* The read bases that differ from the reference sequence. Can include one or more gap characters '-', to indicate |
359 |
- that the query sequence has no base at this alignment position. |
360 |
- This field is required (enforced by semantic validation in Goby 2.0+). |
361 |
- */ |
362 |
- optional string to = 1; |
363 |
- /* |
364 |
- The position of the variation on the read, as if the read always matched on the forward strand. |
365 |
- Adding position to the index where the reference starts aligning the read yields the position of the variation |
366 |
- in reference/target sequence space. Since position starts at one the resulting position will also be one based. |
367 |
- This field is required (enforced by semantic validation in Goby 2.0+). |
368 |
- */ |
369 |
- optional uint32 position = 3; |
370 |
- /* |
371 |
- The position of the variation, starting from the beginning of the aligned read (position 1), and up to the length |
372 |
- of the read (inclusive). Use this index if you need to know how far the variation is observed from the beginning |
373 |
- of the sequenced read. When the read has an insertion, this index records the position immediately before the base |
374 |
- where the bases are inserted (these bases are in the to field). |
375 |
- When the read has a deletion, read_index records the position in the read after which the bases that would align |
376 |
- in the reference are missing (these bases are in the from field). |
377 |
- This field is required (enforced by semantic validation in Goby 2.0+). |
378 |
- */ |
379 |
- optional uint32 read_index = 5; |
380 |
- |
381 |
- /** |
382 |
- The read base quality scores for those bases that are given in the to field. This field |
383 |
- is populated when the reads used to perform the search include quality scores, and when |
384 |
- the alignment parser can extract the information from the aligner's output. |
385 |
- (this option is currently not implemented in Goby.) |
386 |
- */ |
387 |
- optional bytes to_quality = 4; |
388 |
- |
389 |
-} |
390 |
-/* |
391 |
- This message is written to 'basename'.header |
392 |
-*/ |
393 |
- |
394 |
-message AlignmentHeader { |
395 |
- /* |
396 |
- The smallest possible query index in this alignment. Data stored as an array where |
397 |
- queryIndex is the array index will be stored with only the elements in the inclusive |
398 |
- range [smallestSplitQueryIndex largestSplitQueryIndex] |
399 |
- Such data structures include queryLength and some arrays in the TooManyHits data |
400 |
- structure. |
401 |
- */ |
402 |
- optional uint32 smallest_split_query_index = 9; |
403 |
- /* |
404 |
- The largest possible query index in this alignment. Data stored as an array where |
405 |
- queryIndex is the array index will be stored with only the elements in the inclusive |
406 |
- range [smallestSplitQueryIndex largestSplitQueryIndex] |
407 |
- Such data structures include queryLength and some arrays in the TooManyHits data |
408 |
- structure. |
409 |
- */ |
410 |
- optional uint32 largest_split_query_index = 11; |
411 |
- |
412 |
- /* Mapping from query identifier name to query index (as used in alignment entries). |
413 |
- */ |
414 |
- optional IdentifierMapping query_name_mapping = 1; |
415 |
- |
416 |
- /* Mapping from target identifier name to target index (as used in alignment entries). |
417 |
- */ |
418 |
- optional IdentifierMapping target_name_mapping = 2; |
419 |
- |
420 |
- /* |
421 |
- The number of query sequences |
422 |
- */ |
423 |
- optional uint32 number_of_queries = 5; |
424 |
- /* |
425 |
- The number of target sequences |
426 |
- */ |
427 |
- optional uint32 number_of_targets = 6; |
428 |
- /* |
429 |
- The number of reads that were aligned to the reference and are represented in this alignment archive. |
430 |
- */ |
431 |
- optional uint32 number_of_aligned_reads = 7; |
432 |
- |
433 |
- /* |
434 |
- Length of the query sequences. One number per query, in the order of increasing query index. |
435 |
- This information has been moved to the individual alignment entries. |
436 |
- */ |
437 |
- repeated uint32 query_length = 3 [deprecated = true]; |
438 |
- /* |
439 |
- If query length is constant across all the queries, this field contains the constant length. |
440 |
- In such cases, query_length will be empty. |
441 |
- */ |
442 |
- optional uint32 constant_query_length = 10; |
443 |
- |
444 |
- /* |
445 |
- Length of the target sequences. One number per target, in the order of increasing target index. |
446 |
- The target indexes must be 0..(number of targets - 1). |
447 |
- */ |
448 |
- repeated uint32 target_length = 8; |
449 |
- /* |
450 |
- Indicates whether this alignment is sorted by position. True: the alignment entries occur in sorted |
451 |
- order, such that entry a occurs before entry b if a.targetIndex< b.targetIndex or, when entries |
452 |
- have the same target, when a.position < b.position. |
453 |
- */ |
454 |
- optional bool sorted = 13; |
455 |
- |
456 |
- /* |
457 |
- Indicates whether this alignment is indexed by position. When this attribute is true, a file called |
458 |
- 'basename'.index exists that contains the AlignmentIndex message (GZip compressed). |
459 |
- */ |
460 |
- optional bool indexed = 14; |
461 |
- /* |
462 |
- True when query lengths are stored in alignment entries (Goby 1.7+). |
463 |
- */ |
464 |
- optional bool query_lengths_stored_in_entries = 15; |
465 |
- /* |
466 |
- Name of the aligner that produced this alignment. |
467 |
- */ |
468 |
- optional string aligner_name = 17; |
469 |
- /* |
470 |
- Version number for the aligner implementation that produced this alignment. |
471 |
- */ |
472 |
- optional string aligner_version = 18; |
473 |
- /* |
474 |
- The version of Goby that created this alignment file. |
475 |
- */ |
476 |
- optional string version = 25; |
477 |
- |
478 |
- /* |
479 |
- Sample basenames, in the order of increasing sampleIndex, starting with sampleIndex=0. |
480 |
- */ |
481 |
- |
482 |
- repeated string sample_basename = 30; |
483 |
- |
484 |
- /* |
485 |
- This field is true when the query indices of alignment entries were permuted to smaller indices. Only sorted |
486 |
- alignments can have query_indices_were_permuted=true. When the field is true, and you need to retrieve the |
487 |
- original query-index of an alignment (because you want to retrieve the specific read(s) from a read file for |
488 |
- instance), you will need the information in the permutation file (extension basename.perm) and transform back |
489 |
- each small index of interest to the original query index. |
490 |
- */ |
491 |
- optional bool query_indices_were_permuted = 26; |
492 |
- /* |
493 |
- This field is true when entries in the alignment .entries file all have the query_index_occurrences field populated |
494 |
- (Since Goby 2.0). |
495 |
- */ |
496 |
- optional bool query_index_occurrences = 35; |
497 |
- |
498 |
- /* |
499 |
- This field is true when entries in the alignment .entries file all have the ambiguity field populated |
500 |
- (Since Goby 2.0). |
501 |
- */ |
502 |
- optional bool ambiguity_stored_in_entries = 36; |
503 |
- /* |
504 |
- This field is true when entries in the alignment .entries file all have the read_quality_score field populated. |
505 |
- (Since Goby 2.0). |
506 |
- */ |
507 |
- optional bool all_read_quality_scores = 40; |
508 |
- /* |
509 |
- A description of the origin of sets of reads. Serves a similar function to BAM read groups, but more flexible and |
510 |
- efficient. Instead of storing strings, we use integers in the entries. |
511 |
- Alignemnt entries will link to a specific ReadOriginInfo with the origin_index field. |
512 |
- (Since Goby 2.0). |
513 |
- */ |
514 |
- repeated ReadOriginInfo read_origin = 27; |
515 |
-} |
516 |
- |
517 |
-message IdentifierMapping { |
518 |
- repeated IdentifierInfo mappings = 1; |
519 |
-} |
520 |
- |
521 |
-message IdentifierInfo { |
522 |
- required string name = 1; |
523 |
- required uint32 index = 2; |
524 |
-} |
525 |
- |
526 |
- |
527 |
-/* |
528 |
- A description of the origin of sets of reads. Stored in the Goby alignment header and linked |
529 |
- from alignment entries. Goby makes it possible to adapt origin equivalence rules on the fly |
530 |
- efficiently. To do this, it is sufficient to read the header of the alignment, decide which |
531 |
- ReadOriginInfo instances are equivalent (e.g., by looking at sample, platform, library, or |
532 |
- other fields in the message), then construct a function e(a):int. This function takes |
533 |
- one originIndex parameter and returns another integer that maps to an equivalent class. The |
534 |
- equivalence class can be used to estimate error models for entries that belong to each class, |
535 |
- for instance. |
536 |
- (Since Goby 2.0). |
537 |
- */ |
538 |
-message ReadOriginInfo { |
539 |
- /* |
540 |
- Origin index. An integer that links alignment entries to their origin information. |
541 |
- */ |
542 |
- required uint32 origin_index = 1; |
543 |
- /* |
544 |
- Identifier that describes the origin of the reads. This field is compatible with the ID/platform field of BAM read |
545 |
- groups. Free text. |
546 |
- */ |
547 |
- required string origin_id = 2; |
548 |
- /* |
549 |
- The sample from which the reads were sequenced. This field is compatible with the SM/sample field of BAM read |
550 |
- groups. Free text. |
551 |
- */ |
552 |
- optional string sample = 4; |
553 |
- /* |
554 |
- The platform on which the reads were sequenced. This field is compatible with the PL/platform field of BAM read |
555 |
- groups. Valid values: ILLUMINA, SOLID, LS454, HELICOS and PACBIO. |
556 |
- */ |
557 |
- optional string platform = 5; |
558 |
- /* |
559 |
- The library from which the reads were sequenced. This field is compatible with the LB/library field of BAM read |
560 |
- groups. Free text. |
561 |
- */ |
562 |
- optional string library = 8; |
563 |
- /* |
564 |
- The platform unit on which the reads were sequenced. This field for compatibility with samtools. |
565 |
- */ |
566 |
- optional string platform_unit = 12; |
567 |
- /* |
568 |
- The date the reads were sequenced. Useful to identify batch effects, in the format dd:MMM:yyyy. |
569 |
- The month is Jan, Feb, etc. to avoid all confusion with days when day<=12. |
570 |
- */ |
571 |
- optional string run_date = 6; |
572 |
-} |
573 |
- |
574 |
-/* |
575 |
- This message is written to 'basename'.tmh |
576 |
-*/ |
577 |
- |
578 |
-message AlignmentTooManyHits { |
579 |
- /* |
580 |
- The threshold used by the aligner to determine that a query is ambiguous and should be dropped. |
581 |
- Referred to as parameter k below. |
582 |
- */ |
583 |
- required uint32 aligner_threshold = 2; |
584 |
- /* |
585 |
- The hits that are assigned to several (>k) reference location. |
586 |
- */ |
587 |
- repeated AmbiguousLocation hits = 1; |
588 |
- |
589 |
-} |
590 |
- |
591 |
-message AmbiguousLocation { |
592 |
- /* |
593 |
- The index of the query that matched too many times. |
594 |
- */ |
595 |
- required uint32 query_index = 1; |
596 |
- /* |
597 |
- The number of hits that triggered membership in the too many hits list. The query may hit more |
598 |
- locations than reported here, since some alignment tools will just drop queries that match above |
599 |
- a threshold and stop counting. This number can be >=k. |
600 |
- */ |
601 |
- required uint32 at_least_number_of_hits = 2; |
602 |
- /** |
603 |
-The length of the part of the query sequence that could be matched to the target (also called depth). |
604 |
-May be less than the length of the query sequence, in which case the match was not perfect. When merging |
605 |
-alignments produced by searching different reference sequences, consider only at_least_number_of_hits |
606 |
-from alignments that have exactly the longer depth for the query. */ |
607 |
- optional uint32 length_of_match = 3; |
608 |
-} |
609 |
- |
610 |
-/* |
611 |
- This message is written to 'basename'.index |
612 |
- */ |
613 |
-message AlignmentIndex { |
614 |
- /* |
615 |
- Stores one element by target sequence. Each element is the cumulative target length for the target |
616 |
- stored at index i. Assume there are four target sequences, with lengths {10, 12, 15, 34}. The field |
617 |
- targetPositionOffsets will contain: {0,10,22,37}. Such offsets can be used to calculate the absolute |
618 |
- position of a genomic location. Given targetIndex and positionOnReference, the absolute location |
619 |
- is defined as targetPositionOffsets[targetIndex]+positionOnReference. |
620 |
- */ |
621 |
- repeated uint32 target_position_offsets = 1 [packed = true]; |
622 |
- /* |
623 |
- The byte offsets into the compressed entries file. Byte offsets are matched with absolute position |
624 |
- by index. There should be as many elements in offsets as there are in absolutePosition |
625 |
- where chunks start which represent entries whose absolute positions are less than |
626 |
- */ |
627 |
- repeated uint64 offsets = 2 [packed = true]; |
628 |
- /* |
629 |
- The absolute positions of the first entry in the chunk that immediately start at offset. One element |
630 |
- per chunk in the 'basename'.entries file. |
631 |
- */ |
632 |
- repeated uint64 absolute_positions = 3 [packed = true]; |
633 |
- |
634 |
-} |
635 |
|
636 |
diff --git a/sci-biology/goby-cpp/files/Reads.proto b/sci-biology/goby-cpp/files/Reads.proto |
637 |
deleted file mode 100644 |
638 |
index 32c1244a3eb..00000000000 |
639 |
--- a/sci-biology/goby-cpp/files/Reads.proto |
640 |
+++ /dev/null |
641 |
@@ -1,96 +0,0 @@ |
642 |
-package goby; |
643 |
- |
644 |
-option java_package = "edu.cornell.med.icb.goby.reads"; |
645 |
-option optimize_for = SPEED; |
646 |
- |
647 |
-message ReadCollection { |
648 |
- repeated ReadEntry reads = 1; |
649 |
-} |
650 |
- |
651 |
-message ReadEntry { |
652 |
- /* |
653 |
- Index of a read. |
654 |
- */ |
655 |
- required uint32 read_index = 1; |
656 |
- /* |
657 |
- Index of the barcode, if any. |
658 |
- */ |
659 |
- optional uint32 barcode_index = 10; |
660 |
- /* |
661 |
- Read identifier/name may be present. |
662 |
- */ |
663 |
- optional string read_identifier = 23; |
664 |
- /* |
665 |
- Additional description about the read (from Fasta/Q format). |
666 |
- */ |
667 |
- optional string description = 22; |
668 |
- /* |
669 |
- Length of the sequence. |
670 |
- */ |
671 |
- required uint32 read_length = 2; |
672 |
- /* |
673 |
- Sequence, encoded as ascii characters stored in single bytes. |
674 |
- */ |
675 |
- optional bytes sequence = 3; |
676 |
- /* |
677 |
- The second sequence in a pair. Stored the same way as the sequence attribute. |
678 |
- */ |
679 |
- optional bytes sequence_pair = 5; |
680 |
- /* |
681 |
- Length of the second sequence in a pair. |
682 |
- */ |
683 |
- optional uint32 read_length_pair = 6; |
684 |
- /* |
685 |
- Quality scores in Phred units, stored as single bytes (0-255). |
686 |
- */ |
687 |
- optional bytes quality_scores = 4; |
688 |
- /* |
689 |
- Quality scores for the second sequence in a pair. Stored as the 'qualityScores' attribute. |
690 |
- */ |
691 |
- optional bytes quality_scores_pair = 7; |
692 |
- /* |
693 |
- Compressed stream of data. The first byte indicates the compression/decompression method (codec). The remaining bytes are |
694 |
- content compressed with the codec. |
695 |
- */ |
696 |
- optional bytes compressed_data = 8; |
697 |
- /* |
698 |
- Stores meta-data about the reads. Typically meta-data is stored in the very first read of a |
699 |
- read collection, with the understanding that the meta-data applies to all the reads in the |
700 |
- collection. Meta-data can be used to store information about when the sample was sequenced, |
701 |
- or other information of interest. The key-value pair format is sufficiently flexible to |
702 |
- accomodate a variety of needs. The following keys are pre-defined. Please use pre-defined |
703 |
- keys so that automated tools can use metadata in relatively standard way. Please note that |
704 |
- some keys provide a format for the value. This format should also be followed to garantee |
705 |
- that meta data can be used computationally in fully automatic manner. |
706 |
- |
707 |
- key="sequencing-run-start-date" value="MM/DD/YYYY" Used to record when the sequencing run |
708 |
- was initiated on the instrument. Can be used to detect batch effect in a large set of samples. |
709 |
- key="platform" value="<free-text>". Value is free text, but the following terms are pre-defined. |
710 |
- Illumina GaIIx |
711 |
- Illumina HiSeq 1000 |
712 |
- Illumina HiSeq 2000 |
713 |
- Helicos Heliscope |
714 |
- LifeTech 5500 SOLiD |
715 |
- LifeTech 5500xl SOLiD |
716 |
- Roche 454 GS FLX Ti |
717 |
- |
718 |
- key="organism" value="species name" |
719 |
- Since Goby 1.9.1 |
720 |
- */ |
721 |
- repeated MetaData meta_data = 25; |
722 |
- |
723 |
-} |
724 |
-/* |
725 |
- A message to store a key/value pair and represent metadata about reads. |
726 |
- Since Goby 1.9.1 |
727 |
- */ |
728 |
-message MetaData { |
729 |
- /* |
730 |
- Provides the key. See examples in the documentation of meta_data for ReadEntry. |
731 |
- */ |
732 |
- required string key=1; |
733 |
- /* |
734 |
- Describes the value associated with the key. See examples in the documentation of meta_data for ReadEntry. |
735 |
- */ |
736 |
- required string value=2; |
737 |
-} |
738 |
|
739 |
diff --git a/sci-biology/goby-cpp/files/goby-cpp-2.0.1-underlinking.patch b/sci-biology/goby-cpp/files/goby-cpp-2.0.1-underlinking.patch |
740 |
deleted file mode 100644 |
741 |
index 415785466af..00000000000 |
742 |
--- a/sci-biology/goby-cpp/files/goby-cpp-2.0.1-underlinking.patch |
743 |
+++ /dev/null |
744 |
@@ -1,16 +0,0 @@ |
745 |
- src/Makefile.am | 2 +- |
746 |
- 1 file changed, 1 insertion(+), 1 deletion(-) |
747 |
- |
748 |
-diff --git a/src/Makefile.am b/src/Makefile.am |
749 |
-index 1033382..33ca906 100644 |
750 |
---- a/src/Makefile.am |
751 |
-+++ b/src/Makefile.am |
752 |
-@@ -84,7 +84,7 @@ GobyReadsStats_LDADD = libgoby.la ${BOOST_LDFLAGS} ${BOOST_SYSTEM_LIB} ${BOOST_D |
753 |
- GobyReadsStats_SOURCES = \ |
754 |
- GobyReadsStats.cc |
755 |
- |
756 |
--GobyFastaToCompact_LDADD = libgoby.la ${BOOST_LDFLAGS} ${BOOST_SYSTEM_LIB} ${BOOST_DATE_TIME_LIB} ${BOOST_FILESYSTEM_LIB} ${BOOST_PROGRAM_OPTIONS_LIB} |
757 |
-+GobyFastaToCompact_LDADD = libgoby.la ${BOOST_LDFLAGS} ${BOOST_SYSTEM_LIB} ${BOOST_DATE_TIME_LIB} ${BOOST_FILESYSTEM_LIB} ${BOOST_PROGRAM_OPTIONS_LIB} -lz |
758 |
- GobyFastaToCompact_SOURCES = \ |
759 |
- GobyFastaToCompact.cc |
760 |
- |
761 |
|
762 |
diff --git a/sci-biology/goby-cpp/goby-cpp-2.0.1.ebuild b/sci-biology/goby-cpp/goby-cpp-2.0.1.ebuild |
763 |
index fcf8971fceb..e74dd6ecede 100644 |
764 |
--- a/sci-biology/goby-cpp/goby-cpp-2.0.1.ebuild |
765 |
+++ b/sci-biology/goby-cpp/goby-cpp-2.0.1.ebuild |
766 |
@@ -1,4 +1,4 @@ |
767 |
-# Copyright 1999-2015 Gentoo Foundation |
768 |
+# Copyright 1999-2017 Gentoo Foundation |
769 |
# Distributed under the terms of the GNU General Public License v2 |
770 |
|
771 |
EAPI=5 |
772 |
@@ -9,7 +9,8 @@ inherit autotools-utils |
773 |
|
774 |
DESCRIPTION="A DNA sequencing data management framework - C/C++ API" |
775 |
HOMEPAGE="http://campagnelab.org/software/goby/" |
776 |
-SRC_URI="http://chagall.med.cornell.edu/goby/releases/archive/release-goby_${PV}/goby_${PV}-cpp.zip" |
777 |
+SRC_URI="http://chagall.med.cornell.edu/goby/releases/archive/release-goby_${PV}/goby_${PV}-cpp.zip |
778 |
+ https://dev.gentoo.org/~mgorny/dist/${P}-files.tar.bz2" |
779 |
|
780 |
LICENSE="GPL-3" |
781 |
SLOT="0" |
782 |
@@ -24,7 +25,7 @@ RDEPEND="${DEPEND}" |
783 |
S="${WORKDIR}/${PV}/cpp" |
784 |
|
785 |
PATCHES=( |
786 |
- "${FILESDIR}"/${P}-underlinking.patch |
787 |
+ "${WORKDIR}"/${P}-files/${P}-underlinking.patch |
788 |
) |
789 |
|
790 |
src_prepare() { |
791 |
@@ -33,7 +34,7 @@ src_prepare() { |
792 |
-i src/Makefile.am || die |
793 |
|
794 |
pushd src/goby > /dev/null || die |
795 |
- cp "${FILESDIR}"/*.proto . || die |
796 |
+ cp "${WORKDIR}"/${P}-files/*.proto . || die |
797 |
protoc --cpp_out=. *.proto || die |
798 |
popd > /dev/null || die |