1 |
commit: a2df729d9eae2e016883572c2fbfe941184f421d |
2 |
Author: Martin Mokrejš <mmokrejs <AT> fold <DOT> natur <DOT> cuni <DOT> cz> |
3 |
AuthorDate: Tue Jan 31 16:44:12 2017 +0000 |
4 |
Commit: Martin Mokrejs <mmokrejs <AT> fold <DOT> natur <DOT> cuni <DOT> cz> |
5 |
CommitDate: Tue Jan 31 16:44:12 2017 +0000 |
6 |
URL: https://gitweb.gentoo.org/proj/sci.git/commit/?id=a2df729d |
7 |
|
8 |
sci-biology/trf-bin: renamed to reflect it is an upstream's binary |
9 |
|
10 |
A lot of fixes in the ebuild as well: |
11 |
- install snapshot copies of HTML docs to prevent checksum errors |
12 |
- properly set MY_PV version number |
13 |
- fix install location |
14 |
- distinguish between 32 and 64bit binaries |
15 |
|
16 |
Package-Manager: Portage-2.3.3, Repoman-2.3.1 |
17 |
|
18 |
sci-biology/trf-bin/files/trf.definitions.txt | 159 +++++++++++++++++ |
19 |
sci-biology/trf-bin/files/trf.txt | 176 +++++++++++++++++++ |
20 |
sci-biology/trf-bin/files/trf.whatsnew.txt | 243 ++++++++++++++++++++++++++ |
21 |
sci-biology/trf-bin/metadata.xml | 8 + |
22 |
sci-biology/trf-bin/trf-bin-4.09.ebuild | 53 ++++++ |
23 |
5 files changed, 639 insertions(+) |
24 |
|
25 |
diff --git a/sci-biology/trf-bin/files/trf.definitions.txt b/sci-biology/trf-bin/files/trf.definitions.txt |
26 |
new file mode 100644 |
27 |
index 0000000..ddedf76 |
28 |
--- /dev/null |
29 |
+++ b/sci-biology/trf-bin/files/trf.definitions.txt |
30 |
@@ -0,0 +1,159 @@ |
31 |
+ [1][trflogo.png] |
32 |
+ |
33 |
+ |
34 |
+ |
35 |
+FASTA Format: |
36 |
+ |
37 |
+ The FASTA format is a plain text format which looks something like |
38 |
+ this: |
39 |
+ |
40 |
+ >myseq |
41 |
+ AGTCGTCGCT AGCTAGCTAG CATCGAGTCT TTTCGATCGA GGACTAGACT TCTAGCTAGC |
42 |
+ TAGCATAGCA TACGAGCATA TCGGTCATGA GACTGATTGG GCTTTAGCTA GCTAGCATAG |
43 |
+ CATACGAGCA TATCGGTAGA CTGATTGGGT TTAGGTTACC |
44 |
+ |
45 |
+ The first line starts with a greater than sign ">" and contains a name |
46 |
+ or other identifier for the sequence. This is the sequence header and |
47 |
+ must be in a single line. The remaining lines contain the sequence |
48 |
+ data. The sequence can be in upper or lower case letters. Anything |
49 |
+ other than letters (numbers for example) is ignored. Multiple sequences |
50 |
+ can be present in the same file as long as each sequence has its own |
51 |
+ header. |
52 |
+ |
53 |
+Table Explanation: |
54 |
+ |
55 |
+ The summary table includes the following information: |
56 |
+ 1. Indices of the repeat relative to the start of the sequence. |
57 |
+ 2. Period size of the repeat. |
58 |
+ 3. Number of copies aligned with the consensus pattern. |
59 |
+ 4. Size of consensus pattern (may differ slightly from the period |
60 |
+ size). |
61 |
+ 5. Percent of matches between adjacent copies overall. |
62 |
+ 6. Percent of indels between adjacent copies overall. |
63 |
+ 7. Alignment score. |
64 |
+ 8. Percent composition for each of the four nucleotides. |
65 |
+ 9. Entropy measure based on percent composition. |
66 |
+ |
67 |
+ If the output contains more than 120 repeats, multiple linked tables |
68 |
+ are produced. The links to the other tables appear at the top and |
69 |
+ bottom of each table. |
70 |
+ |
71 |
+ Note: If you save multiple linked summary table files, use the default |
72 |
+ names supplied by your browser to preserve the automatic linking. |
73 |
+ |
74 |
+Alignment Explanation: |
75 |
+ |
76 |
+ The alignment is presented as follows: |
77 |
+ 1. In each pair of lines, the actual sequence is on the top and a |
78 |
+ consensus sequence for all the copies is on the bottom. |
79 |
+ 2. Each pair of lines is one period except for very small patterns. |
80 |
+ 3. The 10 sequence characters before and after a repeat are shown. |
81 |
+ 4. Symbol * indicates a mismatch. |
82 |
+ 5. Symbol - indicates an insertion or deletion. |
83 |
+ 6. Statistics refers to the matches, mismatches and indels overall |
84 |
+ between adjacent copies in the sequence, not between the sequence |
85 |
+ and the consensus pattern. |
86 |
+ 7. Distances between matching characters at corresponding positions |
87 |
+ are listed as distance, number at that distance, percentage of all |
88 |
+ matches. |
89 |
+ 8. ACGTcount is percentage of each nucleotide in the repeat sequence. |
90 |
+ 9. Consensus sequence is shown by itself. |
91 |
+ 10. If chosen as an option, 500 characters of flanking sequence on each |
92 |
+ side of the repeat are shown. |
93 |
+ |
94 |
+ Note: If you save the alignment file, use the default name supplied by |
95 |
+ your browser to preserve the automatic cross-referencing with the |
96 |
+ summary table. |
97 |
+ |
98 |
+Program Parameters: |
99 |
+ |
100 |
+ Input to the program consists of a sequence file and the following |
101 |
+ parameters: |
102 |
+ 1. Alignment Parameters. Weights for match, mismatch and indels. These |
103 |
+ parameters are for Smith-Waterman style local alignment using |
104 |
+ wraparound dynamic programming. Lower weights allow alignments with |
105 |
+ more mismatches and indels. Match weight is +2 in all options here. |
106 |
+ Mismatch and indel weights (interpreted as negative numbers) are |
107 |
+ either 3, 5, or 7. A 3 is more permissive and a 7 less permissive |
108 |
+ of these types of alignments choices. |
109 |
+ 2. Minimum Alignment Score. The alignment score must meet or exceed |
110 |
+ this value for the repeat to be reported. |
111 |
+ 3. Maximum Period Size. The period size must be no larger than this |
112 |
+ value for the repeat to be reported. Period size is the programs |
113 |
+ best guess at the pattern size of the tandem repeat. The program |
114 |
+ will find all repeats with period size between 1 and 2000. |
115 |
+ 4. Maximum TR array size. Specifies the longest TR array (the complete |
116 |
+ repeating sequence) expected to be found in the input, in millions |
117 |
+ of base pairs. Some sequences have very long TR arrays, such as |
118 |
+ chromosome 18 in HG38 which has an array measuring over 5.3 million |
119 |
+ base pairs. |
120 |
+ 5. Detection Parameters. Matching probability Pm and indel probability |
121 |
+ Pi. Pm = .80 and Pi = .10 by default and cannot be modified in this |
122 |
+ version of the program. |
123 |
+ |
124 |
+Options: |
125 |
+ |
126 |
+ 1. Flanking sequence. Flanking sequence consists of the 500 |
127 |
+ nucleotides on each side of a repeat. Flanking sequence is recorded |
128 |
+ in the alignment file. This may be useful for PCR primer |
129 |
+ determination. |
130 |
+ 2. Masked Sequence File. The masked sequence file is a [2]FASTA format |
131 |
+ file containing a copy of the sequence with every character that |
132 |
+ occurred in a tandem repeat changed to the letter 'N'. The word |
133 |
+ "masked" is added to the sequence description line just after the |
134 |
+ '>' character. |
135 |
+ 3. Data File. The data file is a text file which contains the same |
136 |
+ information, in the same order, as the repeat [3]table file, plus |
137 |
+ consensus and repeat sequences. This file contains no labeling and |
138 |
+ is suitable for additional processing, for example with a perl |
139 |
+ script, outside of the program. |
140 |
+ |
141 |
+ |
142 |
+ |
143 |
+ [4][USEMAP:buttonarrow.png] [5]Home |
144 |
+ |
145 |
+ [6][USEMAP:buttonarrow.png] [7]What's New |
146 |
+ |
147 |
+ [8][USEMAP:buttonarrow.png] [9]Submit Page |
148 |
+ |
149 |
+ [10][USEMAP:buttonarrow.png] [11]Downloads |
150 |
+ |
151 |
+ |
152 |
+ |
153 |
+ __________________________________________________________________ |
154 |
+ |
155 |
+ [12][bu.gif] Last revised February 22, 2016 |
156 |
+ Send any questions or comments to: |
157 |
+ [13]Yozen Hernandez |
158 |
+ |
159 |
+References |
160 |
+ |
161 |
+ 1. http://tandem.bu.edu/trf/trf.html |
162 |
+ 2. http://tandem.bu.edu/trf/trf.definitions.html#fasta |
163 |
+ 3. http://tandem.bu.edu/trf/trf.definitions.html#table |
164 |
+ 4. LYNXIMGMAP:http://tandem.bu.edu/trf/trf.definitions.html#FPMap0 |
165 |
+ 5. http://tandem.bu.edu/trf/trf.html |
166 |
+ 6. LYNXIMGMAP:http://tandem.bu.edu/trf/trf.definitions.html#FPMap3 |
167 |
+ 7. http://tandem.bu.edu/trf/trf.whatnew.html |
168 |
+ 8. LYNXIMGMAP:http://tandem.bu.edu/trf/trf.definitions.html#FPMap1 |
169 |
+ 9. http://tandem.bu.edu/trf/trf.submit.options.html |
170 |
+ 10. LYNXIMGMAP:http://tandem.bu.edu/trf/trf.definitions.html#FPMap2 |
171 |
+ 11. http://tandem.bu.edu/trf/trf.download.html |
172 |
+ 12. http://www.bu.edu/ |
173 |
+ 13. javascript:spamGuard('yhernand','bu.edu') |
174 |
+ |
175 |
+[USEMAP] |
176 |
+http://tandem.bu.edu/trf/trf.definitions.html#FPMap2 |
177 |
+ 1. http://tandem.bu.edu/trf/trf.download.html |
178 |
+ |
179 |
+[USEMAP] |
180 |
+http://tandem.bu.edu/trf/trf.definitions.html#FPMap1 |
181 |
+ 1. http://tandem.bu.edu/trf/trf.submit.options.html |
182 |
+ |
183 |
+[USEMAP] |
184 |
+http://tandem.bu.edu/trf/trf.definitions.html#FPMap3 |
185 |
+ 1. http://tandem.bu.edu/trf/trf.whatnew.html |
186 |
+ |
187 |
+[USEMAP] |
188 |
+http://tandem.bu.edu/trf/trf.definitions.html#FPMap0 |
189 |
+ 1. http://tandem.bu.edu/trf/trf.html |
190 |
|
191 |
diff --git a/sci-biology/trf-bin/files/trf.txt b/sci-biology/trf-bin/files/trf.txt |
192 |
new file mode 100644 |
193 |
index 0000000..f75ec5f |
194 |
--- /dev/null |
195 |
+++ b/sci-biology/trf-bin/files/trf.txt |
196 |
@@ -0,0 +1,176 @@ |
197 |
+ [1][trflogo.png] |
198 |
+ |
199 |
+ |
200 |
+Using Command Line Version of Tandem Repeats Finder |
201 |
+ |
202 |
+ Once the program is installed you can run it with no parameters to |
203 |
+ obtain information on proper usage syntax. |
204 |
+ |
205 |
+ If you installed the program as trf then by typing trf at the command |
206 |
+ line you will see the following output: |
207 |
+ |
208 |
+Please use: trf File Match Mismatch Delta PM PI Minscore MaxPeriod [options] |
209 |
+ |
210 |
+Where: (all weights, penalties, and scores are positive) |
211 |
+ File = sequences input file |
212 |
+ Match = matching weight |
213 |
+ Mismatch = mismatching penalty |
214 |
+ Delta = indel penalty |
215 |
+ PM = match probability (whole number) |
216 |
+ PI = indel probability (whole number) |
217 |
+ Minscore = minimum alignment score to report |
218 |
+ MaxPeriod = maximum period size to report |
219 |
+ [options] = one or more of the following: |
220 |
+ -m masked sequence file |
221 |
+ -f flanking sequence |
222 |
+ -d data file |
223 |
+ -h suppress html output |
224 |
+ -r no redundancy elimination |
225 |
+ -l <n> maximum TR length expected (in millions) (eg, -l 3 or -l=3 for |
226 |
+ 3 million) |
227 |
+ |
228 |
+Note the sequence file should be in FASTA format: |
229 |
+ |
230 |
+>Name of sequence |
231 |
+aggaaacctgccatggcctcctggtgagctgtcctcatccactgctcgctgcctctccag |
232 |
+atactctgacccatggatcccctgggtgcagccaagccacaatggccatggcgccgctgt |
233 |
+actcccacccgccccaccctcctgatcctgctatggacatggcctttccacatccctgtg |
234 |
+ |
235 |
+ |
236 |
+ The program accepts a minimum of eight parameters. Options can be |
237 |
+ specified to generate additional files. |
238 |
+ |
239 |
+ The following is a more detailed description of the parameters: |
240 |
+ * File: The sequence file to be analyzed in FASTA format( [2]see for |
241 |
+ details). Multiple sequence in the same file are allowed. |
242 |
+ * Match, Mismatch, and Delta: Weights for match, mismatch and indels. |
243 |
+ These parameters are for Smith-Waterman style local alignment using |
244 |
+ wraparound dynamic programming. Lower weights allow alignments with |
245 |
+ more mismatches and indels. A match weight of 2 has proven |
246 |
+ effective with mismatch and indel penalties in the range of 3 to 7. |
247 |
+ Mismatch and indel weights are interpreted as negative numbers. A 3 |
248 |
+ is more permissive and a 7 less permissive. The recomended values |
249 |
+ for Match Mismatch and Delta are 2, 7, and 7 respectively. |
250 |
+ * PM and PI: Probabilistic data is available for PM values of 80 and |
251 |
+ 75 and PI values of 10 and 20. The best performance can be achieved |
252 |
+ with values of PM=80 and PI=10. Values of PM=75 and PI=20 give |
253 |
+ results which are very similar, but often require as much as ten |
254 |
+ times the processing time when compared with values of PM=80 and |
255 |
+ PI=10. |
256 |
+ * Minscore: The alignment of a tandem repeat must meet or exceed this |
257 |
+ alignment score to be reported. For example, if we set the matching |
258 |
+ weight to 2 and the minimun score to 50, assuming perfect |
259 |
+ alignment, we will need to align at least 25 characters to meet the |
260 |
+ minimum score (for example 5 copies with a period of size 5). |
261 |
+ * Maxperiod: Period size is the program's best guess at the pattern |
262 |
+ size of the tandem repeat. The program will find all repeats with |
263 |
+ period size between 1 and 2000, but the output can be limited to a |
264 |
+ smaller range. |
265 |
+ * -m: This is an optional parameter and when present instructs the |
266 |
+ program to generate a masked sequence file. The masked sequence |
267 |
+ file is a FASTA format file containing a copy of the sequence with |
268 |
+ every location that occurred in a tandem repeat changed to the |
269 |
+ letter 'N'. The word "masked" is added to the sequence description |
270 |
+ line just after the '>' character. |
271 |
+ * -f: If this option is present, flanking sequence around each repeat |
272 |
+ is recorded in the alignment file. This may be useful for PCR |
273 |
+ primer determination. Flanking sequence consists of the 500 |
274 |
+ nucleotides on each side of a repeat. |
275 |
+ * -d: A data file is produced if this option is present. This file is |
276 |
+ a text file which contains the same information, in the same order, |
277 |
+ as the summary table file, plus consensus pattern and repeat |
278 |
+ sequences. This file contains no labeling and is suitable for |
279 |
+ additional processing, for example with a perl script, outside of |
280 |
+ the program. |
281 |
+ * -h: suppress HTML output (this automatically switches -d to ON) |
282 |
+ * -l <n>: Specifies that the longest TR array expected in the input |
283 |
+ is at most n million bp long. The default is 2 (for 2 million). |
284 |
+ Setting this option too high may result in an error message if you |
285 |
+ did not have enough availablememory. We have only tested this |
286 |
+ option uo to value 29. |
287 |
+ * -u: Prints the help/usage message above |
288 |
+ * -v: Prints the version information |
289 |
+ * -ngs: More compact .dat output on multisequence files, returns 0 on |
290 |
+ success. You may pipe input in with this option using - for file |
291 |
+ name. Short 50 flanks are appended to .dat output. .dat output |
292 |
+ actually goes to stdout instead of file. Sequence headers are |
293 |
+ displayed in output as @header. Only headers containing repeats are |
294 |
+ shown. |
295 |
+ |
296 |
+ Using recommended parameters the command line will look something like: |
297 |
+ trf yoursequence.txt 2 7 7 80 10 50 500 -f -d -m |
298 |
+ |
299 |
+ Once the program starts running it will print update messages to the |
300 |
+ screen. The word "Done" will be printed when the program finishes. |
301 |
+ |
302 |
+ For single sequence input files there will be at least two HTML format |
303 |
+ output files, a repeat table file and an alignment file. If the number |
304 |
+ of repeats found is greater than 120, multiple linked repeat tables are |
305 |
+ produced. The links to the other tables appear at the top and the |
306 |
+ bottom of each table. To view the results start by opening the first |
307 |
+ repeat table file with your web browser. This file has the extension |
308 |
+ ".1.html". Alignment files can be accessed from the repeat table files. |
309 |
+ Alignment files end with the ".txt.html" extension. |
310 |
+ |
311 |
+ For input files containing multiple sequences a summary page is |
312 |
+ produced that links to the output of individual sequences. This file |
313 |
+ has the extension "summary.html". You should start by opening this file |
314 |
+ if your input had multiple sequences in the same file. Also note that |
315 |
+ the output files of individual sequences will have an identifier of the |
316 |
+ form ".sn." ( n an integer) embedded in the name indicating the index |
317 |
+ of the sequence in the input file. The identifier is omitted for single |
318 |
+ sequence input files. |
319 |
+ |
320 |
+ For more information on the output please see [3]Table Explanation and |
321 |
+ [4]Alignment Explanation. |
322 |
+ |
323 |
+ |
324 |
+ |
325 |
+ |
326 |
+ [5][USEMAP:buttonarrow.png] [6]Home |
327 |
+ |
328 |
+ [7][USEMAP:buttonarrow.png] [8]What's New |
329 |
+ |
330 |
+ [9][USEMAP:buttonarrow.png] [10]Submit Page |
331 |
+ |
332 |
+ [11][USEMAP:buttonarrow.png] [12]Downloads |
333 |
+ |
334 |
+ |
335 |
+ __________________________________________________________________ |
336 |
+ |
337 |
+ [13][bu.gif] Last revised September 11, 2003 |
338 |
+ Send any questions or comments to: |
339 |
+ [14]Yozen Hernandez |
340 |
+ |
341 |
+References |
342 |
+ |
343 |
+ 1. http://tandem.bu.edu/trf/trf.html |
344 |
+ 2. http://tandem.bu.edu/trf/trf.definitions.html#fasta |
345 |
+ 3. http://tandem.bu.edu/trf/trf.definitions.html#table |
346 |
+ 4. http://tandem.bu.edu/trf/trf.definitions.html#alignment |
347 |
+ 5. LYNXIMGMAP:http://tandem.bu.edu/trf/trf.unix.help.html#FPMap0 |
348 |
+ 6. http://tandem.bu.edu/trf/trf.html |
349 |
+ 7. LYNXIMGMAP:http://tandem.bu.edu/trf/trf.unix.help.html#FPMap3 |
350 |
+ 8. http://tandem.bu.edu/trf/trf.whatnew.html |
351 |
+ 9. LYNXIMGMAP:http://tandem.bu.edu/trf/trf.unix.help.html#FPMap1 |
352 |
+ 10. http://tandem.bu.edu/trf/trf.submit.options.html |
353 |
+ 11. LYNXIMGMAP:http://tandem.bu.edu/trf/trf.unix.help.html#FPMap2 |
354 |
+ 12. http://tandem.bu.edu/trf/trf.download.html |
355 |
+ 13. http://www.bu.edu/ |
356 |
+ 14. javascript:spamGuard('yhernand','bu.edu') |
357 |
+ |
358 |
+[USEMAP] |
359 |
+http://tandem.bu.edu/trf/trf.unix.help.html#FPMap2 |
360 |
+ 1. http://tandem.bu.edu/trf/trf.download.html |
361 |
+ |
362 |
+[USEMAP] |
363 |
+http://tandem.bu.edu/trf/trf.unix.help.html#FPMap1 |
364 |
+ 1. http://tandem.bu.edu/trf/trf.submit.options.html |
365 |
+ |
366 |
+[USEMAP] |
367 |
+http://tandem.bu.edu/trf/trf.unix.help.html#FPMap3 |
368 |
+ 1. http://tandem.bu.edu/trf/trf.whatnew.html |
369 |
+ |
370 |
+[USEMAP] |
371 |
+http://tandem.bu.edu/trf/trf.unix.help.html#FPMap0 |
372 |
+ 1. http://tandem.bu.edu/trf/trf.html |
373 |
|
374 |
diff --git a/sci-biology/trf-bin/files/trf.whatsnew.txt b/sci-biology/trf-bin/files/trf.whatsnew.txt |
375 |
new file mode 100644 |
376 |
index 0000000..8967a7d |
377 |
--- /dev/null |
378 |
+++ b/sci-biology/trf-bin/files/trf.whatsnew.txt |
379 |
@@ -0,0 +1,243 @@ |
380 |
+ [1][trflogo.png] |
381 |
+ |
382 |
+ |
383 |
+[newspaper.png] What's New? |
384 |
+ |
385 |
+New For Version 4.09 (Feb 22, 2016) |
386 |
+ |
387 |
+ [checkmark.png] new -l/-L flag allows the user to specify the length of |
388 |
+ the longest expected TR array in the input sequence, in millions. The |
389 |
+ default value is 2, for 2 million bp. For HG38, a value of 6 is |
390 |
+ necessary. |
391 |
+ |
392 |
+ Example usage: |
393 |
+trf409.linux64.exe hg38.fasta 2 5 7 80 10 50 2000 -l 6 |
394 |
+ |
395 |
+ [checkmark.png] Setting a sufficiently high value may result in a |
396 |
+ crash, very long execution time, or a sharp drop in available memory on |
397 |
+ your system. We have only tested up to a value of 25. |
398 |
+ |
399 |
+ [checkmark.png] For workloads requiring over 3GB of RAM (any value of |
400 |
+ -l above 5), the 32-bit builds cannot be used. |
401 |
+ |
402 |
+ [checkmark.png] New -v/-V flag allows you to quickly check your version |
403 |
+ of TRF. |
404 |
+ |
405 |
+ [checkmark.png] New -u/-U flag allows you to quickly display the usage |
406 |
+ help text. |
407 |
+ |
408 |
+ |
409 |
+New For Version 4.07b |
410 |
+ |
411 |
+ [checkmark.png] new -ngs flag allows more compact output and piping of |
412 |
+ input on linux systems, returns standard linux exit value of 0 on |
413 |
+ success |
414 |
+ |
415 |
+ [checkmark.png] temporary output is no longer written to disk |
416 |
+ |
417 |
+ [checkmark.png] changed alignment to go further when score drops to 0 |
418 |
+ on first pass, more repeats are reported now |
419 |
+ |
420 |
+ [checkmark.png] fixed bestlist structure deallocations, this |
421 |
+ significantly improves run speed on multi-sequence fasta files |
422 |
+ |
423 |
+ [checkmark.png] fixed line in alignment files "file K of N", K was off |
424 |
+ by 1 before |
425 |
+ |
426 |
+ [checkmark.png] Added check to make sure all required parameters are |
427 |
+ entered from the commandline |
428 |
+ |
429 |
+ |
430 |
+New For Version 4.04 |
431 |
+ |
432 |
+ [checkmark.png] Widened radius of narrowband alignment to avoid losing |
433 |
+ alignment in some cases |
434 |
+ |
435 |
+ |
436 |
+New For Version 4.03 |
437 |
+ |
438 |
+ [checkmark.png] Added -R switch to produce output without redundancy if |
439 |
+ desired |
440 |
+ |
441 |
+ [checkmark.png] Fixed a bug in the redundancy algorithm which sometimes |
442 |
+ caused repeats to vanish |
443 |
+ |
444 |
+ [checkmark.png] Fixed a bug when N characters being part of pattern |
445 |
+ cause problems |
446 |
+ |
447 |
+ |
448 |
+New For Version 4.00 |
449 |
+ |
450 |
+ [checkmark.png] Improved longer period detection and period 1 detection |
451 |
+ |
452 |
+ [checkmark.png] Improved alignment |
453 |
+ |
454 |
+ [checkmark.png] Added a flag to suppress HTML output (-h) |
455 |
+ |
456 |
+ [checkmark.png] Fixed a loading sequence problem with incomplete FASTA |
457 |
+ format files |
458 |
+ |
459 |
+ [checkmark.png] Increased minimum period for larger repeats from 1.9 to |
460 |
+ 1.8 (patternsize > 50) |
461 |
+ |
462 |
+ [checkmark.png] Added Linux GUI version (GTK+) |
463 |
+ |
464 |
+ |
465 |
+New For Version 3.21 |
466 |
+ |
467 |
+ [checkmark.png] Fixed Sequence Name Bug: This bug affected the parsing |
468 |
+ of FASTA header on Windows versions of the program. The bug caused the |
469 |
+ program to report a sequence name that had a control character appended |
470 |
+ at the end and a missing parameters line in the output data file. This |
471 |
+ bug surfaced as a result of version 3.20 fixes. |
472 |
+ |
473 |
+ |
474 |
+New For Version 3.20 |
475 |
+ |
476 |
+ [checkmark.png] Improved Redundancy Control: We have improved the |
477 |
+ program's ability to remove redundant versions of the same tandem |
478 |
+ repeat. On earlier versions certain conditions could cause the |
479 |
+ algorithm to leave a redundant version in the output. The new version |
480 |
+ properly identifies these and removes them. |
481 |
+ |
482 |
+ [checkmark.png] Improved End-of-Line Identification: Different |
483 |
+ operating systems use different conventions for end of line (EOL) |
484 |
+ character sequence in text files. We have made improvements in the |
485 |
+ routines that allows TRF to read sequence text from files using various |
486 |
+ EOL conventions. |
487 |
+ |
488 |
+ [checkmark.png] Fixed Memory Overrun Bug: We have corrected a problem |
489 |
+ where some large-pattern, low-scoring repeats could cause a memory |
490 |
+ fault in previous versions. Thanks to Angie Hinrichs at UC Santa Cruz's |
491 |
+ Genome Bioinformatics group for the bug report and the offending |
492 |
+ sequence. |
493 |
+ |
494 |
+ |
495 |
+ |
496 |
+Previous Update (Version 3.01) |
497 |
+ |
498 |
+ [checkmark.png] Unlimited Sequence Size: We have eliminated the |
499 |
+ sequence size restriction of previous versions. In this version you are |
500 |
+ only limited by the memory available in your system. |
501 |
+ |
502 |
+ [checkmark.png] Multi-Sequence Files: The program handles files |
503 |
+ containing multiple sequences. Each sequence must contain its own FASTA |
504 |
+ header. A summary page is produced which links to the results of |
505 |
+ individual sequences. |
506 |
+ |
507 |
+ [checkmark.png] Data File Includes Repeat Sequence: We now include |
508 |
+ complete repeat sequence for each repeat record reported in the data |
509 |
+ file. |
510 |
+ |
511 |
+ [checkmark.png] Smaller Scores: The downloadable versions of the |
512 |
+ program are now able to report matches with scores as low as 20. We |
513 |
+ recommend caution when using this feature since very large output files |
514 |
+ can be generated at this score level. |
515 |
+ |
516 |
+ [checkmark.png] Longer Patter Sizes: The program finds repeats with |
517 |
+ period size as large as 2000 base pairs. |
518 |
+ |
519 |
+ [checkmark.png] New File Naming Convention: For input files containing |
520 |
+ a single sequence the naming convention for output files has not been |
521 |
+ changed. For input files containing multiple sequences a summary page |
522 |
+ is produced. This file has the extension "summary.html" and contains |
523 |
+ links to the repeat tables of the individual sequences. In the name of |
524 |
+ each of those repeat tables and their alignment files, an additional |
525 |
+ identifier ".sn." ( n an integer, for example: ".s3.") has been |
526 |
+ inserted before the parameters to indicate the sequence index in the |
527 |
+ input file. |
528 |
+ |
529 |
+ [checkmark.png] Repeat Table Changes: Each table now shows the total |
530 |
+ number of repeats found in the sequence and links to other tables have |
531 |
+ been added at the bottom of the page. |
532 |
+ |
533 |
+ [checkmark.png] Longer Flanking Sequence: 500 characters of flanking |
534 |
+ sequence on each side of the repeat are now reported. |
535 |
+ |
536 |
+ |
537 |
+ Previous Update (Version 2.02) |
538 |
+ |
539 |
+ [checkmark.png] Multiple Repeat Tables: If the output contains more |
540 |
+ than 140 repeats, multiple linked repeat tables and alignment files |
541 |
+ will be produced. This will speed downloading time and overcome |
542 |
+ problems with tables too big for web browsers to format. |
543 |
+ |
544 |
+ [checkmark.png] Consensus Sequence: The program prints the consensus |
545 |
+ sequence for each repeat in the alignment file, below the alignment. |
546 |
+ |
547 |
+ [checkmark.png] Flanking Sequence: As an option, the program prints 200 |
548 |
+ characters of flanking sequence from each side of the repeat. This may |
549 |
+ be useful for PCR primer determination. Find it in the alignment file. |
550 |
+ |
551 |
+ [checkmark.png] Masked Sequence File: As an option, the program returns |
552 |
+ a copy of the original sequence with the tandem repeats "masked" out. |
553 |
+ The masked sequence file is a [2]FASTA format file with every tandem |
554 |
+ repeat character changed to the letter 'N'. The word "masked" is added |
555 |
+ to the sequence description line just after the '>' character. |
556 |
+ |
557 |
+ [checkmark.png] Data File: As an option, the program returns a text |
558 |
+ file containing the same data, in the same order, as the summary table |
559 |
+ file, plus consensus sequences, but without any labels or formatting |
560 |
+ instructions. This file is suitable for automated processing, for |
561 |
+ example with a perl script. |
562 |
+ |
563 |
+ [checkmark.png] Select Parameters: Now you can select parameters when |
564 |
+ you submit a sequence, or simply use the default parameters. Visit the |
565 |
+ [3]Submit Options Page for more details. |
566 |
+ |
567 |
+ [checkmark.png] Sequence Alphabet: The program now handles sequences |
568 |
+ containing letter other than A, C, G, and T. |
569 |
+ |
570 |
+ [checkmark.png] Enhanced Alignment File: We have modified the |
571 |
+ presentation of the alignment file. The output should be easier to view |
572 |
+ and print. |
573 |
+ |
574 |
+ [checkmark.png] Automatic Redundancy Removal: The program now reports |
575 |
+ only the smallest period size for a repeat unless a larger period size |
576 |
+ has a significantly higher score. |
577 |
+ |
578 |
+ [checkmark.png] Windows Version Now Available: A Windows version of the |
579 |
+ program is now available for download. This version of the the program |
580 |
+ can be run under Windows 95/98 and Windows NT 4. Please visit our |
581 |
+ [4]Download Page for more details. |
582 |
+ |
583 |
+ |
584 |
+ |
585 |
+ |
586 |
+ [5][USEMAP:buttonarrow.png] [6]Home |
587 |
+ |
588 |
+ [7][USEMAP:buttonarrow.png] [8]Submit Page |
589 |
+ |
590 |
+ [9][USEMAP:buttonarrow.png] [10]Downloads |
591 |
+ __________________________________________________________________ |
592 |
+ |
593 |
+ [11][bu.gif] Last revised February 22, 2016 |
594 |
+ Send any questions or comments to: |
595 |
+ [12]Yozen Hernandez |
596 |
+ |
597 |
+References |
598 |
+ |
599 |
+ 1. http://tandem.bu.edu/trf/trf.html |
600 |
+ 2. http://tandem.bu.edu/trf/trf.definitions.html#fasta |
601 |
+ 3. http://tandem.bu.edu/trf/trf.submit.options.html |
602 |
+ 4. http://tandem.bu.edu/trf/trf.download.html |
603 |
+ 5. LYNXIMGMAP:http://tandem.bu.edu/trf/trf.whatnew.html#FPMap0 |
604 |
+ 6. http://tandem.bu.edu/trf/trf.html |
605 |
+ 7. LYNXIMGMAP:http://tandem.bu.edu/trf/trf.whatnew.html#FPMap1 |
606 |
+ 8. http://tandem.bu.edu/trf/trf.submit.options.html |
607 |
+ 9. LYNXIMGMAP:http://tandem.bu.edu/trf/trf.whatnew.html#FPMap2 |
608 |
+ 10. http://tandem.bu.edu/trf/trf.download.html |
609 |
+ 11. http://www.bu.edu/ |
610 |
+ 12. javascript:spamGuard('yhernand','bu.edu') |
611 |
+ |
612 |
+[USEMAP] |
613 |
+http://tandem.bu.edu/trf/trf.whatnew.html#FPMap2 |
614 |
+ 1. http://tandem.bu.edu/trf/trf.download.html |
615 |
+ |
616 |
+[USEMAP] |
617 |
+http://tandem.bu.edu/trf/trf.whatnew.html#FPMap1 |
618 |
+ 1. http://tandem.bu.edu/trf/trf.submit.options.html |
619 |
+ |
620 |
+[USEMAP] |
621 |
+http://tandem.bu.edu/trf/trf.whatnew.html#FPMap0 |
622 |
+ 1. http://tandem.bu.edu/trf/trf.html |
623 |
|
624 |
diff --git a/sci-biology/trf-bin/metadata.xml b/sci-biology/trf-bin/metadata.xml |
625 |
new file mode 100644 |
626 |
index 0000000..959160f |
627 |
--- /dev/null |
628 |
+++ b/sci-biology/trf-bin/metadata.xml |
629 |
@@ -0,0 +1,8 @@ |
630 |
+<?xml version="1.0" encoding="UTF-8"?> |
631 |
+<!DOCTYPE pkgmetadata SYSTEM "http://www.gentoo.org/dtd/metadata.dtd"> |
632 |
+<pkgmetadata> |
633 |
+ <maintainer type="project"> |
634 |
+ <email>sci-biology@g.o</email> |
635 |
+ <name>Gentoo Biology Project</name> |
636 |
+ </maintainer> |
637 |
+</pkgmetadata> |
638 |
|
639 |
diff --git a/sci-biology/trf-bin/trf-bin-4.09.ebuild b/sci-biology/trf-bin/trf-bin-4.09.ebuild |
640 |
new file mode 100644 |
641 |
index 0000000..11fae9c |
642 |
--- /dev/null |
643 |
+++ b/sci-biology/trf-bin/trf-bin-4.09.ebuild |
644 |
@@ -0,0 +1,53 @@ |
645 |
+# Copyright 1999-2017 Gentoo Foundation |
646 |
+# Distributed under the terms of the GNU General Public License v2 |
647 |
+# $Id$ |
648 |
+ |
649 |
+EAPI=6 |
650 |
+ |
651 |
+inherit eutils |
652 |
+ |
653 |
+MY_PV="${PV/.}" # drop the dot |
654 |
+MY_PN="trf" |
655 |
+MY_P="trf${MY_PV}" |
656 |
+ |
657 |
+DESCRIPTION="Tandem Repeats Finder" |
658 |
+HOMEPAGE="http://tandem.bu.edu/trf/trf.html" |
659 |
+SRC_URI="x86? ( http://tandem.bu.edu/trf/downloads/${MY_P}.linux32 ) |
660 |
+ amd64? ( http://tandem.bu.edu/trf/downloads/${MY_P}.linux64 )" |
661 |
+ |
662 |
+LICENSE="trf" # http://tandem.bu.edu/trf/trf.license.html |
663 |
+SLOT="0" |
664 |
+KEYWORDS="~amd64 ~x86" |
665 |
+RESTRICT="mirror bindist" |
666 |
+ |
667 |
+S="${WORKDIR}" |
668 |
+ |
669 |
+QA_PREBUILT="opt/${MY_PN}/.*" |
670 |
+ |
671 |
+src_unpack() { |
672 |
+ if use x86; then |
673 |
+ cp "${DISTDIR}/${MY_P}".linux32 "${S}/${MY_PN}" || die |
674 |
+ elif use amd64; then |
675 |
+ cp "${DISTDIR}/${MY_P}".linux64 "${S}/${MY_PN}" || die |
676 |
+ else |
677 |
+ eerror "Unsupported platform, check http://tandem.bu.edu/trf/downloads/" |
678 |
+ fi |
679 |
+ default |
680 |
+} |
681 |
+ |
682 |
+src_install() { |
683 |
+ exeinto /opt/"${MY_PN}"/bin |
684 |
+ doexe "${MY_PN}" |
685 |
+ # GTK version (http://tandem.bu.edu/trf/downloads/trf400.linuxgtk.exe) has broken linking |
686 |
+ #if use gtk; then |
687 |
+ # doexe trf400.linuxgtk.exe |
688 |
+ # make_desktop_entry /opt/${PN}/trf400.linuxgtk.exe "Tandem Repeats Finder" || die |
689 |
+ #fi |
690 |
+ # http://tandem.bu.edu/trf/trf.unix.help.html |
691 |
+ # http://tandem.bu.edu/trf/trf.definitions.html |
692 |
+ # http://tandem.bu.edu/trf/trf.whatnew.html |
693 |
+ dodoc \ |
694 |
+ "${FILESDIR}/"trf.txt \ |
695 |
+ "${FILESDIR}/"trf.definitions.txt \ |
696 |
+ "${FILESDIR}/"trf.whatsnew.txt |
697 |
+} |