1 |
commit: cb4121a23d10b5a0879682b261c0e55e483bc218 |
2 |
Author: Martin Mokrejš <mmokrejs <AT> fold <DOT> natur <DOT> cuni <DOT> cz> |
3 |
AuthorDate: Thu Apr 30 22:46:00 2015 +0000 |
4 |
Commit: Martin Mokrejs <mmokrejs <AT> fold <DOT> natur <DOT> cuni <DOT> cz> |
5 |
CommitDate: Thu Apr 30 22:46:00 2015 +0000 |
6 |
URL: https://gitweb.gentoo.org/proj/sci.git/commit/?id=cb4121a2 |
7 |
|
8 |
sci-biology/TransDecoder: renamed package to keep original letter-casing |
9 |
|
10 |
Package-Manager: portage-2.2.18 |
11 |
|
12 |
sci-biology/TransDecoder/ChangeLog | 54 ++++++++ |
13 |
sci-biology/TransDecoder/TransDecoder-2.0.1.ebuild | 69 +++++++++++ |
14 |
sci-biology/TransDecoder/files/TransDecoder.patch | 136 +++++++++++++++++++++ |
15 |
.../TransDecoder/files/pfam_runner.pl.patch | 20 +++ |
16 |
sci-biology/TransDecoder/metadata.xml | 9 ++ |
17 |
5 files changed, 288 insertions(+) |
18 |
|
19 |
diff --git a/sci-biology/TransDecoder/ChangeLog b/sci-biology/TransDecoder/ChangeLog |
20 |
new file mode 100644 |
21 |
index 0000000..6fd8f5c |
22 |
--- /dev/null |
23 |
+++ b/sci-biology/TransDecoder/ChangeLog |
24 |
@@ -0,0 +1,54 @@ |
25 |
+# ChangeLog for sci-biology/transdecoder |
26 |
+# Copyright 1999-2015 Gentoo Foundation; Distributed under the GPL v2 |
27 |
+# $Header: $ |
28 |
+ |
29 |
+ 27 Apr 2015; Martin Mokrejs <mmokrejs@×××××××××××××××.cz> |
30 |
+ transdecoder-2.0.1.ebuild: |
31 |
+ sci-biology/transdecoder: more ebuild cleanup |
32 |
+ |
33 |
+ 27 Apr 2015; Martin Mokrejs <mmokrejs@×××××××××××××××.cz> |
34 |
+ transdecoder-2.0.1.ebuild: |
35 |
+ sci-biology/transdecoder: ebuild cleanup |
36 |
+ |
37 |
+ 27 Apr 2015; Martin Mokrejs <mmokrejs@×××××××××××××××.cz> |
38 |
+ -transdecoder-20140704.ebuild, transdecoder-2.0.1.ebuild: |
39 |
+ sci-biology/transdecoder: drop old |
40 |
+ |
41 |
+ 17 Apr 2015; Martin Mokrejs <mmokrejs@×××××××××××××××.cz> |
42 |
+ transdecoder-2.0.1.ebuild, transdecoder-20140704.ebuild: |
43 |
+ sci-biology/transdecoder: fixed installation of perl-related files |
44 |
+ |
45 |
+ 19 Mar 2015; Martin Mokrejs <mmokrejs@×××××××××××××××.cz> |
46 |
+ transdecoder-2.0.1.ebuild: |
47 |
+ dropped hmmer dependency altogether, added pkg_postinst() with |
48 |
+ usage/dependency info |
49 |
+ |
50 |
+*transdecoder-2.0.1 (19 Mar 2015) |
51 |
+ |
52 |
+ 19 Mar 2015; Martin Mokrejs <mmokrejs@×××××××××××××××.cz> |
53 |
+ +transdecoder-2.0.1.ebuild, transdecoder-20140704.ebuild: |
54 |
+ removing KEYWORDS until the perl files are installed to some other place. At |
55 |
+ the moment I get: 'perl-module.eclass: Suspicious environment values found. |
56 |
+ PERL5LIB="/usr/lib64/perl5/vendor_perl/5.18.2/TransDecoder"' |
57 |
+ |
58 |
+ 15 Feb 2015; Martin Mokrejs <mmokrejs@×××××××××××××××.cz> |
59 |
+ transdecoder-20140704.ebuild: |
60 |
+ drop hmmer-3 dependency, it is an optional dependency only |
61 |
+ |
62 |
+ 10 Jan 2015; Martin Mokrejs <mmokrejs@×××××××××××××××.cz> |
63 |
+ transdecoder-20140704.ebuild: |
64 |
+ install *.pm into PERL5LIB/TransDecoder and pass that via env.d |
65 |
+ |
66 |
+ 09 Jan 2015; Martin Mokrejs <mmokrejs@×××××××××××××××.cz> |
67 |
+ +files/TransDecoder.patch, +files/pfam_runner.pl.patch, |
68 |
+ transdecoder-20140704.ebuild: |
69 |
+ sci-biology/transdecoder: added patches so that we use PATH to loclate |
70 |
+ binaries and not in a local subdirectory named 'util', drop sys- |
71 |
+ cluster/openmpi requirement, it does not link against it all all, this is a |
72 |
+ bunch of perl and shell scripts |
73 |
+ |
74 |
+*transdecoder-20140704 (08 Jan 2015) |
75 |
+ |
76 |
+ 08 Jan 2015; Martin Mokrejs <mmokrejs@×××××××××××××××.cz> +metadata.xml, |
77 |
+ +transdecoder-20140704.ebuild: |
78 |
+ sci-biology/transdecoder: new package |
79 |
|
80 |
diff --git a/sci-biology/TransDecoder/TransDecoder-2.0.1.ebuild b/sci-biology/TransDecoder/TransDecoder-2.0.1.ebuild |
81 |
new file mode 100644 |
82 |
index 0000000..e8f5134 |
83 |
--- /dev/null |
84 |
+++ b/sci-biology/TransDecoder/TransDecoder-2.0.1.ebuild |
85 |
@@ -0,0 +1,69 @@ |
86 |
+# Copyright 1999-2015 Gentoo Foundation |
87 |
+# Distributed under the terms of the GNU General Public License v2 |
88 |
+# $Header: $ |
89 |
+ |
90 |
+EAPI=5 |
91 |
+ |
92 |
+PERL_EXPORT_PHASE_FUNCTIONS=no |
93 |
+inherit perl-module eutils toolchain-funcs |
94 |
+ |
95 |
+DESCRIPTION="Extract ORF/CDS regions from FASTA sequences" |
96 |
+HOMEPAGE="http://sourceforge.net/projects/transdecoder/" |
97 |
+SRC_URI="https://github.com/TransDecoder/TransDecoder/archive/"${PV}".tar.gz -> ${P}.tar.gz" |
98 |
+ |
99 |
+LICENSE="BSD-BroadInstitute" |
100 |
+SLOT="0" |
101 |
+KEYWORDS="~amd64" |
102 |
+IUSE="" |
103 |
+ |
104 |
+DEPEND="" |
105 |
+RDEPEND="${DEPEND} |
106 |
+ sci-biology/cd-hit |
107 |
+ sci-biology/hmmer |
108 |
+ sci-biology/parafly |
109 |
+ sci-biology/ffindex" |
110 |
+# cdhit-4.6.1 is a real dependency, at least hmmer is optional (also ncbi-tools++ is now used for ORF searches) |
111 |
+ |
112 |
+S="${WORKDIR}"/TransDecoder-2.0.1 |
113 |
+ |
114 |
+##src_prepare(){ |
115 |
+# #mv Makefile Makefile.old |
116 |
+# #epatch "${FILESDIR}"/TransDecoder.patch |
117 |
+# #epatch "${FILESDIR}"/pfam_runner.pl.patch |
118 |
+#} |
119 |
+ |
120 |
+# avoid fetching 1.5TB "${S}"/pfam/Pfam-AB.hmm.bin, see |
121 |
+# "Re: [Transdecoder-users] Announcement: Transdecoder release r20140704" thread in archives |
122 |
+# |
123 |
+# you cna get it from http://downloads.sourceforge.net/project/transdecoder/Pfam-AB.hmm.bin |
124 |
+ |
125 |
+src_install(){ |
126 |
+ dobin TransDecoder.Predict TransDecoder.LongOrfs |
127 |
+ insinto /usr/share/${PN}/util |
128 |
+ dobin util/*.pl |
129 |
+ # zap the bundled cdhit binaries copied from transdecoder_plugins/cdhit/ to util/bin |
130 |
+ rm -rf util/bin |
131 |
+ # |
132 |
+ # * sci-biology/trinityrnaseq-20140413:0::science |
133 |
+ # * /usr/bin/Fasta_reader.pm |
134 |
+ # * /usr/bin/GFF3_utils.pm |
135 |
+ # * /usr/bin/Gene_obj.pm |
136 |
+ # * /usr/bin/Gene_obj_indexer.pm |
137 |
+ # * /usr/bin/Longest_orf.pm |
138 |
+ # * /usr/bin/Nuc_translator.pm |
139 |
+ # * /usr/bin/TiedHash.pm |
140 |
+ # |
141 |
+ perl_set_version |
142 |
+ insinto ${VENDOR_LIB}/${PN} |
143 |
+ dobin PerlLib/*.pm # BUG: install into /usr/bin but wanted to have it readable and executable in ${VENDOR_LIB}/${PN} instead |
144 |
+ einfo "Fetch on your own:" |
145 |
+ einfo "wget --mirror -nH -nd http://downloads.sourceforge.net/project/transdecoder/Pfam-AB.hmm.bin" |
146 |
+ einfo "hmmpress Pfam-AB.hmm.bin" |
147 |
+} |
148 |
+ |
149 |
+pkg_postinst(){ |
150 |
+ einfo "It is recommended to use TransDecoder with hmmer-3 or at least NCBI blast" |
151 |
+ einfo "from either sci-biology/ncbi-blast+ (released more often) or" |
152 |
+ einfo "from sci-biology/ncbi-toolkit++ (huge bundle with releases and less frequent bugfixes)" |
153 |
+ einfo "Author says the minimum requirement is sci-biology/cd-hit" |
154 |
+} |
155 |
|
156 |
diff --git a/sci-biology/TransDecoder/files/TransDecoder.patch b/sci-biology/TransDecoder/files/TransDecoder.patch |
157 |
new file mode 100644 |
158 |
index 0000000..c0cff94 |
159 |
--- /dev/null |
160 |
+++ b/sci-biology/TransDecoder/files/TransDecoder.patch |
161 |
@@ -0,0 +1,136 @@ |
162 |
+--- /usr/bin/TransDecoder 2015-01-09 11:22:55.000000000 +0100 |
163 |
++++ TransDecoder 2015-01-09 14:31:44.095839522 +0100 |
164 |
+@@ -48,7 +48,7 @@ |
165 |
+ --prepare_pfam Prepare data for PFAM search and then quit (for running PFAM on HPC/computing cluster |
166 |
+ with or without MPI ) |
167 |
+ |
168 |
+- --CPU <int> number of threads to use; (default: 2) |
169 |
++ --CPU <int> number of threads to use; (default: 1) |
170 |
+ |
171 |
+ --MPI use MPI w/ execution of hmmscan |
172 |
+ |
173 |
+@@ -76,7 +76,7 @@ |
174 |
+ |
175 |
+ =head1 PFAM |
176 |
+ |
177 |
+-You will need hmmer installed. Use hmmpress to prepare the database for hmmer. |
178 |
++You will need hmmer installed. Use hmmpress from >=hmmer-3.0 to prepare the database for hmmer. |
179 |
+ L<See|https://sourceforge.net/projects/transdecoder/files/Pfam-AB.hmm.bin> for downloading the database. |
180 |
+ |
181 |
+ =head1 CD-HIT |
182 |
+@@ -105,7 +105,6 @@ |
183 |
+ use Longest_orf; |
184 |
+ |
185 |
+ my $UTIL_DIR = "$FindBin::RealBin/util"; |
186 |
+-$ENV{PATH} = "$UTIL_DIR/bin:$ENV{PATH}"; |
187 |
+ $ENV{LD_LIBRARY_PATH} .= ":$FindBin::RealBin/util/lib64"; |
188 |
+ |
189 |
+ my ($cd_hit_est_exec) = &check_program('cd-hit-est'); |
190 |
+@@ -124,7 +123,7 @@ |
191 |
+ my $verbose; |
192 |
+ my $search_pfam = ""; |
193 |
+ my ($reuse,$pfam_out); |
194 |
+-my $CPU = 2; |
195 |
++my $CPU = 1; |
196 |
+ my $RETAIN_LONG_ORFS = 900; |
197 |
+ my $MPI = 0; |
198 |
+ |
199 |
+@@ -330,15 +329,15 @@ |
200 |
+ my $top_cds_file = $train_file && -s $train_file ? $train_file : "$cds_file.top_${top_ORFs_train}_longest"; |
201 |
+ if (!-s $top_cds_file) { |
202 |
+ # get longest entries |
203 |
+- my $cmd = "$UTIL_DIR/get_top_longest_fasta_entries.pl $cds_file $top_ORFs_train > $top_cds_file"; |
204 |
++ my $cmd = "get_top_longest_fasta_entries.pl $cds_file $top_ORFs_train > $top_cds_file"; |
205 |
+ |
206 |
+ unless ($reuse && -s $top_cds_file){ |
207 |
+ if ($cd_hit_est_exec){ |
208 |
+ # to speed things up only check for redundancy up to 4x the number of entries we want |
209 |
+ my $red_num = $top_ORFs_train * 4 ; |
210 |
+- &process_cmd("$UTIL_DIR/get_top_longest_fasta_entries.pl $cds_file $red_num > $workdir/redundant_top"); |
211 |
++ &process_cmd("get_top_longest_fasta_entries.pl $cds_file $red_num > $workdir/redundant_top"); |
212 |
+ &process_cmd("$cd_hit_est_exec -r 1 -i $workdir/redundant_top -o $workdir/redundant_top.nr90 -M 0 -T $CPU >/dev/null 2>/dev/null"); |
213 |
+- &process_cmd("$UTIL_DIR/get_top_longest_fasta_entries.pl $workdir/redundant_top.nr90 $top_ORFs_train > $top_cds_file"); |
214 |
++ &process_cmd("get_top_longest_fasta_entries.pl $workdir/redundant_top.nr90 $top_ORFs_train > $top_cds_file"); |
215 |
+ unlink("$workdir/redundant_top"); |
216 |
+ unlink("$workdir/redundant_top.nr90"); |
217 |
+ unlink("$workdir/redundant_top.nr90.bak.clstr"); |
218 |
+@@ -349,20 +348,20 @@ |
219 |
+ } |
220 |
+ } |
221 |
+ |
222 |
+-$cmd = "$UTIL_DIR/compute_base_probs.pl $transcripts_file $TOP_STRAND_ONLY > $workdir/base_freqs.dat"; |
223 |
++$cmd = "compute_base_probs.pl $transcripts_file $TOP_STRAND_ONLY > $workdir/base_freqs.dat"; |
224 |
+ &process_cmd($cmd) unless $reuse && -s "$workdir/base_freqs.dat"; |
225 |
+ |
226 |
+ |
227 |
+ # get hexamer scores |
228 |
+-#$cmd = "$UTIL_DIR/seq_n_background_to_logliklihood_vals.pl $top_cds_file $transcripts_file.random > hexamer.scores"; |
229 |
++#$cmd = "seq_n_background_to_logliklihood_vals.pl $top_cds_file $transcripts_file.random > hexamer.scores"; |
230 |
+ #&process_cmd($cmd) unless ($reuse && -s "hexamer.scores"); |
231 |
+ |
232 |
+-$cmd = "$UTIL_DIR/seq_n_baseprobs_to_logliklihood_vals.pl $top_cds_file $workdir/base_freqs.dat > $workdir/hexamer.scores"; |
233 |
++$cmd = "seq_n_baseprobs_to_logliklihood_vals.pl $top_cds_file $workdir/base_freqs.dat > $workdir/hexamer.scores"; |
234 |
+ &process_cmd($cmd) unless $reuse && -s "$workdir/hexamer.scores"; |
235 |
+ |
236 |
+ |
237 |
+ # score all cds entries |
238 |
+-$cmd = "$UTIL_DIR/score_CDS_liklihood_all_6_frames.pl $cds_file $workdir/hexamer.scores > $cds_file.scores"; |
239 |
++$cmd = "score_CDS_liklihood_all_6_frames.pl $cds_file $workdir/hexamer.scores > $cds_file.scores"; |
240 |
+ &process_cmd($cmd) unless ($reuse && -s "$cds_file.scores"); |
241 |
+ |
242 |
+ |
243 |
+@@ -440,18 +439,18 @@ |
244 |
+ } |
245 |
+ |
246 |
+ # index the current gff file: |
247 |
+-$cmd = "$UTIL_DIR/index_gff3_files_by_isoform.pl $gff3_file"; |
248 |
++$cmd = "index_gff3_files_by_isoform.pl $gff3_file"; |
249 |
+ &process_cmd($cmd); |
250 |
+ |
251 |
+ # retrieve the best entries: |
252 |
+-$cmd = "$UTIL_DIR/gene_list_to_gff.pl $acc_file $gff3_file.inx > $cds_file.best_candidates.gff3"; |
253 |
++$cmd = "gene_list_to_gff.pl $acc_file $gff3_file.inx > $cds_file.best_candidates.gff3"; |
254 |
+ &process_cmd($cmd); |
255 |
+ |
256 |
+ { |
257 |
+ my $final_output_prefix = basename($transcripts_file) . ".transdecoder"; |
258 |
+ |
259 |
+ # exclude shadow orfs (smaller orfs in different reading frame that are eclipsed by longer orfs) |
260 |
+- $cmd = "$UTIL_DIR/remove_eclipsed_ORFs.pl $cds_file.best_candidates.gff3 > $final_output_prefix.gff3"; |
261 |
++ $cmd = "remove_eclipsed_ORFs.pl $cds_file.best_candidates.gff3 > $final_output_prefix.gff3"; |
262 |
+ &process_cmd($cmd); |
263 |
+ |
264 |
+ |
265 |
+@@ -462,14 +461,14 @@ |
266 |
+ my $gff3_file = "$final_output_prefix.gff3"; |
267 |
+ my $bed_file = $gff3_file; |
268 |
+ $bed_file =~ s/\.gff3$/\.bed/; |
269 |
+- $cmd = "$UTIL_DIR/gff3_file_to_bed.pl $gff3_file > $bed_file"; |
270 |
++ $cmd = "gff3_file_to_bed.pl $gff3_file > $bed_file"; |
271 |
+ &process_cmd($cmd); |
272 |
+ |
273 |
+ |
274 |
+ # make a peptide file: |
275 |
+ my $best_pep_file = $gff3_file; |
276 |
+ $best_pep_file =~ s/\.gff3$/\.pep/; |
277 |
+- $cmd = "$UTIL_DIR/gff3_file_to_proteins.pl $gff3_file $transcripts_file > $best_pep_file"; |
278 |
++ $cmd = "gff3_file_to_proteins.pl $gff3_file $transcripts_file > $best_pep_file"; |
279 |
+ &process_cmd($cmd); |
280 |
+ |
281 |
+ |
282 |
+@@ -477,13 +476,13 @@ |
283 |
+ # make a CDS file: |
284 |
+ my $best_cds_file = $best_pep_file; |
285 |
+ $best_cds_file =~ s/\.pep$/\.cds/; |
286 |
+- $cmd = "$UTIL_DIR/gff3_file_to_proteins.pl $gff3_file $transcripts_file CDS > $best_cds_file"; |
287 |
++ $cmd = "gff3_file_to_proteins.pl $gff3_file $transcripts_file CDS > $best_cds_file"; |
288 |
+ &process_cmd($cmd); |
289 |
+ |
290 |
+ # make a CDS file: |
291 |
+ my $best_cdna_file = $best_pep_file; |
292 |
+ $best_cdna_file =~ s/\.pep$/\.mRNA/; |
293 |
+- $cmd = "$UTIL_DIR/gff3_file_to_proteins.pl $gff3_file $transcripts_file cDNA > $best_cdna_file"; |
294 |
++ $cmd = "gff3_file_to_proteins.pl $gff3_file $transcripts_file cDNA > $best_cdna_file"; |
295 |
+ &process_cmd($cmd); |
296 |
+ |
297 |
+ } |
298 |
|
299 |
diff --git a/sci-biology/TransDecoder/files/pfam_runner.pl.patch b/sci-biology/TransDecoder/files/pfam_runner.pl.patch |
300 |
new file mode 100644 |
301 |
index 0000000..7809b1a |
302 |
--- /dev/null |
303 |
+++ b/sci-biology/TransDecoder/files/pfam_runner.pl.patch |
304 |
@@ -0,0 +1,20 @@ |
305 |
+--- /usr/bin/pfam_runner.pl 2015-01-09 11:22:55.000000000 +0100 |
306 |
++++ pfam_runner.pl 2015-01-09 14:25:43.385838579 +0100 |
307 |
+@@ -24,7 +24,7 @@ |
308 |
+ my $workdir; |
309 |
+ my $verbose; |
310 |
+ my ($reuse,$pfam_out); |
311 |
+-my $CPU = 2; |
312 |
++my $CPU = 1; |
313 |
+ |
314 |
+ my $usage = <<_EOH_; |
315 |
+ |
316 |
+@@ -59,7 +59,7 @@ |
317 |
+ # -h print this option menu and quit |
318 |
+ # -v verbose |
319 |
+ # |
320 |
+-# --CPU <int> number of threads to use; (default: 2) |
321 |
++# --CPU <int> number of threads to use; (default: 1) |
322 |
+ # |
323 |
+ # --MPI use MPI (via ffindex_apply_mpi) |
324 |
+ # |
325 |
|
326 |
diff --git a/sci-biology/TransDecoder/metadata.xml b/sci-biology/TransDecoder/metadata.xml |
327 |
new file mode 100644 |
328 |
index 0000000..2bc8930 |
329 |
--- /dev/null |
330 |
+++ b/sci-biology/TransDecoder/metadata.xml |
331 |
@@ -0,0 +1,9 @@ |
332 |
+<?xml version="1.0" encoding="UTF-8"?> |
333 |
+<!DOCTYPE pkgmetadata SYSTEM "http://www.gentoo.org/dtd/metadata.dtd"> |
334 |
+<pkgmetadata> |
335 |
+ <herd>sci-biology</herd> |
336 |
+ <maintainer> |
337 |
+ <email>mmokrejs@×××××××××××××××.cz</email> |
338 |
+ <name>Martin Mokrejs</name> |
339 |
+ </maintainer> |
340 |
+</pkgmetadata> |