Gentoo Archives: gentoo-commits

From: Martin Mokrejs <mmokrejs@×××××××××××××××.cz>
To: gentoo-commits@l.g.o
Subject: [gentoo-commits] proj/sci:master commit in: sci-biology/TransDecoder/files/, sci-biology/TransDecoder/
Date: Thu, 30 Apr 2015 22:48:24
Message-Id: 1430433960.cb4121a23d10b5a0879682b261c0e55e483bc218.mmokrejs@gentoo
1 commit: cb4121a23d10b5a0879682b261c0e55e483bc218
2 Author: Martin Mokrejš <mmokrejs <AT> fold <DOT> natur <DOT> cuni <DOT> cz>
3 AuthorDate: Thu Apr 30 22:46:00 2015 +0000
4 Commit: Martin Mokrejs <mmokrejs <AT> fold <DOT> natur <DOT> cuni <DOT> cz>
5 CommitDate: Thu Apr 30 22:46:00 2015 +0000
6 URL: https://gitweb.gentoo.org/proj/sci.git/commit/?id=cb4121a2
7
8 sci-biology/TransDecoder: renamed package to keep original letter-casing
9
10 Package-Manager: portage-2.2.18
11
12 sci-biology/TransDecoder/ChangeLog | 54 ++++++++
13 sci-biology/TransDecoder/TransDecoder-2.0.1.ebuild | 69 +++++++++++
14 sci-biology/TransDecoder/files/TransDecoder.patch | 136 +++++++++++++++++++++
15 .../TransDecoder/files/pfam_runner.pl.patch | 20 +++
16 sci-biology/TransDecoder/metadata.xml | 9 ++
17 5 files changed, 288 insertions(+)
18
19 diff --git a/sci-biology/TransDecoder/ChangeLog b/sci-biology/TransDecoder/ChangeLog
20 new file mode 100644
21 index 0000000..6fd8f5c
22 --- /dev/null
23 +++ b/sci-biology/TransDecoder/ChangeLog
24 @@ -0,0 +1,54 @@
25 +# ChangeLog for sci-biology/transdecoder
26 +# Copyright 1999-2015 Gentoo Foundation; Distributed under the GPL v2
27 +# $Header: $
28 +
29 + 27 Apr 2015; Martin Mokrejs <mmokrejs@×××××××××××××××.cz>
30 + transdecoder-2.0.1.ebuild:
31 + sci-biology/transdecoder: more ebuild cleanup
32 +
33 + 27 Apr 2015; Martin Mokrejs <mmokrejs@×××××××××××××××.cz>
34 + transdecoder-2.0.1.ebuild:
35 + sci-biology/transdecoder: ebuild cleanup
36 +
37 + 27 Apr 2015; Martin Mokrejs <mmokrejs@×××××××××××××××.cz>
38 + -transdecoder-20140704.ebuild, transdecoder-2.0.1.ebuild:
39 + sci-biology/transdecoder: drop old
40 +
41 + 17 Apr 2015; Martin Mokrejs <mmokrejs@×××××××××××××××.cz>
42 + transdecoder-2.0.1.ebuild, transdecoder-20140704.ebuild:
43 + sci-biology/transdecoder: fixed installation of perl-related files
44 +
45 + 19 Mar 2015; Martin Mokrejs <mmokrejs@×××××××××××××××.cz>
46 + transdecoder-2.0.1.ebuild:
47 + dropped hmmer dependency altogether, added pkg_postinst() with
48 + usage/dependency info
49 +
50 +*transdecoder-2.0.1 (19 Mar 2015)
51 +
52 + 19 Mar 2015; Martin Mokrejs <mmokrejs@×××××××××××××××.cz>
53 + +transdecoder-2.0.1.ebuild, transdecoder-20140704.ebuild:
54 + removing KEYWORDS until the perl files are installed to some other place. At
55 + the moment I get: 'perl-module.eclass: Suspicious environment values found.
56 + PERL5LIB="/usr/lib64/perl5/vendor_perl/5.18.2/TransDecoder"'
57 +
58 + 15 Feb 2015; Martin Mokrejs <mmokrejs@×××××××××××××××.cz>
59 + transdecoder-20140704.ebuild:
60 + drop hmmer-3 dependency, it is an optional dependency only
61 +
62 + 10 Jan 2015; Martin Mokrejs <mmokrejs@×××××××××××××××.cz>
63 + transdecoder-20140704.ebuild:
64 + install *.pm into PERL5LIB/TransDecoder and pass that via env.d
65 +
66 + 09 Jan 2015; Martin Mokrejs <mmokrejs@×××××××××××××××.cz>
67 + +files/TransDecoder.patch, +files/pfam_runner.pl.patch,
68 + transdecoder-20140704.ebuild:
69 + sci-biology/transdecoder: added patches so that we use PATH to loclate
70 + binaries and not in a local subdirectory named 'util', drop sys-
71 + cluster/openmpi requirement, it does not link against it all all, this is a
72 + bunch of perl and shell scripts
73 +
74 +*transdecoder-20140704 (08 Jan 2015)
75 +
76 + 08 Jan 2015; Martin Mokrejs <mmokrejs@×××××××××××××××.cz> +metadata.xml,
77 + +transdecoder-20140704.ebuild:
78 + sci-biology/transdecoder: new package
79
80 diff --git a/sci-biology/TransDecoder/TransDecoder-2.0.1.ebuild b/sci-biology/TransDecoder/TransDecoder-2.0.1.ebuild
81 new file mode 100644
82 index 0000000..e8f5134
83 --- /dev/null
84 +++ b/sci-biology/TransDecoder/TransDecoder-2.0.1.ebuild
85 @@ -0,0 +1,69 @@
86 +# Copyright 1999-2015 Gentoo Foundation
87 +# Distributed under the terms of the GNU General Public License v2
88 +# $Header: $
89 +
90 +EAPI=5
91 +
92 +PERL_EXPORT_PHASE_FUNCTIONS=no
93 +inherit perl-module eutils toolchain-funcs
94 +
95 +DESCRIPTION="Extract ORF/CDS regions from FASTA sequences"
96 +HOMEPAGE="http://sourceforge.net/projects/transdecoder/"
97 +SRC_URI="https://github.com/TransDecoder/TransDecoder/archive/"${PV}".tar.gz -> ${P}.tar.gz"
98 +
99 +LICENSE="BSD-BroadInstitute"
100 +SLOT="0"
101 +KEYWORDS="~amd64"
102 +IUSE=""
103 +
104 +DEPEND=""
105 +RDEPEND="${DEPEND}
106 + sci-biology/cd-hit
107 + sci-biology/hmmer
108 + sci-biology/parafly
109 + sci-biology/ffindex"
110 +# cdhit-4.6.1 is a real dependency, at least hmmer is optional (also ncbi-tools++ is now used for ORF searches)
111 +
112 +S="${WORKDIR}"/TransDecoder-2.0.1
113 +
114 +##src_prepare(){
115 +# #mv Makefile Makefile.old
116 +# #epatch "${FILESDIR}"/TransDecoder.patch
117 +# #epatch "${FILESDIR}"/pfam_runner.pl.patch
118 +#}
119 +
120 +# avoid fetching 1.5TB "${S}"/pfam/Pfam-AB.hmm.bin, see
121 +# "Re: [Transdecoder-users] Announcement: Transdecoder release r20140704" thread in archives
122 +#
123 +# you cna get it from http://downloads.sourceforge.net/project/transdecoder/Pfam-AB.hmm.bin
124 +
125 +src_install(){
126 + dobin TransDecoder.Predict TransDecoder.LongOrfs
127 + insinto /usr/share/${PN}/util
128 + dobin util/*.pl
129 + # zap the bundled cdhit binaries copied from transdecoder_plugins/cdhit/ to util/bin
130 + rm -rf util/bin
131 + #
132 + # * sci-biology/trinityrnaseq-20140413:0::science
133 + # * /usr/bin/Fasta_reader.pm
134 + # * /usr/bin/GFF3_utils.pm
135 + # * /usr/bin/Gene_obj.pm
136 + # * /usr/bin/Gene_obj_indexer.pm
137 + # * /usr/bin/Longest_orf.pm
138 + # * /usr/bin/Nuc_translator.pm
139 + # * /usr/bin/TiedHash.pm
140 + #
141 + perl_set_version
142 + insinto ${VENDOR_LIB}/${PN}
143 + dobin PerlLib/*.pm # BUG: install into /usr/bin but wanted to have it readable and executable in ${VENDOR_LIB}/${PN} instead
144 + einfo "Fetch on your own:"
145 + einfo "wget --mirror -nH -nd http://downloads.sourceforge.net/project/transdecoder/Pfam-AB.hmm.bin"
146 + einfo "hmmpress Pfam-AB.hmm.bin"
147 +}
148 +
149 +pkg_postinst(){
150 + einfo "It is recommended to use TransDecoder with hmmer-3 or at least NCBI blast"
151 + einfo "from either sci-biology/ncbi-blast+ (released more often) or"
152 + einfo "from sci-biology/ncbi-toolkit++ (huge bundle with releases and less frequent bugfixes)"
153 + einfo "Author says the minimum requirement is sci-biology/cd-hit"
154 +}
155
156 diff --git a/sci-biology/TransDecoder/files/TransDecoder.patch b/sci-biology/TransDecoder/files/TransDecoder.patch
157 new file mode 100644
158 index 0000000..c0cff94
159 --- /dev/null
160 +++ b/sci-biology/TransDecoder/files/TransDecoder.patch
161 @@ -0,0 +1,136 @@
162 +--- /usr/bin/TransDecoder 2015-01-09 11:22:55.000000000 +0100
163 ++++ TransDecoder 2015-01-09 14:31:44.095839522 +0100
164 +@@ -48,7 +48,7 @@
165 + --prepare_pfam Prepare data for PFAM search and then quit (for running PFAM on HPC/computing cluster
166 + with or without MPI )
167 +
168 +- --CPU <int> number of threads to use; (default: 2)
169 ++ --CPU <int> number of threads to use; (default: 1)
170 +
171 + --MPI use MPI w/ execution of hmmscan
172 +
173 +@@ -76,7 +76,7 @@
174 +
175 + =head1 PFAM
176 +
177 +-You will need hmmer installed. Use hmmpress to prepare the database for hmmer.
178 ++You will need hmmer installed. Use hmmpress from >=hmmer-3.0 to prepare the database for hmmer.
179 + L<See|https://sourceforge.net/projects/transdecoder/files/Pfam-AB.hmm.bin> for downloading the database.
180 +
181 + =head1 CD-HIT
182 +@@ -105,7 +105,6 @@
183 + use Longest_orf;
184 +
185 + my $UTIL_DIR = "$FindBin::RealBin/util";
186 +-$ENV{PATH} = "$UTIL_DIR/bin:$ENV{PATH}";
187 + $ENV{LD_LIBRARY_PATH} .= ":$FindBin::RealBin/util/lib64";
188 +
189 + my ($cd_hit_est_exec) = &check_program('cd-hit-est');
190 +@@ -124,7 +123,7 @@
191 + my $verbose;
192 + my $search_pfam = "";
193 + my ($reuse,$pfam_out);
194 +-my $CPU = 2;
195 ++my $CPU = 1;
196 + my $RETAIN_LONG_ORFS = 900;
197 + my $MPI = 0;
198 +
199 +@@ -330,15 +329,15 @@
200 + my $top_cds_file = $train_file && -s $train_file ? $train_file : "$cds_file.top_${top_ORFs_train}_longest";
201 + if (!-s $top_cds_file) {
202 + # get longest entries
203 +- my $cmd = "$UTIL_DIR/get_top_longest_fasta_entries.pl $cds_file $top_ORFs_train > $top_cds_file";
204 ++ my $cmd = "get_top_longest_fasta_entries.pl $cds_file $top_ORFs_train > $top_cds_file";
205 +
206 + unless ($reuse && -s $top_cds_file){
207 + if ($cd_hit_est_exec){
208 + # to speed things up only check for redundancy up to 4x the number of entries we want
209 + my $red_num = $top_ORFs_train * 4 ;
210 +- &process_cmd("$UTIL_DIR/get_top_longest_fasta_entries.pl $cds_file $red_num > $workdir/redundant_top");
211 ++ &process_cmd("get_top_longest_fasta_entries.pl $cds_file $red_num > $workdir/redundant_top");
212 + &process_cmd("$cd_hit_est_exec -r 1 -i $workdir/redundant_top -o $workdir/redundant_top.nr90 -M 0 -T $CPU >/dev/null 2>/dev/null");
213 +- &process_cmd("$UTIL_DIR/get_top_longest_fasta_entries.pl $workdir/redundant_top.nr90 $top_ORFs_train > $top_cds_file");
214 ++ &process_cmd("get_top_longest_fasta_entries.pl $workdir/redundant_top.nr90 $top_ORFs_train > $top_cds_file");
215 + unlink("$workdir/redundant_top");
216 + unlink("$workdir/redundant_top.nr90");
217 + unlink("$workdir/redundant_top.nr90.bak.clstr");
218 +@@ -349,20 +348,20 @@
219 + }
220 + }
221 +
222 +-$cmd = "$UTIL_DIR/compute_base_probs.pl $transcripts_file $TOP_STRAND_ONLY > $workdir/base_freqs.dat";
223 ++$cmd = "compute_base_probs.pl $transcripts_file $TOP_STRAND_ONLY > $workdir/base_freqs.dat";
224 + &process_cmd($cmd) unless $reuse && -s "$workdir/base_freqs.dat";
225 +
226 +
227 + # get hexamer scores
228 +-#$cmd = "$UTIL_DIR/seq_n_background_to_logliklihood_vals.pl $top_cds_file $transcripts_file.random > hexamer.scores";
229 ++#$cmd = "seq_n_background_to_logliklihood_vals.pl $top_cds_file $transcripts_file.random > hexamer.scores";
230 + #&process_cmd($cmd) unless ($reuse && -s "hexamer.scores");
231 +
232 +-$cmd = "$UTIL_DIR/seq_n_baseprobs_to_logliklihood_vals.pl $top_cds_file $workdir/base_freqs.dat > $workdir/hexamer.scores";
233 ++$cmd = "seq_n_baseprobs_to_logliklihood_vals.pl $top_cds_file $workdir/base_freqs.dat > $workdir/hexamer.scores";
234 + &process_cmd($cmd) unless $reuse && -s "$workdir/hexamer.scores";
235 +
236 +
237 + # score all cds entries
238 +-$cmd = "$UTIL_DIR/score_CDS_liklihood_all_6_frames.pl $cds_file $workdir/hexamer.scores > $cds_file.scores";
239 ++$cmd = "score_CDS_liklihood_all_6_frames.pl $cds_file $workdir/hexamer.scores > $cds_file.scores";
240 + &process_cmd($cmd) unless ($reuse && -s "$cds_file.scores");
241 +
242 +
243 +@@ -440,18 +439,18 @@
244 + }
245 +
246 + # index the current gff file:
247 +-$cmd = "$UTIL_DIR/index_gff3_files_by_isoform.pl $gff3_file";
248 ++$cmd = "index_gff3_files_by_isoform.pl $gff3_file";
249 + &process_cmd($cmd);
250 +
251 + # retrieve the best entries:
252 +-$cmd = "$UTIL_DIR/gene_list_to_gff.pl $acc_file $gff3_file.inx > $cds_file.best_candidates.gff3";
253 ++$cmd = "gene_list_to_gff.pl $acc_file $gff3_file.inx > $cds_file.best_candidates.gff3";
254 + &process_cmd($cmd);
255 +
256 + {
257 + my $final_output_prefix = basename($transcripts_file) . ".transdecoder";
258 +
259 + # exclude shadow orfs (smaller orfs in different reading frame that are eclipsed by longer orfs)
260 +- $cmd = "$UTIL_DIR/remove_eclipsed_ORFs.pl $cds_file.best_candidates.gff3 > $final_output_prefix.gff3";
261 ++ $cmd = "remove_eclipsed_ORFs.pl $cds_file.best_candidates.gff3 > $final_output_prefix.gff3";
262 + &process_cmd($cmd);
263 +
264 +
265 +@@ -462,14 +461,14 @@
266 + my $gff3_file = "$final_output_prefix.gff3";
267 + my $bed_file = $gff3_file;
268 + $bed_file =~ s/\.gff3$/\.bed/;
269 +- $cmd = "$UTIL_DIR/gff3_file_to_bed.pl $gff3_file > $bed_file";
270 ++ $cmd = "gff3_file_to_bed.pl $gff3_file > $bed_file";
271 + &process_cmd($cmd);
272 +
273 +
274 + # make a peptide file:
275 + my $best_pep_file = $gff3_file;
276 + $best_pep_file =~ s/\.gff3$/\.pep/;
277 +- $cmd = "$UTIL_DIR/gff3_file_to_proteins.pl $gff3_file $transcripts_file > $best_pep_file";
278 ++ $cmd = "gff3_file_to_proteins.pl $gff3_file $transcripts_file > $best_pep_file";
279 + &process_cmd($cmd);
280 +
281 +
282 +@@ -477,13 +476,13 @@
283 + # make a CDS file:
284 + my $best_cds_file = $best_pep_file;
285 + $best_cds_file =~ s/\.pep$/\.cds/;
286 +- $cmd = "$UTIL_DIR/gff3_file_to_proteins.pl $gff3_file $transcripts_file CDS > $best_cds_file";
287 ++ $cmd = "gff3_file_to_proteins.pl $gff3_file $transcripts_file CDS > $best_cds_file";
288 + &process_cmd($cmd);
289 +
290 + # make a CDS file:
291 + my $best_cdna_file = $best_pep_file;
292 + $best_cdna_file =~ s/\.pep$/\.mRNA/;
293 +- $cmd = "$UTIL_DIR/gff3_file_to_proteins.pl $gff3_file $transcripts_file cDNA > $best_cdna_file";
294 ++ $cmd = "gff3_file_to_proteins.pl $gff3_file $transcripts_file cDNA > $best_cdna_file";
295 + &process_cmd($cmd);
296 +
297 + }
298
299 diff --git a/sci-biology/TransDecoder/files/pfam_runner.pl.patch b/sci-biology/TransDecoder/files/pfam_runner.pl.patch
300 new file mode 100644
301 index 0000000..7809b1a
302 --- /dev/null
303 +++ b/sci-biology/TransDecoder/files/pfam_runner.pl.patch
304 @@ -0,0 +1,20 @@
305 +--- /usr/bin/pfam_runner.pl 2015-01-09 11:22:55.000000000 +0100
306 ++++ pfam_runner.pl 2015-01-09 14:25:43.385838579 +0100
307 +@@ -24,7 +24,7 @@
308 + my $workdir;
309 + my $verbose;
310 + my ($reuse,$pfam_out);
311 +-my $CPU = 2;
312 ++my $CPU = 1;
313 +
314 + my $usage = <<_EOH_;
315 +
316 +@@ -59,7 +59,7 @@
317 + # -h print this option menu and quit
318 + # -v verbose
319 + #
320 +-# --CPU <int> number of threads to use; (default: 2)
321 ++# --CPU <int> number of threads to use; (default: 1)
322 + #
323 + # --MPI use MPI (via ffindex_apply_mpi)
324 + #
325
326 diff --git a/sci-biology/TransDecoder/metadata.xml b/sci-biology/TransDecoder/metadata.xml
327 new file mode 100644
328 index 0000000..2bc8930
329 --- /dev/null
330 +++ b/sci-biology/TransDecoder/metadata.xml
331 @@ -0,0 +1,9 @@
332 +<?xml version="1.0" encoding="UTF-8"?>
333 +<!DOCTYPE pkgmetadata SYSTEM "http://www.gentoo.org/dtd/metadata.dtd">
334 +<pkgmetadata>
335 + <herd>sci-biology</herd>
336 + <maintainer>
337 + <email>mmokrejs@×××××××××××××××.cz</email>
338 + <name>Martin Mokrejs</name>
339 + </maintainer>
340 +</pkgmetadata>