1 |
commit: aab87747b89b64107677056a3d4874d8f5ee7bbf |
2 |
Author: Martin Mokrejš <mmokrejs <AT> fold <DOT> natur <DOT> cuni <DOT> cz> |
3 |
AuthorDate: Tue Nov 21 16:11:28 2017 +0000 |
4 |
Commit: Martin Mokrejs <mmokrejs <AT> fold <DOT> natur <DOT> cuni <DOT> cz> |
5 |
CommitDate: Tue Nov 21 16:11:28 2017 +0000 |
6 |
URL: https://gitweb.gentoo.org/proj/sci.git/commit/?id=aab87747 |
7 |
|
8 |
sci-biology/SEECER: execute jellyfish1 instead of jellyfish |
9 |
|
10 |
Also I wrote a few cleanup patches to expose THREADS |
11 |
variable and cleanup the code. |
12 |
|
13 |
Package-Manager: Portage-2.3.14, Repoman-2.3.6 |
14 |
|
15 |
sci-biology/SEECER/SEECER-0.1.3-r2.ebuild | 7 ++- |
16 |
.../SEECER/files/rename_jellyfish_binary.patch | 11 ++++ |
17 |
sci-biology/SEECER/files/run_jellyfish.sh.patch | 72 ++++++++++++++++++++++ |
18 |
sci-biology/SEECER/files/run_seecer.sh.patch | 42 +++++++++++++ |
19 |
4 files changed, 130 insertions(+), 2 deletions(-) |
20 |
|
21 |
diff --git a/sci-biology/SEECER/SEECER-0.1.3-r2.ebuild b/sci-biology/SEECER/SEECER-0.1.3-r2.ebuild |
22 |
index 60862d8c8..0b7ec3bfa 100644 |
23 |
--- a/sci-biology/SEECER/SEECER-0.1.3-r2.ebuild |
24 |
+++ b/sci-biology/SEECER/SEECER-0.1.3-r2.ebuild |
25 |
@@ -5,7 +5,7 @@ EAPI=6 |
26 |
|
27 |
inherit eutils |
28 |
|
29 |
-DESCRIPTION="SEquencing Error Corrector for RNA-Seq reads" |
30 |
+DESCRIPTION="SEquence Error Corrector for RNA-Seq reads" |
31 |
HOMEPAGE="http://sb.cs.cmu.edu/seecer/" |
32 |
SRC_URI=" |
33 |
http://sb.cs.cmu.edu/seecer/downloads/"${P}".tar.gz |
34 |
@@ -22,12 +22,15 @@ DEPEND=" |
35 |
sci-libs/gsl:0= |
36 |
sci-biology/seqan:0=" |
37 |
RDEPEND="${DEPEND} |
38 |
- =sci-biology/jellyfish-1.1.11" |
39 |
+ =sci-biology/jellyfish-1.1.11-r1" |
40 |
|
41 |
S="${S}"/SEECER |
42 |
|
43 |
PATCHES=( |
44 |
"${FILESDIR}"/remove-hardcoded-paths.patch |
45 |
+ "${FILESDIR}"/run_seecer.sh.patch |
46 |
+ "${FILESDIR}"/run_jellyfish.sh.patch |
47 |
+ "${FILESDIR}"/rename_jellyfish_binary.patch |
48 |
) |
49 |
|
50 |
src_prepare(){ |
51 |
|
52 |
diff --git a/sci-biology/SEECER/files/rename_jellyfish_binary.patch b/sci-biology/SEECER/files/rename_jellyfish_binary.patch |
53 |
new file mode 100644 |
54 |
index 000000000..c6548cee1 |
55 |
--- /dev/null |
56 |
+++ b/sci-biology/SEECER/files/rename_jellyfish_binary.patch |
57 |
@@ -0,0 +1,11 @@ |
58 |
+--- SEECER/bin/run_seecer.sh.ori 2017-11-21 16:56:28.808767468 +0100 |
59 |
++++ SEECER/bin/run_seecer.sh 2017-11-21 16:57:07.469835728 +0100 |
60 |
+@@ -26,7 +26,7 @@ |
61 |
+ |
62 |
+ |
63 |
+ BINDIR='' #this can be hardcoded to /absolute/path/to/SEECER/bin/ |
64 |
+-JF="jellyfish" #this may be hardcoded to /absolute/path/to/jellyfish/bin/ |
65 |
++JF="jellyfish1" #this may be hardcoded to /absolute/path/to/jellyfish/bin/jellyfish |
66 |
+ |
67 |
+ K=17 |
68 |
+ SEECER_PARAMS="" |
69 |
|
70 |
diff --git a/sci-biology/SEECER/files/run_jellyfish.sh.patch b/sci-biology/SEECER/files/run_jellyfish.sh.patch |
71 |
new file mode 100644 |
72 |
index 000000000..7631f5a4c |
73 |
--- /dev/null |
74 |
+++ b/sci-biology/SEECER/files/run_jellyfish.sh.patch |
75 |
@@ -0,0 +1,72 @@ |
76 |
+--- SEECER-0.1.3/bin/run_jellyfish.sh.ori 2017-11-21 16:41:54.164599838 +0100 |
77 |
++++ SEECER-0.1.3/bin/run_jellyfish.sh 2017-11-21 16:46:28.022166903 +0100 |
78 |
+@@ -1,18 +1,45 @@ |
79 |
+ #!/bin/bash |
80 |
++ |
81 |
++# Usage: run_jellyfish.sh jellyfish_binpath tempfile_prefix kmersize mincount tmpdir infile1 [infile2] threads |
82 |
+ JF=$1 |
83 |
+ LCOUNT=$4 |
84 |
+ TMPDIR=$5 |
85 |
++THREADS=${8:-32} |
86 |
+ |
87 |
+ if [ -z "$JF" ]; then |
88 |
+ echo "No path to jellyfish binary provided, exiting."; |
89 |
+ exit 255; |
90 |
+ fi |
91 |
+ |
92 |
++# Usage: jellyfish count [options] file:path+ |
93 |
++# |
94 |
++# Count k-mers or qmers in fasta or fastq files |
95 |
++# |
96 |
++# Options (default value in (), *required): |
97 |
++# -m, --mer-len=uint32 *Length of mer |
98 |
++# -s, --size=uint64 *Hash size |
99 |
++# -t, --threads=uint32 Number of threads (1) |
100 |
++# -o, --output=string Output prefix (mer_counts) |
101 |
++# -c, --counter-len=Length in bits Length of counting field (7) |
102 |
++# --out-counter-len=Length in bytes Length of counter field in output (4) |
103 |
++# -C, --both-strands Count both strand, canonical representation (false) |
104 |
++# -p, --reprobes=uint32 Maximum number of reprobes (62) |
105 |
++# -r, --raw Write raw database (false) |
106 |
++# -q, --quake Quake compatibility mode (false) |
107 |
++# --quality-start=uint32 Starting ASCII for quality values (64) |
108 |
++# --min-quality=uint32 Minimum quality. A base with lesser quality becomes an N (0) |
109 |
++# -L, --lower-count=uint64 Don't output k-mer with count < lower-count |
110 |
++# -U, --upper-count=uint64 Don't output k-mer with count > upper-count |
111 |
++# --invalid-char=warn|ignore|error How to treat invalid characters. The char is changed to a N. (warn) |
112 |
++# --matrix=Matrix file Hash function binary matrix |
113 |
++# --timing=Timing file Print timing information |
114 |
++# --stats=Stats file Print stats |
115 |
++# |
116 |
+ if [ "$#" -eq "4" ]; |
117 |
+ then |
118 |
+-$JF count -m $3 -o $TMPDIR/jf_tmp -c 3 -s 10000000 -t 32 --both-strands $6 || exit 255 |
119 |
++$JF count -m $3 -o $TMPDIR/jf_tmp -c 3 -s 10000000 -t $THREADS --both-strands $6 || exit 255 |
120 |
+ else |
121 |
+-$JF count -m $3 -o $TMPDIR/jf_tmp -c 3 -s 10000000 -t 32 --both-strands $6 $7 || exit 255 |
122 |
++$JF count -m $3 -o $TMPDIR/jf_tmp -c 3 -s 10000000 -t $THREADS --both-strands $6 $7 || exit 255 |
123 |
+ fi; |
124 |
+ |
125 |
+ # merge |
126 |
+@@ -25,5 +52,21 @@ |
127 |
+ rm $TMPDIR/jf_tmp_* |
128 |
+ fi |
129 |
+ |
130 |
++# |
131 |
++# Usage: jellyfish dump [options] db:path |
132 |
++# |
133 |
++# Dump k-mer counts |
134 |
++# |
135 |
++# By default, dump in a fasta format where the header is the count and |
136 |
++# the sequence is the sequence of the k-mer. The column format is a 2 |
137 |
++# column output: k-mer count. |
138 |
++# |
139 |
++# Options (default value in (), *required): |
140 |
++# -c, --column Column format (false) |
141 |
++# -t, --tab Tab separator (false) |
142 |
++# -L, --lower-count=uint64 Don't output k-mer with count < lower-count |
143 |
++# -U, --upper-count=uint64 Don't output k-mer with count > upper-count |
144 |
++# -o, --output=string Output file |
145 |
++# |
146 |
+ $JF dump --lower-count=$LCOUNT -o $2 -c $TMPDIR/jf_merged_$3 || exit 255 |
147 |
+ rm $TMPDIR/jf_merged_$3 |
148 |
|
149 |
diff --git a/sci-biology/SEECER/files/run_seecer.sh.patch b/sci-biology/SEECER/files/run_seecer.sh.patch |
150 |
new file mode 100644 |
151 |
index 000000000..a20c7917f |
152 |
--- /dev/null |
153 |
+++ b/sci-biology/SEECER/files/run_seecer.sh.patch |
154 |
@@ -0,0 +1,42 @@ |
155 |
+--- SEECER/bin/run_seecer.sh.old 2013-10-02 18:55:24.000000000 +0200 |
156 |
++++ SEECER/bin/run_seecer.sh 2017-11-21 16:24:24.065584149 +0100 |
157 |
+@@ -33,6 +33,7 @@ |
158 |
+ SeecerStep=1 |
159 |
+ LCOUNT=3 |
160 |
+ TMPDIR='' |
161 |
++THREADS=32 |
162 |
+ |
163 |
+ usage=$(cat << EOF |
164 |
+ # This script runs the SEECER pipeline of 4 steps: |
165 |
+@@ -54,11 +55,12 @@ |
166 |
+ -j <v> : specify the location of JELLYFISH binary (default = $JF). |
167 |
+ -p <v> : specify extra SEECER parameters (default = ''). |
168 |
+ -s <v> : specify the starting step ( default = 1). Values = 1,2,3,4. |
169 |
++ -c <v> : number of threads (default = 32). |
170 |
+ -h : help message |
171 |
+ EOF |
172 |
+ ); |
173 |
+ |
174 |
+-while getopts ":j:p:k:s:t:h" opt; do |
175 |
++while getopts ":j:p:k:s:t:c:h" opt; do |
176 |
+ case $opt in |
177 |
+ t) |
178 |
+ TMPDIR=$OPTARG |
179 |
+@@ -75,6 +77,8 @@ |
180 |
+ s) |
181 |
+ SeecerStep=$OPTARG |
182 |
+ ;; |
183 |
++ c) |
184 |
++ THREADS=$OPTARG |
185 |
+ \?) |
186 |
+ echo "Invalid option: -$OPTARG" >&2 |
187 |
+ echo "$usage" |
188 |
+@@ -170,7 +177,7 @@ |
189 |
+ then |
190 |
+ echo "++ Step 2: Running JELLYFISH to count kmers ..." |
191 |
+ echo |
192 |
+- bash "${BINDIR}"run_jellyfish.sh $JF $TMPDIR/counts_${K}_${LCOUNT} $K $LCOUNT $TMPDIR $Read1_N $Read2_N || exit 255 |
193 |
++ bash "${BINDIR}"run_jellyfish.sh $JF $TMPDIR/counts_${K}_${LCOUNT} $K $LCOUNT $TMPDIR $Read1_N $Read2_N $THREADS || exit 255 |
194 |
+ fi; |
195 |
+ |
196 |
+ if [ ! -r $TMPDIR/counts_${K}_${LCOUNT} ]; |