1 |
commit: b6bc96d05888bcfb2a2ebac3a477663d1915c57c |
2 |
Author: Martin Mokrejš <mmokrejs <AT> fold <DOT> natur <DOT> cuni <DOT> cz> |
3 |
AuthorDate: Sun Mar 23 15:52:18 2014 +0000 |
4 |
Commit: Martin Mokrejs <mmokrejs <AT> fold <DOT> natur <DOT> cuni <DOT> cz> |
5 |
CommitDate: Sun Mar 23 15:52:18 2014 +0000 |
6 |
URL: http://git.overlays.gentoo.org/gitweb/?p=proj/sci.git;a=commit;h=b6bc96d0 |
7 |
|
8 |
sci-biology/biopython-1.63-r1: version bump and an upstream patch to improve error message |
9 |
|
10 |
Package-Manager: portage-2.2.7 |
11 |
|
12 |
--- |
13 |
sci-biology/biopython/ChangeLog | 17 +++ |
14 |
sci-biology/biopython/biopython-1.62-r3.ebuild | 51 +++++++ |
15 |
sci-biology/biopython/biopython-1.62-r4.ebuild | 51 +++++++ |
16 |
sci-biology/biopython/biopython-1.63-r1.ebuild | 51 +++++++ |
17 |
sci-biology/biopython/biopython-1.63.ebuild | 50 +++++++ |
18 |
sci-biology/biopython/files/SeqRecord.py.patch | 148 +++++++++++++++++++++ |
19 |
.../biopython/files/SffIO_error_in_check_eof.patch | 14 ++ |
20 |
.../biopython/files/adjust-trimpoints.patch | 76 +++++++++++ |
21 |
.../biopython/files/biopython-1.51-flex.patch | 21 +++ |
22 |
.../biopython/files/biopython-1.62-SffIO.patch | 36 +++++ |
23 |
sci-biology/biopython/metadata.xml | 5 + |
24 |
11 files changed, 520 insertions(+) |
25 |
|
26 |
diff --git a/sci-biology/biopython/ChangeLog b/sci-biology/biopython/ChangeLog |
27 |
new file mode 100644 |
28 |
index 0000000..6dfe5e2 |
29 |
--- /dev/null |
30 |
+++ b/sci-biology/biopython/ChangeLog |
31 |
@@ -0,0 +1,17 @@ |
32 |
+# ChangeLog for sci-biology/biopython |
33 |
+# Copyright 1999-2014 Gentoo Foundation; Distributed under the GPL v2 |
34 |
+# $Header: $ |
35 |
+ |
36 |
+*biopython-1.62-r3 (23 Mar 2014) |
37 |
+*biopython-1.62-r4 (23 Mar 2014) |
38 |
+*biopython-1.63-r1 (23 Mar 2014) |
39 |
+*biopython-1.63 (23 Mar 2014) |
40 |
+ |
41 |
+ 23 Mar 2014; Martin Mokrejs <mmokrejs@×××××××××××××××.cz> |
42 |
+ +biopython-1.62-r3.ebuild, +biopython-1.62-r4.ebuild, |
43 |
+ +biopython-1.63-r1.ebuild, +biopython-1.63.ebuild, +files/SeqRecord.py.patch, |
44 |
+ +files/SffIO_error_in_check_eof.patch, +files/adjust-trimpoints.patch, |
45 |
+ +files/biopython-1.51-flex.patch, +files/biopython-1.62-SffIO.patch, |
46 |
+ +metadata.xml: |
47 |
+ sci-biology/biopython-1.63-r1: version bump and an upstream patch to improve |
48 |
+ error message |
49 |
|
50 |
diff --git a/sci-biology/biopython/biopython-1.62-r3.ebuild b/sci-biology/biopython/biopython-1.62-r3.ebuild |
51 |
new file mode 100644 |
52 |
index 0000000..1eed5a9 |
53 |
--- /dev/null |
54 |
+++ b/sci-biology/biopython/biopython-1.62-r3.ebuild |
55 |
@@ -0,0 +1,51 @@ |
56 |
+# Copyright 1999-2014 Gentoo Foundation |
57 |
+# Distributed under the terms of the GNU General Public License v2 |
58 |
+# $Header: /var/cvsroot/gentoo-x86/sci-biology/biopython/biopython-1.62.ebuild,v 1.1 2013/09/17 16:07:56 jlec Exp $ |
59 |
+ |
60 |
+EAPI=5 |
61 |
+ |
62 |
+PYTHON_COMPAT=( python{2_6,2_7} ) |
63 |
+ |
64 |
+inherit distutils-r1 eutils |
65 |
+ |
66 |
+DESCRIPTION="Python modules for computational molecular biology" |
67 |
+HOMEPAGE="http://www.biopython.org/ http://pypi.python.org/pypi/biopython/" |
68 |
+SRC_URI="http://www.biopython.org/DIST/${P}.tar.gz" |
69 |
+ |
70 |
+LICENSE="HPND" |
71 |
+SLOT="0" |
72 |
+KEYWORDS="~amd64 ~ppc ~x86 ~amd64-linux ~x86-linux" |
73 |
+IUSE="mysql postgres" |
74 |
+ |
75 |
+REQUIRED_USE="${PYTHON_REQUIRED_USE}" |
76 |
+ |
77 |
+RDEPEND="${PYTHON_DEPS} |
78 |
+ dev-python/matplotlib[${PYTHON_USEDEP}] |
79 |
+ dev-python/networkx[${PYTHON_USEDEP}] |
80 |
+ dev-python/numpy[${PYTHON_USEDEP}] |
81 |
+ dev-python/pygraphviz[${PYTHON_USEDEP}] |
82 |
+ dev-python/reportlab[${PYTHON_USEDEP}] |
83 |
+ media-gfx/pydot[${PYTHON_USEDEP}] |
84 |
+ mysql? ( dev-python/mysql-python[${PYTHON_USEDEP}] ) |
85 |
+ postgres? ( dev-python/psycopg[${PYTHON_USEDEP}] )" |
86 |
+DEPEND="${RDEPEND} |
87 |
+ sys-devel/flex" |
88 |
+ |
89 |
+DOCS=( CONTRIB DEPRECATED NEWS README Doc/. ) |
90 |
+ |
91 |
+src_prepare() { |
92 |
+ distutils-r1_src_prepare |
93 |
+ epatch "${FILESDIR}/${PN}-1.62-SffIO.patch" |
94 |
+} |
95 |
+ |
96 |
+python_test() { |
97 |
+ cd Tests || die |
98 |
+ ${PYTHON} run_tests.py || die |
99 |
+} |
100 |
+ |
101 |
+python_install_all() { |
102 |
+ distutils-r1_python_install_all |
103 |
+ |
104 |
+ dodir /usr/share/${PN} |
105 |
+ cp -r --preserve=mode Scripts Tests "${ED}"/usr/share/${PN} || die |
106 |
+} |
107 |
|
108 |
diff --git a/sci-biology/biopython/biopython-1.62-r4.ebuild b/sci-biology/biopython/biopython-1.62-r4.ebuild |
109 |
new file mode 100644 |
110 |
index 0000000..1eed5a9 |
111 |
--- /dev/null |
112 |
+++ b/sci-biology/biopython/biopython-1.62-r4.ebuild |
113 |
@@ -0,0 +1,51 @@ |
114 |
+# Copyright 1999-2014 Gentoo Foundation |
115 |
+# Distributed under the terms of the GNU General Public License v2 |
116 |
+# $Header: /var/cvsroot/gentoo-x86/sci-biology/biopython/biopython-1.62.ebuild,v 1.1 2013/09/17 16:07:56 jlec Exp $ |
117 |
+ |
118 |
+EAPI=5 |
119 |
+ |
120 |
+PYTHON_COMPAT=( python{2_6,2_7} ) |
121 |
+ |
122 |
+inherit distutils-r1 eutils |
123 |
+ |
124 |
+DESCRIPTION="Python modules for computational molecular biology" |
125 |
+HOMEPAGE="http://www.biopython.org/ http://pypi.python.org/pypi/biopython/" |
126 |
+SRC_URI="http://www.biopython.org/DIST/${P}.tar.gz" |
127 |
+ |
128 |
+LICENSE="HPND" |
129 |
+SLOT="0" |
130 |
+KEYWORDS="~amd64 ~ppc ~x86 ~amd64-linux ~x86-linux" |
131 |
+IUSE="mysql postgres" |
132 |
+ |
133 |
+REQUIRED_USE="${PYTHON_REQUIRED_USE}" |
134 |
+ |
135 |
+RDEPEND="${PYTHON_DEPS} |
136 |
+ dev-python/matplotlib[${PYTHON_USEDEP}] |
137 |
+ dev-python/networkx[${PYTHON_USEDEP}] |
138 |
+ dev-python/numpy[${PYTHON_USEDEP}] |
139 |
+ dev-python/pygraphviz[${PYTHON_USEDEP}] |
140 |
+ dev-python/reportlab[${PYTHON_USEDEP}] |
141 |
+ media-gfx/pydot[${PYTHON_USEDEP}] |
142 |
+ mysql? ( dev-python/mysql-python[${PYTHON_USEDEP}] ) |
143 |
+ postgres? ( dev-python/psycopg[${PYTHON_USEDEP}] )" |
144 |
+DEPEND="${RDEPEND} |
145 |
+ sys-devel/flex" |
146 |
+ |
147 |
+DOCS=( CONTRIB DEPRECATED NEWS README Doc/. ) |
148 |
+ |
149 |
+src_prepare() { |
150 |
+ distutils-r1_src_prepare |
151 |
+ epatch "${FILESDIR}/${PN}-1.62-SffIO.patch" |
152 |
+} |
153 |
+ |
154 |
+python_test() { |
155 |
+ cd Tests || die |
156 |
+ ${PYTHON} run_tests.py || die |
157 |
+} |
158 |
+ |
159 |
+python_install_all() { |
160 |
+ distutils-r1_python_install_all |
161 |
+ |
162 |
+ dodir /usr/share/${PN} |
163 |
+ cp -r --preserve=mode Scripts Tests "${ED}"/usr/share/${PN} || die |
164 |
+} |
165 |
|
166 |
diff --git a/sci-biology/biopython/biopython-1.63-r1.ebuild b/sci-biology/biopython/biopython-1.63-r1.ebuild |
167 |
new file mode 100644 |
168 |
index 0000000..aac2bdf |
169 |
--- /dev/null |
170 |
+++ b/sci-biology/biopython/biopython-1.63-r1.ebuild |
171 |
@@ -0,0 +1,51 @@ |
172 |
+# Copyright 1999-2014 Gentoo Foundation |
173 |
+# Distributed under the terms of the GNU General Public License v2 |
174 |
+# $Header: /var/cvsroot/gentoo-x86/sci-biology/biopython/biopython-1.62.ebuild,v 1.1 2013/09/17 16:07:56 jlec Exp $ |
175 |
+ |
176 |
+EAPI=5 |
177 |
+ |
178 |
+PYTHON_COMPAT=( python{2_6,2_7} ) |
179 |
+ |
180 |
+inherit distutils-r1 eutils |
181 |
+ |
182 |
+DESCRIPTION="Python modules for computational molecular biology" |
183 |
+HOMEPAGE="http://www.biopython.org/ http://pypi.python.org/pypi/biopython/" |
184 |
+SRC_URI="http://www.biopython.org/DIST/${P}.tar.gz" |
185 |
+ |
186 |
+LICENSE="HPND" |
187 |
+SLOT="0" |
188 |
+KEYWORDS="~amd64 ~ppc ~x86 ~amd64-linux ~x86-linux" |
189 |
+IUSE="mysql postgres" |
190 |
+ |
191 |
+REQUIRED_USE="${PYTHON_REQUIRED_USE}" |
192 |
+ |
193 |
+RDEPEND="${PYTHON_DEPS} |
194 |
+ dev-python/matplotlib[${PYTHON_USEDEP}] |
195 |
+ dev-python/networkx[${PYTHON_USEDEP}] |
196 |
+ dev-python/numpy[${PYTHON_USEDEP}] |
197 |
+ dev-python/pygraphviz[${PYTHON_USEDEP}] |
198 |
+ dev-python/reportlab[${PYTHON_USEDEP}] |
199 |
+ media-gfx/pydot[${PYTHON_USEDEP}] |
200 |
+ mysql? ( dev-python/mysql-python[${PYTHON_USEDEP}] ) |
201 |
+ postgres? ( dev-python/psycopg[${PYTHON_USEDEP}] )" |
202 |
+DEPEND="${RDEPEND} |
203 |
+ sys-devel/flex" |
204 |
+ |
205 |
+DOCS=( CONTRIB DEPRECATED NEWS README Doc/. ) |
206 |
+ |
207 |
+src_prepare() { |
208 |
+ epatch "${FILESDIR}"/SffIO_error_in_check_eof.patch |
209 |
+ distutils-r1_src_prepare |
210 |
+} |
211 |
+ |
212 |
+python_test() { |
213 |
+ cd Tests || die |
214 |
+ ${PYTHON} run_tests.py || die |
215 |
+} |
216 |
+ |
217 |
+python_install_all() { |
218 |
+ distutils-r1_python_install_all |
219 |
+ |
220 |
+ dodir /usr/share/${PN} |
221 |
+ cp -r --preserve=mode Scripts Tests "${ED}"/usr/share/${PN} || die |
222 |
+} |
223 |
|
224 |
diff --git a/sci-biology/biopython/biopython-1.63.ebuild b/sci-biology/biopython/biopython-1.63.ebuild |
225 |
new file mode 100644 |
226 |
index 0000000..5180b33 |
227 |
--- /dev/null |
228 |
+++ b/sci-biology/biopython/biopython-1.63.ebuild |
229 |
@@ -0,0 +1,50 @@ |
230 |
+# Copyright 1999-2014 Gentoo Foundation |
231 |
+# Distributed under the terms of the GNU General Public License v2 |
232 |
+# $Header: /var/cvsroot/gentoo-x86/sci-biology/biopython/biopython-1.62.ebuild,v 1.1 2013/09/17 16:07:56 jlec Exp $ |
233 |
+ |
234 |
+EAPI=5 |
235 |
+ |
236 |
+PYTHON_COMPAT=( python{2_6,2_7} ) |
237 |
+ |
238 |
+inherit distutils-r1 eutils |
239 |
+ |
240 |
+DESCRIPTION="Python modules for computational molecular biology" |
241 |
+HOMEPAGE="http://www.biopython.org/ http://pypi.python.org/pypi/biopython/" |
242 |
+SRC_URI="http://www.biopython.org/DIST/${P}.tar.gz" |
243 |
+ |
244 |
+LICENSE="HPND" |
245 |
+SLOT="0" |
246 |
+KEYWORDS="~amd64 ~ppc ~x86 ~amd64-linux ~x86-linux" |
247 |
+IUSE="mysql postgres" |
248 |
+ |
249 |
+REQUIRED_USE="${PYTHON_REQUIRED_USE}" |
250 |
+ |
251 |
+RDEPEND="${PYTHON_DEPS} |
252 |
+ dev-python/matplotlib[${PYTHON_USEDEP}] |
253 |
+ dev-python/networkx[${PYTHON_USEDEP}] |
254 |
+ dev-python/numpy[${PYTHON_USEDEP}] |
255 |
+ dev-python/pygraphviz[${PYTHON_USEDEP}] |
256 |
+ dev-python/reportlab[${PYTHON_USEDEP}] |
257 |
+ media-gfx/pydot[${PYTHON_USEDEP}] |
258 |
+ mysql? ( dev-python/mysql-python[${PYTHON_USEDEP}] ) |
259 |
+ postgres? ( dev-python/psycopg[${PYTHON_USEDEP}] )" |
260 |
+DEPEND="${RDEPEND} |
261 |
+ sys-devel/flex" |
262 |
+ |
263 |
+DOCS=( CONTRIB DEPRECATED NEWS README Doc/. ) |
264 |
+ |
265 |
+src_prepare() { |
266 |
+ distutils-r1_src_prepare |
267 |
+} |
268 |
+ |
269 |
+python_test() { |
270 |
+ cd Tests || die |
271 |
+ ${PYTHON} run_tests.py || die |
272 |
+} |
273 |
+ |
274 |
+python_install_all() { |
275 |
+ distutils-r1_python_install_all |
276 |
+ |
277 |
+ dodir /usr/share/${PN} |
278 |
+ cp -r --preserve=mode Scripts Tests "${ED}"/usr/share/${PN} || die |
279 |
+} |
280 |
|
281 |
diff --git a/sci-biology/biopython/files/SeqRecord.py.patch b/sci-biology/biopython/files/SeqRecord.py.patch |
282 |
new file mode 100644 |
283 |
index 0000000..ac3785f |
284 |
--- /dev/null |
285 |
+++ b/sci-biology/biopython/files/SeqRecord.py.patch |
286 |
@@ -0,0 +1,148 @@ |
287 |
+diff --git a/Bio/SeqIO/SffIO.py b/Bio/SeqIO/SffIO.py |
288 |
+index 1971dba..43b38fd 100644 |
289 |
+--- a/Bio/SeqIO/SffIO.py |
290 |
++++ b/Bio/SeqIO/SffIO.py |
291 |
+@@ -539,8 +539,15 @@ _valid_UAN_read_name = re.compile(r'^[a-zA-Z0-9]{14}$') |
292 |
+ |
293 |
+ |
294 |
+ def _sff_read_seq_record(handle, number_of_flows_per_read, flow_chars, |
295 |
+- key_sequence, alphabet, trim=False): |
296 |
+- """Parse the next read in the file, return data as a SeqRecord (PRIVATE).""" |
297 |
++ key_sequence, alphabet, trim=False, interpret_qual_trims=True, interpret_adapter_trims=False): |
298 |
++ """Parse the next read in the file, return data as a SeqRecord (PRIVATE). |
299 |
++ Allow user to specify which type of clipping values should be applied |
300 |
++ while reading the SFF stream. To be backwards compatible, we interpret |
301 |
++ only the quality-based trim points by default. That results in lower-cased |
302 |
++ sequences in the low-qual region, regardless what adapter-based clip points |
303 |
++ say. This should be the desired behavior. More discussion at |
304 |
++ https://redmine.open-bio.org/issues/3437 |
305 |
++ """ |
306 |
+ #Now on to the reads... |
307 |
+ #the read header format (fixed part): |
308 |
+ #read_header_length H |
309 |
+@@ -589,20 +596,41 @@ def _sff_read_seq_record(handle, number_of_flows_per_read, flow_chars, |
310 |
+ warnings.warn("Post quality %i byte padding region contained data, SFF data is not broken" |
311 |
+ % padding) |
312 |
+ #Follow Roche and apply most aggressive of qual and adapter clipping. |
313 |
+- #Note Roche seems to ignore adapter clip fields when writing SFF, |
314 |
+- #and uses just the quality clipping values for any clipping. |
315 |
+- clip_left = max(clip_qual_left, clip_adapter_left) |
316 |
+- #Right clipping of zero means no clipping |
317 |
+- if clip_qual_right: |
318 |
+- if clip_adapter_right: |
319 |
+- clip_right = min(clip_qual_right, clip_adapter_right) |
320 |
++ #Note Roche does not use adapter clip fields when writing SFF files |
321 |
++ #but instead combines the adapter clipping information with quality-based |
322 |
++ #values and writes the most aggressive combination into clip fields (as |
323 |
++ #allowed by SFF specs). |
324 |
++ |
325 |
++ if interpret_qual_trims: |
326 |
++ if interpret_adapter_trims: |
327 |
++ clip_left = max(clip_qual_left, clip_adapter_left) |
328 |
++ #Right clipping of zero means no clipping |
329 |
++ if clip_qual_right: |
330 |
++ if clip_adapter_right: |
331 |
++ clip_right = min(clip_qual_right, clip_adapter_right) |
332 |
++ else: |
333 |
++ #Typical case with Roche SFF files |
334 |
++ clip_right = clip_qual_right |
335 |
++ elif clip_adapter_right: |
336 |
++ clip_right = clip_adapter_right |
337 |
++ else: |
338 |
++ clip_right = seq_len |
339 |
+ else: |
340 |
+- #Typical case with Roche SFF files |
341 |
+- clip_right = clip_qual_right |
342 |
+- elif clip_adapter_right: |
343 |
+- clip_right = clip_adapter_right |
344 |
++ clip_left = clip_qual_left |
345 |
++ if clip_qual_right: |
346 |
++ clip_right = clip_qual_right |
347 |
++ else: |
348 |
++ clip_right = seq_len |
349 |
++ elif interpret_adapter_trims: |
350 |
++ clip_left = clip_adapter_left |
351 |
++ if clip_adapter_right: |
352 |
++ clip_right = clip_adapter_right |
353 |
++ else: |
354 |
++ clip_right = seq_len |
355 |
+ else: |
356 |
+- clip_right = seq_len |
357 |
++ clip_left = 0 |
358 |
++ clip_right = seq_len |
359 |
++ |
360 |
+ #Now build a SeqRecord |
361 |
+ if trim: |
362 |
+ seq = seq[clip_left:clip_right].upper() |
363 |
+diff --git a/Bio/SeqRecord.py b/Bio/SeqRecord.py |
364 |
+index c90e13b..66bdea0 100644 |
365 |
+--- a/Bio/SeqRecord.py |
366 |
++++ b/Bio/SeqRecord.py |
367 |
+@@ -14,6 +14,8 @@ __docformat__ = "epytext en" # Simple markup to show doctests nicely |
368 |
+ # also BioSQL.BioSeq.DBSeq which is the "Database Seq" class) |
369 |
+ |
370 |
+ |
371 |
++from Bio.Seq import Seq |
372 |
++ |
373 |
+ class _RestrictedDict(dict): |
374 |
+ """Dict which only allows sequences of given length as values (PRIVATE). |
375 |
+ |
376 |
+@@ -76,7 +78,7 @@ class _RestrictedDict(dict): |
377 |
+ if not hasattr(value, "__len__") or not hasattr(value, "__getitem__") \ |
378 |
+ or (hasattr(self, "_length") and len(value) != self._length): |
379 |
+ raise TypeError("We only allow python sequences (lists, tuples or " |
380 |
+- "strings) of length %i." % self._length) |
381 |
++ "strings) of length %i whereas you passed an object of length %s." % (self._length, str(len(value)))) |
382 |
+ dict.__setitem__(self, key, value) |
383 |
+ |
384 |
+ def update(self, new_dict): |
385 |
+@@ -290,10 +292,11 @@ class SeqRecord(object): |
386 |
+ """) |
387 |
+ |
388 |
+ def _set_seq(self, value): |
389 |
+- #TODO - Add a deprecation warning that the seq should be write only? |
390 |
+- if self._per_letter_annotations: |
391 |
+- #TODO - Make this a warning? Silently empty the dictionary? |
392 |
+- raise ValueError("You must empty the letter annotations first!") |
393 |
++ # we should be much more user friendly and accept even a plain sequence string |
394 |
++ # and make the Seq or MutableSeq object ourselves |
395 |
++ if not isinstance(value, Seq): |
396 |
++ raise ValueError("You must pass a Seq object containing the new sequence instead of just plain string.") |
397 |
++ else: |
398 |
+ self._seq = value |
399 |
+ try: |
400 |
+ self._per_letter_annotations = _RestrictedDict(length=len(self.seq)) |
401 |
+@@ -696,7 +699,7 @@ class SeqRecord(object): |
402 |
+ SeqIO.write(self, handle, format_spec) |
403 |
+ return handle.getvalue() |
404 |
+ |
405 |
+- def __len__(self): |
406 |
++ def __len__(self, trim=False, interpret_qual_trims=True, interpret_adapter_trims=False): |
407 |
+ """Returns the length of the sequence. |
408 |
+ |
409 |
+ For example, using Bio.SeqIO to read in a FASTA nucleotide file: |
410 |
+@@ -707,6 +710,10 @@ class SeqRecord(object): |
411 |
+ 309 |
412 |
+ >>> len(record.seq) |
413 |
+ 309 |
414 |
++ |
415 |
++ It should be possible to get length of a raw object, of trimmed |
416 |
++ object by quality or adapter criteria or both, whenever user wants |
417 |
++ to, not only when data is parsed from input. |
418 |
+ """ |
419 |
+ return len(self.seq) |
420 |
+ |
421 |
+@@ -725,6 +732,13 @@ class SeqRecord(object): |
422 |
+ """ |
423 |
+ return True |
424 |
+ |
425 |
++ def apply_trimpoints(self, trim=False, interpret_qual_trims=False, interpret_adapter_trims=False): |
426 |
++ """We should apply either of the quality-based or adapter-based annotated |
427 |
++ trim points and return a new, sliced object. |
428 |
++ """ |
429 |
++ pass |
430 |
++ |
431 |
++ |
432 |
+ def __add__(self, other): |
433 |
+ """Add another sequence or string to this sequence. |
434 |
+ |
435 |
|
436 |
diff --git a/sci-biology/biopython/files/SffIO_error_in_check_eof.patch b/sci-biology/biopython/files/SffIO_error_in_check_eof.patch |
437 |
new file mode 100644 |
438 |
index 0000000..9059604 |
439 |
--- /dev/null |
440 |
+++ b/sci-biology/biopython/files/SffIO_error_in_check_eof.patch |
441 |
@@ -0,0 +1,14 @@ |
442 |
+diff --git a/Bio/SeqIO/SffIO.py b/Bio/SeqIO/SffIO.py |
443 |
+index 2bb0dac..735d55b 100644 |
444 |
+--- a/Bio/SeqIO/SffIO.py |
445 |
++++ b/Bio/SeqIO/SffIO.py |
446 |
+@@ -941,7 +941,8 @@ def _check_eof(handle, index_offset, index_length): |
447 |
+ BiopythonParserWarning) |
448 |
+ |
449 |
+ offset = handle.tell() |
450 |
+- assert offset % 8 == 0 |
451 |
++ assert offset % 8 == 0, \ |
452 |
++ "Wanted offset %i %% 8 = %i to be zero" % (offset, offset % 8) |
453 |
+ # Should now be at the end of the file... |
454 |
+ extra = handle.read(4) |
455 |
+ if extra == _sff: |
456 |
|
457 |
diff --git a/sci-biology/biopython/files/adjust-trimpoints.patch b/sci-biology/biopython/files/adjust-trimpoints.patch |
458 |
new file mode 100644 |
459 |
index 0000000..dd6d548 |
460 |
--- /dev/null |
461 |
+++ b/sci-biology/biopython/files/adjust-trimpoints.patch |
462 |
@@ -0,0 +1,76 @@ |
463 |
+diff --git a/Bio/SeqIO/SffIO.py b/Bio/SeqIO/SffIO.py |
464 |
+index 1971dba..43b38fd 100644 |
465 |
+--- a/Bio/SeqIO/SffIO.py |
466 |
++++ b/Bio/SeqIO/SffIO.py |
467 |
+@@ -539,8 +539,15 @@ _valid_UAN_read_name = re.compile(r'^[a-zA-Z0-9]{14}$') |
468 |
+ |
469 |
+ |
470 |
+ def _sff_read_seq_record(handle, number_of_flows_per_read, flow_chars, |
471 |
+- key_sequence, alphabet, trim=False): |
472 |
+- """Parse the next read in the file, return data as a SeqRecord (PRIVATE).""" |
473 |
++ key_sequence, alphabet, trim=False, interpret_qual_trims=True, interpret_adapter_trims=False): |
474 |
++ """Parse the next read in the file, return data as a SeqRecord (PRIVATE). |
475 |
++ Allow user to specify which type of clipping values should be applied |
476 |
++ while reading the SFF stream. To be backwards compatible, we interpret |
477 |
++ only the quality-based trim points by default. That results in lower-cased |
478 |
++ sequences in the low-qual region, regardless what adapter-based clip points |
479 |
++ say. This should be the desired behavior. More discussion at |
480 |
++ https://redmine.open-bio.org/issues/3437 |
481 |
++ """ |
482 |
+ #Now on to the reads... |
483 |
+ #the read header format (fixed part): |
484 |
+ #read_header_length H |
485 |
+@@ -589,20 +596,41 @@ def _sff_read_seq_record(handle, number_of_flows_per_read, flow_chars, |
486 |
+ warnings.warn("Post quality %i byte padding region contained data, SFF data is not broken" |
487 |
+ % padding) |
488 |
+ #Follow Roche and apply most aggressive of qual and adapter clipping. |
489 |
+- #Note Roche seems to ignore adapter clip fields when writing SFF, |
490 |
+- #and uses just the quality clipping values for any clipping. |
491 |
+- clip_left = max(clip_qual_left, clip_adapter_left) |
492 |
+- #Right clipping of zero means no clipping |
493 |
+- if clip_qual_right: |
494 |
+- if clip_adapter_right: |
495 |
+- clip_right = min(clip_qual_right, clip_adapter_right) |
496 |
++ #Note Roche does not use adapter clip fields when writing SFF files |
497 |
++ #but instead combines the adapter clipping information with quality-based |
498 |
++ #values and writes the most aggressive combination into clip fields (as |
499 |
++ #allowed by SFF specs). |
500 |
++ |
501 |
++ if interpret_qual_trims: |
502 |
++ if interpret_adapter_trims: |
503 |
++ clip_left = max(clip_qual_left, clip_adapter_left) |
504 |
++ #Right clipping of zero means no clipping |
505 |
++ if clip_qual_right: |
506 |
++ if clip_adapter_right: |
507 |
++ clip_right = min(clip_qual_right, clip_adapter_right) |
508 |
++ else: |
509 |
++ #Typical case with Roche SFF files |
510 |
++ clip_right = clip_qual_right |
511 |
++ elif clip_adapter_right: |
512 |
++ clip_right = clip_adapter_right |
513 |
++ else: |
514 |
++ clip_right = seq_len |
515 |
+ else: |
516 |
+- #Typical case with Roche SFF files |
517 |
+- clip_right = clip_qual_right |
518 |
+- elif clip_adapter_right: |
519 |
+- clip_right = clip_adapter_right |
520 |
++ clip_left = clip_qual_left |
521 |
++ if clip_qual_right: |
522 |
++ clip_right = clip_qual_right |
523 |
++ else: |
524 |
++ clip_right = seq_len |
525 |
++ elif interpret_adapter_trims: |
526 |
++ clip_left = clip_adapter_left |
527 |
++ if clip_adapter_right: |
528 |
++ clip_right = clip_adapter_right |
529 |
++ else: |
530 |
++ clip_right = seq_len |
531 |
+ else: |
532 |
+- clip_right = seq_len |
533 |
++ clip_left = 0 |
534 |
++ clip_right = seq_len |
535 |
++ |
536 |
+ #Now build a SeqRecord |
537 |
+ if trim: |
538 |
+ seq = seq[clip_left:clip_right].upper() |
539 |
|
540 |
diff --git a/sci-biology/biopython/files/biopython-1.51-flex.patch b/sci-biology/biopython/files/biopython-1.51-flex.patch |
541 |
new file mode 100644 |
542 |
index 0000000..afd5094 |
543 |
--- /dev/null |
544 |
+++ b/sci-biology/biopython/files/biopython-1.51-flex.patch |
545 |
@@ -0,0 +1,21 @@ |
546 |
+--- setup.py.old 2008-11-25 18:03:16.000000000 +0100 |
547 |
++++ setup.py 2008-11-25 18:04:14.000000000 +0100 |
548 |
+@@ -341,12 +341,12 @@ |
549 |
+ include_dirs=["Bio"] |
550 |
+ ), |
551 |
+ #Commented out due to the build dependency on flex, see Bug 2619 |
552 |
+-# Extension('Bio.PDB.mmCIF.MMCIFlex', |
553 |
+-# ['Bio/PDB/mmCIF/lex.yy.c', |
554 |
+-# 'Bio/PDB/mmCIF/MMCIFlexmodule.c'], |
555 |
+-# include_dirs=["Bio"], |
556 |
+-# libraries=["fl"] |
557 |
+-# ), |
558 |
++ Extension('Bio.PDB.mmCIF.MMCIFlex', |
559 |
++ ['Bio/PDB/mmCIF/lex.yy.c', |
560 |
++ 'Bio/PDB/mmCIF/MMCIFlexmodule.c'], |
561 |
++ include_dirs=["Bio"], |
562 |
++ libraries=["fl"] |
563 |
++ ), |
564 |
+ Extension('Bio.Nexus.cnexus', |
565 |
+ ['Bio/Nexus/cnexus.c'] |
566 |
+ ), |
567 |
|
568 |
diff --git a/sci-biology/biopython/files/biopython-1.62-SffIO.patch b/sci-biology/biopython/files/biopython-1.62-SffIO.patch |
569 |
new file mode 100644 |
570 |
index 0000000..7f2208e |
571 |
--- /dev/null |
572 |
+++ b/sci-biology/biopython/files/biopython-1.62-SffIO.patch |
573 |
@@ -0,0 +1,36 @@ |
574 |
+--- Bio/SeqIO/SffIO.py.ori 2013-09-25 13:28:51.000000000 +0200 |
575 |
++++ Bio/SeqIO/SffIO.py 2013-09-25 13:37:44.000000000 +0200 |
576 |
+@@ -383,7 +383,14 @@ |
577 |
+ if padding: |
578 |
+ padding = 8 - padding |
579 |
+ if handle.read(padding).count(_null) != padding: |
580 |
+- raise ValueError("Post quality %i byte padding region contained data" |
581 |
++ import warnings |
582 |
++ from Bio import BiopythonParserWarning |
583 |
++ warnings.warn("Your SFF file is valid but post quality %i byte " |
584 |
++ "padding region contains UNUSED data. Was the " |
585 |
++ "SFF file created by SRA sff-dump >2.1.7 and <2.1.10? " |
586 |
++ "It did not clear some internal buffer while writing " |
587 |
++ "out new data so that previous values remained in the" |
588 |
++ "output unless overwritten by new real values." |
589 |
+ % padding) |
590 |
+ #print read, name, record_offset |
591 |
+ yield name, record_offset |
592 |
+--- Bio/SeqIO/SffIO.py.ori 2013-09-25 14:07:14.000000000 +0200 |
593 |
++++ Bio/SeqIO/SffIO.py 2013-09-25 14:08:59.000000000 +0200 |
594 |
+@@ -596,7 +596,14 @@ |
595 |
+ if padding: |
596 |
+ padding = 8 - padding |
597 |
+ if handle.read(padding).count(_null) != padding: |
598 |
+- raise ValueError("Post quality %i byte padding region contained data" |
599 |
++ import warnings |
600 |
++ from Bio import BiopythonParserWarning |
601 |
++ warnings.warn("Your SFF file is valid but post quality %i byte " |
602 |
++ "padding region contains UNUSED data. Was the " |
603 |
++ "SFF file created by SRA sff-dump >2.1.7 and <2.1.10? " |
604 |
++ "It did not clear some internal buffer while writing " |
605 |
++ "out new data so that previous values remained in the" |
606 |
++ "output unless overwritten by new real values." |
607 |
+ % padding) |
608 |
+ #Follow Roche and apply most aggressive of qual and adapter clipping. |
609 |
+ #Note Roche seems to ignore adapter clip fields when writing SFF, |
610 |
|
611 |
diff --git a/sci-biology/biopython/metadata.xml b/sci-biology/biopython/metadata.xml |
612 |
new file mode 100644 |
613 |
index 0000000..f17a827 |
614 |
--- /dev/null |
615 |
+++ b/sci-biology/biopython/metadata.xml |
616 |
@@ -0,0 +1,5 @@ |
617 |
+<?xml version="1.0" encoding="UTF-8"?> |
618 |
+<!DOCTYPE pkgmetadata SYSTEM "http://www.gentoo.org/dtd/metadata.dtd"> |
619 |
+<pkgmetadata> |
620 |
+ <herd>sci-biology</herd> |
621 |
+</pkgmetadata> |