Gentoo Archives: gentoo-commits

From: Martin Mokrejs <mmokrejs@×××××××××××××××.cz>
To: gentoo-commits@l.g.o
Subject: [gentoo-commits] proj/sci:master commit in: sci-biology/biopython/, sci-biology/biopython/files/
Date: Sun, 23 Mar 2014 16:00:06
Message-Id: 1395589938.b6bc96d05888bcfb2a2ebac3a477663d1915c57c.mmokrejs@gentoo
1 commit: b6bc96d05888bcfb2a2ebac3a477663d1915c57c
2 Author: Martin Mokrejš <mmokrejs <AT> fold <DOT> natur <DOT> cuni <DOT> cz>
3 AuthorDate: Sun Mar 23 15:52:18 2014 +0000
4 Commit: Martin Mokrejs <mmokrejs <AT> fold <DOT> natur <DOT> cuni <DOT> cz>
5 CommitDate: Sun Mar 23 15:52:18 2014 +0000
6 URL: http://git.overlays.gentoo.org/gitweb/?p=proj/sci.git;a=commit;h=b6bc96d0
7
8 sci-biology/biopython-1.63-r1: version bump and an upstream patch to improve error message
9
10 Package-Manager: portage-2.2.7
11
12 ---
13 sci-biology/biopython/ChangeLog | 17 +++
14 sci-biology/biopython/biopython-1.62-r3.ebuild | 51 +++++++
15 sci-biology/biopython/biopython-1.62-r4.ebuild | 51 +++++++
16 sci-biology/biopython/biopython-1.63-r1.ebuild | 51 +++++++
17 sci-biology/biopython/biopython-1.63.ebuild | 50 +++++++
18 sci-biology/biopython/files/SeqRecord.py.patch | 148 +++++++++++++++++++++
19 .../biopython/files/SffIO_error_in_check_eof.patch | 14 ++
20 .../biopython/files/adjust-trimpoints.patch | 76 +++++++++++
21 .../biopython/files/biopython-1.51-flex.patch | 21 +++
22 .../biopython/files/biopython-1.62-SffIO.patch | 36 +++++
23 sci-biology/biopython/metadata.xml | 5 +
24 11 files changed, 520 insertions(+)
25
26 diff --git a/sci-biology/biopython/ChangeLog b/sci-biology/biopython/ChangeLog
27 new file mode 100644
28 index 0000000..6dfe5e2
29 --- /dev/null
30 +++ b/sci-biology/biopython/ChangeLog
31 @@ -0,0 +1,17 @@
32 +# ChangeLog for sci-biology/biopython
33 +# Copyright 1999-2014 Gentoo Foundation; Distributed under the GPL v2
34 +# $Header: $
35 +
36 +*biopython-1.62-r3 (23 Mar 2014)
37 +*biopython-1.62-r4 (23 Mar 2014)
38 +*biopython-1.63-r1 (23 Mar 2014)
39 +*biopython-1.63 (23 Mar 2014)
40 +
41 + 23 Mar 2014; Martin Mokrejs <mmokrejs@×××××××××××××××.cz>
42 + +biopython-1.62-r3.ebuild, +biopython-1.62-r4.ebuild,
43 + +biopython-1.63-r1.ebuild, +biopython-1.63.ebuild, +files/SeqRecord.py.patch,
44 + +files/SffIO_error_in_check_eof.patch, +files/adjust-trimpoints.patch,
45 + +files/biopython-1.51-flex.patch, +files/biopython-1.62-SffIO.patch,
46 + +metadata.xml:
47 + sci-biology/biopython-1.63-r1: version bump and an upstream patch to improve
48 + error message
49
50 diff --git a/sci-biology/biopython/biopython-1.62-r3.ebuild b/sci-biology/biopython/biopython-1.62-r3.ebuild
51 new file mode 100644
52 index 0000000..1eed5a9
53 --- /dev/null
54 +++ b/sci-biology/biopython/biopython-1.62-r3.ebuild
55 @@ -0,0 +1,51 @@
56 +# Copyright 1999-2014 Gentoo Foundation
57 +# Distributed under the terms of the GNU General Public License v2
58 +# $Header: /var/cvsroot/gentoo-x86/sci-biology/biopython/biopython-1.62.ebuild,v 1.1 2013/09/17 16:07:56 jlec Exp $
59 +
60 +EAPI=5
61 +
62 +PYTHON_COMPAT=( python{2_6,2_7} )
63 +
64 +inherit distutils-r1 eutils
65 +
66 +DESCRIPTION="Python modules for computational molecular biology"
67 +HOMEPAGE="http://www.biopython.org/ http://pypi.python.org/pypi/biopython/"
68 +SRC_URI="http://www.biopython.org/DIST/${P}.tar.gz"
69 +
70 +LICENSE="HPND"
71 +SLOT="0"
72 +KEYWORDS="~amd64 ~ppc ~x86 ~amd64-linux ~x86-linux"
73 +IUSE="mysql postgres"
74 +
75 +REQUIRED_USE="${PYTHON_REQUIRED_USE}"
76 +
77 +RDEPEND="${PYTHON_DEPS}
78 + dev-python/matplotlib[${PYTHON_USEDEP}]
79 + dev-python/networkx[${PYTHON_USEDEP}]
80 + dev-python/numpy[${PYTHON_USEDEP}]
81 + dev-python/pygraphviz[${PYTHON_USEDEP}]
82 + dev-python/reportlab[${PYTHON_USEDEP}]
83 + media-gfx/pydot[${PYTHON_USEDEP}]
84 + mysql? ( dev-python/mysql-python[${PYTHON_USEDEP}] )
85 + postgres? ( dev-python/psycopg[${PYTHON_USEDEP}] )"
86 +DEPEND="${RDEPEND}
87 + sys-devel/flex"
88 +
89 +DOCS=( CONTRIB DEPRECATED NEWS README Doc/. )
90 +
91 +src_prepare() {
92 + distutils-r1_src_prepare
93 + epatch "${FILESDIR}/${PN}-1.62-SffIO.patch"
94 +}
95 +
96 +python_test() {
97 + cd Tests || die
98 + ${PYTHON} run_tests.py || die
99 +}
100 +
101 +python_install_all() {
102 + distutils-r1_python_install_all
103 +
104 + dodir /usr/share/${PN}
105 + cp -r --preserve=mode Scripts Tests "${ED}"/usr/share/${PN} || die
106 +}
107
108 diff --git a/sci-biology/biopython/biopython-1.62-r4.ebuild b/sci-biology/biopython/biopython-1.62-r4.ebuild
109 new file mode 100644
110 index 0000000..1eed5a9
111 --- /dev/null
112 +++ b/sci-biology/biopython/biopython-1.62-r4.ebuild
113 @@ -0,0 +1,51 @@
114 +# Copyright 1999-2014 Gentoo Foundation
115 +# Distributed under the terms of the GNU General Public License v2
116 +# $Header: /var/cvsroot/gentoo-x86/sci-biology/biopython/biopython-1.62.ebuild,v 1.1 2013/09/17 16:07:56 jlec Exp $
117 +
118 +EAPI=5
119 +
120 +PYTHON_COMPAT=( python{2_6,2_7} )
121 +
122 +inherit distutils-r1 eutils
123 +
124 +DESCRIPTION="Python modules for computational molecular biology"
125 +HOMEPAGE="http://www.biopython.org/ http://pypi.python.org/pypi/biopython/"
126 +SRC_URI="http://www.biopython.org/DIST/${P}.tar.gz"
127 +
128 +LICENSE="HPND"
129 +SLOT="0"
130 +KEYWORDS="~amd64 ~ppc ~x86 ~amd64-linux ~x86-linux"
131 +IUSE="mysql postgres"
132 +
133 +REQUIRED_USE="${PYTHON_REQUIRED_USE}"
134 +
135 +RDEPEND="${PYTHON_DEPS}
136 + dev-python/matplotlib[${PYTHON_USEDEP}]
137 + dev-python/networkx[${PYTHON_USEDEP}]
138 + dev-python/numpy[${PYTHON_USEDEP}]
139 + dev-python/pygraphviz[${PYTHON_USEDEP}]
140 + dev-python/reportlab[${PYTHON_USEDEP}]
141 + media-gfx/pydot[${PYTHON_USEDEP}]
142 + mysql? ( dev-python/mysql-python[${PYTHON_USEDEP}] )
143 + postgres? ( dev-python/psycopg[${PYTHON_USEDEP}] )"
144 +DEPEND="${RDEPEND}
145 + sys-devel/flex"
146 +
147 +DOCS=( CONTRIB DEPRECATED NEWS README Doc/. )
148 +
149 +src_prepare() {
150 + distutils-r1_src_prepare
151 + epatch "${FILESDIR}/${PN}-1.62-SffIO.patch"
152 +}
153 +
154 +python_test() {
155 + cd Tests || die
156 + ${PYTHON} run_tests.py || die
157 +}
158 +
159 +python_install_all() {
160 + distutils-r1_python_install_all
161 +
162 + dodir /usr/share/${PN}
163 + cp -r --preserve=mode Scripts Tests "${ED}"/usr/share/${PN} || die
164 +}
165
166 diff --git a/sci-biology/biopython/biopython-1.63-r1.ebuild b/sci-biology/biopython/biopython-1.63-r1.ebuild
167 new file mode 100644
168 index 0000000..aac2bdf
169 --- /dev/null
170 +++ b/sci-biology/biopython/biopython-1.63-r1.ebuild
171 @@ -0,0 +1,51 @@
172 +# Copyright 1999-2014 Gentoo Foundation
173 +# Distributed under the terms of the GNU General Public License v2
174 +# $Header: /var/cvsroot/gentoo-x86/sci-biology/biopython/biopython-1.62.ebuild,v 1.1 2013/09/17 16:07:56 jlec Exp $
175 +
176 +EAPI=5
177 +
178 +PYTHON_COMPAT=( python{2_6,2_7} )
179 +
180 +inherit distutils-r1 eutils
181 +
182 +DESCRIPTION="Python modules for computational molecular biology"
183 +HOMEPAGE="http://www.biopython.org/ http://pypi.python.org/pypi/biopython/"
184 +SRC_URI="http://www.biopython.org/DIST/${P}.tar.gz"
185 +
186 +LICENSE="HPND"
187 +SLOT="0"
188 +KEYWORDS="~amd64 ~ppc ~x86 ~amd64-linux ~x86-linux"
189 +IUSE="mysql postgres"
190 +
191 +REQUIRED_USE="${PYTHON_REQUIRED_USE}"
192 +
193 +RDEPEND="${PYTHON_DEPS}
194 + dev-python/matplotlib[${PYTHON_USEDEP}]
195 + dev-python/networkx[${PYTHON_USEDEP}]
196 + dev-python/numpy[${PYTHON_USEDEP}]
197 + dev-python/pygraphviz[${PYTHON_USEDEP}]
198 + dev-python/reportlab[${PYTHON_USEDEP}]
199 + media-gfx/pydot[${PYTHON_USEDEP}]
200 + mysql? ( dev-python/mysql-python[${PYTHON_USEDEP}] )
201 + postgres? ( dev-python/psycopg[${PYTHON_USEDEP}] )"
202 +DEPEND="${RDEPEND}
203 + sys-devel/flex"
204 +
205 +DOCS=( CONTRIB DEPRECATED NEWS README Doc/. )
206 +
207 +src_prepare() {
208 + epatch "${FILESDIR}"/SffIO_error_in_check_eof.patch
209 + distutils-r1_src_prepare
210 +}
211 +
212 +python_test() {
213 + cd Tests || die
214 + ${PYTHON} run_tests.py || die
215 +}
216 +
217 +python_install_all() {
218 + distutils-r1_python_install_all
219 +
220 + dodir /usr/share/${PN}
221 + cp -r --preserve=mode Scripts Tests "${ED}"/usr/share/${PN} || die
222 +}
223
224 diff --git a/sci-biology/biopython/biopython-1.63.ebuild b/sci-biology/biopython/biopython-1.63.ebuild
225 new file mode 100644
226 index 0000000..5180b33
227 --- /dev/null
228 +++ b/sci-biology/biopython/biopython-1.63.ebuild
229 @@ -0,0 +1,50 @@
230 +# Copyright 1999-2014 Gentoo Foundation
231 +# Distributed under the terms of the GNU General Public License v2
232 +# $Header: /var/cvsroot/gentoo-x86/sci-biology/biopython/biopython-1.62.ebuild,v 1.1 2013/09/17 16:07:56 jlec Exp $
233 +
234 +EAPI=5
235 +
236 +PYTHON_COMPAT=( python{2_6,2_7} )
237 +
238 +inherit distutils-r1 eutils
239 +
240 +DESCRIPTION="Python modules for computational molecular biology"
241 +HOMEPAGE="http://www.biopython.org/ http://pypi.python.org/pypi/biopython/"
242 +SRC_URI="http://www.biopython.org/DIST/${P}.tar.gz"
243 +
244 +LICENSE="HPND"
245 +SLOT="0"
246 +KEYWORDS="~amd64 ~ppc ~x86 ~amd64-linux ~x86-linux"
247 +IUSE="mysql postgres"
248 +
249 +REQUIRED_USE="${PYTHON_REQUIRED_USE}"
250 +
251 +RDEPEND="${PYTHON_DEPS}
252 + dev-python/matplotlib[${PYTHON_USEDEP}]
253 + dev-python/networkx[${PYTHON_USEDEP}]
254 + dev-python/numpy[${PYTHON_USEDEP}]
255 + dev-python/pygraphviz[${PYTHON_USEDEP}]
256 + dev-python/reportlab[${PYTHON_USEDEP}]
257 + media-gfx/pydot[${PYTHON_USEDEP}]
258 + mysql? ( dev-python/mysql-python[${PYTHON_USEDEP}] )
259 + postgres? ( dev-python/psycopg[${PYTHON_USEDEP}] )"
260 +DEPEND="${RDEPEND}
261 + sys-devel/flex"
262 +
263 +DOCS=( CONTRIB DEPRECATED NEWS README Doc/. )
264 +
265 +src_prepare() {
266 + distutils-r1_src_prepare
267 +}
268 +
269 +python_test() {
270 + cd Tests || die
271 + ${PYTHON} run_tests.py || die
272 +}
273 +
274 +python_install_all() {
275 + distutils-r1_python_install_all
276 +
277 + dodir /usr/share/${PN}
278 + cp -r --preserve=mode Scripts Tests "${ED}"/usr/share/${PN} || die
279 +}
280
281 diff --git a/sci-biology/biopython/files/SeqRecord.py.patch b/sci-biology/biopython/files/SeqRecord.py.patch
282 new file mode 100644
283 index 0000000..ac3785f
284 --- /dev/null
285 +++ b/sci-biology/biopython/files/SeqRecord.py.patch
286 @@ -0,0 +1,148 @@
287 +diff --git a/Bio/SeqIO/SffIO.py b/Bio/SeqIO/SffIO.py
288 +index 1971dba..43b38fd 100644
289 +--- a/Bio/SeqIO/SffIO.py
290 ++++ b/Bio/SeqIO/SffIO.py
291 +@@ -539,8 +539,15 @@ _valid_UAN_read_name = re.compile(r'^[a-zA-Z0-9]{14}$')
292 +
293 +
294 + def _sff_read_seq_record(handle, number_of_flows_per_read, flow_chars,
295 +- key_sequence, alphabet, trim=False):
296 +- """Parse the next read in the file, return data as a SeqRecord (PRIVATE)."""
297 ++ key_sequence, alphabet, trim=False, interpret_qual_trims=True, interpret_adapter_trims=False):
298 ++ """Parse the next read in the file, return data as a SeqRecord (PRIVATE).
299 ++ Allow user to specify which type of clipping values should be applied
300 ++ while reading the SFF stream. To be backwards compatible, we interpret
301 ++ only the quality-based trim points by default. That results in lower-cased
302 ++ sequences in the low-qual region, regardless what adapter-based clip points
303 ++ say. This should be the desired behavior. More discussion at
304 ++ https://redmine.open-bio.org/issues/3437
305 ++ """
306 + #Now on to the reads...
307 + #the read header format (fixed part):
308 + #read_header_length H
309 +@@ -589,20 +596,41 @@ def _sff_read_seq_record(handle, number_of_flows_per_read, flow_chars,
310 + warnings.warn("Post quality %i byte padding region contained data, SFF data is not broken"
311 + % padding)
312 + #Follow Roche and apply most aggressive of qual and adapter clipping.
313 +- #Note Roche seems to ignore adapter clip fields when writing SFF,
314 +- #and uses just the quality clipping values for any clipping.
315 +- clip_left = max(clip_qual_left, clip_adapter_left)
316 +- #Right clipping of zero means no clipping
317 +- if clip_qual_right:
318 +- if clip_adapter_right:
319 +- clip_right = min(clip_qual_right, clip_adapter_right)
320 ++ #Note Roche does not use adapter clip fields when writing SFF files
321 ++ #but instead combines the adapter clipping information with quality-based
322 ++ #values and writes the most aggressive combination into clip fields (as
323 ++ #allowed by SFF specs).
324 ++
325 ++ if interpret_qual_trims:
326 ++ if interpret_adapter_trims:
327 ++ clip_left = max(clip_qual_left, clip_adapter_left)
328 ++ #Right clipping of zero means no clipping
329 ++ if clip_qual_right:
330 ++ if clip_adapter_right:
331 ++ clip_right = min(clip_qual_right, clip_adapter_right)
332 ++ else:
333 ++ #Typical case with Roche SFF files
334 ++ clip_right = clip_qual_right
335 ++ elif clip_adapter_right:
336 ++ clip_right = clip_adapter_right
337 ++ else:
338 ++ clip_right = seq_len
339 + else:
340 +- #Typical case with Roche SFF files
341 +- clip_right = clip_qual_right
342 +- elif clip_adapter_right:
343 +- clip_right = clip_adapter_right
344 ++ clip_left = clip_qual_left
345 ++ if clip_qual_right:
346 ++ clip_right = clip_qual_right
347 ++ else:
348 ++ clip_right = seq_len
349 ++ elif interpret_adapter_trims:
350 ++ clip_left = clip_adapter_left
351 ++ if clip_adapter_right:
352 ++ clip_right = clip_adapter_right
353 ++ else:
354 ++ clip_right = seq_len
355 + else:
356 +- clip_right = seq_len
357 ++ clip_left = 0
358 ++ clip_right = seq_len
359 ++
360 + #Now build a SeqRecord
361 + if trim:
362 + seq = seq[clip_left:clip_right].upper()
363 +diff --git a/Bio/SeqRecord.py b/Bio/SeqRecord.py
364 +index c90e13b..66bdea0 100644
365 +--- a/Bio/SeqRecord.py
366 ++++ b/Bio/SeqRecord.py
367 +@@ -14,6 +14,8 @@ __docformat__ = "epytext en" # Simple markup to show doctests nicely
368 + # also BioSQL.BioSeq.DBSeq which is the "Database Seq" class)
369 +
370 +
371 ++from Bio.Seq import Seq
372 ++
373 + class _RestrictedDict(dict):
374 + """Dict which only allows sequences of given length as values (PRIVATE).
375 +
376 +@@ -76,7 +78,7 @@ class _RestrictedDict(dict):
377 + if not hasattr(value, "__len__") or not hasattr(value, "__getitem__") \
378 + or (hasattr(self, "_length") and len(value) != self._length):
379 + raise TypeError("We only allow python sequences (lists, tuples or "
380 +- "strings) of length %i." % self._length)
381 ++ "strings) of length %i whereas you passed an object of length %s." % (self._length, str(len(value))))
382 + dict.__setitem__(self, key, value)
383 +
384 + def update(self, new_dict):
385 +@@ -290,10 +292,11 @@ class SeqRecord(object):
386 + """)
387 +
388 + def _set_seq(self, value):
389 +- #TODO - Add a deprecation warning that the seq should be write only?
390 +- if self._per_letter_annotations:
391 +- #TODO - Make this a warning? Silently empty the dictionary?
392 +- raise ValueError("You must empty the letter annotations first!")
393 ++ # we should be much more user friendly and accept even a plain sequence string
394 ++ # and make the Seq or MutableSeq object ourselves
395 ++ if not isinstance(value, Seq):
396 ++ raise ValueError("You must pass a Seq object containing the new sequence instead of just plain string.")
397 ++ else:
398 + self._seq = value
399 + try:
400 + self._per_letter_annotations = _RestrictedDict(length=len(self.seq))
401 +@@ -696,7 +699,7 @@ class SeqRecord(object):
402 + SeqIO.write(self, handle, format_spec)
403 + return handle.getvalue()
404 +
405 +- def __len__(self):
406 ++ def __len__(self, trim=False, interpret_qual_trims=True, interpret_adapter_trims=False):
407 + """Returns the length of the sequence.
408 +
409 + For example, using Bio.SeqIO to read in a FASTA nucleotide file:
410 +@@ -707,6 +710,10 @@ class SeqRecord(object):
411 + 309
412 + >>> len(record.seq)
413 + 309
414 ++
415 ++ It should be possible to get length of a raw object, of trimmed
416 ++ object by quality or adapter criteria or both, whenever user wants
417 ++ to, not only when data is parsed from input.
418 + """
419 + return len(self.seq)
420 +
421 +@@ -725,6 +732,13 @@ class SeqRecord(object):
422 + """
423 + return True
424 +
425 ++ def apply_trimpoints(self, trim=False, interpret_qual_trims=False, interpret_adapter_trims=False):
426 ++ """We should apply either of the quality-based or adapter-based annotated
427 ++ trim points and return a new, sliced object.
428 ++ """
429 ++ pass
430 ++
431 ++
432 + def __add__(self, other):
433 + """Add another sequence or string to this sequence.
434 +
435
436 diff --git a/sci-biology/biopython/files/SffIO_error_in_check_eof.patch b/sci-biology/biopython/files/SffIO_error_in_check_eof.patch
437 new file mode 100644
438 index 0000000..9059604
439 --- /dev/null
440 +++ b/sci-biology/biopython/files/SffIO_error_in_check_eof.patch
441 @@ -0,0 +1,14 @@
442 +diff --git a/Bio/SeqIO/SffIO.py b/Bio/SeqIO/SffIO.py
443 +index 2bb0dac..735d55b 100644
444 +--- a/Bio/SeqIO/SffIO.py
445 ++++ b/Bio/SeqIO/SffIO.py
446 +@@ -941,7 +941,8 @@ def _check_eof(handle, index_offset, index_length):
447 + BiopythonParserWarning)
448 +
449 + offset = handle.tell()
450 +- assert offset % 8 == 0
451 ++ assert offset % 8 == 0, \
452 ++ "Wanted offset %i %% 8 = %i to be zero" % (offset, offset % 8)
453 + # Should now be at the end of the file...
454 + extra = handle.read(4)
455 + if extra == _sff:
456
457 diff --git a/sci-biology/biopython/files/adjust-trimpoints.patch b/sci-biology/biopython/files/adjust-trimpoints.patch
458 new file mode 100644
459 index 0000000..dd6d548
460 --- /dev/null
461 +++ b/sci-biology/biopython/files/adjust-trimpoints.patch
462 @@ -0,0 +1,76 @@
463 +diff --git a/Bio/SeqIO/SffIO.py b/Bio/SeqIO/SffIO.py
464 +index 1971dba..43b38fd 100644
465 +--- a/Bio/SeqIO/SffIO.py
466 ++++ b/Bio/SeqIO/SffIO.py
467 +@@ -539,8 +539,15 @@ _valid_UAN_read_name = re.compile(r'^[a-zA-Z0-9]{14}$')
468 +
469 +
470 + def _sff_read_seq_record(handle, number_of_flows_per_read, flow_chars,
471 +- key_sequence, alphabet, trim=False):
472 +- """Parse the next read in the file, return data as a SeqRecord (PRIVATE)."""
473 ++ key_sequence, alphabet, trim=False, interpret_qual_trims=True, interpret_adapter_trims=False):
474 ++ """Parse the next read in the file, return data as a SeqRecord (PRIVATE).
475 ++ Allow user to specify which type of clipping values should be applied
476 ++ while reading the SFF stream. To be backwards compatible, we interpret
477 ++ only the quality-based trim points by default. That results in lower-cased
478 ++ sequences in the low-qual region, regardless what adapter-based clip points
479 ++ say. This should be the desired behavior. More discussion at
480 ++ https://redmine.open-bio.org/issues/3437
481 ++ """
482 + #Now on to the reads...
483 + #the read header format (fixed part):
484 + #read_header_length H
485 +@@ -589,20 +596,41 @@ def _sff_read_seq_record(handle, number_of_flows_per_read, flow_chars,
486 + warnings.warn("Post quality %i byte padding region contained data, SFF data is not broken"
487 + % padding)
488 + #Follow Roche and apply most aggressive of qual and adapter clipping.
489 +- #Note Roche seems to ignore adapter clip fields when writing SFF,
490 +- #and uses just the quality clipping values for any clipping.
491 +- clip_left = max(clip_qual_left, clip_adapter_left)
492 +- #Right clipping of zero means no clipping
493 +- if clip_qual_right:
494 +- if clip_adapter_right:
495 +- clip_right = min(clip_qual_right, clip_adapter_right)
496 ++ #Note Roche does not use adapter clip fields when writing SFF files
497 ++ #but instead combines the adapter clipping information with quality-based
498 ++ #values and writes the most aggressive combination into clip fields (as
499 ++ #allowed by SFF specs).
500 ++
501 ++ if interpret_qual_trims:
502 ++ if interpret_adapter_trims:
503 ++ clip_left = max(clip_qual_left, clip_adapter_left)
504 ++ #Right clipping of zero means no clipping
505 ++ if clip_qual_right:
506 ++ if clip_adapter_right:
507 ++ clip_right = min(clip_qual_right, clip_adapter_right)
508 ++ else:
509 ++ #Typical case with Roche SFF files
510 ++ clip_right = clip_qual_right
511 ++ elif clip_adapter_right:
512 ++ clip_right = clip_adapter_right
513 ++ else:
514 ++ clip_right = seq_len
515 + else:
516 +- #Typical case with Roche SFF files
517 +- clip_right = clip_qual_right
518 +- elif clip_adapter_right:
519 +- clip_right = clip_adapter_right
520 ++ clip_left = clip_qual_left
521 ++ if clip_qual_right:
522 ++ clip_right = clip_qual_right
523 ++ else:
524 ++ clip_right = seq_len
525 ++ elif interpret_adapter_trims:
526 ++ clip_left = clip_adapter_left
527 ++ if clip_adapter_right:
528 ++ clip_right = clip_adapter_right
529 ++ else:
530 ++ clip_right = seq_len
531 + else:
532 +- clip_right = seq_len
533 ++ clip_left = 0
534 ++ clip_right = seq_len
535 ++
536 + #Now build a SeqRecord
537 + if trim:
538 + seq = seq[clip_left:clip_right].upper()
539
540 diff --git a/sci-biology/biopython/files/biopython-1.51-flex.patch b/sci-biology/biopython/files/biopython-1.51-flex.patch
541 new file mode 100644
542 index 0000000..afd5094
543 --- /dev/null
544 +++ b/sci-biology/biopython/files/biopython-1.51-flex.patch
545 @@ -0,0 +1,21 @@
546 +--- setup.py.old 2008-11-25 18:03:16.000000000 +0100
547 ++++ setup.py 2008-11-25 18:04:14.000000000 +0100
548 +@@ -341,12 +341,12 @@
549 + include_dirs=["Bio"]
550 + ),
551 + #Commented out due to the build dependency on flex, see Bug 2619
552 +-# Extension('Bio.PDB.mmCIF.MMCIFlex',
553 +-# ['Bio/PDB/mmCIF/lex.yy.c',
554 +-# 'Bio/PDB/mmCIF/MMCIFlexmodule.c'],
555 +-# include_dirs=["Bio"],
556 +-# libraries=["fl"]
557 +-# ),
558 ++ Extension('Bio.PDB.mmCIF.MMCIFlex',
559 ++ ['Bio/PDB/mmCIF/lex.yy.c',
560 ++ 'Bio/PDB/mmCIF/MMCIFlexmodule.c'],
561 ++ include_dirs=["Bio"],
562 ++ libraries=["fl"]
563 ++ ),
564 + Extension('Bio.Nexus.cnexus',
565 + ['Bio/Nexus/cnexus.c']
566 + ),
567
568 diff --git a/sci-biology/biopython/files/biopython-1.62-SffIO.patch b/sci-biology/biopython/files/biopython-1.62-SffIO.patch
569 new file mode 100644
570 index 0000000..7f2208e
571 --- /dev/null
572 +++ b/sci-biology/biopython/files/biopython-1.62-SffIO.patch
573 @@ -0,0 +1,36 @@
574 +--- Bio/SeqIO/SffIO.py.ori 2013-09-25 13:28:51.000000000 +0200
575 ++++ Bio/SeqIO/SffIO.py 2013-09-25 13:37:44.000000000 +0200
576 +@@ -383,7 +383,14 @@
577 + if padding:
578 + padding = 8 - padding
579 + if handle.read(padding).count(_null) != padding:
580 +- raise ValueError("Post quality %i byte padding region contained data"
581 ++ import warnings
582 ++ from Bio import BiopythonParserWarning
583 ++ warnings.warn("Your SFF file is valid but post quality %i byte "
584 ++ "padding region contains UNUSED data. Was the "
585 ++ "SFF file created by SRA sff-dump >2.1.7 and <2.1.10? "
586 ++ "It did not clear some internal buffer while writing "
587 ++ "out new data so that previous values remained in the"
588 ++ "output unless overwritten by new real values."
589 + % padding)
590 + #print read, name, record_offset
591 + yield name, record_offset
592 +--- Bio/SeqIO/SffIO.py.ori 2013-09-25 14:07:14.000000000 +0200
593 ++++ Bio/SeqIO/SffIO.py 2013-09-25 14:08:59.000000000 +0200
594 +@@ -596,7 +596,14 @@
595 + if padding:
596 + padding = 8 - padding
597 + if handle.read(padding).count(_null) != padding:
598 +- raise ValueError("Post quality %i byte padding region contained data"
599 ++ import warnings
600 ++ from Bio import BiopythonParserWarning
601 ++ warnings.warn("Your SFF file is valid but post quality %i byte "
602 ++ "padding region contains UNUSED data. Was the "
603 ++ "SFF file created by SRA sff-dump >2.1.7 and <2.1.10? "
604 ++ "It did not clear some internal buffer while writing "
605 ++ "out new data so that previous values remained in the"
606 ++ "output unless overwritten by new real values."
607 + % padding)
608 + #Follow Roche and apply most aggressive of qual and adapter clipping.
609 + #Note Roche seems to ignore adapter clip fields when writing SFF,
610
611 diff --git a/sci-biology/biopython/metadata.xml b/sci-biology/biopython/metadata.xml
612 new file mode 100644
613 index 0000000..f17a827
614 --- /dev/null
615 +++ b/sci-biology/biopython/metadata.xml
616 @@ -0,0 +1,5 @@
617 +<?xml version="1.0" encoding="UTF-8"?>
618 +<!DOCTYPE pkgmetadata SYSTEM "http://www.gentoo.org/dtd/metadata.dtd">
619 +<pkgmetadata>
620 + <herd>sci-biology</herd>
621 +</pkgmetadata>