Gentoo Archives: gentoo-commits

From: Bernard Cafarelli <voyageur@g.o>
To: gentoo-commits@l.g.o
Subject: [gentoo-commits] repo/gentoo:master commit in: app-text/tesseract/files/, app-text/tesseract/
Date: Tue, 04 Jun 2019 14:42:56
Message-Id: 1559659193.98f4080a82c54d39d0a6c646649ca47fe9c7d649.voyageur@gentoo
1 commit: 98f4080a82c54d39d0a6c646649ca47fe9c7d649
2 Author: Bernard Cafarelli <voyageur <AT> gentoo <DOT> org>
3 AuthorDate: Tue Jun 4 14:35:30 2019 +0000
4 Commit: Bernard Cafarelli <voyageur <AT> gentoo <DOT> org>
5 CommitDate: Tue Jun 4 14:39:53 2019 +0000
6 URL: https://gitweb.gentoo.org/repo/gentoo.git/commit/?id=98f4080a
7
8 app-text/tesseract: 4.0.0 bump
9
10 Thanks marecki and Chris Mayo for the help
11 This version does not provide scrollview anymore, see bug for details
12
13 Closes: https://bugs.gentoo.org/686944
14 Package-Manager: Portage-2.3.67, Repoman-2.3.14
15 Signed-off-by: Bernard Cafarelli <voyageur <AT> gentoo.org>
16
17 app-text/tesseract/Manifest | 1 +
18 .../tesseract/files/tesseract-4.0.0-manpages.patch | 49 ++++++++
19 app-text/tesseract/tesseract-4.0.0.ebuild | 129 +++++++++++++++++++++
20 3 files changed, 179 insertions(+)
21
22 diff --git a/app-text/tesseract/Manifest b/app-text/tesseract/Manifest
23 index 039ffa60657..c36c7265429 100644
24 --- a/app-text/tesseract/Manifest
25 +++ b/app-text/tesseract/Manifest
26 @@ -123,6 +123,7 @@ DIST tam.traineddata-4.00 17333471 BLAKE2B 67e4b10d8e9fab5df8455b35483972d5543cc
27 DIST tel.traineddata-4.00 54446537 BLAKE2B 0ecb044b58017e36a6d9e28927242ecb6ac8975f079c42c78a661faa87ac1ed7f9d38fd59383d3ca1b484fbf371aeb5e872765921d4b79066a938b88671a3d9f SHA512 5e5c2ca84f095ec4dd3184d6cb75702482c699b5cd04f8750ab8958f578f7b24cf3253a83d19a4a1f3716466b95a81737d473339e3593538e46372c4588febb6
28 DIST tesseract-3.05.01.tar.gz 3574810 BLAKE2B 0af97d0a58c05ce4d6f8fb3f76302344f75b5894b036adc327ecc79f90f2b41da58c8145509dc131706e806e291355e221482980294e2c9caff4be93d9b448b6 SHA512 a49c20c98386684cd89582e57b772811204fad8e5ff18214fb0da109f73629c70845054985e31e8deeb49107fbcf56e546aff661f08eb5dd60fbf83dbe976e81
29 DIST tesseract-3.05.02.tar.gz 3571750 BLAKE2B f8c856449ab763d66a50d4bc65450f3c71132e66b4306f5a022c3df65c62646ec202256e12b5ce91888330a5ef10a3966f91eddc241cf306809ce40abc165c82 SHA512 4cb23a6981dd5ec9eefea7b9674847ae88a411a7308ee6d946a920c76eefcf5fe7a90f6cb3ff00493a0e69b5c327d052fa8514d7f3ed506bccbe4b0163065793
30 +DIST tesseract-4.0.0.tar.gz 1961372 BLAKE2B 5d684ec58deddc2cad6d61f5a3e22e9bccc1fa96b57d5bcad5239d5ba6d0805978b94b4b44a871c0796a756173b8072327f8a44a6a84da5604a3b50d9e425d21 SHA512 69e57d4ba1fc43d212fd0fff69a2b5d48a3b37cfee7054fdc083cbb7e04d92317609a32e457229661d70ce8d9b16c9d25e81bfc3861db660dd2c8f292202d447
31 DIST tesseract-4.0.0_beta4.tar.gz 1855821 BLAKE2B 8273fefc5c07526c87adcbe1d9d81204f622f42396564d8e5fae5ed5275c47bcefab1bc5df1560dee722a1236fbd2211f622e61b3f279219a877b3bb3bad822e SHA512 37347c461c288cbaf839b65ac1283ab5b60e6723726442a5d067e6a4e2f48d6847d7393206f1ddd4de3c5dbd8a8d9febcc42d68567a46a4665fbf678bab232d4
32 DIST tgk.traineddata-4.00 6555909 BLAKE2B a3023ec0fa10e3f52e5059b20da48a81eaa90b6665b55b50ac74d7f1f4acea3d7b6eae9f00f2a0a1f58798eb15d976367145d72976db1dd019fe249cae36a1b2 SHA512 abc5f2c4ac91b17a250f2f4847c9528a7673d51230415b7a069dc7243e2c210564400d34d5c38da6b727c8c24af26cbde3e7ef8a9674fa1605da001b4b1b60bb
33 DIST tgl.traineddata-3.04.00 4114554 BLAKE2B 1cbd5e20fcf5fa7ea183daa76e1d09d7bd4427ce2456de49f23e374af8ae48219a32f71a25d0a98c26c79ac87dc4f8f68621c76fad85b6105561f5285d635c8d SHA512 8c205fd0d4b2fc774e5b6a19cb56dcdb91b7d001acd881e34363e437d5eeae8615b853ae09f93c4957328d3a423300b9a20a443ca971a14ede1867de6194a2bb
34
35 diff --git a/app-text/tesseract/files/tesseract-4.0.0-manpages.patch b/app-text/tesseract/files/tesseract-4.0.0-manpages.patch
36 new file mode 100644
37 index 00000000000..dfa8e9fde34
38 --- /dev/null
39 +++ b/app-text/tesseract/files/tesseract-4.0.0-manpages.patch
40 @@ -0,0 +1,49 @@
41 +From 39ed30ad834a43cf403f88158c6db7a96f1bed29 Mon Sep 17 00:00:00 2001
42 +From: Stefan Weil <sw@××××××××.de>
43 +Date: Fri, 1 Feb 2019 19:47:46 +0100
44 +Subject: Fix build rule for manpages
45 +
46 +This is similar to commit 2106cba0a98a90451df835f3ab7b2aaf54826442
47 +which fixed doc/generate_manpages.sh.
48 +
49 +Signed-off-by: Stefan Weil <sw@××××××××.de>
50 +---
51 + doc/Makefile.am | 8 ++++----
52 + 1 file changed, 4 insertions(+), 4 deletions(-)
53 +
54 +diff --git a/doc/Makefile.am b/doc/Makefile.am
55 +index a218aca3..c0eeda84 100644
56 +--- a/doc/Makefile.am
57 ++++ b/doc/Makefile.am
58 +@@ -2,9 +2,6 @@
59 +
60 + if ASCIIDOC
61 +
62 +-asciidoc=asciidoc -d manpage
63 +-
64 +-
65 + man_MANS = \
66 + combine_lang_model.1 \
67 + combine_tessdata.1 \
68 +@@ -29,6 +26,8 @@ man_MANS += \
69 + unicharset.5
70 + endif
71 +
72 ++man_xslt = http://docbook.sourceforge.net/release/xsl/current/manpages/docbook.xsl
73 ++
74 + EXTRA_DIST = $(man_MANS) Doxyfile
75 +
76 + .PHONY: html
77 +@@ -36,7 +35,8 @@ EXTRA_DIST = $(man_MANS) Doxyfile
78 + html: $(patsubst %,%.html,$(man_MANS))
79 +
80 + %: %.asc
81 +- $(asciidoc) -o $@ $<
82 ++ asciidoc -b docbook -d manpage -o - $< | \
83 ++ xsltproc --nonet $(man_xslt) -
84 +
85 + %.html: %.asc
86 + asciidoc -b html5 -o $@ $<
87 +--
88 +cgit v1.2.1
89 +
90
91 diff --git a/app-text/tesseract/tesseract-4.0.0.ebuild b/app-text/tesseract/tesseract-4.0.0.ebuild
92 new file mode 100644
93 index 00000000000..d4ba59d9b35
94 --- /dev/null
95 +++ b/app-text/tesseract/tesseract-4.0.0.ebuild
96 @@ -0,0 +1,129 @@
97 +# Copyright 1999-2019 Gentoo Authors
98 +# Distributed under the terms of the GNU General Public License v2
99 +
100 +EAPI=7
101 +
102 +MY_PN="tesseract-ocr"
103 +LANGPACKV="4.00"
104 +URI_PREFIX="https://github.com/${MY_PN}/tessdata/raw/${LANGPACKV}/"
105 +
106 +inherit autotools toolchain-funcs
107 +
108 +DESCRIPTION="An OCR Engine, orginally developed at HP, now open source."
109 +HOMEPAGE="https://github.com/tesseract-ocr"
110 +SRC_URI="https://github.com/${MY_PN}/${PN}/archive/${PV}.tar.gz -> ${P}.tar.gz
111 + ${URI_PREFIX}eng.traineddata -> eng.traineddata-${LANGPACKV}
112 + math? ( ${URI_PREFIX}equ.traineddata -> equ.traineddata-${LANGPACKV} )
113 + osd? ( ${URI_PREFIX}osd.traineddata -> osd.traineddata-${LANGPACKV} )"
114 +
115 +LICENSE="Apache-2.0"
116 +SLOT="0"
117 +KEYWORDS="~alpha ~amd64 ~arm ~arm64 ~mips ~ppc ~ppc64 ~sparc ~x86"
118 +IUSE="doc jpeg math opencl openmp osd png static-libs tiff training webp"
119 +
120 +# List of supported Gentoo linguas and their upstream mapping
121 +# https://github.com/tesseract-ocr/tesseract/wiki/Data-Files
122 +# "old" variants were regrouped in the matching modern locale
123 +LANGUAGES="af:afr am:amh ar:ara as:asm az:aze,aze_cyrl be:bel bn:ben bo:bod bs:bos bg:bul ca:cat cs:ces zh:chi_sim,chi_tra cy:cym da:dan de:deu,frk dz:dzo el:ell,grc en:enm eo:epo et:est eu:eus fa:fas fi:fin fr:fra,frm ga:gle gl:glg gu:guj he:heb hi:hin hr:hrv hu:hun id:ind is:isl it:ita,ita_old ja:jpn kn:kan ka:kat,kat_old kk:kaz km:khm ky:kir ko:kor ku:kur lo:lao la:lat lv:lav lt:lit ml:mal mr:mar mk:mkd ms:msa my:mya ne:nep nl:nld no:nor or:ori pa:pan pl:pol pt:por ro:ron ru:rus sa:san si:sin sk:slk sl:slv es:spa,spa_old sq:sqi sr:srp,srp_latn sw:swa sv:swe syc:syr ta:tam te:tel tg:tgk tl:tgl th:tha tr:tur ug:uig uk:ukr uz:uzb,uzb_cyrl vi:vie"
124 +# Missing matches:
125 +# ceb Cebuano
126 +# chr Cherokee
127 +# hat Haitian; Haitian Creole
128 +# iku Inuktitut
129 +# jav Javanese
130 +# mlt Maltese
131 +# pus Pushto; Pashto
132 +# tir Tigrinya
133 +# urd Urdu
134 +# yid Yiddish
135 +# l10n_en provides the additional data:
136 +# enm English, Middle (1100-1500)
137 +
138 +for lang in ${LANGUAGES}; do
139 + gentoo_lang=${lang%:*}
140 + tess_langs=${lang#*:}
141 + for tess_lang in ${tess_langs//,/ }; do
142 + SRC_URI+=" l10n_${gentoo_lang}? ( ${URI_PREFIX}${tess_lang}.traineddata -> ${tess_lang}.traineddata-${LANGPACKV} )"
143 + done
144 + IUSE+=" l10n_${gentoo_lang}"
145 +done
146 +
147 +# With opencl USE=tiff is necessary in leptonica
148 +RDEPEND=">=media-libs/leptonica-1.74:=[zlib,tiff?,jpeg?,png?,webp?]
149 + opencl? (
150 + virtual/opencl
151 + media-libs/tiff:0=
152 + media-libs/leptonica:=[tiff]
153 + )
154 + training? (
155 + dev-libs/icu:=
156 + x11-libs/pango:=
157 + x11-libs/cairo:=
158 + )"
159 +
160 +DEPEND="${RDEPEND}
161 + app-text/asciidoc
162 + app-text/docbook-xsl-stylesheets
163 + dev-libs/libxslt
164 + doc? ( app-doc/doxygen )"
165 +
166 +DOCS=( AUTHORS ChangeLog README.md )
167 +
168 +PATCHES=(
169 + "${FILESDIR}"/${P}-manpages.patch
170 +)
171 +
172 +pkg_pretend() {
173 + [[ ${MERGE_TYPE} != binary ]] && use openmp && tc-check-openmp
174 +}
175 +
176 +pkg_setup() {
177 + [[ ${MERGE_TYPE} != binary ]] && use openmp && tc-check-openmp
178 +}
179 +
180 +src_unpack() {
181 + unpack ${P}.tar.gz
182 + for file in ${A}; do
183 + if [[ "${file}" == *traineddata* ]]; then
184 + cp "${DISTDIR}/${file}" "${S}/tessdata/${file%-*}" || die
185 + fi
186 + done
187 +}
188 +
189 +src_prepare() {
190 + default
191 + eautoreconf
192 +}
193 +
194 +src_configure() {
195 + # scrollview disabled for now, see bug #686944
196 + local myeconfargs=(
197 + --enable-shared
198 + --disable-graphics
199 + $(use_enable opencl)
200 + $(use_enable openmp)
201 + $(use_enable static-libs static)
202 + )
203 +
204 + econf "${myeconfargs[@]}"
205 +}
206 +
207 +src_compile() {
208 + default
209 + use doc && emake doc
210 + use training && emake training
211 +}
212 +
213 +src_install() {
214 + use doc && HTML_DOCS=( doc/html/. )
215 + default
216 +
217 + find "${D}" -name '*.la' -type f -delete || die
218 +
219 + if use training; then
220 + emake DESTDIR="${D}" training-install
221 + fi
222 +
223 + insinto /usr/share/tessdata
224 + doins tessdata/*traineddata* # language files
225 +}