Gentoo Archives: gentoo-commits

From: "Michał Górny" <mgorny@g.o>
To: gentoo-commits@l.g.o
Subject: [gentoo-commits] repo/gentoo:master commit in: dev-python/nltk-data/
Date: Mon, 04 May 2020 11:26:38
Message-Id: 1588591570.d6d068bfb0046fad9c2b2ddf686c69721ec6f4a5.mgorny@gentoo
1 commit: d6d068bfb0046fad9c2b2ddf686c69721ec6f4a5
2 Author: Michał Górny <mgorny <AT> gentoo <DOT> org>
3 AuthorDate: Mon May 4 11:26:10 2020 +0000
4 Commit: Michał Górny <mgorny <AT> gentoo <DOT> org>
5 CommitDate: Mon May 4 11:26:10 2020 +0000
6 URL: https://gitweb.gentoo.org/repo/gentoo.git/commit/?id=d6d068bf
7
8 dev-python/nltk-data: Remove old
9
10 Signed-off-by: Michał Górny <mgorny <AT> gentoo.org>
11
12 dev-python/nltk-data/nltk-data-20200312.ebuild | 184 -------------------------
13 1 file changed, 184 deletions(-)
14
15 diff --git a/dev-python/nltk-data/nltk-data-20200312.ebuild b/dev-python/nltk-data/nltk-data-20200312.ebuild
16 deleted file mode 100644
17 index 1d02afe6be7..00000000000
18 --- a/dev-python/nltk-data/nltk-data-20200312.ebuild
19 +++ /dev/null
20 @@ -1,184 +0,0 @@
21 -# Copyright 2020 Gentoo Authors
22 -# Distributed under the terms of the GNU General Public License v2
23 -
24 -EAPI=7
25 -
26 -inherit check-reqs
27 -
28 -DESCRIPTION="Data files for NLTK"
29 -HOMEPAGE="https://www.nltk.org/nltk_data/"
30 -
31 -# at least some of the files have poorly documented licenses
32 -# TODO: create a USE flag for free-ish subset
33 -LICENSE="all-rights-reserved"
34 -SLOT="0"
35 -KEYWORDS="~amd64 ~x86"
36 -IUSE="extra"
37 -RESTRICT="bindist mirror"
38 -
39 -BDEPEND="app-arch/unzip"
40 -
41 -PACKAGES_ZIP=(
42 - # wget -O - https://www.nltk.org/nltk_data/ | xml sel -t -m '//package[@unzip=0]' -v @subdir -o "/" -v @id -n - | sort
43 - corpora/comtrans
44 - corpora/conll2007
45 - corpora/jeita
46 - corpora/knbc
47 - corpora/machado
48 - corpora/masc_tagged
49 - corpora/nombank.1.0
50 - corpora/panlex_swadesh
51 - corpora/propbank
52 - corpora/reuters
53 - corpora/semcor
54 - corpora/universal_treebanks_v20
55 - sentiment/vader_lexicon
56 - stemmers/snowball_data
57 -)
58 -
59 -PACKAGES_UNPACK=(
60 - # wget -O - https://www.nltk.org/nltk_data/ | xml sel -t -m '//package[@unzip=1]' -v @subdir -o "/" -v @id -n - | sort
61 - corpora/abc
62 - corpora/alpino
63 - corpora/brown
64 - corpora/cess_cat
65 - corpora/cess_esp
66 - corpora/chat80
67 - corpora/city_database
68 - corpora/cmudict
69 - corpora/comparative_sentences
70 - corpora/conll2000
71 - corpora/conll2002
72 - corpora/crubadan
73 - corpora/dependency_treebank
74 - corpora/dolch
75 - corpora/europarl_raw
76 - corpora/floresta
77 - corpora/framenet_v15
78 - corpora/framenet_v17
79 - corpora/gazetteers
80 - corpora/genesis
81 - corpora/gutenberg
82 - corpora/ieer
83 - corpora/inaugural
84 - corpora/indian
85 - corpora/lin_thesaurus
86 - corpora/mac_morpho
87 - corpora/movie_reviews
88 - corpora/mte_teip5
89 - corpora/names
90 - corpora/nonbreaking_prefixes
91 - corpora/nps_chat
92 - corpora/omw
93 - corpora/opinion_lexicon
94 - corpora/ppattach
95 - corpora/product_reviews_1
96 - corpora/product_reviews_2
97 - corpora/pros_cons
98 - corpora/ptb
99 - corpora/qc
100 - corpora/rte
101 - corpora/senseval
102 - corpora/sentence_polarity
103 - corpora/sentiwordnet
104 - corpora/shakespeare
105 - corpora/sinica_treebank
106 - corpora/state_union
107 - corpora/stopwords
108 - corpora/subjectivity
109 - corpora/swadesh
110 - corpora/switchboard
111 - corpora/timit
112 - corpora/toolbox
113 - corpora/treebank
114 - corpora/twitter_samples
115 - corpora/udhr
116 - corpora/udhr2
117 - corpora/verbnet
118 - corpora/webtext
119 - corpora/wordnet
120 - corpora/wordnet_ic
121 - corpora/words
122 - grammars/book_grammars
123 - grammars/large_grammars
124 - grammars/sample_grammars
125 - misc/perluniprops
126 - models/bllip_wsj_no_aux
127 - models/moses_sample
128 - models/wmt15_eval
129 - models/word2vec_sample
130 - stemmers/porter_test
131 - stemmers/rslp
132 - taggers/averaged_perceptron_tagger
133 - taggers/averaged_perceptron_tagger_ru
134 - taggers/universal_tagset
135 - tokenizers/punkt
136 -)
137 -
138 -PACKAGES_UNPACK_EXTRA=(
139 - chunkers/maxent_ne_chunker
140 - corpora/biocreative_ppi
141 - corpora/brown_tei
142 - corpora/kimmo
143 - corpora/paradigms
144 - corpora/pe08
145 - corpora/pil
146 - corpora/pl196x
147 - corpora/problem_reports
148 - corpora/smultron
149 - corpora/unicode_samples
150 - corpora/verbnet3
151 - corpora/ycoe
152 - grammars/basque_grammars
153 - grammars/spanish_grammars
154 - help/tagsets
155 - misc/mwa_ppdb
156 - taggers/maxent_treebank_pos_tagger
157 -)
158 -
159 -add_data() {
160 - local x
161 - for x; do
162 - SRC_URI+="
163 - https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/${x}.zip
164 - -> nltk-${x#*/}-${PV}.zip"
165 - done
166 -}
167 -
168 -add_data "${PACKAGES_ZIP[@]}" "${PACKAGES_UNPACK[@]}"
169 -SRC_URI+="
170 - extra? ("
171 -add_data "${PACKAGES_UNPACK_EXTRA[@]}"
172 -SRC_URI+="
173 - )"
174 -
175 -CHECKREQS_DISK_USR=3G
176 -CHECKREQS_DISK_BUILD=${CHECKREQS_DISK_USR}
177 -
178 -src_unpack() {
179 - local x
180 - local to_unpack=( "${PACKAGES_UNPACK[@]}" )
181 - use extra && to_unpack+=( "${PACKAGES_UNPACK_EXTRA[@]}" )
182 - for x in "${to_unpack[@]}"; do
183 - local cat=${x%/*}
184 - local pkg=${x#*/}
185 -
186 - mkdir -p "${S}/${cat}" || die
187 - cd "${S}/${cat}" || die
188 - unpack "nltk-${pkg}-${PV}.zip"
189 - done
190 -}
191 -
192 -src_install() {
193 - dodir /usr/share/nltk_data
194 - mv * "${ED}/usr/share/nltk_data/" || die
195 -
196 - local x
197 - for x in "${PACKAGES_ZIP[@]}"; do
198 - local cat=${x%/*}
199 - local pkg=${x#*/}
200 -
201 - insinto "/usr/share/nltk_data/${cat}"
202 - newins "${DISTDIR}/nltk-${pkg}-${PV}.zip" "${pkg}.zip"
203 - done
204 -}