1 |
commit: 2ca0adba706af006c13a385fd0357fcb66fb5da0 |
2 |
Author: Akinori Hattori <hattya <AT> gentoo <DOT> org> |
3 |
AuthorDate: Sat Oct 9 06:48:40 2021 +0000 |
4 |
Commit: Akinori Hattori <hattya <AT> gentoo <DOT> org> |
5 |
CommitDate: Sat Oct 9 06:54:10 2021 +0000 |
6 |
URL: https://gitweb.gentoo.org/repo/gentoo.git/commit/?id=2ca0adba |
7 |
|
8 |
app-i18n/libkkc-data: initial import |
9 |
|
10 |
Package-Manager: Portage-3.0.20, Repoman-3.0.3 |
11 |
Signed-off-by: Akinori Hattori <hattya <AT> gentoo.org> |
12 |
|
13 |
app-i18n/libkkc-data/Manifest | 1 + |
14 |
.../libkkc-data/files/libkkc-data-python3.patch | 138 +++++++++++++++++++++ |
15 |
app-i18n/libkkc-data/libkkc-data-0.2.7.ebuild | 20 +++ |
16 |
app-i18n/libkkc-data/metadata.xml | 11 ++ |
17 |
4 files changed, 170 insertions(+) |
18 |
|
19 |
diff --git a/app-i18n/libkkc-data/Manifest b/app-i18n/libkkc-data/Manifest |
20 |
new file mode 100644 |
21 |
index 00000000000..c0fc91c8c73 |
22 |
--- /dev/null |
23 |
+++ b/app-i18n/libkkc-data/Manifest |
24 |
@@ -0,0 +1 @@ |
25 |
+DIST libkkc-data-0.2.7.tar.xz 22262552 BLAKE2B 2c735ee9fabf8f8f201591c9ed584cece22ddcd15da5f36b39bb422b1bce1dbcbcd66f71b5713e2dd4c5e2862b06b014c24a4a3db63c86ecee20519434da9261 SHA512 61c0cd8c0fa41ed8df49cac6709eebb245cc965d7e192b1ba945e95f2fc46aca8aa48c16e1977a12c157c55dab6b9f4c30f4905806725eca6e697b762eb7cbd7 |
26 |
|
27 |
diff --git a/app-i18n/libkkc-data/files/libkkc-data-python3.patch b/app-i18n/libkkc-data/files/libkkc-data-python3.patch |
28 |
new file mode 100644 |
29 |
index 00000000000..46109a92dff |
30 |
--- /dev/null |
31 |
+++ b/app-i18n/libkkc-data/files/libkkc-data-python3.patch |
32 |
@@ -0,0 +1,138 @@ |
33 |
+From ba1c1bd3eb86d887fc3689c3142732658071b5f7 Mon Sep 17 00:00:00 2001 |
34 |
+From: Takao Fujiwara <tfujiwar@××××××.com> |
35 |
+Date: Mon, 30 Jul 2018 15:26:37 +0900 |
36 |
+Subject: [PATCH] build: Enable python3 |
37 |
+ |
38 |
+--- |
39 |
+ tools/genfilter.py | 18 +++++++-------- |
40 |
+ tools/sortlm.py | 23 ++++++++----------- |
41 |
+ 2 files changed, 19 insertions(+), 22 deletions(-) |
42 |
+ |
43 |
+diff --git a/tools/genfilter.py b/tools/genfilter.py |
44 |
+index 5ffab32..0c5f75a 100644 |
45 |
+--- a/tools/genfilter.py |
46 |
++++ b/tools/genfilter.py |
47 |
+@@ -84,24 +84,24 @@ def __init__(self, infile, outfile, record_size): |
48 |
+ |
49 |
+ def generate(self): |
50 |
+ size = os.fstat(self.infile.fileno()).st_size |
51 |
+- n = size / self.record_size |
52 |
++ n = size // self.record_size |
53 |
+ m = int(math.ceil(-n*math.log10(ERROR_RATE) / |
54 |
+ math.pow(math.log10(2), 2))) |
55 |
+- m = (m/8 + 1)*8 |
56 |
++ m = (m//8 + 1)*8 |
57 |
+ inmem = mmap.mmap(self.infile.fileno(), |
58 |
+ size, |
59 |
+ access=mmap.ACCESS_READ) |
60 |
+- outmem = bytearray(m/8) |
61 |
+- for i in xrange(0, n): |
62 |
++ outmem = bytearray(m//8) |
63 |
++ for i in range(0, n): |
64 |
+ offset = i*self.record_size |
65 |
+ b0, b1 = struct.unpack("=LL", inmem[offset:offset+8]) |
66 |
+- for k in xrange(0, 4): |
67 |
++ for k in range(0, 4): |
68 |
+ h = murmur_hash3_32(b0, b1, k) |
69 |
+ h = int(h * (m / float(0xFFFFFFFF))) |
70 |
+- outmem[h/8] |= (1 << (h%8)) |
71 |
++ outmem[h//8] |= (1 << (h%8)) |
72 |
+ inmem.close() |
73 |
+- # Convert bytearray to str, for Python 2.6 compatibility. |
74 |
+- self.outfile.write(str(outmem)) |
75 |
++ # Convert bytearray to bytes, for Python 3 compatibility. |
76 |
++ self.outfile.write(bytes(outmem)) |
77 |
+ |
78 |
+ if __name__ == '__main__': |
79 |
+ import sys |
80 |
+@@ -110,7 +110,7 @@ def generate(self): |
81 |
+ parser = argparse.ArgumentParser(description='filter') |
82 |
+ parser.add_argument('infile', type=argparse.FileType('r'), |
83 |
+ help='input file') |
84 |
+- parser.add_argument('outfile', type=argparse.FileType('w'), |
85 |
++ parser.add_argument('outfile', type=argparse.FileType('wb'), |
86 |
+ help='output file') |
87 |
+ parser.add_argument('record_size', type=int, |
88 |
+ help='record size') |
89 |
+diff --git a/tools/sortlm.py b/tools/sortlm.py |
90 |
+index a0dd8fe..40f0837 100644 |
91 |
+--- a/tools/sortlm.py |
92 |
++++ b/tools/sortlm.py |
93 |
+@@ -40,10 +40,10 @@ def __init__(self, infile, output_prefix): |
94 |
+ self.__min_cost = 0.0 |
95 |
+ |
96 |
+ def read(self): |
97 |
+- print "reading N-grams" |
98 |
++ print("reading N-grams") |
99 |
+ self.__read_tries() |
100 |
+ self.__read_ngrams() |
101 |
+- print "min cost = %lf" % self.__min_cost |
102 |
++ print("min cost = %lf" % self.__min_cost) |
103 |
+ |
104 |
+ def __read_tries(self): |
105 |
+ while True: |
106 |
+@@ -58,7 +58,7 @@ def __read_tries(self): |
107 |
+ line = self.__infile.readline() |
108 |
+ if line == "": |
109 |
+ break |
110 |
+- line = line.strip() |
111 |
++ line = line.strip('\n') |
112 |
+ if line == "": |
113 |
+ break |
114 |
+ match = self.__ngram_line_regex.match(line) |
115 |
+@@ -89,7 +89,7 @@ def __read_ngrams(self): |
116 |
+ line = self.__infile.readline() |
117 |
+ if line == "": |
118 |
+ break |
119 |
+- line = line.strip() |
120 |
++ line = line.strip('\n') |
121 |
+ if line == "": |
122 |
+ break |
123 |
+ match = self.__ngram_line_regex.match(line) |
124 |
+@@ -125,14 +125,11 @@ def __write_ngrams(self): |
125 |
+ def quantize(cost, min_cost): |
126 |
+ return max(0, min(65535, int(cost * 65535 / min_cost))) |
127 |
+ |
128 |
+- def cmp_header(a, b): |
129 |
+- return cmp(a[0], b[0]) |
130 |
+- |
131 |
+- print "writing 1-gram file" |
132 |
++ print("writing 1-gram file") |
133 |
+ unigram_offsets = {} |
134 |
+ unigram_file = open("%s.1gram" % self.__output_prefix, "wb") |
135 |
+ offset = 0 |
136 |
+- for ids, value in sorted(self.__ngram_entries[0].iteritems()): |
137 |
++ for ids, value in sorted(self.__ngram_entries[0].items()): |
138 |
+ unigram_offsets[ids[0]] = offset |
139 |
+ s = struct.pack("=HHH", |
140 |
+ quantize(value[0], self.__min_cost), |
141 |
+@@ -143,13 +140,13 @@ def cmp_header(a, b): |
142 |
+ offset += 1 |
143 |
+ unigram_file.close() |
144 |
+ |
145 |
+- print "writing 2-gram file" |
146 |
++ print("writing 2-gram file") |
147 |
+ bigram_offsets = {} |
148 |
+ bigram_file = open("%s.2gram" % self.__output_prefix, "wb") |
149 |
+ keys = self.__ngram_entries[1].keys() |
150 |
+ items = [(struct.pack("=LL", ids[1], unigram_offsets[ids[0]]), ids) for ids in keys] |
151 |
+ offset = 0 |
152 |
+- for header, ids in sorted(items, cmp=cmp_header): |
153 |
++ for header, ids in sorted(items, key=lambda x: x[0]): |
154 |
+ value = self.__ngram_entries[1][ids] |
155 |
+ bigram_offsets[ids] = offset |
156 |
+ s = struct.pack("=HH", |
157 |
+@@ -160,11 +157,11 @@ def cmp_header(a, b): |
158 |
+ bigram_file.close() |
159 |
+ |
160 |
+ if len(self.__ngram_entries[2]) > 0: |
161 |
+- print "writing 3-gram file" |
162 |
++ print("writing 3-gram file") |
163 |
+ trigram_file = open("%s.3gram" % self.__output_prefix, "wb") |
164 |
+ keys = self.__ngram_entries[2].keys() |
165 |
+ items = [(struct.pack("=LL", ids[2], bigram_offsets[(ids[0], ids[1])]), ids) for ids in keys] |
166 |
+- for header, ids in sorted(items, cmp=cmp_header): |
167 |
++ for header, ids in sorted(items, key=lambda x: x[0]): |
168 |
+ value = self.__ngram_entries[2][ids] |
169 |
+ s = struct.pack("=H", |
170 |
+ quantize(value[0], self.__min_cost)) |
171 |
|
172 |
diff --git a/app-i18n/libkkc-data/libkkc-data-0.2.7.ebuild b/app-i18n/libkkc-data/libkkc-data-0.2.7.ebuild |
173 |
new file mode 100644 |
174 |
index 00000000000..eff62dbe39f |
175 |
--- /dev/null |
176 |
+++ b/app-i18n/libkkc-data/libkkc-data-0.2.7.ebuild |
177 |
@@ -0,0 +1,20 @@ |
178 |
+# Copyright 1999-2021 Gentoo Authors |
179 |
+# Distributed under the terms of the GNU General Public License v2 |
180 |
+ |
181 |
+EAPI="7" |
182 |
+PYTHON_COMPAT=( python3_{7..10} ) |
183 |
+ |
184 |
+inherit python-any-r1 |
185 |
+ |
186 |
+DESCRIPTION="Language model data for libkkc" |
187 |
+HOMEPAGE="https://github.com/ueno/libkkc" |
188 |
+SRC_URI="https://github.com/ueno/${PN%-*}/releases/download/v0.3.5/${P}.tar.xz" |
189 |
+ |
190 |
+LICENSE="GPL-3+" |
191 |
+SLOT="0" |
192 |
+KEYWORDS="~amd64 ~x86" |
193 |
+ |
194 |
+RDEPEND="" |
195 |
+BDEPEND="${PYTHON_DEPS}" |
196 |
+ |
197 |
+PATCHES=( "${FILESDIR}"/${PN}-python3.patch ) |
198 |
|
199 |
diff --git a/app-i18n/libkkc-data/metadata.xml b/app-i18n/libkkc-data/metadata.xml |
200 |
new file mode 100644 |
201 |
index 00000000000..6853c12be89 |
202 |
--- /dev/null |
203 |
+++ b/app-i18n/libkkc-data/metadata.xml |
204 |
@@ -0,0 +1,11 @@ |
205 |
+<?xml version="1.0" encoding="UTF-8"?> |
206 |
+<!DOCTYPE pkgmetadata SYSTEM "https://www.gentoo.org/dtd/metadata.dtd"> |
207 |
+<pkgmetadata> |
208 |
+ <maintainer type="project"> |
209 |
+ <email>cjk@g.o</email> |
210 |
+ <name>Cjk</name> |
211 |
+ </maintainer> |
212 |
+ <upstream> |
213 |
+ <remote-id type="github">ueno/libkkc</remote-id> |
214 |
+ </upstream> |
215 |
+</pkgmetadata> |