Gentoo Archives: gentoo-commits

From: Akinori Hattori <hattya@g.o>
To: gentoo-commits@l.g.o
Subject: [gentoo-commits] repo/gentoo:master commit in: app-i18n/libkkc-data/files/, app-i18n/libkkc-data/
Date: Sat, 09 Oct 2021 06:54:23
Message-Id: 1633762450.2ca0adba706af006c13a385fd0357fcb66fb5da0.hattya@gentoo
1 commit: 2ca0adba706af006c13a385fd0357fcb66fb5da0
2 Author: Akinori Hattori <hattya <AT> gentoo <DOT> org>
3 AuthorDate: Sat Oct 9 06:48:40 2021 +0000
4 Commit: Akinori Hattori <hattya <AT> gentoo <DOT> org>
5 CommitDate: Sat Oct 9 06:54:10 2021 +0000
6 URL: https://gitweb.gentoo.org/repo/gentoo.git/commit/?id=2ca0adba
7
8 app-i18n/libkkc-data: initial import
9
10 Package-Manager: Portage-3.0.20, Repoman-3.0.3
11 Signed-off-by: Akinori Hattori <hattya <AT> gentoo.org>
12
13 app-i18n/libkkc-data/Manifest | 1 +
14 .../libkkc-data/files/libkkc-data-python3.patch | 138 +++++++++++++++++++++
15 app-i18n/libkkc-data/libkkc-data-0.2.7.ebuild | 20 +++
16 app-i18n/libkkc-data/metadata.xml | 11 ++
17 4 files changed, 170 insertions(+)
18
19 diff --git a/app-i18n/libkkc-data/Manifest b/app-i18n/libkkc-data/Manifest
20 new file mode 100644
21 index 00000000000..c0fc91c8c73
22 --- /dev/null
23 +++ b/app-i18n/libkkc-data/Manifest
24 @@ -0,0 +1 @@
25 +DIST libkkc-data-0.2.7.tar.xz 22262552 BLAKE2B 2c735ee9fabf8f8f201591c9ed584cece22ddcd15da5f36b39bb422b1bce1dbcbcd66f71b5713e2dd4c5e2862b06b014c24a4a3db63c86ecee20519434da9261 SHA512 61c0cd8c0fa41ed8df49cac6709eebb245cc965d7e192b1ba945e95f2fc46aca8aa48c16e1977a12c157c55dab6b9f4c30f4905806725eca6e697b762eb7cbd7
26
27 diff --git a/app-i18n/libkkc-data/files/libkkc-data-python3.patch b/app-i18n/libkkc-data/files/libkkc-data-python3.patch
28 new file mode 100644
29 index 00000000000..46109a92dff
30 --- /dev/null
31 +++ b/app-i18n/libkkc-data/files/libkkc-data-python3.patch
32 @@ -0,0 +1,138 @@
33 +From ba1c1bd3eb86d887fc3689c3142732658071b5f7 Mon Sep 17 00:00:00 2001
34 +From: Takao Fujiwara <tfujiwar@××××××.com>
35 +Date: Mon, 30 Jul 2018 15:26:37 +0900
36 +Subject: [PATCH] build: Enable python3
37 +
38 +---
39 + tools/genfilter.py | 18 +++++++--------
40 + tools/sortlm.py | 23 ++++++++-----------
41 + 2 files changed, 19 insertions(+), 22 deletions(-)
42 +
43 +diff --git a/tools/genfilter.py b/tools/genfilter.py
44 +index 5ffab32..0c5f75a 100644
45 +--- a/tools/genfilter.py
46 ++++ b/tools/genfilter.py
47 +@@ -84,24 +84,24 @@ def __init__(self, infile, outfile, record_size):
48 +
49 + def generate(self):
50 + size = os.fstat(self.infile.fileno()).st_size
51 +- n = size / self.record_size
52 ++ n = size // self.record_size
53 + m = int(math.ceil(-n*math.log10(ERROR_RATE) /
54 + math.pow(math.log10(2), 2)))
55 +- m = (m/8 + 1)*8
56 ++ m = (m//8 + 1)*8
57 + inmem = mmap.mmap(self.infile.fileno(),
58 + size,
59 + access=mmap.ACCESS_READ)
60 +- outmem = bytearray(m/8)
61 +- for i in xrange(0, n):
62 ++ outmem = bytearray(m//8)
63 ++ for i in range(0, n):
64 + offset = i*self.record_size
65 + b0, b1 = struct.unpack("=LL", inmem[offset:offset+8])
66 +- for k in xrange(0, 4):
67 ++ for k in range(0, 4):
68 + h = murmur_hash3_32(b0, b1, k)
69 + h = int(h * (m / float(0xFFFFFFFF)))
70 +- outmem[h/8] |= (1 << (h%8))
71 ++ outmem[h//8] |= (1 << (h%8))
72 + inmem.close()
73 +- # Convert bytearray to str, for Python 2.6 compatibility.
74 +- self.outfile.write(str(outmem))
75 ++ # Convert bytearray to bytes, for Python 3 compatibility.
76 ++ self.outfile.write(bytes(outmem))
77 +
78 + if __name__ == '__main__':
79 + import sys
80 +@@ -110,7 +110,7 @@ def generate(self):
81 + parser = argparse.ArgumentParser(description='filter')
82 + parser.add_argument('infile', type=argparse.FileType('r'),
83 + help='input file')
84 +- parser.add_argument('outfile', type=argparse.FileType('w'),
85 ++ parser.add_argument('outfile', type=argparse.FileType('wb'),
86 + help='output file')
87 + parser.add_argument('record_size', type=int,
88 + help='record size')
89 +diff --git a/tools/sortlm.py b/tools/sortlm.py
90 +index a0dd8fe..40f0837 100644
91 +--- a/tools/sortlm.py
92 ++++ b/tools/sortlm.py
93 +@@ -40,10 +40,10 @@ def __init__(self, infile, output_prefix):
94 + self.__min_cost = 0.0
95 +
96 + def read(self):
97 +- print "reading N-grams"
98 ++ print("reading N-grams")
99 + self.__read_tries()
100 + self.__read_ngrams()
101 +- print "min cost = %lf" % self.__min_cost
102 ++ print("min cost = %lf" % self.__min_cost)
103 +
104 + def __read_tries(self):
105 + while True:
106 +@@ -58,7 +58,7 @@ def __read_tries(self):
107 + line = self.__infile.readline()
108 + if line == "":
109 + break
110 +- line = line.strip()
111 ++ line = line.strip('\n')
112 + if line == "":
113 + break
114 + match = self.__ngram_line_regex.match(line)
115 +@@ -89,7 +89,7 @@ def __read_ngrams(self):
116 + line = self.__infile.readline()
117 + if line == "":
118 + break
119 +- line = line.strip()
120 ++ line = line.strip('\n')
121 + if line == "":
122 + break
123 + match = self.__ngram_line_regex.match(line)
124 +@@ -125,14 +125,11 @@ def __write_ngrams(self):
125 + def quantize(cost, min_cost):
126 + return max(0, min(65535, int(cost * 65535 / min_cost)))
127 +
128 +- def cmp_header(a, b):
129 +- return cmp(a[0], b[0])
130 +-
131 +- print "writing 1-gram file"
132 ++ print("writing 1-gram file")
133 + unigram_offsets = {}
134 + unigram_file = open("%s.1gram" % self.__output_prefix, "wb")
135 + offset = 0
136 +- for ids, value in sorted(self.__ngram_entries[0].iteritems()):
137 ++ for ids, value in sorted(self.__ngram_entries[0].items()):
138 + unigram_offsets[ids[0]] = offset
139 + s = struct.pack("=HHH",
140 + quantize(value[0], self.__min_cost),
141 +@@ -143,13 +140,13 @@ def cmp_header(a, b):
142 + offset += 1
143 + unigram_file.close()
144 +
145 +- print "writing 2-gram file"
146 ++ print("writing 2-gram file")
147 + bigram_offsets = {}
148 + bigram_file = open("%s.2gram" % self.__output_prefix, "wb")
149 + keys = self.__ngram_entries[1].keys()
150 + items = [(struct.pack("=LL", ids[1], unigram_offsets[ids[0]]), ids) for ids in keys]
151 + offset = 0
152 +- for header, ids in sorted(items, cmp=cmp_header):
153 ++ for header, ids in sorted(items, key=lambda x: x[0]):
154 + value = self.__ngram_entries[1][ids]
155 + bigram_offsets[ids] = offset
156 + s = struct.pack("=HH",
157 +@@ -160,11 +157,11 @@ def cmp_header(a, b):
158 + bigram_file.close()
159 +
160 + if len(self.__ngram_entries[2]) > 0:
161 +- print "writing 3-gram file"
162 ++ print("writing 3-gram file")
163 + trigram_file = open("%s.3gram" % self.__output_prefix, "wb")
164 + keys = self.__ngram_entries[2].keys()
165 + items = [(struct.pack("=LL", ids[2], bigram_offsets[(ids[0], ids[1])]), ids) for ids in keys]
166 +- for header, ids in sorted(items, cmp=cmp_header):
167 ++ for header, ids in sorted(items, key=lambda x: x[0]):
168 + value = self.__ngram_entries[2][ids]
169 + s = struct.pack("=H",
170 + quantize(value[0], self.__min_cost))
171
172 diff --git a/app-i18n/libkkc-data/libkkc-data-0.2.7.ebuild b/app-i18n/libkkc-data/libkkc-data-0.2.7.ebuild
173 new file mode 100644
174 index 00000000000..eff62dbe39f
175 --- /dev/null
176 +++ b/app-i18n/libkkc-data/libkkc-data-0.2.7.ebuild
177 @@ -0,0 +1,20 @@
178 +# Copyright 1999-2021 Gentoo Authors
179 +# Distributed under the terms of the GNU General Public License v2
180 +
181 +EAPI="7"
182 +PYTHON_COMPAT=( python3_{7..10} )
183 +
184 +inherit python-any-r1
185 +
186 +DESCRIPTION="Language model data for libkkc"
187 +HOMEPAGE="https://github.com/ueno/libkkc"
188 +SRC_URI="https://github.com/ueno/${PN%-*}/releases/download/v0.3.5/${P}.tar.xz"
189 +
190 +LICENSE="GPL-3+"
191 +SLOT="0"
192 +KEYWORDS="~amd64 ~x86"
193 +
194 +RDEPEND=""
195 +BDEPEND="${PYTHON_DEPS}"
196 +
197 +PATCHES=( "${FILESDIR}"/${PN}-python3.patch )
198
199 diff --git a/app-i18n/libkkc-data/metadata.xml b/app-i18n/libkkc-data/metadata.xml
200 new file mode 100644
201 index 00000000000..6853c12be89
202 --- /dev/null
203 +++ b/app-i18n/libkkc-data/metadata.xml
204 @@ -0,0 +1,11 @@
205 +<?xml version="1.0" encoding="UTF-8"?>
206 +<!DOCTYPE pkgmetadata SYSTEM "https://www.gentoo.org/dtd/metadata.dtd">
207 +<pkgmetadata>
208 + <maintainer type="project">
209 + <email>cjk@g.o</email>
210 + <name>Cjk</name>
211 + </maintainer>
212 + <upstream>
213 + <remote-id type="github">ueno/libkkc</remote-id>
214 + </upstream>
215 +</pkgmetadata>