Gentoo Archives: gentoo-commits

From: "Lars Wendler (polynomial-c)" <polynomial-c@g.o>
To: gentoo-commits@l.g.o
Subject: [gentoo-commits] gentoo commit in src/patchsets/xulrunner/1.9.2: 1009-armv4t-nanojit.patch
Date: Sun, 01 Aug 2010 13:03:26
Message-Id: 20100801122903.DA2B32CE15@corvid.gentoo.org
1 polynomial-c 10/08/01 12:29:03
2
3 Added: 1009-armv4t-nanojit.patch
4 Log:
5 New nanojit patch for armv4t. As of request from armin76 in bug #552624
6
7 Revision Changes Path
8 1.1 src/patchsets/xulrunner/1.9.2/1009-armv4t-nanojit.patch
9
10 file : http://sources.gentoo.org/viewvc.cgi/gentoo/src/patchsets/xulrunner/1.9.2/1009-armv4t-nanojit.patch?rev=1.1&view=markup
11 plain: http://sources.gentoo.org/viewvc.cgi/gentoo/src/patchsets/xulrunner/1.9.2/1009-armv4t-nanojit.patch?rev=1.1&content-type=text/plain
12
13 Index: 1009-armv4t-nanojit.patch
14 ===================================================================
15 diff --git a/js/src/nanojit/NativeARM.cpp b/js/src/nanojit/NativeARM.cpp
16 index 9387191..a50898c 100644
17 --- a/js/src/nanojit/NativeARM.cpp
18 +++ b/js/src/nanojit/NativeARM.cpp
19 @@ -61,6 +61,8 @@ extern "C" void __clear_cache(void *BEG, void *END);
20
21 #ifdef FEATURE_NANOJIT
22
23 +#define ARM_ARCH_AT_LEAST(wanted) ((NJ_COMPILER_ARM_ARCH >= wanted) || (ARM_ARCH >= wanted))
24 +
25 namespace nanojit
26 {
27
28 @@ -114,49 +116,50 @@ Assembler::CountLeadingZeroes(uint32_t data)
29 {
30 uint32_t leading_zeroes;
31
32 - // We can't do CLZ on anything earlier than ARMv5. Architectures as early
33 - // as that aren't supported, but assert that we aren't running on one
34 - // anyway.
35 - // If ARMv4 support is required in the future for some reason, we can do a
36 - // run-time check on config.arch and fall back to the C routine, but for
37 - // now we can avoid the cost of the check as we don't intend to support
38 - // ARMv4 anyway.
39 - NanoAssert(ARM_ARCH >= 5);
40 -
41 #if defined(__ARMCC__)
42 // ARMCC can do this with an intrinsic.
43 leading_zeroes = __clz(data);
44
45 -// current Android GCC compiler incorrectly refuses to compile 'clz' for armv5
46 -// (even though this is a legal instruction there). Since we currently only compile for ARMv5
47 -// for emulation, we don't care too much (but we DO care for ARMv6+ since those are "real"
48 -// devices).
49 -#elif defined(__GNUC__) && !(defined(ANDROID) && __ARM_ARCH__ <= 5)
50 + if (0) // We don't need the fallback
51 +#elif defined(__GNUC__)
52 // GCC can use inline assembler to insert a CLZ instruction.
53 - __asm (
54 - " clz %0, %1 \n"
55 - : "=r" (leading_zeroes)
56 - : "r" (data)
57 - );
58 + // Targetting armv5t allows a toolchain with armv4t default target to
59 + // still build with clz. On Android gcc compiler, clz is not supported
60 + // with a target smaller than armv7.
61 + if (ARM_ARCH_AT_LEAST(5))
62 + __asm (
63 +#if defined(ANDROID) && NJ_COMPILER_ARM_ARCH <= 5
64 + ".arch armv7\n"
65 +#elif (NJ_COMPILER_ARM_ARCH < 5)
66 + ".arch armv5t\n"
67 +#endif
68 + " clz %0, %1 \n"
69 + : "=r" (leading_zeroes)
70 + : "r" (data)
71 + );
72 + else
73 #elif defined(WINCE)
74 // WinCE can do this with an intrinsic.
75 leading_zeroes = _CountLeadingZeros(data);
76 -#else
77 - // Other platforms must fall back to a C routine. This won't be as
78 - // efficient as the CLZ instruction, but it is functional.
79 - uint32_t try_shift;
80 -
81 - leading_zeroes = 0;
82 -
83 - // This loop does a bisection search rather than the obvious rotation loop.
84 - // This should be faster, though it will still be no match for CLZ.
85 - for (try_shift = 16; try_shift != 0; try_shift /= 2) {
86 - uint32_t shift = leading_zeroes + try_shift;
87 - if (((data << shift) >> shift) == data) {
88 - leading_zeroes = shift;
89 +
90 + if (0) // We don't need the fallback
91 +#endif
92 + {
93 + // Other platforms must fall back to a C routine. This won't be as
94 + // efficient as the CLZ instruction, but it is functional.
95 + uint32_t try_shift;
96 +
97 + leading_zeroes = 0;
98 +
99 + // This loop does a bisection search rather than the obvious rotation loop.
100 + // This should be faster, though it will still be no match for CLZ.
101 + for (try_shift = 16; try_shift != 0; try_shift /= 2) {
102 + uint32_t shift = leading_zeroes + try_shift;
103 + if (((data << shift) >> shift) == data) {
104 + leading_zeroes = shift;
105 + }
106 }
107 }
108 -#endif
109
110 // Assert that the operation worked!
111 NanoAssert(((0xffffffff >> leading_zeroes) & data) == data);
112 @@ -555,13 +558,18 @@ NIns*
113 Assembler::genEpilogue()
114 {
115 // On ARMv5+, loading directly to PC correctly handles interworking.
116 - // Note that we don't support anything older than ARMv5.
117 - NanoAssert(ARM_ARCH >= 5);
118 -
119 - RegisterMask savingMask = rmask(FP) | rmask(PC);
120 + // On ARMv4T, interworking is not handled properly, therefore, we pop
121 + // lr into ip and use bx ip to avoid that.
122 + if (ARM_ARCH_AT_LEAST(5)) {
123 + RegisterMask savingMask = rmask(FP) | rmask(PC);
124
125 - POP_mask(savingMask); // regs
126 + POP_mask(savingMask); // regs
127 + } else {
128 + RegisterMask savingMask = rmask(FP) | rmask(IP);
129
130 + BX(IP);
131 + POP_mask(savingMask); // regs
132 + }
133 return _nIns;
134 }
135
136 @@ -1502,7 +1510,7 @@ Assembler::BranchWithLink(NIns* addr)
137
138 // ARMv5 and above can use BLX <imm> for branches within ±32MB of the
139 // PC and BLX Rm for long branches.
140 - if (isS24(offs>>2)) {
141 + if (isS24(offs>>2) && (ARM_ARCH_AT_LEAST(5))) {
142 // the value we need to stick in the instruction; masked,
143 // because it will be sign-extended back to 32 bits.
144 intptr_t offs2 = (offs>>2) & 0xffffff;
145 @@ -1519,7 +1527,6 @@ Assembler::BranchWithLink(NIns* addr)
146 // We need to emit an ARMv5+ instruction, so assert that we have a
147 // suitable processor. Note that we don't support ARMv4(T), but
148 // this serves as a useful sanity check.
149 - NanoAssert(ARM_ARCH >= 5);
150
151 // The (pre-shifted) value of the "H" bit in the BLX encoding.
152 uint32_t H = (offs & 0x2) << 23;
153 @@ -1543,11 +1550,6 @@ Assembler::BranchWithLink(NIns* addr)
154 inline void
155 Assembler::BLX(Register addr, bool chk /* = true */)
156 {
157 - // We need to emit an ARMv5+ instruction, so assert that we have a suitable
158 - // processor. Note that we don't support ARMv4(T), but this serves as a
159 - // useful sanity check.
160 - NanoAssert(ARM_ARCH >= 5);
161 -
162 NanoAssert(IsGpReg(addr));
163 // There is a bug in the WinCE device emulator which stops "BLX LR" from
164 // working as expected. Assert that we never do that!
165 @@ -1558,8 +1560,15 @@ Assembler::BLX(Register addr, bool chk /* = true */)
166 }
167
168 // BLX IP
169 - *(--_nIns) = (NIns)( (COND_AL) | (0x12<<20) | (0xFFF<<8) | (0x3<<4) | (addr) );
170 - asm_output("blx ip");
171 + if (ARM_ARCH_AT_LEAST(5)) {
172 + *(--_nIns) = (NIns)( (COND_AL) | (0x12<<20) | (0xFFF<<8) | (0x3<<4) | (addr) );
173 + asm_output("blx %s", gpn(addr));
174 + } else {
175 + *(--_nIns) = (NIns)( (COND_AL) | (0x12fff1 << 4) | (addr) );
176 + asm_output("bx %s", gpn(addr));
177 + *(--_nIns) = (NIns)( (COND_AL) | (0x1A0 << 16) | (0xE << 12) | 0xF );
178 + asm_output("mov lr, pc");
179 + }
180 }
181
182 // Emit the code required to load a memory address into a register as follows:
183 @@ -2177,7 +2186,7 @@ Assembler::asm_arith(LInsp ins)
184 // common for (rr == ra) and is thus likely to be the most
185 // efficient case; if ra is no longer used after this LIR
186 // instruction, it is re-used for the result register (rr).
187 - if ((ARM_ARCH > 5) || (rr != rb)) {
188 + if ((ARM_ARCH_AT_LEAST(6)) || (rr != rb)) {
189 // Newer cores place no restrictions on the registers used in a
190 // MUL instruction (compared to other arithmetic instructions).
191 MUL(rr, rb, ra);
192 diff --git a/js/src/nanojit/avmplus.h b/js/src/nanojit/avmplus.h
193 index ffc0873..e86f22e 100644
194 --- a/js/src/nanojit/avmplus.h
195 +++ b/js/src/nanojit/avmplus.h
196 @@ -50,6 +50,8 @@
197 #include "jstypes.h"
198 #include "jsstdint.h"
199
200 +#include "njcpudetect.h"
201 +
202 #ifdef AVMPLUS_ARM
203 #define ARM_ARCH config.arch
204 #define ARM_VFP config.vfp
205 diff --git a/js/src/nanojit/njcpudetect.h b/js/src/nanojit/njcpudetect.h
206 new file mode 100644
207 index 0000000..79ea90b
208 --- /dev/null
209 +++ b/js/src/nanojit/njcpudetect.h
210 @@ -0,0 +1,110 @@
211 +/* -*- Mode: C++; c-basic-offset: 4; indent-tabs-mode: nil; tab-width: 4 -*- */
212 +/* vi: set ts=4 sw=4 expandtab: (add to ~/.vimrc: set modeline modelines=5) */
213 +/* ***** BEGIN LICENSE BLOCK *****
214 + * Version: MPL 1.1/GPL 2.0/LGPL 2.1
215 + *
216 + * The contents of this file are subject to the Mozilla Public License Version
217 + * 1.1 (the "License"); you may not use this file except in compliance with
218 + * the License. You may obtain a copy of the License at
219 + * http://www.mozilla.org/MPL/
220 + *
221 + * Software distributed under the License is distributed on an "AS IS" basis,
222 + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
223 + * for the specific language governing rights and limitations under the
224 + * License.
225 + *
226 + * The Original Code is [Open Source Virtual Machine].
227 + *
228 + * The Initial Developer of the Original Code is
229 + * Adobe System Incorporated.
230 + * Portions created by the Initial Developer are Copyright (C) 2004-2007
231 + * the Initial Developer. All Rights Reserved.
232 + *
233 + * Contributor(s):
234 + * Adobe AS3 Team
235 + *
236 + * Alternatively, the contents of this file may be used under the terms of
237 + * either the GNU General Public License Version 2 or later (the "GPL"), or
238 + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
239 + * in which case the provisions of the GPL or the LGPL are applicable instead
240 + * of those above. If you wish to allow use of your version of this file only
241 + * under the terms of either the GPL or the LGPL, and not to allow others to
242 + * use your version of this file under the terms of the MPL, indicate your
243 + * decision by deleting the provisions above and replace them with the notice
244 + * and other provisions required by the GPL or the LGPL. If you do not delete
245 + * the provisions above, a recipient may use your version of this file under
246 + * the terms of any one of the MPL, the GPL or the LGPL.
247 + *
248 + * ***** END LICENSE BLOCK ***** */
249 +
250 +#ifndef __njcpudetect__
251 +#define __njcpudetect__
252 +
253 +/***
254 + * Note: this file should not include *any* other files, nor should it wrap
255 + * itself in ifdef FEATURE_NANOJIT, nor should it do anything other than
256 + * define preprocessor symbols.
257 + */
258 +
259 +/***
260 + * NJ_COMPILER_ARM_ARCH attempts to specify the minimum ARM architecture
261 + * that the C++ compiler has specified. Note that although Config::arm_arch
262 + * is initialized to this value by default, there is no requirement that they
263 + * be in sync.
264 + *
265 + * Note, this is done via #define so that downstream preprocessor usage can
266 + * examine it, but please don't attempt to redefine it.
267 + *
268 + * Note, this is deliberately not encased in "ifdef NANOJIT_ARM", as this file
269 + * may be included before that is defined. On non-ARM platforms we will hit the
270 + * "Unable to determine" case.
271 + */
272 +
273 +// GCC and RealView usually define __ARM_ARCH__
274 +#if defined(__ARM_ARCH__)
275 +
276 + #define NJ_COMPILER_ARM_ARCH __ARM_ARCH__
277 +
278 +// ok, try well-known GCC flags ( see http://gcc.gnu.org/onlinedocs/gcc/ARM-Options.html )
279 +#elif defined(__ARM_ARCH_7__) || \
280 + defined(__ARM_ARCH_7A__) || \
281 + defined(__ARM_ARCH_7M__) || \
282 + defined(__ARM_ARCH_7R__) || \
283 + defined(_ARM_ARCH_7)
284 +
285 + #define NJ_COMPILER_ARM_ARCH 7
286 +
287 +#elif defined(__ARM_ARCH_6__) || \
288 + defined(__ARM_ARCH_6J__) || \
289 + defined(__ARM_ARCH_6T2__) || \
290 + defined(__ARM_ARCH_6Z__) || \
291 + defined(__ARM_ARCH_6ZK__) || \
292 + defined(__ARM_ARCH_6M__) || \
293 + defined(_ARM_ARCH_6)
294 +
295 + #define NJ_COMPILER_ARM_ARCH 6
296 +
297 +#elif defined(__ARM_ARCH_5__) || \
298 + defined(__ARM_ARCH_5T__) || \
299 + defined(__ARM_ARCH_5E__) || \
300 + defined(__ARM_ARCH_5TE__)
301 +
302 + #define NJ_COMPILER_ARM_ARCH 5
303 +
304 +#elif defined(__ARM_ARCH_4__)
305 +
306 + #define NJ_COMPILER_ARM_ARCH 4
307 +
308 +// Visual C has its own mojo
309 +#elif defined(_MSC_VER) && defined(_M_ARM)
310 +
311 + #define NJ_COMPILER_ARM_ARCH _M_ARM
312 +
313 +#else
314 +
315 + // non-numeric value
316 + #define NJ_COMPILER_ARM_ARCH "Unable to determine valid NJ_COMPILER_ARM_ARCH (nanojit only supports ARMv5 or later)"
317 +
318 +#endif
319 +
320 +#endif // __njcpudetect__