1 |
polynomial-c 10/08/01 12:29:03 |
2 |
|
3 |
Added: 1009-armv4t-nanojit.patch |
4 |
Log: |
5 |
New nanojit patch for armv4t. As of request from armin76 in bug #552624 |
6 |
|
7 |
Revision Changes Path |
8 |
1.1 src/patchsets/xulrunner/1.9.2/1009-armv4t-nanojit.patch |
9 |
|
10 |
file : http://sources.gentoo.org/viewvc.cgi/gentoo/src/patchsets/xulrunner/1.9.2/1009-armv4t-nanojit.patch?rev=1.1&view=markup |
11 |
plain: http://sources.gentoo.org/viewvc.cgi/gentoo/src/patchsets/xulrunner/1.9.2/1009-armv4t-nanojit.patch?rev=1.1&content-type=text/plain |
12 |
|
13 |
Index: 1009-armv4t-nanojit.patch |
14 |
=================================================================== |
15 |
diff --git a/js/src/nanojit/NativeARM.cpp b/js/src/nanojit/NativeARM.cpp |
16 |
index 9387191..a50898c 100644 |
17 |
--- a/js/src/nanojit/NativeARM.cpp |
18 |
+++ b/js/src/nanojit/NativeARM.cpp |
19 |
@@ -61,6 +61,8 @@ extern "C" void __clear_cache(void *BEG, void *END); |
20 |
|
21 |
#ifdef FEATURE_NANOJIT |
22 |
|
23 |
+#define ARM_ARCH_AT_LEAST(wanted) ((NJ_COMPILER_ARM_ARCH >= wanted) || (ARM_ARCH >= wanted)) |
24 |
+ |
25 |
namespace nanojit |
26 |
{ |
27 |
|
28 |
@@ -114,49 +116,50 @@ Assembler::CountLeadingZeroes(uint32_t data) |
29 |
{ |
30 |
uint32_t leading_zeroes; |
31 |
|
32 |
- // We can't do CLZ on anything earlier than ARMv5. Architectures as early |
33 |
- // as that aren't supported, but assert that we aren't running on one |
34 |
- // anyway. |
35 |
- // If ARMv4 support is required in the future for some reason, we can do a |
36 |
- // run-time check on config.arch and fall back to the C routine, but for |
37 |
- // now we can avoid the cost of the check as we don't intend to support |
38 |
- // ARMv4 anyway. |
39 |
- NanoAssert(ARM_ARCH >= 5); |
40 |
- |
41 |
#if defined(__ARMCC__) |
42 |
// ARMCC can do this with an intrinsic. |
43 |
leading_zeroes = __clz(data); |
44 |
|
45 |
-// current Android GCC compiler incorrectly refuses to compile 'clz' for armv5 |
46 |
-// (even though this is a legal instruction there). Since we currently only compile for ARMv5 |
47 |
-// for emulation, we don't care too much (but we DO care for ARMv6+ since those are "real" |
48 |
-// devices). |
49 |
-#elif defined(__GNUC__) && !(defined(ANDROID) && __ARM_ARCH__ <= 5) |
50 |
+ if (0) // We don't need the fallback |
51 |
+#elif defined(__GNUC__) |
52 |
// GCC can use inline assembler to insert a CLZ instruction. |
53 |
- __asm ( |
54 |
- " clz %0, %1 \n" |
55 |
- : "=r" (leading_zeroes) |
56 |
- : "r" (data) |
57 |
- ); |
58 |
+ // Targetting armv5t allows a toolchain with armv4t default target to |
59 |
+ // still build with clz. On Android gcc compiler, clz is not supported |
60 |
+ // with a target smaller than armv7. |
61 |
+ if (ARM_ARCH_AT_LEAST(5)) |
62 |
+ __asm ( |
63 |
+#if defined(ANDROID) && NJ_COMPILER_ARM_ARCH <= 5 |
64 |
+ ".arch armv7\n" |
65 |
+#elif (NJ_COMPILER_ARM_ARCH < 5) |
66 |
+ ".arch armv5t\n" |
67 |
+#endif |
68 |
+ " clz %0, %1 \n" |
69 |
+ : "=r" (leading_zeroes) |
70 |
+ : "r" (data) |
71 |
+ ); |
72 |
+ else |
73 |
#elif defined(WINCE) |
74 |
// WinCE can do this with an intrinsic. |
75 |
leading_zeroes = _CountLeadingZeros(data); |
76 |
-#else |
77 |
- // Other platforms must fall back to a C routine. This won't be as |
78 |
- // efficient as the CLZ instruction, but it is functional. |
79 |
- uint32_t try_shift; |
80 |
- |
81 |
- leading_zeroes = 0; |
82 |
- |
83 |
- // This loop does a bisection search rather than the obvious rotation loop. |
84 |
- // This should be faster, though it will still be no match for CLZ. |
85 |
- for (try_shift = 16; try_shift != 0; try_shift /= 2) { |
86 |
- uint32_t shift = leading_zeroes + try_shift; |
87 |
- if (((data << shift) >> shift) == data) { |
88 |
- leading_zeroes = shift; |
89 |
+ |
90 |
+ if (0) // We don't need the fallback |
91 |
+#endif |
92 |
+ { |
93 |
+ // Other platforms must fall back to a C routine. This won't be as |
94 |
+ // efficient as the CLZ instruction, but it is functional. |
95 |
+ uint32_t try_shift; |
96 |
+ |
97 |
+ leading_zeroes = 0; |
98 |
+ |
99 |
+ // This loop does a bisection search rather than the obvious rotation loop. |
100 |
+ // This should be faster, though it will still be no match for CLZ. |
101 |
+ for (try_shift = 16; try_shift != 0; try_shift /= 2) { |
102 |
+ uint32_t shift = leading_zeroes + try_shift; |
103 |
+ if (((data << shift) >> shift) == data) { |
104 |
+ leading_zeroes = shift; |
105 |
+ } |
106 |
} |
107 |
} |
108 |
-#endif |
109 |
|
110 |
// Assert that the operation worked! |
111 |
NanoAssert(((0xffffffff >> leading_zeroes) & data) == data); |
112 |
@@ -555,13 +558,18 @@ NIns* |
113 |
Assembler::genEpilogue() |
114 |
{ |
115 |
// On ARMv5+, loading directly to PC correctly handles interworking. |
116 |
- // Note that we don't support anything older than ARMv5. |
117 |
- NanoAssert(ARM_ARCH >= 5); |
118 |
- |
119 |
- RegisterMask savingMask = rmask(FP) | rmask(PC); |
120 |
+ // On ARMv4T, interworking is not handled properly, therefore, we pop |
121 |
+ // lr into ip and use bx ip to avoid that. |
122 |
+ if (ARM_ARCH_AT_LEAST(5)) { |
123 |
+ RegisterMask savingMask = rmask(FP) | rmask(PC); |
124 |
|
125 |
- POP_mask(savingMask); // regs |
126 |
+ POP_mask(savingMask); // regs |
127 |
+ } else { |
128 |
+ RegisterMask savingMask = rmask(FP) | rmask(IP); |
129 |
|
130 |
+ BX(IP); |
131 |
+ POP_mask(savingMask); // regs |
132 |
+ } |
133 |
return _nIns; |
134 |
} |
135 |
|
136 |
@@ -1502,7 +1510,7 @@ Assembler::BranchWithLink(NIns* addr) |
137 |
|
138 |
// ARMv5 and above can use BLX <imm> for branches within ±32MB of the |
139 |
// PC and BLX Rm for long branches. |
140 |
- if (isS24(offs>>2)) { |
141 |
+ if (isS24(offs>>2) && (ARM_ARCH_AT_LEAST(5))) { |
142 |
// the value we need to stick in the instruction; masked, |
143 |
// because it will be sign-extended back to 32 bits. |
144 |
intptr_t offs2 = (offs>>2) & 0xffffff; |
145 |
@@ -1519,7 +1527,6 @@ Assembler::BranchWithLink(NIns* addr) |
146 |
// We need to emit an ARMv5+ instruction, so assert that we have a |
147 |
// suitable processor. Note that we don't support ARMv4(T), but |
148 |
// this serves as a useful sanity check. |
149 |
- NanoAssert(ARM_ARCH >= 5); |
150 |
|
151 |
// The (pre-shifted) value of the "H" bit in the BLX encoding. |
152 |
uint32_t H = (offs & 0x2) << 23; |
153 |
@@ -1543,11 +1550,6 @@ Assembler::BranchWithLink(NIns* addr) |
154 |
inline void |
155 |
Assembler::BLX(Register addr, bool chk /* = true */) |
156 |
{ |
157 |
- // We need to emit an ARMv5+ instruction, so assert that we have a suitable |
158 |
- // processor. Note that we don't support ARMv4(T), but this serves as a |
159 |
- // useful sanity check. |
160 |
- NanoAssert(ARM_ARCH >= 5); |
161 |
- |
162 |
NanoAssert(IsGpReg(addr)); |
163 |
// There is a bug in the WinCE device emulator which stops "BLX LR" from |
164 |
// working as expected. Assert that we never do that! |
165 |
@@ -1558,8 +1560,15 @@ Assembler::BLX(Register addr, bool chk /* = true */) |
166 |
} |
167 |
|
168 |
// BLX IP |
169 |
- *(--_nIns) = (NIns)( (COND_AL) | (0x12<<20) | (0xFFF<<8) | (0x3<<4) | (addr) ); |
170 |
- asm_output("blx ip"); |
171 |
+ if (ARM_ARCH_AT_LEAST(5)) { |
172 |
+ *(--_nIns) = (NIns)( (COND_AL) | (0x12<<20) | (0xFFF<<8) | (0x3<<4) | (addr) ); |
173 |
+ asm_output("blx %s", gpn(addr)); |
174 |
+ } else { |
175 |
+ *(--_nIns) = (NIns)( (COND_AL) | (0x12fff1 << 4) | (addr) ); |
176 |
+ asm_output("bx %s", gpn(addr)); |
177 |
+ *(--_nIns) = (NIns)( (COND_AL) | (0x1A0 << 16) | (0xE << 12) | 0xF ); |
178 |
+ asm_output("mov lr, pc"); |
179 |
+ } |
180 |
} |
181 |
|
182 |
// Emit the code required to load a memory address into a register as follows: |
183 |
@@ -2177,7 +2186,7 @@ Assembler::asm_arith(LInsp ins) |
184 |
// common for (rr == ra) and is thus likely to be the most |
185 |
// efficient case; if ra is no longer used after this LIR |
186 |
// instruction, it is re-used for the result register (rr). |
187 |
- if ((ARM_ARCH > 5) || (rr != rb)) { |
188 |
+ if ((ARM_ARCH_AT_LEAST(6)) || (rr != rb)) { |
189 |
// Newer cores place no restrictions on the registers used in a |
190 |
// MUL instruction (compared to other arithmetic instructions). |
191 |
MUL(rr, rb, ra); |
192 |
diff --git a/js/src/nanojit/avmplus.h b/js/src/nanojit/avmplus.h |
193 |
index ffc0873..e86f22e 100644 |
194 |
--- a/js/src/nanojit/avmplus.h |
195 |
+++ b/js/src/nanojit/avmplus.h |
196 |
@@ -50,6 +50,8 @@ |
197 |
#include "jstypes.h" |
198 |
#include "jsstdint.h" |
199 |
|
200 |
+#include "njcpudetect.h" |
201 |
+ |
202 |
#ifdef AVMPLUS_ARM |
203 |
#define ARM_ARCH config.arch |
204 |
#define ARM_VFP config.vfp |
205 |
diff --git a/js/src/nanojit/njcpudetect.h b/js/src/nanojit/njcpudetect.h |
206 |
new file mode 100644 |
207 |
index 0000000..79ea90b |
208 |
--- /dev/null |
209 |
+++ b/js/src/nanojit/njcpudetect.h |
210 |
@@ -0,0 +1,110 @@ |
211 |
+/* -*- Mode: C++; c-basic-offset: 4; indent-tabs-mode: nil; tab-width: 4 -*- */ |
212 |
+/* vi: set ts=4 sw=4 expandtab: (add to ~/.vimrc: set modeline modelines=5) */ |
213 |
+/* ***** BEGIN LICENSE BLOCK ***** |
214 |
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1 |
215 |
+ * |
216 |
+ * The contents of this file are subject to the Mozilla Public License Version |
217 |
+ * 1.1 (the "License"); you may not use this file except in compliance with |
218 |
+ * the License. You may obtain a copy of the License at |
219 |
+ * http://www.mozilla.org/MPL/ |
220 |
+ * |
221 |
+ * Software distributed under the License is distributed on an "AS IS" basis, |
222 |
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License |
223 |
+ * for the specific language governing rights and limitations under the |
224 |
+ * License. |
225 |
+ * |
226 |
+ * The Original Code is [Open Source Virtual Machine]. |
227 |
+ * |
228 |
+ * The Initial Developer of the Original Code is |
229 |
+ * Adobe System Incorporated. |
230 |
+ * Portions created by the Initial Developer are Copyright (C) 2004-2007 |
231 |
+ * the Initial Developer. All Rights Reserved. |
232 |
+ * |
233 |
+ * Contributor(s): |
234 |
+ * Adobe AS3 Team |
235 |
+ * |
236 |
+ * Alternatively, the contents of this file may be used under the terms of |
237 |
+ * either the GNU General Public License Version 2 or later (the "GPL"), or |
238 |
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), |
239 |
+ * in which case the provisions of the GPL or the LGPL are applicable instead |
240 |
+ * of those above. If you wish to allow use of your version of this file only |
241 |
+ * under the terms of either the GPL or the LGPL, and not to allow others to |
242 |
+ * use your version of this file under the terms of the MPL, indicate your |
243 |
+ * decision by deleting the provisions above and replace them with the notice |
244 |
+ * and other provisions required by the GPL or the LGPL. If you do not delete |
245 |
+ * the provisions above, a recipient may use your version of this file under |
246 |
+ * the terms of any one of the MPL, the GPL or the LGPL. |
247 |
+ * |
248 |
+ * ***** END LICENSE BLOCK ***** */ |
249 |
+ |
250 |
+#ifndef __njcpudetect__ |
251 |
+#define __njcpudetect__ |
252 |
+ |
253 |
+/*** |
254 |
+ * Note: this file should not include *any* other files, nor should it wrap |
255 |
+ * itself in ifdef FEATURE_NANOJIT, nor should it do anything other than |
256 |
+ * define preprocessor symbols. |
257 |
+ */ |
258 |
+ |
259 |
+/*** |
260 |
+ * NJ_COMPILER_ARM_ARCH attempts to specify the minimum ARM architecture |
261 |
+ * that the C++ compiler has specified. Note that although Config::arm_arch |
262 |
+ * is initialized to this value by default, there is no requirement that they |
263 |
+ * be in sync. |
264 |
+ * |
265 |
+ * Note, this is done via #define so that downstream preprocessor usage can |
266 |
+ * examine it, but please don't attempt to redefine it. |
267 |
+ * |
268 |
+ * Note, this is deliberately not encased in "ifdef NANOJIT_ARM", as this file |
269 |
+ * may be included before that is defined. On non-ARM platforms we will hit the |
270 |
+ * "Unable to determine" case. |
271 |
+ */ |
272 |
+ |
273 |
+// GCC and RealView usually define __ARM_ARCH__ |
274 |
+#if defined(__ARM_ARCH__) |
275 |
+ |
276 |
+ #define NJ_COMPILER_ARM_ARCH __ARM_ARCH__ |
277 |
+ |
278 |
+// ok, try well-known GCC flags ( see http://gcc.gnu.org/onlinedocs/gcc/ARM-Options.html ) |
279 |
+#elif defined(__ARM_ARCH_7__) || \ |
280 |
+ defined(__ARM_ARCH_7A__) || \ |
281 |
+ defined(__ARM_ARCH_7M__) || \ |
282 |
+ defined(__ARM_ARCH_7R__) || \ |
283 |
+ defined(_ARM_ARCH_7) |
284 |
+ |
285 |
+ #define NJ_COMPILER_ARM_ARCH 7 |
286 |
+ |
287 |
+#elif defined(__ARM_ARCH_6__) || \ |
288 |
+ defined(__ARM_ARCH_6J__) || \ |
289 |
+ defined(__ARM_ARCH_6T2__) || \ |
290 |
+ defined(__ARM_ARCH_6Z__) || \ |
291 |
+ defined(__ARM_ARCH_6ZK__) || \ |
292 |
+ defined(__ARM_ARCH_6M__) || \ |
293 |
+ defined(_ARM_ARCH_6) |
294 |
+ |
295 |
+ #define NJ_COMPILER_ARM_ARCH 6 |
296 |
+ |
297 |
+#elif defined(__ARM_ARCH_5__) || \ |
298 |
+ defined(__ARM_ARCH_5T__) || \ |
299 |
+ defined(__ARM_ARCH_5E__) || \ |
300 |
+ defined(__ARM_ARCH_5TE__) |
301 |
+ |
302 |
+ #define NJ_COMPILER_ARM_ARCH 5 |
303 |
+ |
304 |
+#elif defined(__ARM_ARCH_4__) |
305 |
+ |
306 |
+ #define NJ_COMPILER_ARM_ARCH 4 |
307 |
+ |
308 |
+// Visual C has its own mojo |
309 |
+#elif defined(_MSC_VER) && defined(_M_ARM) |
310 |
+ |
311 |
+ #define NJ_COMPILER_ARM_ARCH _M_ARM |
312 |
+ |
313 |
+#else |
314 |
+ |
315 |
+ // non-numeric value |
316 |
+ #define NJ_COMPILER_ARM_ARCH "Unable to determine valid NJ_COMPILER_ARM_ARCH (nanojit only supports ARMv5 or later)" |
317 |
+ |
318 |
+#endif |
319 |
+ |
320 |
+#endif // __njcpudetect__ |