jdk
1#
2# Copyright (c) 2004, 2024, Oracle and/or its affiliates. All rights reserved.
3# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4#
5# This code is free software; you can redistribute it and/or modify it
6# under the terms of the GNU General Public License version 2 only, as
7# published by the Free Software Foundation.
8#
9# This code is distributed in the hope that it will be useful, but WITHOUT
10# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12# version 2 for more details (a copy is included in the LICENSE file that
13# accompanied this code).
14#
15# You should have received a copy of the GNU General Public License version
16# 2 along with this work; if not, write to the Free Software Foundation,
17# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18#
19# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20# or visit www.oracle.com if you need additional information or have any
21# questions.
22#
23
24#include "defs.S.inc"
25
26# NOTE WELL! The _Copy functions are called directly
27# from server-compiler-generated code via CallLeafNoFP,
28# which means that they *must* either not use floating
29# point or use it in the same manner as does the server
30# compiler.
31
32.text
33
34.p2align 4,,15
35DECLARE_FUNC(SpinPause):
36rep
37nop
38movl $1, %eax
39ret
40
41# Support for void Copy::arrayof_conjoint_bytes(void* from,
42# void* to,
43# size_t count)
44#
45.p2align 4,,15
46DECLARE_FUNC(_Copy_arrayof_conjoint_bytes):
47pushl %esi
48movl 4+12(%esp),%ecx # count
49pushl %edi
50movl 8+ 4(%esp),%esi # from
51movl 8+ 8(%esp),%edi # to
52cmpl %esi,%edi
53leal -1(%esi,%ecx),%eax # from + count - 1
54jbe acb_CopyRight
55cmpl %eax,%edi
56jbe acb_CopyLeft
57# copy from low to high
58acb_CopyRight:
59cmpl $3,%ecx
60jbe 5f
611: movl %ecx,%eax
62shrl $2,%ecx
63jz 4f
64cmpl $32,%ecx
65ja 3f
66# copy aligned dwords
67subl %esi,%edi
68.p2align 4,,15
692: movl (%esi),%edx
70movl %edx,(%edi,%esi,1)
71addl $4,%esi
72subl $1,%ecx
73jnz 2b
74addl %esi,%edi
75jmp 4f
76# copy aligned dwords
773: rep; smovl
784: movl %eax,%ecx
795: andl $3,%ecx
80jz 7f
81# copy suffix
82xorl %eax,%eax
836: movb (%esi,%eax,1),%dl
84movb %dl,(%edi,%eax,1)
85addl $1,%eax
86subl $1,%ecx
87jnz 6b
887: popl %edi
89popl %esi
90ret
91acb_CopyLeft:
92std
93leal -4(%edi,%ecx),%edi # to + count - 4
94movl %eax,%esi # from + count - 1
95movl %ecx,%eax
96subl $3,%esi # from + count - 4
97cmpl $3,%ecx
98jbe 5f
991: shrl $2,%ecx
100jz 4f
101cmpl $32,%ecx
102jbe 2f # <= 32 dwords
103rep; smovl
104jmp 4f
105.space 8
1062: subl %esi,%edi
107.p2align 4,,15
1083: movl (%esi),%edx
109movl %edx,(%edi,%esi,1)
110subl $4,%esi
111subl $1,%ecx
112jnz 3b
113addl %esi,%edi
1144: movl %eax,%ecx
1155: andl $3,%ecx
116jz 7f
117subl %esi,%edi
118addl $3,%esi
1196: movb (%esi),%dl
120movb %dl,(%edi,%esi,1)
121subl $1,%esi
122subl $1,%ecx
123jnz 6b
1247: cld
125popl %edi
126popl %esi
127ret
128
129# Support for void Copy::conjoint_jshorts_atomic(void* from,
130# void* to,
131# size_t count)
132.p2align 4,,15
133DECLARE_FUNC(_Copy_conjoint_jshorts_atomic):
134pushl %esi
135movl 4+12(%esp),%ecx # count
136pushl %edi
137movl 8+ 4(%esp),%esi # from
138movl 8+ 8(%esp),%edi # to
139cmpl %esi,%edi
140leal -2(%esi,%ecx,2),%eax # from + count*2 - 2
141jbe cs_CopyRight
142cmpl %eax,%edi
143jbe cs_CopyLeft
144# copy from low to high
145cs_CopyRight:
146# align source address at dword address boundary
147movl %esi,%eax # original from
148andl $3,%eax # either 0 or 2
149jz 1f # no prefix
150# copy prefix
151subl $1,%ecx
152jl 5f # zero count
153movw (%esi),%dx
154movw %dx,(%edi)
155addl %eax,%esi # %eax == 2
156addl %eax,%edi
1571: movl %ecx,%eax # word count less prefix
158sarl %ecx # dword count
159jz 4f # no dwords to move
160cmpl $32,%ecx
161jbe 2f # <= 32 dwords
162# copy aligned dwords
163rep; smovl
164jmp 4f
165# copy aligned dwords
1662: subl %esi,%edi
167.p2align 4,,15
1683: movl (%esi),%edx
169movl %edx,(%edi,%esi,1)
170addl $4,%esi
171subl $1,%ecx
172jnz 3b
173addl %esi,%edi
1744: andl $1,%eax # suffix count
175jz 5f # no suffix
176# copy suffix
177movw (%esi),%dx
178movw %dx,(%edi)
1795: popl %edi
180popl %esi
181ret
182# copy from high to low
183cs_CopyLeft:
184std
185leal -4(%edi,%ecx,2),%edi # to + count*2 - 4
186movl %eax,%esi # from + count*2 - 2
187movl %ecx,%eax
188subl $2,%esi # from + count*2 - 4
1891: sarl %ecx # dword count
190jz 4f # no dwords to move
191cmpl $32,%ecx
192ja 3f # > 32 dwords
193subl %esi,%edi
194.p2align 4,,15
1952: movl (%esi),%edx
196movl %edx,(%edi,%esi,1)
197subl $4,%esi
198subl $1,%ecx
199jnz 2b
200addl %esi,%edi
201jmp 4f
2023: rep; smovl
2034: andl $1,%eax # suffix count
204jz 5f # no suffix
205# copy suffix
206addl $2,%esi
207addl $2,%edi
208movw (%esi),%dx
209movw %dx,(%edi)
2105: cld
211popl %edi
212popl %esi
213ret
214
215# Support for void Copy::arrayof_conjoint_jshorts(void* from,
216# void* to,
217# size_t count)
218.p2align 4,,15
219DECLARE_FUNC(_Copy_arrayof_conjoint_jshorts):
220pushl %esi
221movl 4+12(%esp),%ecx # count
222pushl %edi
223movl 8+ 4(%esp),%esi # from
224movl 8+ 8(%esp),%edi # to
225cmpl %esi,%edi
226leal -2(%esi,%ecx,2),%eax # from + count*2 - 2
227jbe acs_CopyRight
228cmpl %eax,%edi
229jbe acs_CopyLeft
230acs_CopyRight:
231movl %ecx,%eax # word count
232sarl %ecx # dword count
233jz 4f # no dwords to move
234cmpl $32,%ecx
235jbe 2f # <= 32 dwords
236# copy aligned dwords
237rep; smovl
238jmp 4f
239# copy aligned dwords
240.space 5
2412: subl %esi,%edi
242.p2align 4,,15
2433: movl (%esi),%edx
244movl %edx,(%edi,%esi,1)
245addl $4,%esi
246subl $1,%ecx
247jnz 3b
248addl %esi,%edi
2494: andl $1,%eax # suffix count
250jz 5f # no suffix
251# copy suffix
252movw (%esi),%dx
253movw %dx,(%edi)
2545: popl %edi
255popl %esi
256ret
257acs_CopyLeft:
258std
259leal -4(%edi,%ecx,2),%edi # to + count*2 - 4
260movl %eax,%esi # from + count*2 - 2
261movl %ecx,%eax
262subl $2,%esi # from + count*2 - 4
263sarl %ecx # dword count
264jz 4f # no dwords to move
265cmpl $32,%ecx
266ja 3f # > 32 dwords
267subl %esi,%edi
268.p2align 4,,15
2692: movl (%esi),%edx
270movl %edx,(%edi,%esi,1)
271subl $4,%esi
272subl $1,%ecx
273jnz 2b
274addl %esi,%edi
275jmp 4f
2763: rep; smovl
2774: andl $1,%eax # suffix count
278jz 5f # no suffix
279# copy suffix
280addl $2,%esi
281addl $2,%edi
282movw (%esi),%dx
283movw %dx,(%edi)
2845: cld
285popl %edi
286popl %esi
287ret
288
289# Support for void Copy::conjoint_jints_atomic(void* from,
290# void* to,
291# size_t count)
292# Equivalent to
293# arrayof_conjoint_jints
294.p2align 4,,15
295DECLARE_FUNC(_Copy_conjoint_jints_atomic):
296DECLARE_FUNC(_Copy_arrayof_conjoint_jints):
297pushl %esi
298movl 4+12(%esp),%ecx # count
299pushl %edi
300movl 8+ 4(%esp),%esi # from
301movl 8+ 8(%esp),%edi # to
302cmpl %esi,%edi
303leal -4(%esi,%ecx,4),%eax # from + count*4 - 4
304jbe ci_CopyRight
305cmpl %eax,%edi
306jbe ci_CopyLeft
307ci_CopyRight:
308cmpl $32,%ecx
309jbe 2f # <= 32 dwords
310rep; smovl
311popl %edi
312popl %esi
313ret
314.space 10
3152: subl %esi,%edi
316jmp 4f
317.p2align 4,,15
3183: movl (%esi),%edx
319movl %edx,(%edi,%esi,1)
320addl $4,%esi
3214: subl $1,%ecx
322jge 3b
323popl %edi
324popl %esi
325ret
326ci_CopyLeft:
327std
328leal -4(%edi,%ecx,4),%edi # to + count*4 - 4
329cmpl $32,%ecx
330ja 4f # > 32 dwords
331subl %eax,%edi # eax == from + count*4 - 4
332jmp 3f
333.p2align 4,,15
3342: movl (%eax),%edx
335movl %edx,(%edi,%eax,1)
336subl $4,%eax
3373: subl $1,%ecx
338jge 2b
339cld
340popl %edi
341popl %esi
342ret
3434: movl %eax,%esi # from + count*4 - 4
344rep; smovl
345cld
346popl %edi
347popl %esi
348ret
349
350# Support for void Copy::conjoint_jlongs_atomic(jlong* from,
351# jlong* to,
352# size_t count)
353#
354# 32-bit
355#
356# count treated as signed
357/*
358#
359# if (from > to) {
360# while (--count >= 0) {
361# *to++ = *from++;
362# }
363# } else {
364# while (--count >= 0) {
365# to[count] = from[count];
366# }
367# }
368*/
369.p2align 4,,15
370DECLARE_FUNC(_Copy_conjoint_jlongs_atomic):
371movl 4+8(%esp),%ecx # count
372movl 4+0(%esp),%eax # from
373movl 4+4(%esp),%edx # to
374cmpl %eax,%edx
375jae cla_CopyLeft
376cla_CopyRight:
377subl %eax,%edx
378jmp 2f
379.p2align 4,,15
3801: fildll (%eax)
381fistpll (%edx,%eax,1)
382addl $8,%eax
3832: subl $1,%ecx
384jge 1b
385ret
386.p2align 4,,15
3873: fildll (%eax,%ecx,8)
388fistpll (%edx,%ecx,8)
389cla_CopyLeft:
390subl $1,%ecx
391jge 3b
392ret
393
394# Support for void Copy::arrayof_conjoint_jshorts(void* from,
395# void* to,
396# size_t count)
397.p2align 4,,15
398DECLARE_FUNC(_mmx_Copy_arrayof_conjoint_jshorts):
399pushl %esi
400movl 4+12(%esp),%ecx
401pushl %edi
402movl 8+ 4(%esp),%esi
403movl 8+ 8(%esp),%edi
404cmpl %esi,%edi
405leal -2(%esi,%ecx,2),%eax
406jbe mmx_acs_CopyRight
407cmpl %eax,%edi
408jbe mmx_acs_CopyLeft
409mmx_acs_CopyRight:
410movl %ecx,%eax
411sarl %ecx
412je 5f
413cmpl $33,%ecx
414jae 3f
4151: subl %esi,%edi
416.p2align 4,,15
4172: movl (%esi),%edx
418movl %edx,(%edi,%esi,1)
419addl $4,%esi
420subl $1,%ecx
421jnz 2b
422addl %esi,%edi
423jmp 5f
4243: smovl # align to 8 bytes, we know we are 4 byte aligned to start
425subl $1,%ecx
4264: .p2align 4,,15
427movq 0(%esi),%mm0
428addl $64,%edi
429movq 8(%esi),%mm1
430subl $16,%ecx
431movq 16(%esi),%mm2
432movq %mm0,-64(%edi)
433movq 24(%esi),%mm0
434movq %mm1,-56(%edi)
435movq 32(%esi),%mm1
436movq %mm2,-48(%edi)
437movq 40(%esi),%mm2
438movq %mm0,-40(%edi)
439movq 48(%esi),%mm0
440movq %mm1,-32(%edi)
441movq 56(%esi),%mm1
442movq %mm2,-24(%edi)
443movq %mm0,-16(%edi)
444addl $64,%esi
445movq %mm1,-8(%edi)
446cmpl $16,%ecx
447jge 4b
448emms
449testl %ecx,%ecx
450ja 1b
4515: andl $1,%eax
452je 7f
4536: movw (%esi),%dx
454movw %dx,(%edi)
4557: popl %edi
456popl %esi
457ret
458mmx_acs_CopyLeft:
459std
460leal -4(%edi,%ecx,2),%edi
461movl %eax,%esi
462movl %ecx,%eax
463subl $2,%esi
464sarl %ecx
465je 4f
466cmpl $32,%ecx
467ja 3f
468subl %esi,%edi
469.p2align 4,,15
4702: movl (%esi),%edx
471movl %edx,(%edi,%esi,1)
472subl $4,%esi
473subl $1,%ecx
474jnz 2b
475addl %esi,%edi
476jmp 4f
4773: rep; smovl
4784: andl $1,%eax
479je 6f
480addl $2,%esi
481addl $2,%edi
4825: movw (%esi),%dx
483movw %dx,(%edi)
4846: cld
485popl %edi
486popl %esi
487ret
488
489
490# Support for jlong Atomic::cmpxchg(volatile jlong* dest,
491# jlong compare_value,
492# jlong exchange_value)
493#
494.p2align 4,,15
495DECLARE_FUNC(_Atomic_cmpxchg_long):
496# 8(%esp) : return PC
497pushl %ebx # 4(%esp) : old %ebx
498pushl %edi # 0(%esp) : old %edi
499movl 12(%esp), %ebx # 12(%esp) : exchange_value (low)
500movl 16(%esp), %ecx # 16(%esp) : exchange_value (high)
501movl 24(%esp), %eax # 24(%esp) : compare_value (low)
502movl 28(%esp), %edx # 28(%esp) : compare_value (high)
503movl 20(%esp), %edi # 20(%esp) : dest
504lock cmpxchg8b (%edi)
505popl %edi
506popl %ebx
507ret
508
509
510# Support for jlong Atomic::load and Atomic::store.
511# void _Atomic_move_long(const volatile jlong* src, volatile jlong* dst)
512.p2align 4,,15
513DECLARE_FUNC(_Atomic_move_long):
514movl 4(%esp), %eax # src
515fildll (%eax)
516movl 8(%esp), %eax # dest
517fistpll (%eax)
518ret
519