llvm-project
171 строка · 4.8 Кб
1/*
2* strcmp - compare two strings
3*
4* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5* See https://llvm.org/LICENSE.txt for license information.
6* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7*/
8
9/* Assumptions:
10*
11* ARMv8-a, AArch64
12*/
13
14#include "../asmdefs.h"
15
16#define REP8_01 0x0101010101010101
17#define REP8_7f 0x7f7f7f7f7f7f7f7f
18#define REP8_80 0x8080808080808080
19
20/* Parameters and result. */
21#define src1 x0
22#define src2 x1
23#define result x0
24
25/* Internal variables. */
26#define data1 x2
27#define data1w w2
28#define data2 x3
29#define data2w w3
30#define has_nul x4
31#define diff x5
32#define syndrome x6
33#define tmp1 x7
34#define tmp2 x8
35#define tmp3 x9
36#define zeroones x10
37#define pos x11
38
39/* Start of performance-critical section -- one 64B cache line. */
40ENTRY (__strcmp_aarch64)
41eor tmp1, src1, src2
42mov zeroones, #REP8_01
43tst tmp1, #7
44b.ne L(misaligned8)
45ands tmp1, src1, #7
46b.ne L(mutual_align)
47/* NUL detection works on the principle that (X - 1) & (~X) & 0x80
48(=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
49can be done in parallel across the entire word. */
50L(loop_aligned):
51ldr data1, [src1], #8
52ldr data2, [src2], #8
53L(start_realigned):
54sub tmp1, data1, zeroones
55orr tmp2, data1, #REP8_7f
56eor diff, data1, data2 /* Non-zero if differences found. */
57bic has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */
58orr syndrome, diff, has_nul
59cbz syndrome, L(loop_aligned)
60/* End of performance-critical section -- one 64B cache line. */
61
62L(end):
63#ifndef __AARCH64EB__
64rev syndrome, syndrome
65rev data1, data1
66/* The MS-non-zero bit of the syndrome marks either the first bit
67that is different, or the top bit of the first zero byte.
68Shifting left now will bring the critical information into the
69top bits. */
70clz pos, syndrome
71rev data2, data2
72lsl data1, data1, pos
73lsl data2, data2, pos
74/* But we need to zero-extend (char is unsigned) the value and then
75perform a signed 32-bit subtraction. */
76lsr data1, data1, #56
77sub result, data1, data2, lsr #56
78ret
79#else
80/* For big-endian we cannot use the trick with the syndrome value
81as carry-propagation can corrupt the upper bits if the trailing
82bytes in the string contain 0x01. */
83/* However, if there is no NUL byte in the dword, we can generate
84the result directly. We can't just subtract the bytes as the
85MSB might be significant. */
86cbnz has_nul, 1f
87cmp data1, data2
88cset result, ne
89cneg result, result, lo
90ret
911:
92/* Re-compute the NUL-byte detection, using a byte-reversed value. */
93rev tmp3, data1
94sub tmp1, tmp3, zeroones
95orr tmp2, tmp3, #REP8_7f
96bic has_nul, tmp1, tmp2
97rev has_nul, has_nul
98orr syndrome, diff, has_nul
99clz pos, syndrome
100/* The MS-non-zero bit of the syndrome marks either the first bit
101that is different, or the top bit of the first zero byte.
102Shifting left now will bring the critical information into the
103top bits. */
104lsl data1, data1, pos
105lsl data2, data2, pos
106/* But we need to zero-extend (char is unsigned) the value and then
107perform a signed 32-bit subtraction. */
108lsr data1, data1, #56
109sub result, data1, data2, lsr #56
110ret
111#endif
112
113L(mutual_align):
114/* Sources are mutually aligned, but are not currently at an
115alignment boundary. Round down the addresses and then mask off
116the bytes that preceed the start point. */
117bic src1, src1, #7
118bic src2, src2, #7
119lsl tmp1, tmp1, #3 /* Bytes beyond alignment -> bits. */
120ldr data1, [src1], #8
121neg tmp1, tmp1 /* Bits to alignment -64. */
122ldr data2, [src2], #8
123mov tmp2, #~0
124#ifdef __AARCH64EB__
125/* Big-endian. Early bytes are at MSB. */
126lsl tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */
127#else
128/* Little-endian. Early bytes are at LSB. */
129lsr tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */
130#endif
131orr data1, data1, tmp2
132orr data2, data2, tmp2
133b L(start_realigned)
134
135L(misaligned8):
136/* Align SRC1 to 8 bytes and then compare 8 bytes at a time, always
137checking to make sure that we don't access beyond page boundary in
138SRC2. */
139tst src1, #7
140b.eq L(loop_misaligned)
141L(do_misaligned):
142ldrb data1w, [src1], #1
143ldrb data2w, [src2], #1
144cmp data1w, #1
145ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */
146b.ne L(done)
147tst src1, #7
148b.ne L(do_misaligned)
149
150L(loop_misaligned):
151/* Test if we are within the last dword of the end of a 4K page. If
152yes then jump back to the misaligned loop to copy a byte at a time. */
153and tmp1, src2, #0xff8
154eor tmp1, tmp1, #0xff8
155cbz tmp1, L(do_misaligned)
156ldr data1, [src1], #8
157ldr data2, [src2], #8
158
159sub tmp1, data1, zeroones
160orr tmp2, data1, #REP8_7f
161eor diff, data1, data2 /* Non-zero if differences found. */
162bic has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */
163orr syndrome, diff, has_nul
164cbz syndrome, L(loop_misaligned)
165b L(end)
166
167L(done):
168sub result, data1, data2
169ret
170
171END (__strcmp_aarch64)
172