2
// Copyright (c) 2011, 2024, Oracle and/or its affiliates. All rights reserved.
3
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5
// This code is free software; you can redistribute it and/or modify it
6
// under the terms of the GNU General Public License version 2 only, as
7
// published by the Free Software Foundation.
9
// This code is distributed in the hope that it will be useful, but WITHOUT
10
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12
// version 2 for more details (a copy is included in the LICENSE file that
13
// accompanied this code).
15
// You should have received a copy of the GNU General Public License version
16
// 2 along with this work; if not, write to the Free Software Foundation,
17
// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19
// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20
// or visit www.oracle.com if you need additional information or have any
25
// X86 Common Architecture Description File
27
//----------REGISTER DEFINITION BLOCK------------------------------------------
28
// This information is used by the matcher and the register allocator to
29
// describe individual registers and classes of registers within the target
33
//----------Architecture Description Register Definitions----------------------
35
// "reg_def" name ( register save type, C convention save type,
36
// ideal register type, encoding );
37
// Register Save Types:
39
// NS = No-Save: The register allocator assumes that these registers
40
// can be used without saving upon entry to the method, &
41
// that they do not need to be saved at call sites.
43
// SOC = Save-On-Call: The register allocator assumes that these registers
44
// can be used without saving upon entry to the method,
45
// but that they must be saved at call sites.
47
// SOE = Save-On-Entry: The register allocator assumes that these registers
48
// must be saved before using them upon entry to the
49
// method, but they do not need to be saved at call
52
// AS = Always-Save: The register allocator assumes that these registers
53
// must be saved before using them upon entry to the
54
// method, & that they must be saved at call sites.
56
// Ideal Register Type is used to determine how to save & restore a
57
// register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
58
// spilled with LoadP/StoreP. If the register supports both, use Op_RegI.
60
// The encoding number is the actual bit-pattern placed into the opcodes.
62
// XMM registers. 512-bit registers or 8 words each, labeled (a)-p.
63
// Word a in each register holds a Float, words ab hold a Double.
64
// The whole registers are used in SSE4.2 version intrinsics,
65
// array copy stubs and superword operations (see UseSSE42Intrinsics,
66
// UseXMMForArrayCopy and UseSuperword flags).
67
// For pre EVEX enabled architectures:
68
// XMM8-XMM15 must be encoded with REX (VEX for UseAVX)
69
// For EVEX enabled architectures:
70
// XMM8-XMM31 must be encoded with REX (EVEX for UseAVX).
72
// Linux ABI: No register preserved across function calls
73
// XMM0-XMM7 might hold parameters
74
// Windows ABI: XMM6-XMM15 preserved across function calls
75
// XMM0-XMM3 might hold parameters
77
reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
78
reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1));
79
reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2));
80
reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3));
81
reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4));
82
reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5));
83
reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6));
84
reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7));
85
reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8));
86
reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9));
87
reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10));
88
reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11));
89
reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12));
90
reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13));
91
reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14));
92
reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15));
94
reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
95
reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1));
96
reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2));
97
reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3));
98
reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4));
99
reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5));
100
reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6));
101
reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7));
102
reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8));
103
reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9));
104
reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10));
105
reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11));
106
reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12));
107
reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13));
108
reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14));
109
reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15));
111
reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
112
reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1));
113
reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2));
114
reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3));
115
reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4));
116
reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5));
117
reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6));
118
reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7));
119
reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8));
120
reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9));
121
reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10));
122
reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11));
123
reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12));
124
reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13));
125
reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14));
126
reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15));
128
reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
129
reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1));
130
reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2));
131
reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3));
132
reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4));
133
reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5));
134
reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6));
135
reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7));
136
reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8));
137
reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9));
138
reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10));
139
reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11));
140
reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12));
141
reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13));
142
reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14));
143
reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15));
145
reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
146
reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1));
147
reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2));
148
reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3));
149
reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4));
150
reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5));
151
reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6));
152
reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7));
153
reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8));
154
reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9));
155
reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10));
156
reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11));
157
reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12));
158
reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13));
159
reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14));
160
reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15));
162
reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
163
reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1));
164
reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2));
165
reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3));
166
reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4));
167
reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5));
168
reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6));
169
reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7));
170
reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8));
171
reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9));
172
reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10));
173
reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11));
174
reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12));
175
reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13));
176
reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14));
177
reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15));
179
reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
180
reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1));
181
reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2));
182
reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3));
183
reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4));
184
reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5));
185
reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6));
186
reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7));
187
reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8));
188
reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9));
189
reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10));
190
reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11));
191
reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12));
192
reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13));
193
reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14));
194
reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15));
196
reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
197
reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1));
198
reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2));
199
reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3));
200
reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4));
201
reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5));
202
reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6));
203
reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7));
204
reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8));
205
reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9));
206
reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10));
207
reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11));
208
reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12));
209
reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13));
210
reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14));
211
reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15));
215
reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg());
216
reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1));
217
reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2));
218
reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3));
219
reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4));
220
reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5));
221
reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6));
222
reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7));
223
reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8));
224
reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9));
225
reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10));
226
reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11));
227
reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12));
228
reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13));
229
reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14));
230
reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15));
232
reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg());
233
reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1));
234
reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2));
235
reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3));
236
reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4));
237
reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5));
238
reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6));
239
reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7));
240
reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8));
241
reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9));
242
reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10));
243
reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11));
244
reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12));
245
reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13));
246
reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14));
247
reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15));
249
reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
250
reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1));
251
reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2));
252
reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3));
253
reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4));
254
reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5));
255
reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6));
256
reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7));
257
reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8));
258
reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9));
259
reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10));
260
reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11));
261
reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12));
262
reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13));
263
reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14));
264
reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15));
266
reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
267
reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1));
268
reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2));
269
reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3));
270
reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4));
271
reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5));
272
reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6));
273
reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7));
274
reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8));
275
reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9));
276
reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10));
277
reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11));
278
reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12));
279
reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13));
280
reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14));
281
reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15));
283
reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
284
reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1));
285
reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2));
286
reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3));
287
reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4));
288
reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5));
289
reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6));
290
reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7));
291
reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8));
292
reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9));
293
reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10));
294
reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11));
295
reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12));
296
reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13));
297
reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14));
298
reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15));
300
reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
301
reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1));
302
reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2));
303
reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3));
304
reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4));
305
reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5));
306
reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6));
307
reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7));
308
reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8));
309
reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9));
310
reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10));
311
reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11));
312
reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12));
313
reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13));
314
reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14));
315
reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15));
317
reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
318
reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1));
319
reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2));
320
reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3));
321
reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4));
322
reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5));
323
reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6));
324
reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7));
325
reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8));
326
reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9));
327
reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10));
328
reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11));
329
reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12));
330
reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13));
331
reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14));
332
reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15));
334
reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
335
reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1));
336
reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2));
337
reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3));
338
reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4));
339
reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5));
340
reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6));
341
reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7));
342
reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8));
343
reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9));
344
reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10));
345
reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11));
346
reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12));
347
reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13));
348
reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14));
349
reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15));
351
reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg());
352
reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1));
353
reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2));
354
reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3));
355
reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4));
356
reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5));
357
reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6));
358
reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7));
359
reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8));
360
reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9));
361
reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10));
362
reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11));
363
reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12));
364
reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13));
365
reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14));
366
reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15));
368
reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg());
369
reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1));
370
reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2));
371
reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3));
372
reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4));
373
reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5));
374
reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6));
375
reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7));
376
reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8));
377
reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9));
378
reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10));
379
reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11));
380
reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12));
381
reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13));
382
reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14));
383
reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15));
385
reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg());
386
reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1));
387
reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2));
388
reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3));
389
reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4));
390
reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5));
391
reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6));
392
reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7));
393
reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8));
394
reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9));
395
reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10));
396
reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11));
397
reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12));
398
reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13));
399
reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14));
400
reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15));
402
reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg());
403
reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1));
404
reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2));
405
reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3));
406
reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4));
407
reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5));
408
reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6));
409
reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7));
410
reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8));
411
reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9));
412
reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10));
413
reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11));
414
reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12));
415
reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13));
416
reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14));
417
reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15));
419
reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg());
420
reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1));
421
reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2));
422
reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3));
423
reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4));
424
reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5));
425
reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6));
426
reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7));
427
reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8));
428
reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9));
429
reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10));
430
reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11));
431
reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12));
432
reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13));
433
reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14));
434
reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15));
436
reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg());
437
reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1));
438
reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2));
439
reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3));
440
reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4));
441
reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5));
442
reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6));
443
reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7));
444
reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8));
445
reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9));
446
reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10));
447
reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11));
448
reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12));
449
reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13));
450
reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14));
451
reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15));
453
reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg());
454
reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1));
455
reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2));
456
reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3));
457
reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4));
458
reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5));
459
reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6));
460
reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7));
461
reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8));
462
reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9));
463
reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10));
464
reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11));
465
reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12));
466
reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13));
467
reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14));
468
reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15));
470
reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg());
471
reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1));
472
reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2));
473
reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3));
474
reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4));
475
reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5));
476
reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6));
477
reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7));
478
reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8));
479
reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9));
480
reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10));
481
reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11));
482
reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12));
483
reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13));
484
reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14));
485
reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15));
487
reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg());
488
reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1));
489
reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2));
490
reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3));
491
reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4));
492
reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5));
493
reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6));
494
reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7));
495
reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8));
496
reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9));
497
reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10));
498
reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11));
499
reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12));
500
reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13));
501
reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14));
502
reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15));
504
reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg());
505
reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1));
506
reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2));
507
reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3));
508
reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4));
509
reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5));
510
reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6));
511
reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7));
512
reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8));
513
reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9));
514
reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10));
515
reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11));
516
reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12));
517
reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13));
518
reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14));
519
reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15));
521
reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg());
522
reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1));
523
reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2));
524
reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3));
525
reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4));
526
reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5));
527
reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6));
528
reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7));
529
reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8));
530
reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9));
531
reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10));
532
reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11));
533
reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12));
534
reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13));
535
reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14));
536
reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15));
538
reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg());
539
reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1));
540
reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2));
541
reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3));
542
reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4));
543
reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5));
544
reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6));
545
reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7));
546
reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8));
547
reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9));
548
reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10));
549
reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11));
550
reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12));
551
reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13));
552
reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14));
553
reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15));
555
reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg());
556
reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1));
557
reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2));
558
reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3));
559
reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4));
560
reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5));
561
reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6));
562
reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7));
563
reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8));
564
reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9));
565
reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10));
566
reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11));
567
reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12));
568
reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13));
569
reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14));
570
reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15));
572
reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg());
573
reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1));
574
reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2));
575
reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3));
576
reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4));
577
reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5));
578
reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6));
579
reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7));
580
reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8));
581
reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9));
582
reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10));
583
reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11));
584
reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12));
585
reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13));
586
reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14));
587
reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15));
589
reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg());
590
reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1));
591
reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2));
592
reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3));
593
reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4));
594
reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5));
595
reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6));
596
reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7));
597
reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8));
598
reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9));
599
reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10));
600
reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11));
601
reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12));
602
reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13));
603
reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14));
604
reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15));
606
reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg());
607
reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1));
608
reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2));
609
reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3));
610
reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4));
611
reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5));
612
reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6));
613
reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7));
614
reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8));
615
reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9));
616
reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10));
617
reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11));
618
reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12));
619
reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13));
620
reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14));
621
reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15));
626
reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
628
reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad());
631
// AVX3 Mask Registers.
632
reg_def K1 (SOC, SOC, Op_RegI, 1, k1->as_VMReg());
633
reg_def K1_H (SOC, SOC, Op_RegI, 1, k1->as_VMReg()->next());
635
reg_def K2 (SOC, SOC, Op_RegI, 2, k2->as_VMReg());
636
reg_def K2_H (SOC, SOC, Op_RegI, 2, k2->as_VMReg()->next());
638
reg_def K3 (SOC, SOC, Op_RegI, 3, k3->as_VMReg());
639
reg_def K3_H (SOC, SOC, Op_RegI, 3, k3->as_VMReg()->next());
641
reg_def K4 (SOC, SOC, Op_RegI, 4, k4->as_VMReg());
642
reg_def K4_H (SOC, SOC, Op_RegI, 4, k4->as_VMReg()->next());
644
reg_def K5 (SOC, SOC, Op_RegI, 5, k5->as_VMReg());
645
reg_def K5_H (SOC, SOC, Op_RegI, 5, k5->as_VMReg()->next());
647
reg_def K6 (SOC, SOC, Op_RegI, 6, k6->as_VMReg());
648
reg_def K6_H (SOC, SOC, Op_RegI, 6, k6->as_VMReg()->next());
650
reg_def K7 (SOC, SOC, Op_RegI, 7, k7->as_VMReg());
651
reg_def K7_H (SOC, SOC, Op_RegI, 7, k7->as_VMReg()->next());
654
alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
655
XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
656
XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
657
XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
658
XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
659
XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
660
XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
661
XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p
663
,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
664
XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
665
XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
666
XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
667
XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
668
XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
669
XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
670
XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p
671
,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
672
XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
673
XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
674
XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
675
XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
676
XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
677
XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
678
XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
679
XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
680
XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
681
XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
682
XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
683
XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
684
XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
685
XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
686
XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p
690
alloc_class chunk2(K7, K7_H,
698
reg_class vectmask_reg(K1, K1_H,
706
reg_class vectmask_reg_K1(K1, K1_H);
707
reg_class vectmask_reg_K2(K2, K2_H);
708
reg_class vectmask_reg_K3(K3, K3_H);
709
reg_class vectmask_reg_K4(K4, K4_H);
710
reg_class vectmask_reg_K5(K5, K5_H);
711
reg_class vectmask_reg_K6(K6, K6_H);
712
reg_class vectmask_reg_K7(K7, K7_H);
714
// flags allocation class should be last.
715
alloc_class chunk3(RFLAGS);
718
// Singleton class for condition codes
719
reg_class int_flags(RFLAGS);
721
// Class for pre evex float registers
722
reg_class float_reg_legacy(XMM0,
742
// Class for evex float registers
743
reg_class float_reg_evex(XMM0,
779
reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} );
780
reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
782
// Class for pre evex double registers
783
reg_class double_reg_legacy(XMM0, XMM0b,
803
// Class for evex double registers
804
reg_class double_reg_evex(XMM0, XMM0b,
840
reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} );
841
reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
843
// Class for pre evex 32bit vector registers
844
reg_class vectors_reg_legacy(XMM0,
864
// Class for evex 32bit vector registers
865
reg_class vectors_reg_evex(XMM0,
901
reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} );
902
reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
904
// Class for all 64bit vector registers
905
reg_class vectord_reg_legacy(XMM0, XMM0b,
925
// Class for all 64bit vector registers
926
reg_class vectord_reg_evex(XMM0, XMM0b,
962
reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} );
963
reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
965
// Class for all 128bit vector registers
966
reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d,
967
XMM1, XMM1b, XMM1c, XMM1d,
968
XMM2, XMM2b, XMM2c, XMM2d,
969
XMM3, XMM3b, XMM3c, XMM3d,
970
XMM4, XMM4b, XMM4c, XMM4d,
971
XMM5, XMM5b, XMM5c, XMM5d,
972
XMM6, XMM6b, XMM6c, XMM6d,
973
XMM7, XMM7b, XMM7c, XMM7d
975
,XMM8, XMM8b, XMM8c, XMM8d,
976
XMM9, XMM9b, XMM9c, XMM9d,
977
XMM10, XMM10b, XMM10c, XMM10d,
978
XMM11, XMM11b, XMM11c, XMM11d,
979
XMM12, XMM12b, XMM12c, XMM12d,
980
XMM13, XMM13b, XMM13c, XMM13d,
981
XMM14, XMM14b, XMM14c, XMM14d,
982
XMM15, XMM15b, XMM15c, XMM15d
986
// Class for all 128bit vector registers
987
reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d,
988
XMM1, XMM1b, XMM1c, XMM1d,
989
XMM2, XMM2b, XMM2c, XMM2d,
990
XMM3, XMM3b, XMM3c, XMM3d,
991
XMM4, XMM4b, XMM4c, XMM4d,
992
XMM5, XMM5b, XMM5c, XMM5d,
993
XMM6, XMM6b, XMM6c, XMM6d,
994
XMM7, XMM7b, XMM7c, XMM7d
996
,XMM8, XMM8b, XMM8c, XMM8d,
997
XMM9, XMM9b, XMM9c, XMM9d,
998
XMM10, XMM10b, XMM10c, XMM10d,
999
XMM11, XMM11b, XMM11c, XMM11d,
1000
XMM12, XMM12b, XMM12c, XMM12d,
1001
XMM13, XMM13b, XMM13c, XMM13d,
1002
XMM14, XMM14b, XMM14c, XMM14d,
1003
XMM15, XMM15b, XMM15c, XMM15d,
1004
XMM16, XMM16b, XMM16c, XMM16d,
1005
XMM17, XMM17b, XMM17c, XMM17d,
1006
XMM18, XMM18b, XMM18c, XMM18d,
1007
XMM19, XMM19b, XMM19c, XMM19d,
1008
XMM20, XMM20b, XMM20c, XMM20d,
1009
XMM21, XMM21b, XMM21c, XMM21d,
1010
XMM22, XMM22b, XMM22c, XMM22d,
1011
XMM23, XMM23b, XMM23c, XMM23d,
1012
XMM24, XMM24b, XMM24c, XMM24d,
1013
XMM25, XMM25b, XMM25c, XMM25d,
1014
XMM26, XMM26b, XMM26c, XMM26d,
1015
XMM27, XMM27b, XMM27c, XMM27d,
1016
XMM28, XMM28b, XMM28c, XMM28d,
1017
XMM29, XMM29b, XMM29c, XMM29d,
1018
XMM30, XMM30b, XMM30c, XMM30d,
1019
XMM31, XMM31b, XMM31c, XMM31d
1023
reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} );
1024
reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1026
// Class for all 256bit vector registers
1027
reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h,
1028
XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h,
1029
XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h,
1030
XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h,
1031
XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h,
1032
XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h,
1033
XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h,
1034
XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h
1036
,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h,
1037
XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h,
1038
XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
1039
XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
1040
XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
1041
XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
1042
XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
1043
XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h
1047
// Class for all 256bit vector registers
1048
reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h,
1049
XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h,
1050
XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h,
1051
XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h,
1052
XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h,
1053
XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h,
1054
XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h,
1055
XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h
1057
,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h,
1058
XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h,
1059
XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
1060
XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
1061
XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
1062
XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
1063
XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
1064
XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h,
1065
XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h,
1066
XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h,
1067
XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h,
1068
XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h,
1069
XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h,
1070
XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h,
1071
XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h,
1072
XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h,
1073
XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h,
1074
XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h,
1075
XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h,
1076
XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h,
1077
XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h,
1078
XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h,
1079
XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h,
1080
XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h
1084
reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} );
1085
reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1087
// Class for all 512bit vector registers
1088
reg_class vectorz_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
1089
XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
1090
XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
1091
XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
1092
XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
1093
XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
1094
XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
1095
XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p
1097
,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
1098
XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
1099
XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1100
XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1101
XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1102
XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1103
XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1104
XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p
1105
,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
1106
XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
1107
XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
1108
XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
1109
XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
1110
XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
1111
XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
1112
XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
1113
XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
1114
XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
1115
XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
1116
XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
1117
XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
1118
XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
1119
XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
1120
XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p
1124
// Class for restricted 512bit vector registers
1125
reg_class vectorz_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
1126
XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
1127
XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
1128
XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
1129
XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
1130
XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
1131
XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
1132
XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p
1134
,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
1135
XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
1136
XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1137
XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1138
XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1139
XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1140
XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1141
XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p
1145
reg_class_dynamic vectorz_reg (vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} );
1146
reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
1148
reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d);
1152
//----------SOURCE BLOCK-------------------------------------------------------
1153
// This is a block of C++ code which provides values, functions, and
1154
// definitions necessary in the rest of the architecture description
1157
// Header information of the source block.
1158
// Method declarations/definitions which are used outside
1159
// the ad-scope can conveniently be defined here.
1161
// To keep related declarations/definitions/uses close together,
1162
// we switch between source %{ }% and source_hpp %{ }% freely as needed.
1164
#include "runtime/vm_version.hpp"
1170
//--------------------------------------------------------------
1171
//---< Used for optimization in Compile::shorten_branches >---
1172
//--------------------------------------------------------------
1175
// Size of call trampoline stub.
1176
static uint size_call_trampoline() {
1177
return 0; // no call trampolines on this platform
1180
// number of relocations needed by a call trampoline stub
1181
static uint reloc_call_trampoline() {
1182
return 0; // no call trampolines on this platform
1190
static int emit_exception_handler(C2_MacroAssembler *masm);
1191
static int emit_deopt_handler(C2_MacroAssembler* masm);
1193
static uint size_exception_handler() {
1194
// NativeCall instruction size is the same as NativeJump.
1195
// exception handler starts out as jump and can be patched to
1196
// a call be deoptimization. (4932387)
1197
// Note that this value is also credited (in output.cpp) to
1198
// the size of the code section.
1199
return NativeJump::instruction_size;
1203
static uint size_deopt_handler() {
1204
// three 5 byte instructions plus one move for unreachable address.
1208
static uint size_deopt_handler() {
1209
// NativeCall instruction size is the same as NativeJump.
1210
// exception handler starts out as jump and can be patched to
1211
// a call be deoptimization. (4932387)
1212
// Note that this value is also credited (in output.cpp) to
1213
// the size of the code section.
1214
return 5 + NativeJump::instruction_size; // pushl(); jmp;
1219
inline Assembler::AvxVectorLen vector_length_encoding(int bytes) {
1221
case 4: // fall-through
1222
case 8: // fall-through
1223
case 16: return Assembler::AVX_128bit;
1224
case 32: return Assembler::AVX_256bit;
1225
case 64: return Assembler::AVX_512bit;
1228
ShouldNotReachHere();
1229
return Assembler::AVX_NoVec;
1234
static inline Assembler::AvxVectorLen vector_length_encoding(const Node* n) {
1235
return vector_length_encoding(Matcher::vector_length_in_bytes(n));
1238
static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* use, MachOper* opnd) {
1239
uint def_idx = use->operand_index(opnd);
1240
Node* def = use->in(def_idx);
1241
return vector_length_encoding(def);
1244
static inline bool is_vector_popcount_predicate(BasicType bt) {
1245
return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
1246
(is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
1249
static inline bool is_clz_non_subword_predicate_evex(BasicType bt, int vlen_bytes) {
1250
return is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd() &&
1251
(VM_Version::supports_avx512vl() || vlen_bytes == 64);
1257
Flag_intel_jcc_erratum = Node::_last_flag << 1,
1258
Flag_sets_carry_flag = Node::_last_flag << 2,
1259
Flag_sets_parity_flag = Node::_last_flag << 3,
1260
Flag_sets_zero_flag = Node::_last_flag << 4,
1261
Flag_sets_overflow_flag = Node::_last_flag << 5,
1262
Flag_sets_sign_flag = Node::_last_flag << 6,
1263
Flag_clears_carry_flag = Node::_last_flag << 7,
1264
Flag_clears_parity_flag = Node::_last_flag << 8,
1265
Flag_clears_zero_flag = Node::_last_flag << 9,
1266
Flag_clears_overflow_flag = Node::_last_flag << 10,
1267
Flag_clears_sign_flag = Node::_last_flag << 11,
1268
_last_flag = Flag_clears_sign_flag
1276
#include "opto/addnode.hpp"
1277
#include "c2_intelJccErratum_x86.hpp"
1279
void PhaseOutput::pd_perform_mach_node_analysis() {
1280
if (VM_Version::has_intel_jcc_erratum()) {
1281
int extra_padding = IntelJccErratum::tag_affected_machnodes(C, C->cfg(), C->regalloc());
1282
_buf_sizes._code += extra_padding;
1286
int MachNode::pd_alignment_required() const {
1287
if (VM_Version::has_intel_jcc_erratum() && IntelJccErratum::is_jcc_erratum_branch(this)) {
1288
// Conservatively add worst case padding. We assume that relocInfo::addr_unit() is 1 on x86.
1289
return IntelJccErratum::largest_jcc_size() + 1;
1295
int MachNode::compute_padding(int current_offset) const {
1296
if (flags() & Node::PD::Flag_intel_jcc_erratum) {
1297
Compile* C = Compile::current();
1298
PhaseOutput* output = C->output();
1299
Block* block = output->block();
1300
int index = output->index();
1301
return IntelJccErratum::compute_padding(current_offset, this, block, index, C->regalloc());
1307
// Emit exception handler code.
1308
// Stuff framesize into a register and call a VM stub routine.
1309
int HandlerImpl::emit_exception_handler(C2_MacroAssembler* masm) {
1311
// Note that the code buffer's insts_mark is always relative to insts.
1312
// That's why we must use the macroassembler to generate a handler.
1313
address base = __ start_a_stub(size_exception_handler());
1314
if (base == nullptr) {
1315
ciEnv::current()->record_failure("CodeCache is full");
1316
return 0; // CodeBuffer::expand failed
1318
int offset = __ offset();
1319
__ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
1320
assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
1325
// Emit deopt handler code.
1326
int HandlerImpl::emit_deopt_handler(C2_MacroAssembler* masm) {
1328
// Note that the code buffer's insts_mark is always relative to insts.
1329
// That's why we must use the macroassembler to generate a handler.
1330
address base = __ start_a_stub(size_deopt_handler());
1331
if (base == nullptr) {
1332
ciEnv::current()->record_failure("CodeCache is full");
1333
return 0; // CodeBuffer::expand failed
1335
int offset = __ offset();
1338
address the_pc = (address) __ pc();
1340
// push a "the_pc" on the stack without destroying any registers
1341
// as they all may be live.
1343
// push address of "next"
1344
__ call(next, relocInfo::none); // reloc none is fine since it is a disp32
1346
// adjust it so it matches "the_pc"
1347
__ subptr(Address(rsp, 0), __ offset() - offset);
1349
InternalAddress here(__ pc());
1350
__ pushptr(here.addr(), noreg);
1353
__ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
1354
assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset));
1359
static Assembler::Width widthForType(BasicType bt) {
1361
return Assembler::B;
1362
} else if (bt == T_SHORT) {
1363
return Assembler::W;
1364
} else if (bt == T_INT) {
1365
return Assembler::D;
1367
assert(bt == T_LONG, "not a long: %s", type2name(bt));
1368
return Assembler::Q;
1372
//=============================================================================
1374
// Float masks come from different places depending on platform.
1376
static address float_signmask() { return StubRoutines::x86::float_sign_mask(); }
1377
static address float_signflip() { return StubRoutines::x86::float_sign_flip(); }
1378
static address double_signmask() { return StubRoutines::x86::double_sign_mask(); }
1379
static address double_signflip() { return StubRoutines::x86::double_sign_flip(); }
1381
static address float_signmask() { return (address)float_signmask_pool; }
1382
static address float_signflip() { return (address)float_signflip_pool; }
1383
static address double_signmask() { return (address)double_signmask_pool; }
1384
static address double_signflip() { return (address)double_signflip_pool; }
1386
static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); }
1387
static address vector_int_to_byte_mask() { return StubRoutines::x86::vector_int_to_byte_mask(); }
1388
static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); }
1389
static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); }
1390
static address vector_all_bits_set() { return StubRoutines::x86::vector_all_bits_set(); }
1391
static address vector_int_mask_cmp_bits() { return StubRoutines::x86::vector_int_mask_cmp_bits(); }
1392
static address vector_int_to_short_mask() { return StubRoutines::x86::vector_int_to_short_mask(); }
1393
static address vector_byte_shufflemask() { return StubRoutines::x86::vector_byte_shuffle_mask(); }
1394
static address vector_short_shufflemask() { return StubRoutines::x86::vector_short_shuffle_mask(); }
1395
static address vector_int_shufflemask() { return StubRoutines::x86::vector_int_shuffle_mask(); }
1396
static address vector_long_shufflemask() { return StubRoutines::x86::vector_long_shuffle_mask(); }
1397
static address vector_32_bit_mask() { return StubRoutines::x86::vector_32_bit_mask(); }
1398
static address vector_64_bit_mask() { return StubRoutines::x86::vector_64_bit_mask(); }
1399
static address vector_float_signflip() { return StubRoutines::x86::vector_float_sign_flip();}
1400
static address vector_double_signflip() { return StubRoutines::x86::vector_double_sign_flip();}
1402
//=============================================================================
1403
bool Matcher::match_rule_supported(int opcode) {
1404
if (!has_match_rule(opcode)) {
1405
return false; // no match rule present
1407
const bool is_LP64 = LP64_ONLY(true) NOT_LP64(false);
1410
case Op_StoreVectorScatter:
1417
if (!UsePopCountInstruction) {
1434
if ((UseSSE < 4) && (UseAVX < 1)) { // only with SSE4_1 or AVX
1439
if (UseSSE < 4) { // only with SSE4_1 or AVX
1443
case Op_MulReductionVL:
1444
if (VM_Version::supports_avx512dq() == false) {
1448
case Op_AddReductionVL:
1449
if (UseSSE < 2) { // requires at least SSE2
1456
case Op_AddReductionVI:
1457
case Op_AndReductionV:
1458
case Op_OrReductionV:
1459
case Op_XorReductionV:
1460
if (UseSSE < 3) { // requires at least SSSE3
1464
case Op_VectorLoadShuffle:
1465
case Op_VectorRearrange:
1466
case Op_MulReductionVI:
1467
if (UseSSE < 4) { // requires at least SSE4
1471
case Op_IsInfiniteF:
1472
case Op_IsInfiniteD:
1473
if (!VM_Version::supports_avx512dq()) {
1479
case Op_VectorMaskCmp:
1480
case Op_VectorCastB2X:
1481
case Op_VectorCastS2X:
1482
case Op_VectorCastI2X:
1483
case Op_VectorCastL2X:
1484
case Op_VectorCastF2X:
1485
case Op_VectorCastD2X:
1486
case Op_VectorUCastB2X:
1487
case Op_VectorUCastS2X:
1488
case Op_VectorUCastI2X:
1489
case Op_VectorMaskCast:
1490
if (UseAVX < 1) { // enabled for AVX only
1494
case Op_PopulateIndex:
1495
if (!is_LP64 || (UseAVX < 2)) {
1500
if (UseAVX < 2) { // enabled for AVX2 only
1506
return false; // enabled for AVX3 only
1509
case Op_CompareAndSwapL:
1511
case Op_CompareAndSwapP:
1515
if (!UseSSE42Intrinsics) {
1519
case Op_StrIndexOfChar:
1520
if (!UseSSE42Intrinsics) {
1525
if (VM_Version::supports_on_spin_wait() == false) {
1533
case Op_VectorInsert:
1534
case Op_VectorLoadMask:
1535
case Op_VectorStoreMask:
1536
case Op_VectorBlend:
1546
if (UseAVX < 1) { // enabled for AVX only
1552
case Op_CacheWBPreSync:
1553
case Op_CacheWBPostSync:
1554
if (!VM_Version::supports_data_cache_line_flush()) {
1561
case Op_RoundDoubleMode:
1566
case Op_RoundDoubleModeV:
1567
if (VM_Version::supports_avx() == false) {
1568
return false; // 128bit vroundpd is not available
1571
case Op_LoadVectorGather:
1572
case Op_LoadVectorGatherMasked:
1585
case Op_MacroLogicV:
1586
if (UseAVX < 3 || !UseVectorMacroLogic) {
1591
case Op_VectorCmpMasked:
1592
case Op_VectorMaskGen:
1593
if (!is_LP64 || UseAVX < 3 || !VM_Version::supports_bmi2()) {
1597
case Op_VectorMaskFirstTrue:
1598
case Op_VectorMaskLastTrue:
1599
case Op_VectorMaskTrueCount:
1600
case Op_VectorMaskToLong:
1601
if (!is_LP64 || UseAVX < 1) {
1613
if (UseAVX < 3 || !is_LP64) {
1616
if (!VM_Version::supports_avx512vl()) {
1621
case Op_AddReductionVF:
1622
case Op_AddReductionVD:
1623
case Op_MulReductionVF:
1624
case Op_MulReductionVD:
1625
if (UseSSE < 1) { // requires at least SSE
1629
case Op_MulAddVS2VI:
1638
case Op_CompressBits:
1639
if (!VM_Version::supports_bmi2() || (!is_LP64 && UseSSE < 2)) {
1644
if (!VM_Version::supports_bmi2() || (!is_LP64 && (UseSSE < 2 || !VM_Version::supports_bmi1()))) {
1659
if (!VM_Version::supports_avx512vl() || !VM_Version::supports_bmi2()) {
1674
// x86_32.ad has a special match rule for SqrtD.
1675
// Together with common x86 rules, this handles all UseSSE cases.
1680
if (!VM_Version::supports_float16()) {
1684
case Op_VectorCastF2HF:
1685
case Op_VectorCastHF2F:
1686
if (!VM_Version::supports_f16c() && !VM_Version::supports_evex()) {
1691
return true; // Match rules are supported by default.
1694
//------------------------------------------------------------------------
1696
static inline bool is_pop_count_instr_target(BasicType bt) {
1697
return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
1698
(is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
1701
bool Matcher::match_rule_supported_auto_vectorization(int opcode, int vlen, BasicType bt) {
1702
return match_rule_supported_vector(opcode, vlen, bt);
1705
// Identify extra cases that we might want to provide match rules for vector nodes and
1706
// other intrinsics guarded with vector length (vlen) and element type (bt).
1707
bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
1708
const bool is_LP64 = LP64_ONLY(true) NOT_LP64(false);
1709
if (!match_rule_supported(opcode)) {
1712
// Matcher::vector_size_supported() restricts vector sizes in the following way (see Matcher::vector_width_in_bytes):
1713
// * SSE2 supports 128bit vectors for all types;
1714
// * AVX1 supports 256bit vectors only for FLOAT and DOUBLE types;
1715
// * AVX2 supports 256bit vectors for all types;
1716
// * AVX512F supports 512bit vectors only for INT, FLOAT, and DOUBLE types;
1717
// * AVX512BW supports 512bit vectors for BYTE, SHORT, and CHAR types.
1718
// There's also a limit on minimum vector size supported: 2 elements (or 4 bytes for BYTE).
1719
// And MaxVectorSize is taken into account as well.
1720
if (!vector_size_supported(bt, vlen)) {
1723
// Special cases which require vector length follow:
1724
// * implementation limitations
1725
// * some 512bit vector operations on FLOAT and DOUBLE types require AVX512DQ
1726
// * 128bit vroundpd instruction is present only in AVX1
1727
int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
1731
if ((vlen == 16) && (VM_Version::supports_avx512dq() == false)) {
1732
return false; // 512bit vandps and vxorps are not available
1737
if ((vlen == 8) && (VM_Version::supports_avx512dq() == false)) {
1738
return false; // 512bit vpmullq, vandpd and vxorpd are not available
1741
case Op_RotateRightV:
1742
case Op_RotateLeftV:
1743
if (bt != T_INT && bt != T_LONG) {
1746
case Op_MacroLogicV:
1747
if (!VM_Version::supports_evex() ||
1748
((size_in_bits != 512) && !VM_Version::supports_avx512vl())) {
1753
case Op_VectorMaskGen:
1754
case Op_VectorCmpMasked:
1755
if (!is_LP64 || !VM_Version::supports_avx512bw()) {
1758
if ((size_in_bits != 512) && !VM_Version::supports_avx512vl()) {
1762
case Op_LoadVectorMasked:
1763
case Op_StoreVectorMasked:
1764
if (!VM_Version::supports_avx512bw() && (is_subword_type(bt) || UseAVX < 1)) {
1770
if (UseSSE < 4 && is_integral_type(bt)) {
1773
if ((bt == T_FLOAT || bt == T_DOUBLE)) {
1774
// Float/Double intrinsics are enabled for AVX family currently.
1778
if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { // 512 bit Float/Double intrinsics need AVX512DQ
1783
case Op_CallLeafVector:
1784
if (size_in_bits == 512 && !VM_Version::supports_avx512vlbwdq()) {
1788
case Op_AddReductionVI:
1789
if (bt == T_INT && (UseSSE < 3 || !VM_Version::supports_ssse3())) {
1793
case Op_AndReductionV:
1794
case Op_OrReductionV:
1795
case Op_XorReductionV:
1796
if (is_subword_type(bt) && (UseSSE < 4)) {
1800
if (bt == T_BYTE || bt == T_LONG) {
1806
case Op_VectorInsert:
1807
if (bt == T_LONG || bt == T_DOUBLE) {
1812
case Op_MinReductionV:
1813
case Op_MaxReductionV:
1814
if ((bt == T_INT || is_subword_type(bt)) && UseSSE < 4) {
1816
} else if (bt == T_LONG && (UseAVX < 3 || !VM_Version::supports_avx512vlbwdq())) {
1819
// Float/Double intrinsics enabled for AVX family.
1820
if (UseAVX == 0 && (bt == T_FLOAT || bt == T_DOUBLE)) {
1823
if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) {
1827
if (bt == T_BYTE || bt == T_LONG) {
1834
return false; // Implementation limitation
1835
} else if (size_in_bits < 32) {
1836
return false; // Implementation limitation
1839
case Op_VectorLoadShuffle:
1840
case Op_VectorRearrange:
1842
return false; // Implementation limitation due to how shuffle is loaded
1843
} else if (size_in_bits == 256 && UseAVX < 2) {
1844
return false; // Implementation limitation
1847
case Op_VectorLoadMask:
1848
case Op_VectorMaskCast:
1849
if (size_in_bits == 256 && UseAVX < 2) {
1850
return false; // Implementation limitation
1853
case Op_VectorStoreMask:
1855
return false; // Implementation limitation
1858
case Op_PopulateIndex:
1859
if (size_in_bits > 256 && !VM_Version::supports_avx512bw()) {
1863
case Op_VectorCastB2X:
1864
case Op_VectorCastS2X:
1865
case Op_VectorCastI2X:
1866
if (bt != T_DOUBLE && size_in_bits == 256 && UseAVX < 2) {
1870
case Op_VectorCastL2X:
1871
if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) {
1873
} else if (!is_integral_type(bt) && !VM_Version::supports_avx512dq()) {
1877
case Op_VectorCastF2X: {
1878
// As per JLS section 5.1.3 narrowing conversion to sub-word types
1879
// happen after intermediate conversion to integer and special handling
1880
// code needs AVX2 vpcmpeqd instruction for 256 bit vectors.
1881
int src_size_in_bits = type2aelembytes(T_FLOAT) * vlen * BitsPerByte;
1882
if (is_integral_type(bt) && src_size_in_bits == 256 && UseAVX < 2) {
1887
case Op_VectorCastD2X:
1888
if (bt == T_LONG && !VM_Version::supports_avx512dq()) {
1892
case Op_VectorCastF2HF:
1893
case Op_VectorCastHF2F:
1894
if (!VM_Version::supports_f16c() &&
1895
((!VM_Version::supports_evex() ||
1896
((size_in_bits != 512) && !VM_Version::supports_avx512vl())))) {
1901
if (!VM_Version::supports_avx512dq()) {
1905
case Op_MulReductionVI:
1906
if (bt == T_BYTE && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
1910
case Op_LoadVectorGatherMasked:
1911
if (!is_subword_type(bt) && size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
1914
if (is_subword_type(bt) &&
1916
(size_in_bits > 256 && !VM_Version::supports_avx512bw()) ||
1917
(size_in_bits < 64) ||
1918
(bt == T_SHORT && !VM_Version::supports_bmi2()))) {
1922
case Op_StoreVectorScatterMasked:
1923
case Op_StoreVectorScatter:
1924
if (is_subword_type(bt)) {
1926
} else if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
1930
case Op_LoadVectorGather:
1931
if (!is_subword_type(bt) && size_in_bits == 64) {
1934
if (is_subword_type(bt) && size_in_bits < 64) {
1939
if (!VM_Version::supports_evex()) {
1942
if ((vlen > 16 || is_subword_type(bt)) && !VM_Version::supports_avx512bw()) {
1945
if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
1949
case Op_VectorMaskCmp:
1950
if (vlen < 2 || size_in_bits < 32) {
1955
if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
1961
if (is_subword_type(bt) && !VM_Version::supports_avx512_vbmi2()) {
1964
if (!is_LP64 && !VM_Version::supports_avx512vl() && size_in_bits < 512) {
1967
if (size_in_bits < 128 ) {
1970
case Op_VectorLongToMask:
1971
if (UseAVX < 1 || !is_LP64) {
1974
if (UseAVX < 3 && !VM_Version::supports_bmi2()) {
1985
case Op_PopCountVL: {
1986
if (!is_pop_count_instr_target(bt) &&
1987
(size_in_bits == 512) && !VM_Version::supports_avx512bw()) {
1993
case Op_ReverseBytesV:
1998
case Op_CountTrailingZerosV:
1999
case Op_CountLeadingZerosV:
2005
return true; // Per default match rules are supported.
2008
bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) {
2009
// ADLC based match_rule_supported routine checks for the existence of pattern based
2010
// on IR opcode. Most of the unary/binary/ternary masked operation share the IR nodes
2011
// of their non-masked counterpart with mask edge being the differentiator.
2012
// This routine does a strict check on the existence of masked operation patterns
2013
// by returning a default false value for all the other opcodes apart from the
2014
// ones whose masked instruction patterns are defined in this file.
2015
if (!match_rule_supported_vector(opcode, vlen, bt)) {
2019
const bool is_LP64 = LP64_ONLY(true) NOT_LP64(false);
2020
int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
2021
if (size_in_bits != 512 && !VM_Version::supports_avx512vl()) {
2025
// Unary masked operations
2028
if(!VM_Version::supports_avx512bw()) {
2029
return false; // Implementation limitation
2035
// Ternary masked operations
2040
case Op_MacroLogicV:
2041
if(bt != T_INT && bt != T_LONG) {
2046
// Binary masked operations
2055
assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
2056
if (!VM_Version::supports_avx512bw()) {
2057
return false; // Implementation limitation
2062
assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
2063
if (!VM_Version::supports_avx512dq()) {
2064
return false; // Implementation limitation
2071
case Op_RotateRightV:
2072
case Op_RotateLeftV:
2073
if (bt != T_INT && bt != T_LONG) {
2074
return false; // Implementation limitation
2078
case Op_VectorLoadMask:
2079
assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
2080
if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
2106
case Op_LoadVectorMasked:
2107
case Op_StoreVectorMasked:
2108
case Op_LoadVectorGatherMasked:
2109
case Op_StoreVectorScatterMasked:
2114
if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
2115
return false; // Implementation limitation
2117
if (is_floating_point_type(bt)) {
2118
return false; // Implementation limitation
2122
case Op_VectorMaskCmp:
2123
if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
2124
return false; // Implementation limitation
2128
case Op_VectorRearrange:
2129
if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
2130
return false; // Implementation limitation
2132
if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
2133
return false; // Implementation limitation
2134
} else if ((bt == T_INT || bt == T_FLOAT) && size_in_bits < 256) {
2135
return false; // Implementation limitation
2139
// Binary Logical operations
2143
if (vlen > 16 && !VM_Version::supports_avx512bw()) {
2144
return false; // Implementation limitation
2150
if (!is_pop_count_instr_target(bt)) {
2158
case Op_CountLeadingZerosV:
2159
if (is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd()) {
2167
bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) {
2171
MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) {
2172
assert(Matcher::is_generic_vector(generic_opnd), "not generic");
2173
bool legacy = (generic_opnd->opcode() == LEGVEC);
2174
if (!VM_Version::supports_avx512vlbwdq() && // KNL
2175
is_temp && !legacy && (ideal_reg == Op_VecZ)) {
2176
// Conservatively specialize 512bit vec TEMP operands to legVecZ (zmm0-15) on KNL.
2177
return new legVecZOper();
2180
switch (ideal_reg) {
2181
case Op_VecS: return new legVecSOper();
2182
case Op_VecD: return new legVecDOper();
2183
case Op_VecX: return new legVecXOper();
2184
case Op_VecY: return new legVecYOper();
2185
case Op_VecZ: return new legVecZOper();
2188
switch (ideal_reg) {
2189
case Op_VecS: return new vecSOper();
2190
case Op_VecD: return new vecDOper();
2191
case Op_VecX: return new vecXOper();
2192
case Op_VecY: return new vecYOper();
2193
case Op_VecZ: return new vecZOper();
2196
ShouldNotReachHere();
2200
bool Matcher::is_reg2reg_move(MachNode* m) {
2201
switch (m->rule()) {
2202
case MoveVec2Leg_rule:
2203
case MoveLeg2Vec_rule:
2205
case MoveF2LEG_rule:
2207
case MoveLEG2F_rule:
2209
case MoveD2LEG_rule:
2211
case MoveLEG2D_rule:
2218
bool Matcher::is_generic_vector(MachOper* opnd) {
2219
switch (opnd->opcode()) {
2228
//------------------------------------------------------------------------
2230
const RegMask* Matcher::predicate_reg_mask(void) {
2231
return &_VECTMASK_REG_mask;
2234
const TypeVectMask* Matcher::predicate_reg_type(const Type* elemTy, int length) {
2235
return new TypeVectMask(elemTy, length);
2238
// Max vector size in bytes. 0 if not supported.
2239
int Matcher::vector_width_in_bytes(BasicType bt) {
2240
assert(is_java_primitive(bt), "only primitive type vectors");
2241
if (UseSSE < 2) return 0;
2242
// SSE2 supports 128bit vectors for all types.
2243
// AVX2 supports 256bit vectors for all types.
2244
// AVX2/EVEX supports 512bit vectors for all types.
2245
int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16;
2246
// AVX1 supports 256bit vectors only for FLOAT and DOUBLE.
2247
if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE))
2248
size = (UseAVX > 2) ? 64 : 32;
2249
if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR))
2250
size = (VM_Version::supports_avx512bw()) ? 64 : 32;
2251
// Use flag to limit vector size.
2252
size = MIN2(size,(int)MaxVectorSize);
2253
// Minimum 2 values in vector (or 4 for bytes).
2257
if (size < 16) return 0;
2261
if (size < 8) return 0;
2264
if (size < 4) return 0;
2267
if (size < 4) return 0;
2270
if (size < 4) return 0;
2273
if (size < 4) return 0;
2276
ShouldNotReachHere();
2281
// Limits on vector size (number of elements) loaded into vector.
2282
int Matcher::max_vector_size(const BasicType bt) {
2283
return vector_width_in_bytes(bt)/type2aelembytes(bt);
2285
int Matcher::min_vector_size(const BasicType bt) {
2286
int max_size = max_vector_size(bt);
2287
// Min size which can be loaded into vector is 4 bytes.
2288
int size = (type2aelembytes(bt) == 1) ? 4 : 2;
2289
// Support for calling svml double64 vectors
2290
if (bt == T_DOUBLE) {
2293
return MIN2(size,max_size);
2296
int Matcher::max_vector_size_auto_vectorization(const BasicType bt) {
2297
// Limit the max vector size for auto vectorization to 256 bits (32 bytes)
2298
// by default on Cascade Lake
2299
if (VM_Version::is_default_intel_cascade_lake()) {
2300
return MIN2(Matcher::max_vector_size(bt), 32 / type2aelembytes(bt));
2302
return Matcher::max_vector_size(bt);
2305
int Matcher::scalable_vector_reg_size(const BasicType bt) {
2309
// Vector ideal reg corresponding to specified size in bytes
2310
uint Matcher::vector_ideal_reg(int size) {
2311
assert(MaxVectorSize >= size, "");
2313
case 4: return Op_VecS;
2314
case 8: return Op_VecD;
2315
case 16: return Op_VecX;
2316
case 32: return Op_VecY;
2317
case 64: return Op_VecZ;
2319
ShouldNotReachHere();
2323
// Check for shift by small constant as well
2324
static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) {
2325
if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() &&
2326
shift->in(2)->get_int() <= 3 &&
2327
// Are there other uses besides address expressions?
2328
!matcher->is_visited(shift)) {
2329
address_visited.set(shift->_idx); // Flag as address_visited
2330
mstack.push(shift->in(2), Matcher::Visit);
2331
Node *conv = shift->in(1);
2333
// Allow Matcher to match the rule which bypass
2334
// ConvI2L operation for an array index on LP64
2335
// if the index value is positive.
2336
if (conv->Opcode() == Op_ConvI2L &&
2337
conv->as_Type()->type()->is_long()->_lo >= 0 &&
2338
// Are there other uses besides address expressions?
2339
!matcher->is_visited(conv)) {
2340
address_visited.set(conv->_idx); // Flag as address_visited
2341
mstack.push(conv->in(1), Matcher::Pre_Visit);
2344
mstack.push(conv, Matcher::Pre_Visit);
2350
// This function identifies sub-graphs in which a 'load' node is
2351
// input to two different nodes, and such that it can be matched
2352
// with BMI instructions like blsi, blsr, etc.
2353
// Example : for b = -a[i] & a[i] can be matched to blsi r32, m32.
2354
// The graph is (AndL (SubL Con0 LoadL*) LoadL*), where LoadL*
2355
// refers to the same node.
2357
// Match the generic fused operations pattern (op1 (op2 Con{ConType} mop) mop)
2358
// This is a temporary solution until we make DAGs expressible in ADL.
2359
template<typename ConType>
2360
class FusedPatternMatcher {
2365
static int match_next(Node* n, int next_op, int next_op_idx) {
2366
if (n->in(1) == nullptr || n->in(2) == nullptr) {
2370
if (next_op_idx == -1) { // n is commutative, try rotations
2371
if (n->in(1)->Opcode() == next_op) {
2373
} else if (n->in(2)->Opcode() == next_op) {
2377
assert(next_op_idx > 0 && next_op_idx <= 2, "Bad argument index");
2378
if (n->in(next_op_idx)->Opcode() == next_op) {
2386
FusedPatternMatcher(Node* op1_node, Node* mop_node, int con_op) :
2387
_op1_node(op1_node), _mop_node(mop_node), _con_op(con_op) { }
2389
bool match(int op1, int op1_op2_idx, // op1 and the index of the op1->op2 edge, -1 if op1 is commutative
2390
int op2, int op2_con_idx, // op2 and the index of the op2->con edge, -1 if op2 is commutative
2391
typename ConType::NativeType con_value) {
2392
if (_op1_node->Opcode() != op1) {
2395
if (_mop_node->outcnt() > 2) {
2398
op1_op2_idx = match_next(_op1_node, op2, op1_op2_idx);
2399
if (op1_op2_idx == -1) {
2402
// Memory operation must be the other edge
2403
int op1_mop_idx = (op1_op2_idx & 1) + 1;
2405
// Check that the mop node is really what we want
2406
if (_op1_node->in(op1_mop_idx) == _mop_node) {
2407
Node* op2_node = _op1_node->in(op1_op2_idx);
2408
if (op2_node->outcnt() > 1) {
2411
assert(op2_node->Opcode() == op2, "Should be");
2412
op2_con_idx = match_next(op2_node, _con_op, op2_con_idx);
2413
if (op2_con_idx == -1) {
2416
// Memory operation must be the other edge
2417
int op2_mop_idx = (op2_con_idx & 1) + 1;
2418
// Check that the memory operation is the same node
2419
if (op2_node->in(op2_mop_idx) == _mop_node) {
2420
// Now check the constant
2421
const Type* con_type = op2_node->in(op2_con_idx)->bottom_type();
2422
if (con_type != Type::TOP && ConType::as_self(con_type)->get_con() == con_value) {
2431
static bool is_bmi_pattern(Node* n, Node* m) {
2432
assert(UseBMI1Instructions, "sanity");
2433
if (n != nullptr && m != nullptr) {
2434
if (m->Opcode() == Op_LoadI) {
2435
FusedPatternMatcher<TypeInt> bmii(n, m, Op_ConI);
2436
return bmii.match(Op_AndI, -1, Op_SubI, 1, 0) ||
2437
bmii.match(Op_AndI, -1, Op_AddI, -1, -1) ||
2438
bmii.match(Op_XorI, -1, Op_AddI, -1, -1);
2439
} else if (m->Opcode() == Op_LoadL) {
2440
FusedPatternMatcher<TypeLong> bmil(n, m, Op_ConL);
2441
return bmil.match(Op_AndL, -1, Op_SubL, 1, 0) ||
2442
bmil.match(Op_AndL, -1, Op_AddL, -1, -1) ||
2443
bmil.match(Op_XorL, -1, Op_AddL, -1, -1);
2449
// Should the matcher clone input 'm' of node 'n'?
2450
bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
2451
// If 'n' and 'm' are part of a graph for BMI instruction, clone the input 'm'.
2452
if (UseBMI1Instructions && is_bmi_pattern(n, m)) {
2453
mstack.push(m, Visit);
2456
if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con)
2457
mstack.push(m, Visit); // m = ShiftCntV
2463
// Should the Matcher clone shifts on addressing modes, expecting them
2464
// to be subsumed into complex addressing expressions or compute them
2466
bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
2467
Node *off = m->in(AddPNode::Offset);
2468
if (off->is_Con()) {
2469
address_visited.test_set(m->_idx); // Flag as address_visited
2470
Node *adr = m->in(AddPNode::Address);
2472
// Intel can handle 2 adds in addressing mode
2473
// AtomicAdd is not an addressing expression.
2474
// Cheap to find it by looking for screwy base.
2475
if (adr->is_AddP() &&
2476
!adr->in(AddPNode::Base)->is_top() &&
2477
LP64_ONLY( off->get_long() == (int) (off->get_long()) && ) // immL32
2478
// Are there other uses besides address expressions?
2480
address_visited.set(adr->_idx); // Flag as address_visited
2481
Node *shift = adr->in(AddPNode::Offset);
2482
if (!clone_shift(shift, this, mstack, address_visited)) {
2483
mstack.push(shift, Pre_Visit);
2485
mstack.push(adr->in(AddPNode::Address), Pre_Visit);
2486
mstack.push(adr->in(AddPNode::Base), Pre_Visit);
2488
mstack.push(adr, Pre_Visit);
2491
// Clone X+offset as it also folds into most addressing expressions
2492
mstack.push(off, Visit);
2493
mstack.push(m->in(AddPNode::Base), Pre_Visit);
2495
} else if (clone_shift(off, this, mstack, address_visited)) {
2496
address_visited.test_set(m->_idx); // Flag as address_visited
2497
mstack.push(m->in(AddPNode::Address), Pre_Visit);
2498
mstack.push(m->in(AddPNode::Base), Pre_Visit);
2504
static inline Assembler::ComparisonPredicate booltest_pred_to_comparison_pred(int bt) {
2507
return Assembler::eq;
2509
return Assembler::neq;
2512
return Assembler::le;
2515
return Assembler::nlt;
2518
return Assembler::lt;
2521
return Assembler::nle;
2522
default : ShouldNotReachHere(); return Assembler::_false;
2526
static inline Assembler::ComparisonPredicateFP booltest_pred_to_comparison_pred_fp(int bt) {
2528
case BoolTest::eq: return Assembler::EQ_OQ; // ordered non-signaling
2529
// As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare.
2530
case BoolTest::ne: return Assembler::NEQ_UQ; // unordered non-signaling
2531
case BoolTest::le: return Assembler::LE_OQ; // ordered non-signaling
2532
case BoolTest::ge: return Assembler::GE_OQ; // ordered non-signaling
2533
case BoolTest::lt: return Assembler::LT_OQ; // ordered non-signaling
2534
case BoolTest::gt: return Assembler::GT_OQ; // ordered non-signaling
2535
default: ShouldNotReachHere(); return Assembler::FALSE_OS;
2539
// Helper methods for MachSpillCopyNode::implementation().
2540
static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
2541
int src_hi, int dst_hi, uint ireg, outputStream* st) {
2542
assert(ireg == Op_VecS || // 32bit vector
2543
((src_lo & 1) == 0 && (src_lo + 1) == src_hi &&
2544
(dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi),
2545
"no non-adjacent vector moves" );
2548
case Op_VecS: // copy whole register
2552
__ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
2554
if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
2555
__ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
2557
__ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
2563
__ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
2565
if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
2566
__ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
2568
__ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
2573
__ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2);
2576
ShouldNotReachHere();
2584
st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
2588
st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
2591
ShouldNotReachHere();
2597
void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
2598
int stack_offset, int reg, uint ireg, outputStream* st) {
2603
__ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
2606
__ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
2610
__ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
2612
if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
2613
__ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
2615
__ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
2616
__ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
2622
__ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
2624
if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
2625
__ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
2627
__ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
2628
__ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
2633
__ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2);
2636
ShouldNotReachHere();
2641
__ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
2644
__ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
2648
__ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
2650
if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
2651
__ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
2654
__ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
2660
__ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
2662
if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
2663
__ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
2666
__ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
2671
__ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2);
2674
ShouldNotReachHere();
2682
st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
2685
st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
2688
st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
2692
st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
2695
ShouldNotReachHere();
2700
st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
2703
st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
2706
st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
2710
st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
2713
ShouldNotReachHere();
2721
static inline GrowableArray<jvalue>* vreplicate_imm(BasicType bt, T con, int len) {
2722
GrowableArray<jvalue>* val = new GrowableArray<jvalue>(len);
2725
case T_BYTE: ele.b = con; break;
2726
case T_SHORT: ele.s = con; break;
2727
case T_INT: ele.i = con; break;
2728
case T_LONG: ele.j = con; break;
2729
case T_FLOAT: ele.f = con; break;
2730
case T_DOUBLE: ele.d = con; break;
2731
default: ShouldNotReachHere();
2733
for (int i = 0; i < len; i++) {
2739
static inline jlong high_bit_set(BasicType bt) {
2741
case T_BYTE: return 0x8080808080808080;
2742
case T_SHORT: return 0x8000800080008000;
2743
case T_INT: return 0x8000000080000000;
2744
case T_LONG: return 0x8000000000000000;
2746
ShouldNotReachHere();
2752
void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
2753
st->print("nop \t# %d bytes pad for loops and calls", _count);
2757
void MachNopNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc*) const {
2761
uint MachNopNode::size(PhaseRegAlloc*) const {
2766
void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const {
2767
st->print("# breakpoint");
2771
void MachBreakpointNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc* ra_) const {
2775
uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
2776
return MachNode::size(ra_);
2783
enc_class call_epilog %{
2784
if (VerifyStackAtCalls) {
2785
// Check that stack depth is unchanged: find majik cookie on stack
2786
int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
2788
__ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
2789
__ jccb(Assembler::equal, L);
2790
// Die if stack mismatch
2798
// Operands for bound floating pointer register arguments
2800
constraint(ALLOC_IN_RC(xmm0_reg));
2803
interface(REG_INTER);
2806
//----------OPERANDS-----------------------------------------------------------
2807
// Operand definitions must precede instruction definitions for correct parsing
2808
// in the ADLC because operands constitute user defined types which are used in
2809
// instruction definitions.
2813
// Dummy generic vector class. Should be used for all vector operands.
2814
// Replaced with vec[SDXYZ] during post-selection pass.
2816
constraint(ALLOC_IN_RC(dynamic));
2824
interface(REG_INTER);
2827
// Dummy generic legacy vector class. Should be used for all legacy vector operands.
2828
// Replaced with legVec[SDXYZ] during post-selection cleanup.
2829
// Note: legacy register class is used to avoid extra (unneeded in 32-bit VM)
2830
// runtime code generation via reg_class_dynamic.
2832
constraint(ALLOC_IN_RC(dynamic));
2840
interface(REG_INTER);
2843
// Replaces vec during post-selection cleanup. See above.
2845
constraint(ALLOC_IN_RC(vectors_reg_vlbwdq));
2849
interface(REG_INTER);
2852
// Replaces legVec during post-selection cleanup. See above.
2854
constraint(ALLOC_IN_RC(vectors_reg_legacy));
2858
interface(REG_INTER);
2861
// Replaces vec during post-selection cleanup. See above.
2863
constraint(ALLOC_IN_RC(vectord_reg_vlbwdq));
2867
interface(REG_INTER);
2870
// Replaces legVec during post-selection cleanup. See above.
2872
constraint(ALLOC_IN_RC(vectord_reg_legacy));
2876
interface(REG_INTER);
2879
// Replaces vec during post-selection cleanup. See above.
2881
constraint(ALLOC_IN_RC(vectorx_reg_vlbwdq));
2885
interface(REG_INTER);
2888
// Replaces legVec during post-selection cleanup. See above.
2890
constraint(ALLOC_IN_RC(vectorx_reg_legacy));
2894
interface(REG_INTER);
2897
// Replaces vec during post-selection cleanup. See above.
2899
constraint(ALLOC_IN_RC(vectory_reg_vlbwdq));
2903
interface(REG_INTER);
2906
// Replaces legVec during post-selection cleanup. See above.
2908
constraint(ALLOC_IN_RC(vectory_reg_legacy));
2912
interface(REG_INTER);
2915
// Replaces vec during post-selection cleanup. See above.
2917
constraint(ALLOC_IN_RC(vectorz_reg));
2921
interface(REG_INTER);
2924
// Replaces legVec during post-selection cleanup. See above.
2926
constraint(ALLOC_IN_RC(vectorz_reg_legacy));
2930
interface(REG_INTER);
2933
// INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit)
2935
// ============================================================================
2937
instruct ShouldNotReachHere() %{
2939
format %{ "stop\t# ShouldNotReachHere" %}
2941
if (is_reachable()) {
2942
__ stop(_halt_reason);
2945
ins_pipe(pipe_slow);
2948
// ============================================================================
2950
instruct addF_reg(regF dst, regF src) %{
2951
predicate((UseSSE>=1) && (UseAVX == 0));
2952
match(Set dst (AddF dst src));
2954
format %{ "addss $dst, $src" %}
2957
__ addss($dst$$XMMRegister, $src$$XMMRegister);
2959
ins_pipe(pipe_slow);
2962
instruct addF_mem(regF dst, memory src) %{
2963
predicate((UseSSE>=1) && (UseAVX == 0));
2964
match(Set dst (AddF dst (LoadF src)));
2966
format %{ "addss $dst, $src" %}
2969
__ addss($dst$$XMMRegister, $src$$Address);
2971
ins_pipe(pipe_slow);
2974
instruct addF_imm(regF dst, immF con) %{
2975
predicate((UseSSE>=1) && (UseAVX == 0));
2976
match(Set dst (AddF dst con));
2977
format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
2980
__ addss($dst$$XMMRegister, $constantaddress($con));
2982
ins_pipe(pipe_slow);
2985
instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
2986
predicate(UseAVX > 0);
2987
match(Set dst (AddF src1 src2));
2989
format %{ "vaddss $dst, $src1, $src2" %}
2992
__ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
2994
ins_pipe(pipe_slow);
2997
instruct addF_reg_mem(regF dst, regF src1, memory src2) %{
2998
predicate(UseAVX > 0);
2999
match(Set dst (AddF src1 (LoadF src2)));
3001
format %{ "vaddss $dst, $src1, $src2" %}
3004
__ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
3006
ins_pipe(pipe_slow);
3009
instruct addF_reg_imm(regF dst, regF src, immF con) %{
3010
predicate(UseAVX > 0);
3011
match(Set dst (AddF src con));
3013
format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
3016
__ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
3018
ins_pipe(pipe_slow);
3021
instruct addD_reg(regD dst, regD src) %{
3022
predicate((UseSSE>=2) && (UseAVX == 0));
3023
match(Set dst (AddD dst src));
3025
format %{ "addsd $dst, $src" %}
3028
__ addsd($dst$$XMMRegister, $src$$XMMRegister);
3030
ins_pipe(pipe_slow);
3033
instruct addD_mem(regD dst, memory src) %{
3034
predicate((UseSSE>=2) && (UseAVX == 0));
3035
match(Set dst (AddD dst (LoadD src)));
3037
format %{ "addsd $dst, $src" %}
3040
__ addsd($dst$$XMMRegister, $src$$Address);
3042
ins_pipe(pipe_slow);
3045
instruct addD_imm(regD dst, immD con) %{
3046
predicate((UseSSE>=2) && (UseAVX == 0));
3047
match(Set dst (AddD dst con));
3048
format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
3051
__ addsd($dst$$XMMRegister, $constantaddress($con));
3053
ins_pipe(pipe_slow);
3056
instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
3057
predicate(UseAVX > 0);
3058
match(Set dst (AddD src1 src2));
3060
format %{ "vaddsd $dst, $src1, $src2" %}
3063
__ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
3065
ins_pipe(pipe_slow);
3068
instruct addD_reg_mem(regD dst, regD src1, memory src2) %{
3069
predicate(UseAVX > 0);
3070
match(Set dst (AddD src1 (LoadD src2)));
3072
format %{ "vaddsd $dst, $src1, $src2" %}
3075
__ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
3077
ins_pipe(pipe_slow);
3080
instruct addD_reg_imm(regD dst, regD src, immD con) %{
3081
predicate(UseAVX > 0);
3082
match(Set dst (AddD src con));
3084
format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
3087
__ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
3089
ins_pipe(pipe_slow);
3092
instruct subF_reg(regF dst, regF src) %{
3093
predicate((UseSSE>=1) && (UseAVX == 0));
3094
match(Set dst (SubF dst src));
3096
format %{ "subss $dst, $src" %}
3099
__ subss($dst$$XMMRegister, $src$$XMMRegister);
3101
ins_pipe(pipe_slow);
3104
instruct subF_mem(regF dst, memory src) %{
3105
predicate((UseSSE>=1) && (UseAVX == 0));
3106
match(Set dst (SubF dst (LoadF src)));
3108
format %{ "subss $dst, $src" %}
3111
__ subss($dst$$XMMRegister, $src$$Address);
3113
ins_pipe(pipe_slow);
3116
instruct subF_imm(regF dst, immF con) %{
3117
predicate((UseSSE>=1) && (UseAVX == 0));
3118
match(Set dst (SubF dst con));
3119
format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
3122
__ subss($dst$$XMMRegister, $constantaddress($con));
3124
ins_pipe(pipe_slow);
3127
instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
3128
predicate(UseAVX > 0);
3129
match(Set dst (SubF src1 src2));
3131
format %{ "vsubss $dst, $src1, $src2" %}
3134
__ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
3136
ins_pipe(pipe_slow);
3139
instruct subF_reg_mem(regF dst, regF src1, memory src2) %{
3140
predicate(UseAVX > 0);
3141
match(Set dst (SubF src1 (LoadF src2)));
3143
format %{ "vsubss $dst, $src1, $src2" %}
3146
__ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
3148
ins_pipe(pipe_slow);
3151
instruct subF_reg_imm(regF dst, regF src, immF con) %{
3152
predicate(UseAVX > 0);
3153
match(Set dst (SubF src con));
3155
format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
3158
__ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
3160
ins_pipe(pipe_slow);
3163
instruct subD_reg(regD dst, regD src) %{
3164
predicate((UseSSE>=2) && (UseAVX == 0));
3165
match(Set dst (SubD dst src));
3167
format %{ "subsd $dst, $src" %}
3170
__ subsd($dst$$XMMRegister, $src$$XMMRegister);
3172
ins_pipe(pipe_slow);
3175
instruct subD_mem(regD dst, memory src) %{
3176
predicate((UseSSE>=2) && (UseAVX == 0));
3177
match(Set dst (SubD dst (LoadD src)));
3179
format %{ "subsd $dst, $src" %}
3182
__ subsd($dst$$XMMRegister, $src$$Address);
3184
ins_pipe(pipe_slow);
3187
instruct subD_imm(regD dst, immD con) %{
3188
predicate((UseSSE>=2) && (UseAVX == 0));
3189
match(Set dst (SubD dst con));
3190
format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
3193
__ subsd($dst$$XMMRegister, $constantaddress($con));
3195
ins_pipe(pipe_slow);
3198
instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
3199
predicate(UseAVX > 0);
3200
match(Set dst (SubD src1 src2));
3202
format %{ "vsubsd $dst, $src1, $src2" %}
3205
__ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
3207
ins_pipe(pipe_slow);
3210
instruct subD_reg_mem(regD dst, regD src1, memory src2) %{
3211
predicate(UseAVX > 0);
3212
match(Set dst (SubD src1 (LoadD src2)));
3214
format %{ "vsubsd $dst, $src1, $src2" %}
3217
__ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
3219
ins_pipe(pipe_slow);
3222
instruct subD_reg_imm(regD dst, regD src, immD con) %{
3223
predicate(UseAVX > 0);
3224
match(Set dst (SubD src con));
3226
format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
3229
__ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
3231
ins_pipe(pipe_slow);
3234
instruct mulF_reg(regF dst, regF src) %{
3235
predicate((UseSSE>=1) && (UseAVX == 0));
3236
match(Set dst (MulF dst src));
3238
format %{ "mulss $dst, $src" %}
3241
__ mulss($dst$$XMMRegister, $src$$XMMRegister);
3243
ins_pipe(pipe_slow);
3246
instruct mulF_mem(regF dst, memory src) %{
3247
predicate((UseSSE>=1) && (UseAVX == 0));
3248
match(Set dst (MulF dst (LoadF src)));
3250
format %{ "mulss $dst, $src" %}
3253
__ mulss($dst$$XMMRegister, $src$$Address);
3255
ins_pipe(pipe_slow);
3258
instruct mulF_imm(regF dst, immF con) %{
3259
predicate((UseSSE>=1) && (UseAVX == 0));
3260
match(Set dst (MulF dst con));
3261
format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
3264
__ mulss($dst$$XMMRegister, $constantaddress($con));
3266
ins_pipe(pipe_slow);
3269
instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
3270
predicate(UseAVX > 0);
3271
match(Set dst (MulF src1 src2));
3273
format %{ "vmulss $dst, $src1, $src2" %}
3276
__ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
3278
ins_pipe(pipe_slow);
3281
instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{
3282
predicate(UseAVX > 0);
3283
match(Set dst (MulF src1 (LoadF src2)));
3285
format %{ "vmulss $dst, $src1, $src2" %}
3288
__ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
3290
ins_pipe(pipe_slow);
3293
instruct mulF_reg_imm(regF dst, regF src, immF con) %{
3294
predicate(UseAVX > 0);
3295
match(Set dst (MulF src con));
3297
format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
3300
__ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
3302
ins_pipe(pipe_slow);
3305
instruct mulD_reg(regD dst, regD src) %{
3306
predicate((UseSSE>=2) && (UseAVX == 0));
3307
match(Set dst (MulD dst src));
3309
format %{ "mulsd $dst, $src" %}
3312
__ mulsd($dst$$XMMRegister, $src$$XMMRegister);
3314
ins_pipe(pipe_slow);
3317
instruct mulD_mem(regD dst, memory src) %{
3318
predicate((UseSSE>=2) && (UseAVX == 0));
3319
match(Set dst (MulD dst (LoadD src)));
3321
format %{ "mulsd $dst, $src" %}
3324
__ mulsd($dst$$XMMRegister, $src$$Address);
3326
ins_pipe(pipe_slow);
3329
instruct mulD_imm(regD dst, immD con) %{
3330
predicate((UseSSE>=2) && (UseAVX == 0));
3331
match(Set dst (MulD dst con));
3332
format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
3335
__ mulsd($dst$$XMMRegister, $constantaddress($con));
3337
ins_pipe(pipe_slow);
3340
instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
3341
predicate(UseAVX > 0);
3342
match(Set dst (MulD src1 src2));
3344
format %{ "vmulsd $dst, $src1, $src2" %}
3347
__ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
3349
ins_pipe(pipe_slow);
3352
instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{
3353
predicate(UseAVX > 0);
3354
match(Set dst (MulD src1 (LoadD src2)));
3356
format %{ "vmulsd $dst, $src1, $src2" %}
3359
__ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
3361
ins_pipe(pipe_slow);
3364
instruct mulD_reg_imm(regD dst, regD src, immD con) %{
3365
predicate(UseAVX > 0);
3366
match(Set dst (MulD src con));
3368
format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
3371
__ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
3373
ins_pipe(pipe_slow);
3376
instruct divF_reg(regF dst, regF src) %{
3377
predicate((UseSSE>=1) && (UseAVX == 0));
3378
match(Set dst (DivF dst src));
3380
format %{ "divss $dst, $src" %}
3383
__ divss($dst$$XMMRegister, $src$$XMMRegister);
3385
ins_pipe(pipe_slow);
3388
instruct divF_mem(regF dst, memory src) %{
3389
predicate((UseSSE>=1) && (UseAVX == 0));
3390
match(Set dst (DivF dst (LoadF src)));
3392
format %{ "divss $dst, $src" %}
3395
__ divss($dst$$XMMRegister, $src$$Address);
3397
ins_pipe(pipe_slow);
3400
instruct divF_imm(regF dst, immF con) %{
3401
predicate((UseSSE>=1) && (UseAVX == 0));
3402
match(Set dst (DivF dst con));
3403
format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
3406
__ divss($dst$$XMMRegister, $constantaddress($con));
3408
ins_pipe(pipe_slow);
3411
instruct divF_reg_reg(regF dst, regF src1, regF src2) %{
3412
predicate(UseAVX > 0);
3413
match(Set dst (DivF src1 src2));
3415
format %{ "vdivss $dst, $src1, $src2" %}
3418
__ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
3420
ins_pipe(pipe_slow);
3423
instruct divF_reg_mem(regF dst, regF src1, memory src2) %{
3424
predicate(UseAVX > 0);
3425
match(Set dst (DivF src1 (LoadF src2)));
3427
format %{ "vdivss $dst, $src1, $src2" %}
3430
__ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
3432
ins_pipe(pipe_slow);
3435
instruct divF_reg_imm(regF dst, regF src, immF con) %{
3436
predicate(UseAVX > 0);
3437
match(Set dst (DivF src con));
3439
format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
3442
__ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
3444
ins_pipe(pipe_slow);
3447
instruct divD_reg(regD dst, regD src) %{
3448
predicate((UseSSE>=2) && (UseAVX == 0));
3449
match(Set dst (DivD dst src));
3451
format %{ "divsd $dst, $src" %}
3454
__ divsd($dst$$XMMRegister, $src$$XMMRegister);
3456
ins_pipe(pipe_slow);
3459
instruct divD_mem(regD dst, memory src) %{
3460
predicate((UseSSE>=2) && (UseAVX == 0));
3461
match(Set dst (DivD dst (LoadD src)));
3463
format %{ "divsd $dst, $src" %}
3466
__ divsd($dst$$XMMRegister, $src$$Address);
3468
ins_pipe(pipe_slow);
3471
instruct divD_imm(regD dst, immD con) %{
3472
predicate((UseSSE>=2) && (UseAVX == 0));
3473
match(Set dst (DivD dst con));
3474
format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
3477
__ divsd($dst$$XMMRegister, $constantaddress($con));
3479
ins_pipe(pipe_slow);
3482
instruct divD_reg_reg(regD dst, regD src1, regD src2) %{
3483
predicate(UseAVX > 0);
3484
match(Set dst (DivD src1 src2));
3486
format %{ "vdivsd $dst, $src1, $src2" %}
3489
__ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
3491
ins_pipe(pipe_slow);
3494
instruct divD_reg_mem(regD dst, regD src1, memory src2) %{
3495
predicate(UseAVX > 0);
3496
match(Set dst (DivD src1 (LoadD src2)));
3498
format %{ "vdivsd $dst, $src1, $src2" %}
3501
__ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
3503
ins_pipe(pipe_slow);
3506
instruct divD_reg_imm(regD dst, regD src, immD con) %{
3507
predicate(UseAVX > 0);
3508
match(Set dst (DivD src con));
3510
format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
3513
__ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
3515
ins_pipe(pipe_slow);
3518
instruct absF_reg(regF dst) %{
3519
predicate((UseSSE>=1) && (UseAVX == 0));
3520
match(Set dst (AbsF dst));
3522
format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %}
3524
__ andps($dst$$XMMRegister, ExternalAddress(float_signmask()));
3526
ins_pipe(pipe_slow);
3529
instruct absF_reg_reg(vlRegF dst, vlRegF src) %{
3530
predicate(UseAVX > 0);
3531
match(Set dst (AbsF src));
3533
format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
3535
int vlen_enc = Assembler::AVX_128bit;
3536
__ vandps($dst$$XMMRegister, $src$$XMMRegister,
3537
ExternalAddress(float_signmask()), vlen_enc);
3539
ins_pipe(pipe_slow);
3542
instruct absD_reg(regD dst) %{
3543
predicate((UseSSE>=2) && (UseAVX == 0));
3544
match(Set dst (AbsD dst));
3546
format %{ "andpd $dst, [0x7fffffffffffffff]\t"
3547
"# abs double by sign masking" %}
3549
__ andpd($dst$$XMMRegister, ExternalAddress(double_signmask()));
3551
ins_pipe(pipe_slow);
3554
instruct absD_reg_reg(vlRegD dst, vlRegD src) %{
3555
predicate(UseAVX > 0);
3556
match(Set dst (AbsD src));
3558
format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t"
3559
"# abs double by sign masking" %}
3561
int vlen_enc = Assembler::AVX_128bit;
3562
__ vandpd($dst$$XMMRegister, $src$$XMMRegister,
3563
ExternalAddress(double_signmask()), vlen_enc);
3565
ins_pipe(pipe_slow);
3568
instruct negF_reg(regF dst) %{
3569
predicate((UseSSE>=1) && (UseAVX == 0));
3570
match(Set dst (NegF dst));
3572
format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %}
3574
__ xorps($dst$$XMMRegister, ExternalAddress(float_signflip()));
3576
ins_pipe(pipe_slow);
3579
instruct negF_reg_reg(vlRegF dst, vlRegF src) %{
3580
predicate(UseAVX > 0);
3581
match(Set dst (NegF src));
3583
format %{ "vnegatess $dst, $src, [0x80000000]\t# neg float by sign flipping" %}
3585
__ vnegatess($dst$$XMMRegister, $src$$XMMRegister,
3586
ExternalAddress(float_signflip()));
3588
ins_pipe(pipe_slow);
3591
instruct negD_reg(regD dst) %{
3592
predicate((UseSSE>=2) && (UseAVX == 0));
3593
match(Set dst (NegD dst));
3595
format %{ "xorpd $dst, [0x8000000000000000]\t"
3596
"# neg double by sign flipping" %}
3598
__ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip()));
3600
ins_pipe(pipe_slow);
3603
instruct negD_reg_reg(vlRegD dst, vlRegD src) %{
3604
predicate(UseAVX > 0);
3605
match(Set dst (NegD src));
3607
format %{ "vnegatesd $dst, $src, [0x8000000000000000]\t"
3608
"# neg double by sign flipping" %}
3610
__ vnegatesd($dst$$XMMRegister, $src$$XMMRegister,
3611
ExternalAddress(double_signflip()));
3613
ins_pipe(pipe_slow);
3616
// sqrtss instruction needs destination register to be pre initialized for best performance
3617
// Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
3618
instruct sqrtF_reg(regF dst) %{
3619
predicate(UseSSE>=1);
3620
match(Set dst (SqrtF dst));
3621
format %{ "sqrtss $dst, $dst" %}
3623
__ sqrtss($dst$$XMMRegister, $dst$$XMMRegister);
3625
ins_pipe(pipe_slow);
3628
// sqrtsd instruction needs destination register to be pre initialized for best performance
3629
// Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
3630
instruct sqrtD_reg(regD dst) %{
3631
predicate(UseSSE>=2);
3632
match(Set dst (SqrtD dst));
3633
format %{ "sqrtsd $dst, $dst" %}
3635
__ sqrtsd($dst$$XMMRegister, $dst$$XMMRegister);
3637
ins_pipe(pipe_slow);
3640
instruct convF2HF_reg_reg(rRegI dst, vlRegF src, vlRegF tmp) %{
3642
match(Set dst (ConvF2HF src));
3644
format %{ "vcvtps2ph $dst,$src \t using $tmp as TEMP"%}
3646
__ flt_to_flt16($dst$$Register, $src$$XMMRegister, $tmp$$XMMRegister);
3648
ins_pipe( pipe_slow );
3651
instruct convF2HF_mem_reg(memory mem, regF src, kReg ktmp, rRegI rtmp) %{
3652
predicate((UseAVX > 2) && VM_Version::supports_avx512vl());
3653
effect(TEMP ktmp, TEMP rtmp);
3654
match(Set mem (StoreC mem (ConvF2HF src)));
3655
format %{ "evcvtps2ph $mem,$src \t using $ktmp and $rtmp as TEMP" %}
3657
__ movl($rtmp$$Register, 0x1);
3658
__ kmovwl($ktmp$$KRegister, $rtmp$$Register);
3659
__ evcvtps2ph($mem$$Address, $ktmp$$KRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
3661
ins_pipe( pipe_slow );
3664
instruct vconvF2HF(vec dst, vec src) %{
3665
match(Set dst (VectorCastF2HF src));
3666
format %{ "vector_conv_F2HF $dst $src" %}
3668
int vlen_enc = vector_length_encoding(this, $src);
3669
__ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, vlen_enc);
3671
ins_pipe( pipe_slow );
3674
instruct vconvF2HF_mem_reg(memory mem, vec src) %{
3675
match(Set mem (StoreVector mem (VectorCastF2HF src)));
3676
format %{ "vcvtps2ph $mem,$src" %}
3678
int vlen_enc = vector_length_encoding(this, $src);
3679
__ vcvtps2ph($mem$$Address, $src$$XMMRegister, 0x04, vlen_enc);
3681
ins_pipe( pipe_slow );
3684
instruct convHF2F_reg_reg(vlRegF dst, rRegI src) %{
3685
match(Set dst (ConvHF2F src));
3686
format %{ "vcvtph2ps $dst,$src" %}
3688
__ flt16_to_flt($dst$$XMMRegister, $src$$Register);
3690
ins_pipe( pipe_slow );
3693
instruct vconvHF2F_reg_mem(vec dst, memory mem) %{
3694
match(Set dst (VectorCastHF2F (LoadVector mem)));
3695
format %{ "vcvtph2ps $dst,$mem" %}
3697
int vlen_enc = vector_length_encoding(this);
3698
__ vcvtph2ps($dst$$XMMRegister, $mem$$Address, vlen_enc);
3700
ins_pipe( pipe_slow );
3703
instruct vconvHF2F(vec dst, vec src) %{
3704
match(Set dst (VectorCastHF2F src));
3706
format %{ "vector_conv_HF2F $dst,$src" %}
3708
int vlen_enc = vector_length_encoding(this);
3709
__ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
3711
ins_pipe( pipe_slow );
3714
// ---------------------------------------- VectorReinterpret ------------------------------------
3715
instruct reinterpret_mask(kReg dst) %{
3716
predicate(n->bottom_type()->isa_vectmask() &&
3717
Matcher::vector_length(n) == Matcher::vector_length(n->in(1))); // dst == src
3718
match(Set dst (VectorReinterpret dst));
3720
format %{ "vector_reinterpret $dst\t!" %}
3724
ins_pipe( pipe_slow );
3727
instruct reinterpret_mask_W2B(kReg dst, kReg src, vec xtmp) %{
3728
predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
3729
n->bottom_type()->isa_vectmask() &&
3730
n->in(1)->bottom_type()->isa_vectmask() &&
3731
n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_SHORT &&
3732
n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
3733
match(Set dst (VectorReinterpret src));
3735
format %{ "vector_mask_reinterpret_W2B $dst $src\t!" %}
3737
int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_SHORT);
3738
int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
3739
assert(src_sz == dst_sz , "src and dst size mismatch");
3740
int vlen_enc = vector_length_encoding(src_sz);
3741
__ evpmovm2w($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
3742
__ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
3744
ins_pipe( pipe_slow );
3747
instruct reinterpret_mask_D2B(kReg dst, kReg src, vec xtmp) %{
3748
predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
3749
n->bottom_type()->isa_vectmask() &&
3750
n->in(1)->bottom_type()->isa_vectmask() &&
3751
(n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_INT ||
3752
n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_FLOAT) &&
3753
n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
3754
match(Set dst (VectorReinterpret src));
3756
format %{ "vector_mask_reinterpret_D2B $dst $src\t!" %}
3758
int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_INT);
3759
int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
3760
assert(src_sz == dst_sz , "src and dst size mismatch");
3761
int vlen_enc = vector_length_encoding(src_sz);
3762
__ evpmovm2d($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
3763
__ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
3765
ins_pipe( pipe_slow );
3768
instruct reinterpret_mask_Q2B(kReg dst, kReg src, vec xtmp) %{
3769
predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
3770
n->bottom_type()->isa_vectmask() &&
3771
n->in(1)->bottom_type()->isa_vectmask() &&
3772
(n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_LONG ||
3773
n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_DOUBLE) &&
3774
n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
3775
match(Set dst (VectorReinterpret src));
3777
format %{ "vector_mask_reinterpret_Q2B $dst $src\t!" %}
3779
int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_LONG);
3780
int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
3781
assert(src_sz == dst_sz , "src and dst size mismatch");
3782
int vlen_enc = vector_length_encoding(src_sz);
3783
__ evpmovm2q($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
3784
__ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
3786
ins_pipe( pipe_slow );
3789
instruct reinterpret(vec dst) %{
3790
predicate(!n->bottom_type()->isa_vectmask() &&
3791
Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); // dst == src
3792
match(Set dst (VectorReinterpret dst));
3794
format %{ "vector_reinterpret $dst\t!" %}
3798
ins_pipe( pipe_slow );
3801
instruct reinterpret_expand(vec dst, vec src) %{
3802
predicate(UseAVX == 0 &&
3803
(Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
3804
match(Set dst (VectorReinterpret src));
3807
format %{ "vector_reinterpret_expand $dst,$src" %}
3809
assert(Matcher::vector_length_in_bytes(this) <= 16, "required");
3810
assert(Matcher::vector_length_in_bytes(this, $src) <= 8, "required");
3812
int src_vlen_in_bytes = Matcher::vector_length_in_bytes(this, $src);
3813
if (src_vlen_in_bytes == 4) {
3814
__ movdqu($dst$$XMMRegister, ExternalAddress(vector_32_bit_mask()), noreg);
3816
assert(src_vlen_in_bytes == 8, "");
3817
__ movdqu($dst$$XMMRegister, ExternalAddress(vector_64_bit_mask()), noreg);
3819
__ pand($dst$$XMMRegister, $src$$XMMRegister);
3821
ins_pipe( pipe_slow );
3824
instruct vreinterpret_expand4(legVec dst, vec src) %{
3825
predicate(UseAVX > 0 &&
3826
!n->bottom_type()->isa_vectmask() &&
3827
(Matcher::vector_length_in_bytes(n->in(1)) == 4) && // src
3828
(Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
3829
match(Set dst (VectorReinterpret src));
3831
format %{ "vector_reinterpret_expand $dst,$src" %}
3833
__ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), 0, noreg);
3835
ins_pipe( pipe_slow );
3839
instruct vreinterpret_expand(legVec dst, vec src) %{
3840
predicate(UseAVX > 0 &&
3841
!n->bottom_type()->isa_vectmask() &&
3842
(Matcher::vector_length_in_bytes(n->in(1)) > 4) && // src
3843
(Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
3844
match(Set dst (VectorReinterpret src));
3846
format %{ "vector_reinterpret_expand $dst,$src\t!" %}
3848
switch (Matcher::vector_length_in_bytes(this, $src)) {
3849
case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break;
3850
case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
3851
case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
3852
default: ShouldNotReachHere();
3855
ins_pipe( pipe_slow );
3858
instruct reinterpret_shrink(vec dst, legVec src) %{
3859
predicate(!n->bottom_type()->isa_vectmask() &&
3860
Matcher::vector_length_in_bytes(n->in(1)) > Matcher::vector_length_in_bytes(n)); // src > dst
3861
match(Set dst (VectorReinterpret src));
3863
format %{ "vector_reinterpret_shrink $dst,$src\t!" %}
3865
switch (Matcher::vector_length_in_bytes(this)) {
3866
case 4: __ movfltz($dst$$XMMRegister, $src$$XMMRegister); break;
3867
case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break;
3868
case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
3869
case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
3870
default: ShouldNotReachHere();
3873
ins_pipe( pipe_slow );
3876
// ----------------------------------------------------------------------------------------------------
3879
instruct roundD_reg(legRegD dst, legRegD src, immU8 rmode) %{
3880
match(Set dst (RoundDoubleMode src rmode));
3881
format %{ "roundsd $dst,$src" %}
3884
assert(UseSSE >= 4, "required");
3885
if ((UseAVX == 0) && ($dst$$XMMRegister != $src$$XMMRegister)) {
3886
__ pxor($dst$$XMMRegister, $dst$$XMMRegister);
3888
__ roundsd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant);
3890
ins_pipe(pipe_slow);
3893
instruct roundD_imm(legRegD dst, immD con, immU8 rmode) %{
3894
match(Set dst (RoundDoubleMode con rmode));
3895
format %{ "roundsd $dst,[$constantaddress]\t# load from constant table: double=$con" %}
3898
assert(UseSSE >= 4, "required");
3899
__ roundsd($dst$$XMMRegister, $constantaddress($con), $rmode$$constant, noreg);
3901
ins_pipe(pipe_slow);
3904
instruct vroundD_reg(legVec dst, legVec src, immU8 rmode) %{
3905
predicate(Matcher::vector_length(n) < 8);
3906
match(Set dst (RoundDoubleModeV src rmode));
3907
format %{ "vroundpd $dst,$src,$rmode\t! round packedD" %}
3909
assert(UseAVX > 0, "required");
3910
int vlen_enc = vector_length_encoding(this);
3911
__ vroundpd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vlen_enc);
3913
ins_pipe( pipe_slow );
3916
instruct vround8D_reg(vec dst, vec src, immU8 rmode) %{
3917
predicate(Matcher::vector_length(n) == 8);
3918
match(Set dst (RoundDoubleModeV src rmode));
3919
format %{ "vrndscalepd $dst,$src,$rmode\t! round packed8D" %}
3921
assert(UseAVX > 2, "required");
3922
__ vrndscalepd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, Assembler::AVX_512bit);
3924
ins_pipe( pipe_slow );
3927
instruct vroundD_mem(legVec dst, memory mem, immU8 rmode) %{
3928
predicate(Matcher::vector_length(n) < 8);
3929
match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
3930
format %{ "vroundpd $dst, $mem, $rmode\t! round packedD" %}
3932
assert(UseAVX > 0, "required");
3933
int vlen_enc = vector_length_encoding(this);
3934
__ vroundpd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vlen_enc);
3936
ins_pipe( pipe_slow );
3939
instruct vround8D_mem(vec dst, memory mem, immU8 rmode) %{
3940
predicate(Matcher::vector_length(n) == 8);
3941
match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
3942
format %{ "vrndscalepd $dst,$mem,$rmode\t! round packed8D" %}
3944
assert(UseAVX > 2, "required");
3945
__ vrndscalepd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, Assembler::AVX_512bit);
3947
ins_pipe( pipe_slow );
3951
instruct onspinwait() %{
3957
$$emit$$"pause\t! membar_onspinwait"
3962
ins_pipe(pipe_slow);
3966
instruct fmaD_reg(regD a, regD b, regD c) %{
3967
match(Set c (FmaD c (Binary a b)));
3968
format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %}
3971
assert(UseFMA, "Needs FMA instructions support.");
3972
__ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
3974
ins_pipe( pipe_slow );
3978
instruct fmaF_reg(regF a, regF b, regF c) %{
3979
match(Set c (FmaF c (Binary a b)));
3980
format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %}
3983
assert(UseFMA, "Needs FMA instructions support.");
3984
__ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
3986
ins_pipe( pipe_slow );
3989
// ====================VECTOR INSTRUCTIONS=====================================
3991
// Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
3992
instruct MoveVec2Leg(legVec dst, vec src) %{
3996
ShouldNotReachHere();
3998
ins_pipe( fpu_reg_reg );
4001
instruct MoveLeg2Vec(vec dst, legVec src) %{
4005
ShouldNotReachHere();
4007
ins_pipe( fpu_reg_reg );
4010
// ============================================================================
4012
// Load vectors generic operand pattern
4013
instruct loadV(vec dst, memory mem) %{
4014
match(Set dst (LoadVector mem));
4016
format %{ "load_vector $dst,$mem" %}
4018
__ load_vector($dst$$XMMRegister, $mem$$Address, Matcher::vector_length_in_bytes(this));
4020
ins_pipe( pipe_slow );
4023
// Store vectors generic operand pattern.
4024
instruct storeV(memory mem, vec src) %{
4025
match(Set mem (StoreVector mem src));
4027
format %{ "store_vector $mem,$src\n\t" %}
4029
switch (Matcher::vector_length_in_bytes(this, $src)) {
4030
case 4: __ movdl ($mem$$Address, $src$$XMMRegister); break;
4031
case 8: __ movq ($mem$$Address, $src$$XMMRegister); break;
4032
case 16: __ movdqu ($mem$$Address, $src$$XMMRegister); break;
4033
case 32: __ vmovdqu ($mem$$Address, $src$$XMMRegister); break;
4034
case 64: __ evmovdqul($mem$$Address, $src$$XMMRegister, Assembler::AVX_512bit); break;
4035
default: ShouldNotReachHere();
4038
ins_pipe( pipe_slow );
4041
// ---------------------------------------- Gather ------------------------------------
4043
// Gather BYTE, SHORT, INT, LONG, FLOAT, DOUBLE
4045
instruct gather(legVec dst, memory mem, legVec idx, rRegP tmp, legVec mask) %{
4046
predicate(!VM_Version::supports_avx512vl() && !is_subword_type(Matcher::vector_element_basic_type(n)) &&
4047
Matcher::vector_length_in_bytes(n) <= 32);
4048
match(Set dst (LoadVectorGather mem idx));
4049
effect(TEMP dst, TEMP tmp, TEMP mask);
4050
format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and $mask as TEMP" %}
4052
int vlen_enc = vector_length_encoding(this);
4053
BasicType elem_bt = Matcher::vector_element_basic_type(this);
4054
assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
4055
__ vpcmpeqd($mask$$XMMRegister, $mask$$XMMRegister, $mask$$XMMRegister, vlen_enc);
4056
__ lea($tmp$$Register, $mem$$Address);
4057
__ vgather(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx$$XMMRegister, $mask$$XMMRegister, vlen_enc);
4059
ins_pipe( pipe_slow );
4063
instruct evgather(vec dst, memory mem, vec idx, rRegP tmp, kReg ktmp) %{
4064
predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
4065
!is_subword_type(Matcher::vector_element_basic_type(n)));
4066
match(Set dst (LoadVectorGather mem idx));
4067
effect(TEMP dst, TEMP tmp, TEMP ktmp);
4068
format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and ktmp as TEMP" %}
4070
int vlen_enc = vector_length_encoding(this);
4071
BasicType elem_bt = Matcher::vector_element_basic_type(this);
4072
__ kxnorwl($ktmp$$KRegister, $ktmp$$KRegister, $ktmp$$KRegister);
4073
__ lea($tmp$$Register, $mem$$Address);
4074
__ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
4076
ins_pipe( pipe_slow );
4079
instruct evgather_masked(vec dst, memory mem, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
4080
predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
4081
!is_subword_type(Matcher::vector_element_basic_type(n)));
4082
match(Set dst (LoadVectorGatherMasked mem (Binary idx mask)));
4083
effect(TEMP_DEF dst, TEMP tmp, TEMP ktmp);
4084
format %{ "load_vector_gather_masked $dst, $mem, $idx, $mask\t! using $tmp and ktmp as TEMP" %}
4086
assert(UseAVX > 2, "sanity");
4087
int vlen_enc = vector_length_encoding(this);
4088
BasicType elem_bt = Matcher::vector_element_basic_type(this);
4089
assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
4090
// Note: Since gather instruction partially updates the opmask register used
4091
// for predication hense moving mask operand to a temporary.
4092
__ kmovwl($ktmp$$KRegister, $mask$$KRegister);
4093
__ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
4094
__ lea($tmp$$Register, $mem$$Address);
4095
__ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
4097
ins_pipe( pipe_slow );
4100
instruct vgather_subwordLE8B(vec dst, memory mem, rRegP idx_base, immI_0 offset, rRegP tmp, rRegI rtmp) %{
4101
predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
4102
match(Set dst (LoadVectorGather mem (Binary idx_base offset)));
4103
effect(TEMP tmp, TEMP rtmp);
4104
format %{ "vector_gatherLE8 $dst, $mem, $idx_base\t! using $tmp and $rtmp as TEMP" %}
4106
int vlen_enc = vector_length_encoding(this);
4107
BasicType elem_bt = Matcher::vector_element_basic_type(this);
4108
__ lea($tmp$$Register, $mem$$Address);
4109
__ vgather8b_offset(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, noreg, $rtmp$$Register, vlen_enc);
4111
ins_pipe( pipe_slow );
4114
instruct vgather_subwordGT8B(vec dst, memory mem, rRegP idx_base, immI_0 offset, rRegP tmp, rRegP idx_base_temp,
4115
vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI length, rFlagsReg cr) %{
4116
predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
4117
match(Set dst (LoadVectorGather mem (Binary idx_base offset)));
4118
effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP length, KILL cr);
4119
format %{ "vector_gatherGT8 $dst, $mem, $idx_base\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp and $length as TEMP" %}
4121
int vlen_enc = vector_length_encoding(this);
4122
int vector_len = Matcher::vector_length(this);
4123
BasicType elem_bt = Matcher::vector_element_basic_type(this);
4124
__ lea($tmp$$Register, $mem$$Address);
4125
__ movptr($idx_base_temp$$Register, $idx_base$$Register);
4126
__ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, noreg, $xtmp1$$XMMRegister,
4127
$xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, noreg, $length$$Register, vector_len, vlen_enc);
4129
ins_pipe( pipe_slow );
4132
instruct vgather_subwordLE8B_off(vec dst, memory mem, rRegP idx_base, rRegI offset, rRegP tmp, rRegI rtmp, rFlagsReg cr) %{
4133
predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
4134
match(Set dst (LoadVectorGather mem (Binary idx_base offset)));
4135
effect(TEMP tmp, TEMP rtmp, KILL cr);
4136
format %{ "vector_gatherLE8_off $dst, $mem, $idx_base, $offset\t! using $tmp and $rtmp as TEMP" %}
4138
int vlen_enc = vector_length_encoding(this);
4139
BasicType elem_bt = Matcher::vector_element_basic_type(this);
4140
__ lea($tmp$$Register, $mem$$Address);
4141
__ vgather8b_offset(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $offset$$Register, $rtmp$$Register, vlen_enc);
4143
ins_pipe( pipe_slow );
4147
instruct vgather_subwordGT8B_off(vec dst, memory mem, rRegP idx_base, rRegI offset, rRegP tmp, rRegP idx_base_temp,
4148
vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI length, rFlagsReg cr) %{
4149
predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
4150
match(Set dst (LoadVectorGather mem (Binary idx_base offset)));
4151
effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP length, KILL cr);
4152
format %{ "vector_gatherGT8_off $dst, $mem, $idx_base, $offset\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp and $length as TEMP" %}
4154
int vlen_enc = vector_length_encoding(this);
4155
int vector_len = Matcher::vector_length(this);
4156
BasicType elem_bt = Matcher::vector_element_basic_type(this);
4157
__ lea($tmp$$Register, $mem$$Address);
4158
__ movptr($idx_base_temp$$Register, $idx_base$$Register);
4159
__ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $offset$$Register, noreg, $xtmp1$$XMMRegister,
4160
$xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, noreg, $length$$Register, vector_len, vlen_enc);
4162
ins_pipe( pipe_slow );
4167
instruct vgather_masked_subwordLE8B_avx3(vec dst, memory mem, rRegP idx_base, immI_0 offset, kReg mask, rRegL mask_idx, rRegP tmp, rRegI rtmp, rRegL rtmp2, rFlagsReg cr) %{
4168
predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
4169
match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset))));
4170
effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
4171
format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
4173
int vlen_enc = vector_length_encoding(this);
4174
BasicType elem_bt = Matcher::vector_element_basic_type(this);
4175
__ xorq($mask_idx$$Register, $mask_idx$$Register);
4176
__ lea($tmp$$Register, $mem$$Address);
4177
__ kmovql($rtmp2$$Register, $mask$$KRegister);
4178
__ vgather8b_masked_offset(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, noreg, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
4180
ins_pipe( pipe_slow );
4183
instruct vgather_masked_subwordGT8B_avx3(vec dst, memory mem, rRegP idx_base, immI_0 offset, kReg mask, rRegP tmp, rRegP idx_base_temp,
4184
vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegL rtmp2, rRegL mask_idx, rRegI length, rFlagsReg cr) %{
4185
predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
4186
match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset))));
4187
effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
4188
format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
4190
int vlen_enc = vector_length_encoding(this);
4191
int vector_len = Matcher::vector_length(this);
4192
BasicType elem_bt = Matcher::vector_element_basic_type(this);
4193
__ xorq($mask_idx$$Register, $mask_idx$$Register);
4194
__ lea($tmp$$Register, $mem$$Address);
4195
__ movptr($idx_base_temp$$Register, $idx_base$$Register);
4196
__ kmovql($rtmp2$$Register, $mask$$KRegister);
4197
__ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, $rtmp2$$Register, $xtmp1$$XMMRegister,
4198
$xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
4200
ins_pipe( pipe_slow );
4203
instruct vgather_masked_subwordLE8B_off_avx3(vec dst, memory mem, rRegP idx_base, rRegI offset, kReg mask, rRegL mask_idx, rRegP tmp, rRegI rtmp, rRegL rtmp2, rFlagsReg cr) %{
4204
predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
4205
match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset))));
4206
effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
4207
format %{ "vector_masked_gatherLE8_off $dst, $mem, $idx_base, $offset, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
4209
int vlen_enc = vector_length_encoding(this);
4210
BasicType elem_bt = Matcher::vector_element_basic_type(this);
4211
__ xorq($mask_idx$$Register, $mask_idx$$Register);
4212
__ lea($tmp$$Register, $mem$$Address);
4213
__ kmovql($rtmp2$$Register, $mask$$KRegister);
4214
__ vgather8b_masked_offset(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $offset$$Register,
4215
$rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
4217
ins_pipe( pipe_slow );
4220
instruct vgather_masked_subwordGT8B_off_avx3(vec dst, memory mem, rRegP idx_base, rRegI offset, kReg mask, rRegP tmp, rRegP idx_base_temp,
4221
vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegL rtmp2, rRegL mask_idx, rRegI length, rFlagsReg cr) %{
4222
predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
4223
match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset))));
4224
effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
4225
format %{ "vector_gatherGT8_masked_off $dst, $mem, $idx_base, $offset, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
4227
int vlen_enc = vector_length_encoding(this);
4228
int vector_len = Matcher::vector_length(this);
4229
BasicType elem_bt = Matcher::vector_element_basic_type(this);
4230
__ xorq($mask_idx$$Register, $mask_idx$$Register);
4231
__ lea($tmp$$Register, $mem$$Address);
4232
__ movptr($idx_base_temp$$Register, $idx_base$$Register);
4233
__ kmovql($rtmp2$$Register, $mask$$KRegister);
4234
__ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $offset$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
4235
$xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
4237
ins_pipe( pipe_slow );
4240
instruct vgather_masked_subwordLE8B_avx2(vec dst, memory mem, rRegP idx_base, immI_0 offset, vec mask, rRegI mask_idx, rRegP tmp, rRegI rtmp, rRegI rtmp2, rFlagsReg cr) %{
4241
predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
4242
match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset))));
4243
effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
4244
format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
4246
int vlen_enc = vector_length_encoding(this);
4247
BasicType elem_bt = Matcher::vector_element_basic_type(this);
4248
__ lea($tmp$$Register, $mem$$Address);
4249
__ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
4250
if (elem_bt == T_SHORT) {
4251
__ movl($mask_idx$$Register, 0x55555555);
4252
__ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
4254
__ xorl($mask_idx$$Register, $mask_idx$$Register);
4255
__ vgather8b_masked_offset(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, noreg, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
4257
ins_pipe( pipe_slow );
4260
instruct vgather_masked_subwordGT8B_avx2(vec dst, memory mem, rRegP idx_base, immI_0 offset, vec mask, rRegP tmp, rRegP idx_base_temp,
4261
vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI rtmp2, rRegI mask_idx, rRegI length, rFlagsReg cr) %{
4262
predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
4263
match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset))));
4264
effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
4265
format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
4267
int vlen_enc = vector_length_encoding(this);
4268
int vector_len = Matcher::vector_length(this);
4269
BasicType elem_bt = Matcher::vector_element_basic_type(this);
4270
__ lea($tmp$$Register, $mem$$Address);
4271
__ movptr($idx_base_temp$$Register, $idx_base$$Register);
4272
__ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
4273
if (elem_bt == T_SHORT) {
4274
__ movl($mask_idx$$Register, 0x55555555);
4275
__ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
4277
__ xorl($mask_idx$$Register, $mask_idx$$Register);
4278
__ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, $rtmp2$$Register, $xtmp1$$XMMRegister,
4279
$xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
4281
ins_pipe( pipe_slow );
4284
instruct vgather_masked_subwordLE8B_off_avx2(vec dst, memory mem, rRegP idx_base, rRegI offset, vec mask, rRegI mask_idx, rRegP tmp, rRegI rtmp, rRegI rtmp2, rFlagsReg cr) %{
4285
predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
4286
match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset))));
4287
effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
4288
format %{ "vector_masked_gatherLE8_off $dst, $mem, $idx_base, $offset, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
4290
int vlen_enc = vector_length_encoding(this);
4291
BasicType elem_bt = Matcher::vector_element_basic_type(this);
4292
__ lea($tmp$$Register, $mem$$Address);
4293
__ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
4294
if (elem_bt == T_SHORT) {
4295
__ movl($mask_idx$$Register, 0x55555555);
4296
__ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
4298
__ xorl($mask_idx$$Register, $mask_idx$$Register);
4299
__ vgather8b_masked_offset(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $offset$$Register,
4300
$rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
4302
ins_pipe( pipe_slow );
4305
instruct vgather_masked_subwordGT8B_off_avx2(vec dst, memory mem, rRegP idx_base, rRegI offset, vec mask, rRegP tmp, rRegP idx_base_temp,
4306
vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI rtmp2, rRegI mask_idx, rRegI length, rFlagsReg cr) %{
4307
predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
4308
match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset))));
4309
effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
4310
format %{ "vector_gatherGT8_masked_off $dst, $mem, $idx_base, $offset, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
4312
int vlen_enc = vector_length_encoding(this);
4313
int vector_len = Matcher::vector_length(this);
4314
BasicType elem_bt = Matcher::vector_element_basic_type(this);
4315
__ xorl($mask_idx$$Register, $mask_idx$$Register);
4316
__ lea($tmp$$Register, $mem$$Address);
4317
__ movptr($idx_base_temp$$Register, $idx_base$$Register);
4318
__ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
4319
if (elem_bt == T_SHORT) {
4320
__ movl($mask_idx$$Register, 0x55555555);
4321
__ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
4323
__ xorl($mask_idx$$Register, $mask_idx$$Register);
4324
__ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $offset$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
4325
$xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
4327
ins_pipe( pipe_slow );
4331
// ====================Scatter=======================================
4333
// Scatter INT, LONG, FLOAT, DOUBLE
4335
instruct scatter(memory mem, vec src, vec idx, rRegP tmp, kReg ktmp) %{
4336
predicate(UseAVX > 2);
4337
match(Set mem (StoreVectorScatter mem (Binary src idx)));
4338
effect(TEMP tmp, TEMP ktmp);
4339
format %{ "store_vector_scatter $mem, $idx, $src\t! using k2 and $tmp as TEMP" %}
4341
int vlen_enc = vector_length_encoding(this, $src);
4342
BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
4344
assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
4345
assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
4347
__ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), noreg);
4348
__ lea($tmp$$Register, $mem$$Address);
4349
__ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
4351
ins_pipe( pipe_slow );
4354
instruct scatter_masked(memory mem, vec src, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
4355
match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx mask))));
4356
effect(TEMP tmp, TEMP ktmp);
4357
format %{ "store_vector_scatter_masked $mem, $idx, $src, $mask\t!" %}
4359
int vlen_enc = vector_length_encoding(this, $src);
4360
BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
4361
assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
4362
assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
4363
// Note: Since scatter instruction partially updates the opmask register used
4364
// for predication hense moving mask operand to a temporary.
4365
__ kmovwl($ktmp$$KRegister, $mask$$KRegister);
4366
__ lea($tmp$$Register, $mem$$Address);
4367
__ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
4369
ins_pipe( pipe_slow );
4372
// ====================REPLICATE=======================================
4374
// Replicate byte scalar to be vector
4375
instruct vReplB_reg(vec dst, rRegI src) %{
4376
predicate(Matcher::vector_element_basic_type(n) == T_BYTE);
4377
match(Set dst (Replicate src));
4378
format %{ "replicateB $dst,$src" %}
4380
uint vlen = Matcher::vector_length(this);
4382
int vlen_enc = vector_length_encoding(this);
4383
if (vlen == 64 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
4384
assert(VM_Version::supports_avx512bw(), "required"); // 512-bit byte vectors assume AVX512BW
4385
__ evpbroadcastb($dst$$XMMRegister, $src$$Register, vlen_enc);
4387
__ movdl($dst$$XMMRegister, $src$$Register);
4388
__ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
4391
assert(UseAVX < 2, "");
4392
__ movdl($dst$$XMMRegister, $src$$Register);
4393
__ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
4394
__ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
4396
assert(vlen == 16, "");
4397
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
4401
ins_pipe( pipe_slow );
4404
instruct ReplB_mem(vec dst, memory mem) %{
4405
predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_BYTE);
4406
match(Set dst (Replicate (LoadB mem)));
4407
format %{ "replicateB $dst,$mem" %}
4409
int vlen_enc = vector_length_encoding(this);
4410
__ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vlen_enc);
4412
ins_pipe( pipe_slow );
4415
// ====================ReplicateS=======================================
4417
instruct vReplS_reg(vec dst, rRegI src) %{
4418
predicate(Matcher::vector_element_basic_type(n) == T_SHORT);
4419
match(Set dst (Replicate src));
4420
format %{ "replicateS $dst,$src" %}
4422
uint vlen = Matcher::vector_length(this);
4423
int vlen_enc = vector_length_encoding(this);
4425
if (vlen == 32 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
4426
assert(VM_Version::supports_avx512bw(), "required"); // 512-bit short vectors assume AVX512BW
4427
__ evpbroadcastw($dst$$XMMRegister, $src$$Register, vlen_enc);
4429
__ movdl($dst$$XMMRegister, $src$$Register);
4430
__ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
4433
assert(UseAVX < 2, "");
4434
__ movdl($dst$$XMMRegister, $src$$Register);
4435
__ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
4437
assert(vlen == 8, "");
4438
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
4442
ins_pipe( pipe_slow );
4445
instruct ReplS_mem(vec dst, memory mem) %{
4446
predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_SHORT);
4447
match(Set dst (Replicate (LoadS mem)));
4448
format %{ "replicateS $dst,$mem" %}
4450
int vlen_enc = vector_length_encoding(this);
4451
__ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vlen_enc);
4453
ins_pipe( pipe_slow );
4456
// ====================ReplicateI=======================================
4458
instruct ReplI_reg(vec dst, rRegI src) %{
4459
predicate(Matcher::vector_element_basic_type(n) == T_INT);
4460
match(Set dst (Replicate src));
4461
format %{ "replicateI $dst,$src" %}
4463
uint vlen = Matcher::vector_length(this);
4464
int vlen_enc = vector_length_encoding(this);
4465
if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
4466
__ evpbroadcastd($dst$$XMMRegister, $src$$Register, vlen_enc);
4467
} else if (VM_Version::supports_avx2()) {
4468
__ movdl($dst$$XMMRegister, $src$$Register);
4469
__ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
4471
__ movdl($dst$$XMMRegister, $src$$Register);
4472
__ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
4475
ins_pipe( pipe_slow );
4478
instruct ReplI_mem(vec dst, memory mem) %{
4479
predicate(Matcher::vector_element_basic_type(n) == T_INT);
4480
match(Set dst (Replicate (LoadI mem)));
4481
format %{ "replicateI $dst,$mem" %}
4483
int vlen_enc = vector_length_encoding(this);
4484
if (VM_Version::supports_avx2()) {
4485
__ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vlen_enc);
4486
} else if (VM_Version::supports_avx()) {
4487
__ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
4489
__ movdl($dst$$XMMRegister, $mem$$Address);
4490
__ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
4493
ins_pipe( pipe_slow );
4496
instruct ReplI_imm(vec dst, immI con) %{
4497
predicate(Matcher::is_non_long_integral_vector(n));
4498
match(Set dst (Replicate con));
4499
format %{ "replicateI $dst,$con" %}
4501
InternalAddress addr = $constantaddress(Matcher::vector_element_basic_type(this),
4502
vreplicate_imm(Matcher::vector_element_basic_type(this), $con$$constant,
4503
(VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 4 : 8) : 8) /
4504
type2aelembytes(Matcher::vector_element_basic_type(this))));
4505
BasicType bt = Matcher::vector_element_basic_type(this);
4506
int vlen = Matcher::vector_length_in_bytes(this);
4507
__ load_constant_vector(bt, $dst$$XMMRegister, addr, vlen);
4509
ins_pipe( pipe_slow );
4512
// Replicate scalar zero to be vector
4513
instruct ReplI_zero(vec dst, immI_0 zero) %{
4514
predicate(Matcher::is_non_long_integral_vector(n));
4515
match(Set dst (Replicate zero));
4516
format %{ "replicateI $dst,$zero" %}
4518
int vlen_enc = vector_length_encoding(this);
4519
if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
4520
__ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
4522
__ pxor($dst$$XMMRegister, $dst$$XMMRegister);
4525
ins_pipe( fpu_reg_reg );
4528
instruct ReplI_M1(vec dst, immI_M1 con) %{
4529
predicate(UseSSE >= 2 && Matcher::is_non_long_integral_vector(n));
4530
match(Set dst (Replicate con));
4531
format %{ "vallones $dst" %}
4533
int vector_len = vector_length_encoding(this);
4534
__ vallones($dst$$XMMRegister, vector_len);
4536
ins_pipe( pipe_slow );
4539
// ====================ReplicateL=======================================
4542
// Replicate long (8 byte) scalar to be vector
4543
instruct ReplL_reg(vec dst, rRegL src) %{
4544
predicate(Matcher::vector_element_basic_type(n) == T_LONG);
4545
match(Set dst (Replicate src));
4546
format %{ "replicateL $dst,$src" %}
4548
int vlen = Matcher::vector_length(this);
4549
int vlen_enc = vector_length_encoding(this);
4550
if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
4551
__ evpbroadcastq($dst$$XMMRegister, $src$$Register, vlen_enc);
4552
} else if (VM_Version::supports_avx2()) {
4553
__ movdq($dst$$XMMRegister, $src$$Register);
4554
__ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
4556
__ movdq($dst$$XMMRegister, $src$$Register);
4557
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
4560
ins_pipe( pipe_slow );
4563
// Replicate long (8 byte) scalar to be vector
4564
instruct ReplL_reg(vec dst, eRegL src, vec tmp) %{
4565
predicate(Matcher::vector_length(n) <= 4 && Matcher::vector_element_basic_type(n) == T_LONG);
4566
match(Set dst (Replicate src));
4567
effect(TEMP dst, USE src, TEMP tmp);
4568
format %{ "replicateL $dst,$src" %}
4570
uint vlen = Matcher::vector_length(this);
4572
__ movdl($dst$$XMMRegister, $src$$Register);
4573
__ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
4574
__ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
4575
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
4576
} else if (VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
4577
int vlen_enc = Assembler::AVX_256bit;
4578
__ movdl($dst$$XMMRegister, $src$$Register);
4579
__ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
4580
__ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
4581
__ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
4583
__ movdl($dst$$XMMRegister, $src$$Register);
4584
__ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
4585
__ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
4586
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
4587
__ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
4590
ins_pipe( pipe_slow );
4593
instruct ReplL_reg_leg(legVec dst, eRegL src, legVec tmp) %{
4594
predicate(Matcher::vector_length(n) == 8 && Matcher::vector_element_basic_type(n) == T_LONG);
4595
match(Set dst (Replicate src));
4596
effect(TEMP dst, USE src, TEMP tmp);
4597
format %{ "replicateL $dst,$src" %}
4599
if (VM_Version::supports_avx512vl()) {
4600
__ movdl($dst$$XMMRegister, $src$$Register);
4601
__ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
4602
__ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
4603
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
4604
__ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
4605
__ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1);
4607
int vlen_enc = Assembler::AVX_512bit;
4608
__ movdl($dst$$XMMRegister, $src$$Register);
4609
__ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
4610
__ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
4611
__ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
4614
ins_pipe( pipe_slow );
4618
instruct ReplL_mem(vec dst, memory mem) %{
4619
predicate(Matcher::vector_element_basic_type(n) == T_LONG);
4620
match(Set dst (Replicate (LoadL mem)));
4621
format %{ "replicateL $dst,$mem" %}
4623
int vlen_enc = vector_length_encoding(this);
4624
if (VM_Version::supports_avx2()) {
4625
__ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vlen_enc);
4626
} else if (VM_Version::supports_sse3()) {
4627
__ movddup($dst$$XMMRegister, $mem$$Address);
4629
__ movq($dst$$XMMRegister, $mem$$Address);
4630
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
4633
ins_pipe( pipe_slow );
4636
// Replicate long (8 byte) scalar immediate to be vector by loading from const table.
4637
instruct ReplL_imm(vec dst, immL con) %{
4638
predicate(Matcher::vector_element_basic_type(n) == T_LONG);
4639
match(Set dst (Replicate con));
4640
format %{ "replicateL $dst,$con" %}
4642
InternalAddress addr = $constantaddress(T_LONG, vreplicate_imm(T_LONG, $con$$constant, 1));
4643
int vlen = Matcher::vector_length_in_bytes(this);
4644
__ load_constant_vector(T_LONG, $dst$$XMMRegister, addr, vlen);
4646
ins_pipe( pipe_slow );
4649
instruct ReplL_zero(vec dst, immL0 zero) %{
4650
predicate(Matcher::vector_element_basic_type(n) == T_LONG);
4651
match(Set dst (Replicate zero));
4652
format %{ "replicateL $dst,$zero" %}
4654
int vlen_enc = vector_length_encoding(this);
4655
if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
4656
__ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
4658
__ pxor($dst$$XMMRegister, $dst$$XMMRegister);
4661
ins_pipe( fpu_reg_reg );
4664
instruct ReplL_M1(vec dst, immL_M1 con) %{
4665
predicate(UseSSE >= 2 && Matcher::vector_element_basic_type(n) == T_LONG);
4666
match(Set dst (Replicate con));
4667
format %{ "vallones $dst" %}
4669
int vector_len = vector_length_encoding(this);
4670
__ vallones($dst$$XMMRegister, vector_len);
4672
ins_pipe( pipe_slow );
4675
// ====================ReplicateF=======================================
4677
instruct vReplF_reg(vec dst, vlRegF src) %{
4678
predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
4679
match(Set dst (Replicate src));
4680
format %{ "replicateF $dst,$src" %}
4682
uint vlen = Matcher::vector_length(this);
4683
int vlen_enc = vector_length_encoding(this);
4685
__ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
4686
} else if (VM_Version::supports_avx2()) {
4687
__ vbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
4689
assert(vlen == 8, "sanity");
4690
__ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
4691
__ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
4694
ins_pipe( pipe_slow );
4697
instruct ReplF_reg(vec dst, vlRegF src) %{
4698
predicate(UseAVX == 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
4699
match(Set dst (Replicate src));
4700
format %{ "replicateF $dst,$src" %}
4702
__ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
4704
ins_pipe( pipe_slow );
4707
instruct ReplF_mem(vec dst, memory mem) %{
4708
predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
4709
match(Set dst (Replicate (LoadF mem)));
4710
format %{ "replicateF $dst,$mem" %}
4712
int vlen_enc = vector_length_encoding(this);
4713
__ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
4715
ins_pipe( pipe_slow );
4718
// Replicate float scalar immediate to be vector by loading from const table.
4719
instruct ReplF_imm(vec dst, immF con) %{
4720
predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
4721
match(Set dst (Replicate con));
4722
format %{ "replicateF $dst,$con" %}
4724
InternalAddress addr = $constantaddress(T_FLOAT, vreplicate_imm(T_FLOAT, $con$$constant,
4725
VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 1 : 2) : 2));
4726
int vlen = Matcher::vector_length_in_bytes(this);
4727
__ load_constant_vector(T_FLOAT, $dst$$XMMRegister, addr, vlen);
4729
ins_pipe( pipe_slow );
4732
instruct ReplF_zero(vec dst, immF0 zero) %{
4733
predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
4734
match(Set dst (Replicate zero));
4735
format %{ "replicateF $dst,$zero" %}
4737
int vlen_enc = vector_length_encoding(this);
4738
if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
4739
__ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
4741
__ xorps($dst$$XMMRegister, $dst$$XMMRegister);
4744
ins_pipe( fpu_reg_reg );
4747
// ====================ReplicateD=======================================
4749
// Replicate double (8 bytes) scalar to be vector
4750
instruct vReplD_reg(vec dst, vlRegD src) %{
4751
predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
4752
match(Set dst (Replicate src));
4753
format %{ "replicateD $dst,$src" %}
4755
uint vlen = Matcher::vector_length(this);
4756
int vlen_enc = vector_length_encoding(this);
4758
__ movddup($dst$$XMMRegister, $src$$XMMRegister);
4759
} else if (VM_Version::supports_avx2()) {
4760
__ vbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
4762
assert(vlen == 4, "sanity");
4763
__ movddup($dst$$XMMRegister, $src$$XMMRegister);
4764
__ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
4767
ins_pipe( pipe_slow );
4770
instruct ReplD_reg(vec dst, vlRegD src) %{
4771
predicate(UseSSE < 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
4772
match(Set dst (Replicate src));
4773
format %{ "replicateD $dst,$src" %}
4775
__ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
4777
ins_pipe( pipe_slow );
4780
instruct ReplD_mem(vec dst, memory mem) %{
4781
predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
4782
match(Set dst (Replicate (LoadD mem)));
4783
format %{ "replicateD $dst,$mem" %}
4785
if (Matcher::vector_length(this) >= 4) {
4786
int vlen_enc = vector_length_encoding(this);
4787
__ vbroadcastsd($dst$$XMMRegister, $mem$$Address, vlen_enc);
4789
__ movddup($dst$$XMMRegister, $mem$$Address);
4792
ins_pipe( pipe_slow );
4795
// Replicate double (8 byte) scalar immediate to be vector by loading from const table.
4796
instruct ReplD_imm(vec dst, immD con) %{
4797
predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
4798
match(Set dst (Replicate con));
4799
format %{ "replicateD $dst,$con" %}
4801
InternalAddress addr = $constantaddress(T_DOUBLE, vreplicate_imm(T_DOUBLE, $con$$constant, 1));
4802
int vlen = Matcher::vector_length_in_bytes(this);
4803
__ load_constant_vector(T_DOUBLE, $dst$$XMMRegister, addr, vlen);
4805
ins_pipe( pipe_slow );
4808
instruct ReplD_zero(vec dst, immD0 zero) %{
4809
predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
4810
match(Set dst (Replicate zero));
4811
format %{ "replicateD $dst,$zero" %}
4813
int vlen_enc = vector_length_encoding(this);
4814
if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
4815
__ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
4817
__ xorps($dst$$XMMRegister, $dst$$XMMRegister);
4820
ins_pipe( fpu_reg_reg );
4823
// ====================VECTOR INSERT=======================================
4825
instruct insert(vec dst, rRegI val, immU8 idx) %{
4826
predicate(Matcher::vector_length_in_bytes(n) < 32);
4827
match(Set dst (VectorInsert (Binary dst val) idx));
4828
format %{ "vector_insert $dst,$val,$idx" %}
4830
assert(UseSSE >= 4, "required");
4831
assert(Matcher::vector_length_in_bytes(this) >= 8, "required");
4833
BasicType elem_bt = Matcher::vector_element_basic_type(this);
4835
assert(is_integral_type(elem_bt), "");
4836
assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
4838
__ insert(elem_bt, $dst$$XMMRegister, $val$$Register, $idx$$constant);
4840
ins_pipe( pipe_slow );
4843
instruct insert32(vec dst, vec src, rRegI val, immU8 idx, vec vtmp) %{
4844
predicate(Matcher::vector_length_in_bytes(n) == 32);
4845
match(Set dst (VectorInsert (Binary src val) idx));
4847
format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
4849
int vlen_enc = Assembler::AVX_256bit;
4850
BasicType elem_bt = Matcher::vector_element_basic_type(this);
4851
int elem_per_lane = 16/type2aelembytes(elem_bt);
4852
int log2epr = log2(elem_per_lane);
4854
assert(is_integral_type(elem_bt), "sanity");
4855
assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
4857
uint x_idx = $idx$$constant & right_n_bits(log2epr);
4858
uint y_idx = ($idx$$constant >> log2epr) & 1;
4859
__ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
4860
__ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
4861
__ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
4863
ins_pipe( pipe_slow );
4866
instruct insert64(vec dst, vec src, rRegI val, immU8 idx, legVec vtmp) %{
4867
predicate(Matcher::vector_length_in_bytes(n) == 64);
4868
match(Set dst (VectorInsert (Binary src val) idx));
4870
format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
4872
assert(UseAVX > 2, "sanity");
4874
BasicType elem_bt = Matcher::vector_element_basic_type(this);
4875
int elem_per_lane = 16/type2aelembytes(elem_bt);
4876
int log2epr = log2(elem_per_lane);
4878
assert(is_integral_type(elem_bt), "");
4879
assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
4881
uint x_idx = $idx$$constant & right_n_bits(log2epr);
4882
uint y_idx = ($idx$$constant >> log2epr) & 3;
4883
__ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
4884
__ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
4885
__ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
4887
ins_pipe( pipe_slow );
4891
instruct insert2L(vec dst, rRegL val, immU8 idx) %{
4892
predicate(Matcher::vector_length(n) == 2);
4893
match(Set dst (VectorInsert (Binary dst val) idx));
4894
format %{ "vector_insert $dst,$val,$idx" %}
4896
assert(UseSSE >= 4, "required");
4897
assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
4898
assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
4900
__ pinsrq($dst$$XMMRegister, $val$$Register, $idx$$constant);
4902
ins_pipe( pipe_slow );
4905
instruct insert4L(vec dst, vec src, rRegL val, immU8 idx, vec vtmp) %{
4906
predicate(Matcher::vector_length(n) == 4);
4907
match(Set dst (VectorInsert (Binary src val) idx));
4909
format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
4911
assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
4912
assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
4914
uint x_idx = $idx$$constant & right_n_bits(1);
4915
uint y_idx = ($idx$$constant >> 1) & 1;
4916
int vlen_enc = Assembler::AVX_256bit;
4917
__ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
4918
__ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
4919
__ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
4921
ins_pipe( pipe_slow );
4924
instruct insert8L(vec dst, vec src, rRegL val, immU8 idx, legVec vtmp) %{
4925
predicate(Matcher::vector_length(n) == 8);
4926
match(Set dst (VectorInsert (Binary src val) idx));
4928
format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
4930
assert(Matcher::vector_element_basic_type(this) == T_LONG, "sanity");
4931
assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
4933
uint x_idx = $idx$$constant & right_n_bits(1);
4934
uint y_idx = ($idx$$constant >> 1) & 3;
4935
__ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
4936
__ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
4937
__ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
4939
ins_pipe( pipe_slow );
4943
instruct insertF(vec dst, regF val, immU8 idx) %{
4944
predicate(Matcher::vector_length(n) < 8);
4945
match(Set dst (VectorInsert (Binary dst val) idx));
4946
format %{ "vector_insert $dst,$val,$idx" %}
4948
assert(UseSSE >= 4, "sanity");
4950
assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
4951
assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
4953
uint x_idx = $idx$$constant & right_n_bits(2);
4954
__ insertps($dst$$XMMRegister, $val$$XMMRegister, x_idx << 4);
4956
ins_pipe( pipe_slow );
4959
instruct vinsertF(vec dst, vec src, regF val, immU8 idx, vec vtmp) %{
4960
predicate(Matcher::vector_length(n) >= 8);
4961
match(Set dst (VectorInsert (Binary src val) idx));
4963
format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
4965
assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
4966
assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
4968
int vlen = Matcher::vector_length(this);
4969
uint x_idx = $idx$$constant & right_n_bits(2);
4971
uint y_idx = ($idx$$constant >> 2) & 1;
4972
int vlen_enc = Assembler::AVX_256bit;
4973
__ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
4974
__ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
4975
__ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
4977
assert(vlen == 16, "sanity");
4978
uint y_idx = ($idx$$constant >> 2) & 3;
4979
__ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
4980
__ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
4981
__ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
4984
ins_pipe( pipe_slow );
4988
instruct insert2D(vec dst, regD val, immU8 idx, rRegL tmp) %{
4989
predicate(Matcher::vector_length(n) == 2);
4990
match(Set dst (VectorInsert (Binary dst val) idx));
4992
format %{ "vector_insert $dst,$val,$idx\t!using $tmp as TEMP" %}
4994
assert(UseSSE >= 4, "sanity");
4995
assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
4996
assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
4998
__ movq($tmp$$Register, $val$$XMMRegister);
4999
__ pinsrq($dst$$XMMRegister, $tmp$$Register, $idx$$constant);
5001
ins_pipe( pipe_slow );
5004
instruct insert4D(vec dst, vec src, regD val, immU8 idx, rRegL tmp, vec vtmp) %{
5005
predicate(Matcher::vector_length(n) == 4);
5006
match(Set dst (VectorInsert (Binary src val) idx));
5007
effect(TEMP vtmp, TEMP tmp);
5008
format %{ "vector_insert $dst,$src,$val,$idx\t!using $tmp, $vtmp as TEMP" %}
5010
assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
5011
assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
5013
uint x_idx = $idx$$constant & right_n_bits(1);
5014
uint y_idx = ($idx$$constant >> 1) & 1;
5015
int vlen_enc = Assembler::AVX_256bit;
5016
__ movq($tmp$$Register, $val$$XMMRegister);
5017
__ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
5018
__ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
5019
__ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
5021
ins_pipe( pipe_slow );
5024
instruct insert8D(vec dst, vec src, regD val, immI idx, rRegL tmp, legVec vtmp) %{
5025
predicate(Matcher::vector_length(n) == 8);
5026
match(Set dst (VectorInsert (Binary src val) idx));
5027
effect(TEMP tmp, TEMP vtmp);
5028
format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
5030
assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
5031
assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
5033
uint x_idx = $idx$$constant & right_n_bits(1);
5034
uint y_idx = ($idx$$constant >> 1) & 3;
5035
__ movq($tmp$$Register, $val$$XMMRegister);
5036
__ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
5037
__ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
5038
__ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
5040
ins_pipe( pipe_slow );
5044
// ====================REDUCTION ARITHMETIC=======================================
5046
// =======================Int Reduction==========================================
5048
instruct reductionI(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
5049
predicate(Matcher::vector_element_basic_type(n->in(2)) == T_INT); // src2
5050
match(Set dst (AddReductionVI src1 src2));
5051
match(Set dst (MulReductionVI src1 src2));
5052
match(Set dst (AndReductionV src1 src2));
5053
match(Set dst ( OrReductionV src1 src2));
5054
match(Set dst (XorReductionV src1 src2));
5055
match(Set dst (MinReductionV src1 src2));
5056
match(Set dst (MaxReductionV src1 src2));
5057
effect(TEMP vtmp1, TEMP vtmp2);
5058
format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
5060
int opcode = this->ideal_Opcode();
5061
int vlen = Matcher::vector_length(this, $src2);
5062
__ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
5064
ins_pipe( pipe_slow );
5067
// =======================Long Reduction==========================================
5070
instruct reductionL(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
5071
predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && !VM_Version::supports_avx512dq());
5072
match(Set dst (AddReductionVL src1 src2));
5073
match(Set dst (MulReductionVL src1 src2));
5074
match(Set dst (AndReductionV src1 src2));
5075
match(Set dst ( OrReductionV src1 src2));
5076
match(Set dst (XorReductionV src1 src2));
5077
match(Set dst (MinReductionV src1 src2));
5078
match(Set dst (MaxReductionV src1 src2));
5079
effect(TEMP vtmp1, TEMP vtmp2);
5080
format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
5082
int opcode = this->ideal_Opcode();
5083
int vlen = Matcher::vector_length(this, $src2);
5084
__ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
5086
ins_pipe( pipe_slow );
5089
instruct reductionL_avx512dq(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtmp2) %{
5090
predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && VM_Version::supports_avx512dq());
5091
match(Set dst (AddReductionVL src1 src2));
5092
match(Set dst (MulReductionVL src1 src2));
5093
match(Set dst (AndReductionV src1 src2));
5094
match(Set dst ( OrReductionV src1 src2));
5095
match(Set dst (XorReductionV src1 src2));
5096
match(Set dst (MinReductionV src1 src2));
5097
match(Set dst (MaxReductionV src1 src2));
5098
effect(TEMP vtmp1, TEMP vtmp2);
5099
format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
5101
int opcode = this->ideal_Opcode();
5102
int vlen = Matcher::vector_length(this, $src2);
5103
__ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
5105
ins_pipe( pipe_slow );
5109
// =======================Float Reduction==========================================
5111
instruct reductionF128(regF dst, vec src, vec vtmp) %{
5112
predicate(Matcher::vector_length(n->in(2)) <= 4); // src
5113
match(Set dst (AddReductionVF dst src));
5114
match(Set dst (MulReductionVF dst src));
5115
effect(TEMP dst, TEMP vtmp);
5116
format %{ "vector_reduction_float $dst,$src ; using $vtmp as TEMP" %}
5118
int opcode = this->ideal_Opcode();
5119
int vlen = Matcher::vector_length(this, $src);
5120
__ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
5122
ins_pipe( pipe_slow );
5125
instruct reduction8F(regF dst, vec src, vec vtmp1, vec vtmp2) %{
5126
predicate(Matcher::vector_length(n->in(2)) == 8); // src
5127
match(Set dst (AddReductionVF dst src));
5128
match(Set dst (MulReductionVF dst src));
5129
effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
5130
format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
5132
int opcode = this->ideal_Opcode();
5133
int vlen = Matcher::vector_length(this, $src);
5134
__ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
5136
ins_pipe( pipe_slow );
5139
instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{
5140
predicate(Matcher::vector_length(n->in(2)) == 16); // src
5141
match(Set dst (AddReductionVF dst src));
5142
match(Set dst (MulReductionVF dst src));
5143
effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
5144
format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
5146
int opcode = this->ideal_Opcode();
5147
int vlen = Matcher::vector_length(this, $src);
5148
__ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
5150
ins_pipe( pipe_slow );
5153
// =======================Double Reduction==========================================
5155
instruct reduction2D(regD dst, vec src, vec vtmp) %{
5156
predicate(Matcher::vector_length(n->in(2)) == 2); // src
5157
match(Set dst (AddReductionVD dst src));
5158
match(Set dst (MulReductionVD dst src));
5159
effect(TEMP dst, TEMP vtmp);
5160
format %{ "vector_reduction_double $dst,$src ; using $vtmp as TEMP" %}
5162
int opcode = this->ideal_Opcode();
5163
int vlen = Matcher::vector_length(this, $src);
5164
__ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
5166
ins_pipe( pipe_slow );
5169
instruct reduction4D(regD dst, vec src, vec vtmp1, vec vtmp2) %{
5170
predicate(Matcher::vector_length(n->in(2)) == 4); // src
5171
match(Set dst (AddReductionVD dst src));
5172
match(Set dst (MulReductionVD dst src));
5173
effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
5174
format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
5176
int opcode = this->ideal_Opcode();
5177
int vlen = Matcher::vector_length(this, $src);
5178
__ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
5180
ins_pipe( pipe_slow );
5183
instruct reduction8D(regD dst, legVec src, legVec vtmp1, legVec vtmp2) %{
5184
predicate(Matcher::vector_length(n->in(2)) == 8); // src
5185
match(Set dst (AddReductionVD dst src));
5186
match(Set dst (MulReductionVD dst src));
5187
effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
5188
format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
5190
int opcode = this->ideal_Opcode();
5191
int vlen = Matcher::vector_length(this, $src);
5192
__ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
5194
ins_pipe( pipe_slow );
5197
// =======================Byte Reduction==========================================
5200
instruct reductionB(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
5201
predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && !VM_Version::supports_avx512bw());
5202
match(Set dst (AddReductionVI src1 src2));
5203
match(Set dst (AndReductionV src1 src2));
5204
match(Set dst ( OrReductionV src1 src2));
5205
match(Set dst (XorReductionV src1 src2));
5206
match(Set dst (MinReductionV src1 src2));
5207
match(Set dst (MaxReductionV src1 src2));
5208
effect(TEMP vtmp1, TEMP vtmp2);
5209
format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
5211
int opcode = this->ideal_Opcode();
5212
int vlen = Matcher::vector_length(this, $src2);
5213
__ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
5215
ins_pipe( pipe_slow );
5218
instruct reductionB_avx512bw(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
5219
predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && VM_Version::supports_avx512bw());
5220
match(Set dst (AddReductionVI src1 src2));
5221
match(Set dst (AndReductionV src1 src2));
5222
match(Set dst ( OrReductionV src1 src2));
5223
match(Set dst (XorReductionV src1 src2));
5224
match(Set dst (MinReductionV src1 src2));
5225
match(Set dst (MaxReductionV src1 src2));
5226
effect(TEMP vtmp1, TEMP vtmp2);
5227
format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
5229
int opcode = this->ideal_Opcode();
5230
int vlen = Matcher::vector_length(this, $src2);
5231
__ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
5233
ins_pipe( pipe_slow );
5237
// =======================Short Reduction==========================================
5239
instruct reductionS(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
5240
predicate(Matcher::vector_element_basic_type(n->in(2)) == T_SHORT); // src2
5241
match(Set dst (AddReductionVI src1 src2));
5242
match(Set dst (MulReductionVI src1 src2));
5243
match(Set dst (AndReductionV src1 src2));
5244
match(Set dst ( OrReductionV src1 src2));
5245
match(Set dst (XorReductionV src1 src2));
5246
match(Set dst (MinReductionV src1 src2));
5247
match(Set dst (MaxReductionV src1 src2));
5248
effect(TEMP vtmp1, TEMP vtmp2);
5249
format %{ "vector_reduction_short $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
5251
int opcode = this->ideal_Opcode();
5252
int vlen = Matcher::vector_length(this, $src2);
5253
__ reduceS(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
5255
ins_pipe( pipe_slow );
5258
// =======================Mul Reduction==========================================
5260
instruct mul_reductionB(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
5261
predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
5262
Matcher::vector_length(n->in(2)) <= 32); // src2
5263
match(Set dst (MulReductionVI src1 src2));
5264
effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
5265
format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
5267
int opcode = this->ideal_Opcode();
5268
int vlen = Matcher::vector_length(this, $src2);
5269
__ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
5271
ins_pipe( pipe_slow );
5274
instruct mul_reduction64B(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
5275
predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
5276
Matcher::vector_length(n->in(2)) == 64); // src2
5277
match(Set dst (MulReductionVI src1 src2));
5278
effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
5279
format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
5281
int opcode = this->ideal_Opcode();
5282
int vlen = Matcher::vector_length(this, $src2);
5283
__ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
5285
ins_pipe( pipe_slow );
5288
//--------------------Min/Max Float Reduction --------------------
5289
// Float Min Reduction
5290
instruct minmax_reduction2F(legRegF dst, immF src1, legVec src2, legVec tmp,
5291
legVec atmp, legVec btmp, legVec xmm_1, rFlagsReg cr) %{
5292
predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
5293
((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
5294
(n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
5295
Matcher::vector_length(n->in(2)) == 2);
5296
match(Set dst (MinReductionV src1 src2));
5297
match(Set dst (MaxReductionV src1 src2));
5298
effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
5299
format %{ "vector_minmax2F_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
5301
assert(UseAVX > 0, "sanity");
5303
int opcode = this->ideal_Opcode();
5304
int vlen = Matcher::vector_length(this, $src2);
5305
__ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
5306
$atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
5308
ins_pipe( pipe_slow );
5311
instruct minmax_reductionF(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
5312
legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
5313
predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
5314
((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
5315
(n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
5316
Matcher::vector_length(n->in(2)) >= 4);
5317
match(Set dst (MinReductionV src1 src2));
5318
match(Set dst (MaxReductionV src1 src2));
5319
effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
5320
format %{ "vector_minmaxF_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
5322
assert(UseAVX > 0, "sanity");
5324
int opcode = this->ideal_Opcode();
5325
int vlen = Matcher::vector_length(this, $src2);
5326
__ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
5327
$atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
5329
ins_pipe( pipe_slow );
5332
instruct minmax_reduction2F_av(legRegF dst, legVec src, legVec tmp,
5333
legVec atmp, legVec btmp, legVec xmm_1, rFlagsReg cr) %{
5334
predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
5335
Matcher::vector_length(n->in(2)) == 2);
5336
match(Set dst (MinReductionV dst src));
5337
match(Set dst (MaxReductionV dst src));
5338
effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
5339
format %{ "vector_minmax2F_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
5341
assert(UseAVX > 0, "sanity");
5343
int opcode = this->ideal_Opcode();
5344
int vlen = Matcher::vector_length(this, $src);
5345
__ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
5346
$atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
5348
ins_pipe( pipe_slow );
5352
instruct minmax_reductionF_av(legRegF dst, legVec src, legVec tmp,
5353
legVec atmp, legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
5354
predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
5355
Matcher::vector_length(n->in(2)) >= 4);
5356
match(Set dst (MinReductionV dst src));
5357
match(Set dst (MaxReductionV dst src));
5358
effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
5359
format %{ "vector_minmaxF_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
5361
assert(UseAVX > 0, "sanity");
5363
int opcode = this->ideal_Opcode();
5364
int vlen = Matcher::vector_length(this, $src);
5365
__ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
5366
$atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
5368
ins_pipe( pipe_slow );
5372
//--------------------Min Double Reduction --------------------
5373
instruct minmax_reduction2D(legRegD dst, immD src1, legVec src2,
5374
legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, // TEMPs
5376
predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
5377
((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
5378
(n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
5379
Matcher::vector_length(n->in(2)) == 2);
5380
match(Set dst (MinReductionV src1 src2));
5381
match(Set dst (MaxReductionV src1 src2));
5382
effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
5383
format %{ "vector_minmax2D_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
5385
assert(UseAVX > 0, "sanity");
5387
int opcode = this->ideal_Opcode();
5388
int vlen = Matcher::vector_length(this, $src2);
5389
__ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
5390
$tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
5392
ins_pipe( pipe_slow );
5395
instruct minmax_reductionD(legRegD dst, immD src1, legVec src2,
5396
legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, legVec tmp5, // TEMPs
5398
predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
5399
((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
5400
(n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
5401
Matcher::vector_length(n->in(2)) >= 4);
5402
match(Set dst (MinReductionV src1 src2));
5403
match(Set dst (MaxReductionV src1 src2));
5404
effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
5405
format %{ "vector_minmaxD_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
5407
assert(UseAVX > 0, "sanity");
5409
int opcode = this->ideal_Opcode();
5410
int vlen = Matcher::vector_length(this, $src2);
5411
__ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
5412
$tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
5414
ins_pipe( pipe_slow );
5418
instruct minmax_reduction2D_av(legRegD dst, legVec src,
5419
legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, // TEMPs
5421
predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
5422
Matcher::vector_length(n->in(2)) == 2);
5423
match(Set dst (MinReductionV dst src));
5424
match(Set dst (MaxReductionV dst src));
5425
effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
5426
format %{ "vector_minmax2D_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
5428
assert(UseAVX > 0, "sanity");
5430
int opcode = this->ideal_Opcode();
5431
int vlen = Matcher::vector_length(this, $src);
5432
__ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
5433
$tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
5435
ins_pipe( pipe_slow );
5438
instruct minmax_reductionD_av(legRegD dst, legVec src,
5439
legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, legVec tmp5, // TEMPs
5441
predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
5442
Matcher::vector_length(n->in(2)) >= 4);
5443
match(Set dst (MinReductionV dst src));
5444
match(Set dst (MaxReductionV dst src));
5445
effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
5446
format %{ "vector_minmaxD_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
5448
assert(UseAVX > 0, "sanity");
5450
int opcode = this->ideal_Opcode();
5451
int vlen = Matcher::vector_length(this, $src);
5452
__ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
5453
$tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
5455
ins_pipe( pipe_slow );
5458
// ====================VECTOR ARITHMETIC=======================================
5460
// --------------------------------- ADD --------------------------------------
5463
instruct vaddB(vec dst, vec src) %{
5464
predicate(UseAVX == 0);
5465
match(Set dst (AddVB dst src));
5466
format %{ "paddb $dst,$src\t! add packedB" %}
5468
__ paddb($dst$$XMMRegister, $src$$XMMRegister);
5470
ins_pipe( pipe_slow );
5473
instruct vaddB_reg(vec dst, vec src1, vec src2) %{
5474
predicate(UseAVX > 0);
5475
match(Set dst (AddVB src1 src2));
5476
format %{ "vpaddb $dst,$src1,$src2\t! add packedB" %}
5478
int vlen_enc = vector_length_encoding(this);
5479
__ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
5481
ins_pipe( pipe_slow );
5484
instruct vaddB_mem(vec dst, vec src, memory mem) %{
5485
predicate((UseAVX > 0) &&
5486
(Matcher::vector_length_in_bytes(n->in(1)) > 8));
5487
match(Set dst (AddVB src (LoadVector mem)));
5488
format %{ "vpaddb $dst,$src,$mem\t! add packedB" %}
5490
int vlen_enc = vector_length_encoding(this);
5491
__ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
5493
ins_pipe( pipe_slow );
5496
// Shorts/Chars vector add
5497
instruct vaddS(vec dst, vec src) %{
5498
predicate(UseAVX == 0);
5499
match(Set dst (AddVS dst src));
5500
format %{ "paddw $dst,$src\t! add packedS" %}
5502
__ paddw($dst$$XMMRegister, $src$$XMMRegister);
5504
ins_pipe( pipe_slow );
5507
instruct vaddS_reg(vec dst, vec src1, vec src2) %{
5508
predicate(UseAVX > 0);
5509
match(Set dst (AddVS src1 src2));
5510
format %{ "vpaddw $dst,$src1,$src2\t! add packedS" %}
5512
int vlen_enc = vector_length_encoding(this);
5513
__ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
5515
ins_pipe( pipe_slow );
5518
instruct vaddS_mem(vec dst, vec src, memory mem) %{
5519
predicate((UseAVX > 0) &&
5520
(Matcher::vector_length_in_bytes(n->in(1)) > 8));
5521
match(Set dst (AddVS src (LoadVector mem)));
5522
format %{ "vpaddw $dst,$src,$mem\t! add packedS" %}
5524
int vlen_enc = vector_length_encoding(this);
5525
__ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
5527
ins_pipe( pipe_slow );
5530
// Integers vector add
5531
instruct vaddI(vec dst, vec src) %{
5532
predicate(UseAVX == 0);
5533
match(Set dst (AddVI dst src));
5534
format %{ "paddd $dst,$src\t! add packedI" %}
5536
__ paddd($dst$$XMMRegister, $src$$XMMRegister);
5538
ins_pipe( pipe_slow );
5541
instruct vaddI_reg(vec dst, vec src1, vec src2) %{
5542
predicate(UseAVX > 0);
5543
match(Set dst (AddVI src1 src2));
5544
format %{ "vpaddd $dst,$src1,$src2\t! add packedI" %}
5546
int vlen_enc = vector_length_encoding(this);
5547
__ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
5549
ins_pipe( pipe_slow );
5553
instruct vaddI_mem(vec dst, vec src, memory mem) %{
5554
predicate((UseAVX > 0) &&
5555
(Matcher::vector_length_in_bytes(n->in(1)) > 8));
5556
match(Set dst (AddVI src (LoadVector mem)));
5557
format %{ "vpaddd $dst,$src,$mem\t! add packedI" %}
5559
int vlen_enc = vector_length_encoding(this);
5560
__ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
5562
ins_pipe( pipe_slow );
5566
instruct vaddL(vec dst, vec src) %{
5567
predicate(UseAVX == 0);
5568
match(Set dst (AddVL dst src));
5569
format %{ "paddq $dst,$src\t! add packedL" %}
5571
__ paddq($dst$$XMMRegister, $src$$XMMRegister);
5573
ins_pipe( pipe_slow );
5576
instruct vaddL_reg(vec dst, vec src1, vec src2) %{
5577
predicate(UseAVX > 0);
5578
match(Set dst (AddVL src1 src2));
5579
format %{ "vpaddq $dst,$src1,$src2\t! add packedL" %}
5581
int vlen_enc = vector_length_encoding(this);
5582
__ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
5584
ins_pipe( pipe_slow );
5587
instruct vaddL_mem(vec dst, vec src, memory mem) %{
5588
predicate((UseAVX > 0) &&
5589
(Matcher::vector_length_in_bytes(n->in(1)) > 8));
5590
match(Set dst (AddVL src (LoadVector mem)));
5591
format %{ "vpaddq $dst,$src,$mem\t! add packedL" %}
5593
int vlen_enc = vector_length_encoding(this);
5594
__ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
5596
ins_pipe( pipe_slow );
5600
instruct vaddF(vec dst, vec src) %{
5601
predicate(UseAVX == 0);
5602
match(Set dst (AddVF dst src));
5603
format %{ "addps $dst,$src\t! add packedF" %}
5605
__ addps($dst$$XMMRegister, $src$$XMMRegister);
5607
ins_pipe( pipe_slow );
5610
instruct vaddF_reg(vec dst, vec src1, vec src2) %{
5611
predicate(UseAVX > 0);
5612
match(Set dst (AddVF src1 src2));
5613
format %{ "vaddps $dst,$src1,$src2\t! add packedF" %}
5615
int vlen_enc = vector_length_encoding(this);
5616
__ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
5618
ins_pipe( pipe_slow );
5621
instruct vaddF_mem(vec dst, vec src, memory mem) %{
5622
predicate((UseAVX > 0) &&
5623
(Matcher::vector_length_in_bytes(n->in(1)) > 8));
5624
match(Set dst (AddVF src (LoadVector mem)));
5625
format %{ "vaddps $dst,$src,$mem\t! add packedF" %}
5627
int vlen_enc = vector_length_encoding(this);
5628
__ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
5630
ins_pipe( pipe_slow );
5633
// Doubles vector add
5634
instruct vaddD(vec dst, vec src) %{
5635
predicate(UseAVX == 0);
5636
match(Set dst (AddVD dst src));
5637
format %{ "addpd $dst,$src\t! add packedD" %}
5639
__ addpd($dst$$XMMRegister, $src$$XMMRegister);
5641
ins_pipe( pipe_slow );
5644
instruct vaddD_reg(vec dst, vec src1, vec src2) %{
5645
predicate(UseAVX > 0);
5646
match(Set dst (AddVD src1 src2));
5647
format %{ "vaddpd $dst,$src1,$src2\t! add packedD" %}
5649
int vlen_enc = vector_length_encoding(this);
5650
__ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
5652
ins_pipe( pipe_slow );
5655
instruct vaddD_mem(vec dst, vec src, memory mem) %{
5656
predicate((UseAVX > 0) &&
5657
(Matcher::vector_length_in_bytes(n->in(1)) > 8));
5658
match(Set dst (AddVD src (LoadVector mem)));
5659
format %{ "vaddpd $dst,$src,$mem\t! add packedD" %}
5661
int vlen_enc = vector_length_encoding(this);
5662
__ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
5664
ins_pipe( pipe_slow );
5667
// --------------------------------- SUB --------------------------------------
5670
instruct vsubB(vec dst, vec src) %{
5671
predicate(UseAVX == 0);
5672
match(Set dst (SubVB dst src));
5673
format %{ "psubb $dst,$src\t! sub packedB" %}
5675
__ psubb($dst$$XMMRegister, $src$$XMMRegister);
5677
ins_pipe( pipe_slow );
5680
instruct vsubB_reg(vec dst, vec src1, vec src2) %{
5681
predicate(UseAVX > 0);
5682
match(Set dst (SubVB src1 src2));
5683
format %{ "vpsubb $dst,$src1,$src2\t! sub packedB" %}
5685
int vlen_enc = vector_length_encoding(this);
5686
__ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
5688
ins_pipe( pipe_slow );
5691
instruct vsubB_mem(vec dst, vec src, memory mem) %{
5692
predicate((UseAVX > 0) &&
5693
(Matcher::vector_length_in_bytes(n->in(1)) > 8));
5694
match(Set dst (SubVB src (LoadVector mem)));
5695
format %{ "vpsubb $dst,$src,$mem\t! sub packedB" %}
5697
int vlen_enc = vector_length_encoding(this);
5698
__ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
5700
ins_pipe( pipe_slow );
5703
// Shorts/Chars vector sub
5704
instruct vsubS(vec dst, vec src) %{
5705
predicate(UseAVX == 0);
5706
match(Set dst (SubVS dst src));
5707
format %{ "psubw $dst,$src\t! sub packedS" %}
5709
__ psubw($dst$$XMMRegister, $src$$XMMRegister);
5711
ins_pipe( pipe_slow );
5715
instruct vsubS_reg(vec dst, vec src1, vec src2) %{
5716
predicate(UseAVX > 0);
5717
match(Set dst (SubVS src1 src2));
5718
format %{ "vpsubw $dst,$src1,$src2\t! sub packedS" %}
5720
int vlen_enc = vector_length_encoding(this);
5721
__ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
5723
ins_pipe( pipe_slow );
5726
instruct vsubS_mem(vec dst, vec src, memory mem) %{
5727
predicate((UseAVX > 0) &&
5728
(Matcher::vector_length_in_bytes(n->in(1)) > 8));
5729
match(Set dst (SubVS src (LoadVector mem)));
5730
format %{ "vpsubw $dst,$src,$mem\t! sub packedS" %}
5732
int vlen_enc = vector_length_encoding(this);
5733
__ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
5735
ins_pipe( pipe_slow );
5738
// Integers vector sub
5739
instruct vsubI(vec dst, vec src) %{
5740
predicate(UseAVX == 0);
5741
match(Set dst (SubVI dst src));
5742
format %{ "psubd $dst,$src\t! sub packedI" %}
5744
__ psubd($dst$$XMMRegister, $src$$XMMRegister);
5746
ins_pipe( pipe_slow );
5749
instruct vsubI_reg(vec dst, vec src1, vec src2) %{
5750
predicate(UseAVX > 0);
5751
match(Set dst (SubVI src1 src2));
5752
format %{ "vpsubd $dst,$src1,$src2\t! sub packedI" %}
5754
int vlen_enc = vector_length_encoding(this);
5755
__ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
5757
ins_pipe( pipe_slow );
5760
instruct vsubI_mem(vec dst, vec src, memory mem) %{
5761
predicate((UseAVX > 0) &&
5762
(Matcher::vector_length_in_bytes(n->in(1)) > 8));
5763
match(Set dst (SubVI src (LoadVector mem)));
5764
format %{ "vpsubd $dst,$src,$mem\t! sub packedI" %}
5766
int vlen_enc = vector_length_encoding(this);
5767
__ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
5769
ins_pipe( pipe_slow );
5773
instruct vsubL(vec dst, vec src) %{
5774
predicate(UseAVX == 0);
5775
match(Set dst (SubVL dst src));
5776
format %{ "psubq $dst,$src\t! sub packedL" %}
5778
__ psubq($dst$$XMMRegister, $src$$XMMRegister);
5780
ins_pipe( pipe_slow );
5783
instruct vsubL_reg(vec dst, vec src1, vec src2) %{
5784
predicate(UseAVX > 0);
5785
match(Set dst (SubVL src1 src2));
5786
format %{ "vpsubq $dst,$src1,$src2\t! sub packedL" %}
5788
int vlen_enc = vector_length_encoding(this);
5789
__ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
5791
ins_pipe( pipe_slow );
5795
instruct vsubL_mem(vec dst, vec src, memory mem) %{
5796
predicate((UseAVX > 0) &&
5797
(Matcher::vector_length_in_bytes(n->in(1)) > 8));
5798
match(Set dst (SubVL src (LoadVector mem)));
5799
format %{ "vpsubq $dst,$src,$mem\t! sub packedL" %}
5801
int vlen_enc = vector_length_encoding(this);
5802
__ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
5804
ins_pipe( pipe_slow );
5808
instruct vsubF(vec dst, vec src) %{
5809
predicate(UseAVX == 0);
5810
match(Set dst (SubVF dst src));
5811
format %{ "subps $dst,$src\t! sub packedF" %}
5813
__ subps($dst$$XMMRegister, $src$$XMMRegister);
5815
ins_pipe( pipe_slow );
5818
instruct vsubF_reg(vec dst, vec src1, vec src2) %{
5819
predicate(UseAVX > 0);
5820
match(Set dst (SubVF src1 src2));
5821
format %{ "vsubps $dst,$src1,$src2\t! sub packedF" %}
5823
int vlen_enc = vector_length_encoding(this);
5824
__ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
5826
ins_pipe( pipe_slow );
5829
instruct vsubF_mem(vec dst, vec src, memory mem) %{
5830
predicate((UseAVX > 0) &&
5831
(Matcher::vector_length_in_bytes(n->in(1)) > 8));
5832
match(Set dst (SubVF src (LoadVector mem)));
5833
format %{ "vsubps $dst,$src,$mem\t! sub packedF" %}
5835
int vlen_enc = vector_length_encoding(this);
5836
__ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
5838
ins_pipe( pipe_slow );
5841
// Doubles vector sub
5842
instruct vsubD(vec dst, vec src) %{
5843
predicate(UseAVX == 0);
5844
match(Set dst (SubVD dst src));
5845
format %{ "subpd $dst,$src\t! sub packedD" %}
5847
__ subpd($dst$$XMMRegister, $src$$XMMRegister);
5849
ins_pipe( pipe_slow );
5852
instruct vsubD_reg(vec dst, vec src1, vec src2) %{
5853
predicate(UseAVX > 0);
5854
match(Set dst (SubVD src1 src2));
5855
format %{ "vsubpd $dst,$src1,$src2\t! sub packedD" %}
5857
int vlen_enc = vector_length_encoding(this);
5858
__ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
5860
ins_pipe( pipe_slow );
5863
instruct vsubD_mem(vec dst, vec src, memory mem) %{
5864
predicate((UseAVX > 0) &&
5865
(Matcher::vector_length_in_bytes(n->in(1)) > 8));
5866
match(Set dst (SubVD src (LoadVector mem)));
5867
format %{ "vsubpd $dst,$src,$mem\t! sub packedD" %}
5869
int vlen_enc = vector_length_encoding(this);
5870
__ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
5872
ins_pipe( pipe_slow );
5875
// --------------------------------- MUL --------------------------------------
5878
instruct vmul8B(vec dst, vec src1, vec src2, vec xtmp) %{
5879
predicate(Matcher::vector_length_in_bytes(n) <= 8);
5880
match(Set dst (MulVB src1 src2));
5881
effect(TEMP dst, TEMP xtmp);
5882
format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %}
5884
assert(UseSSE > 3, "required");
5885
__ pmovsxbw($dst$$XMMRegister, $src1$$XMMRegister);
5886
__ pmovsxbw($xtmp$$XMMRegister, $src2$$XMMRegister);
5887
__ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
5888
__ psllw($dst$$XMMRegister, 8);
5889
__ psrlw($dst$$XMMRegister, 8);
5890
__ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
5892
ins_pipe( pipe_slow );
5895
instruct vmulB(vec dst, vec src1, vec src2, vec xtmp) %{
5896
predicate(UseAVX == 0 && Matcher::vector_length_in_bytes(n) > 8);
5897
match(Set dst (MulVB src1 src2));
5898
effect(TEMP dst, TEMP xtmp);
5899
format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %}
5901
assert(UseSSE > 3, "required");
5902
// Odd-index elements
5903
__ movdqu($dst$$XMMRegister, $src1$$XMMRegister);
5904
__ psrlw($dst$$XMMRegister, 8);
5905
__ movdqu($xtmp$$XMMRegister, $src2$$XMMRegister);
5906
__ psrlw($xtmp$$XMMRegister, 8);
5907
__ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
5908
__ psllw($dst$$XMMRegister, 8);
5909
// Even-index elements
5910
__ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
5911
__ pmullw($xtmp$$XMMRegister, $src2$$XMMRegister);
5912
__ psllw($xtmp$$XMMRegister, 8);
5913
__ psrlw($xtmp$$XMMRegister, 8);
5915
__ por($dst$$XMMRegister, $xtmp$$XMMRegister);
5917
ins_pipe( pipe_slow );
5920
instruct vmulB_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
5921
predicate(UseAVX > 0 && Matcher::vector_length_in_bytes(n) > 8);
5922
match(Set dst (MulVB src1 src2));
5923
effect(TEMP xtmp1, TEMP xtmp2);
5924
format %{ "vmulVB $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
5926
int vlen_enc = vector_length_encoding(this);
5927
// Odd-index elements
5928
__ vpsrlw($xtmp2$$XMMRegister, $src1$$XMMRegister, 8, vlen_enc);
5929
__ vpsrlw($xtmp1$$XMMRegister, $src2$$XMMRegister, 8, vlen_enc);
5930
__ vpmullw($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
5931
__ vpsllw($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 8, vlen_enc);
5932
// Even-index elements
5933
__ vpmullw($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
5934
__ vpsllw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
5935
__ vpsrlw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
5937
__ vpor($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
5939
ins_pipe( pipe_slow );
5942
// Shorts/Chars vector mul
5943
instruct vmulS(vec dst, vec src) %{
5944
predicate(UseAVX == 0);
5945
match(Set dst (MulVS dst src));
5946
format %{ "pmullw $dst,$src\t! mul packedS" %}
5948
__ pmullw($dst$$XMMRegister, $src$$XMMRegister);
5950
ins_pipe( pipe_slow );
5953
instruct vmulS_reg(vec dst, vec src1, vec src2) %{
5954
predicate(UseAVX > 0);
5955
match(Set dst (MulVS src1 src2));
5956
format %{ "vpmullw $dst,$src1,$src2\t! mul packedS" %}
5958
int vlen_enc = vector_length_encoding(this);
5959
__ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
5961
ins_pipe( pipe_slow );
5964
instruct vmulS_mem(vec dst, vec src, memory mem) %{
5965
predicate((UseAVX > 0) &&
5966
(Matcher::vector_length_in_bytes(n->in(1)) > 8));
5967
match(Set dst (MulVS src (LoadVector mem)));
5968
format %{ "vpmullw $dst,$src,$mem\t! mul packedS" %}
5970
int vlen_enc = vector_length_encoding(this);
5971
__ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
5973
ins_pipe( pipe_slow );
5976
// Integers vector mul
5977
instruct vmulI(vec dst, vec src) %{
5978
predicate(UseAVX == 0);
5979
match(Set dst (MulVI dst src));
5980
format %{ "pmulld $dst,$src\t! mul packedI" %}
5982
assert(UseSSE > 3, "required");
5983
__ pmulld($dst$$XMMRegister, $src$$XMMRegister);
5985
ins_pipe( pipe_slow );
5988
instruct vmulI_reg(vec dst, vec src1, vec src2) %{
5989
predicate(UseAVX > 0);
5990
match(Set dst (MulVI src1 src2));
5991
format %{ "vpmulld $dst,$src1,$src2\t! mul packedI" %}
5993
int vlen_enc = vector_length_encoding(this);
5994
__ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
5996
ins_pipe( pipe_slow );
5999
instruct vmulI_mem(vec dst, vec src, memory mem) %{
6000
predicate((UseAVX > 0) &&
6001
(Matcher::vector_length_in_bytes(n->in(1)) > 8));
6002
match(Set dst (MulVI src (LoadVector mem)));
6003
format %{ "vpmulld $dst,$src,$mem\t! mul packedI" %}
6005
int vlen_enc = vector_length_encoding(this);
6006
__ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
6008
ins_pipe( pipe_slow );
6012
instruct evmulL_reg(vec dst, vec src1, vec src2) %{
6013
predicate((Matcher::vector_length_in_bytes(n) == 64 &&
6014
VM_Version::supports_avx512dq()) ||
6015
VM_Version::supports_avx512vldq());
6016
match(Set dst (MulVL src1 src2));
6017
format %{ "evpmullq $dst,$src1,$src2\t! mul packedL" %}
6019
assert(UseAVX > 2, "required");
6020
int vlen_enc = vector_length_encoding(this);
6021
__ evpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
6023
ins_pipe( pipe_slow );
6026
instruct evmulL_mem(vec dst, vec src, memory mem) %{
6027
predicate((Matcher::vector_length_in_bytes(n) == 64 &&
6028
VM_Version::supports_avx512dq()) ||
6029
(Matcher::vector_length_in_bytes(n) > 8 &&
6030
VM_Version::supports_avx512vldq()));
6031
match(Set dst (MulVL src (LoadVector mem)));
6032
format %{ "evpmullq $dst,$src,$mem\t! mul packedL" %}
6034
assert(UseAVX > 2, "required");
6035
int vlen_enc = vector_length_encoding(this);
6036
__ evpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
6038
ins_pipe( pipe_slow );
6041
instruct vmulL(vec dst, vec src1, vec src2, vec xtmp) %{
6042
predicate(UseAVX == 0);
6043
match(Set dst (MulVL src1 src2));
6044
effect(TEMP dst, TEMP xtmp);
6045
format %{ "mulVL $dst, $src1, $src2\t! using $xtmp as TEMP" %}
6047
assert(VM_Version::supports_sse4_1(), "required");
6048
// Get the lo-hi products, only the lower 32 bits is in concerns
6049
__ pshufd($xtmp$$XMMRegister, $src2$$XMMRegister, 0xB1);
6050
__ pmulld($xtmp$$XMMRegister, $src1$$XMMRegister);
6051
__ pshufd($dst$$XMMRegister, $xtmp$$XMMRegister, 0xB1);
6052
__ paddd($dst$$XMMRegister, $xtmp$$XMMRegister);
6053
__ psllq($dst$$XMMRegister, 32);
6054
// Get the lo-lo products
6055
__ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
6056
__ pmuludq($xtmp$$XMMRegister, $src2$$XMMRegister);
6057
__ paddq($dst$$XMMRegister, $xtmp$$XMMRegister);
6059
ins_pipe( pipe_slow );
6062
instruct vmulL_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
6063
predicate(UseAVX > 0 &&
6064
((Matcher::vector_length_in_bytes(n) == 64 &&
6065
!VM_Version::supports_avx512dq()) ||
6066
(Matcher::vector_length_in_bytes(n) < 64 &&
6067
!VM_Version::supports_avx512vldq())));
6068
match(Set dst (MulVL src1 src2));
6069
effect(TEMP xtmp1, TEMP xtmp2);
6070
format %{ "vmulVL $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
6072
int vlen_enc = vector_length_encoding(this);
6073
// Get the lo-hi products, only the lower 32 bits is in concerns
6074
__ vpshufd($xtmp1$$XMMRegister, $src2$$XMMRegister, 0xB1, vlen_enc);
6075
__ vpmulld($xtmp1$$XMMRegister, $src1$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
6076
__ vpshufd($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, 0xB1, vlen_enc);
6077
__ vpaddd($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
6078
__ vpsllq($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 32, vlen_enc);
6079
// Get the lo-lo products
6080
__ vpmuludq($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
6081
__ vpaddq($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
6083
ins_pipe( pipe_slow );
6087
instruct vmulF(vec dst, vec src) %{
6088
predicate(UseAVX == 0);
6089
match(Set dst (MulVF dst src));
6090
format %{ "mulps $dst,$src\t! mul packedF" %}
6092
__ mulps($dst$$XMMRegister, $src$$XMMRegister);
6094
ins_pipe( pipe_slow );
6097
instruct vmulF_reg(vec dst, vec src1, vec src2) %{
6098
predicate(UseAVX > 0);
6099
match(Set dst (MulVF src1 src2));
6100
format %{ "vmulps $dst,$src1,$src2\t! mul packedF" %}
6102
int vlen_enc = vector_length_encoding(this);
6103
__ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
6105
ins_pipe( pipe_slow );
6108
instruct vmulF_mem(vec dst, vec src, memory mem) %{
6109
predicate((UseAVX > 0) &&
6110
(Matcher::vector_length_in_bytes(n->in(1)) > 8));
6111
match(Set dst (MulVF src (LoadVector mem)));
6112
format %{ "vmulps $dst,$src,$mem\t! mul packedF" %}
6114
int vlen_enc = vector_length_encoding(this);
6115
__ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
6117
ins_pipe( pipe_slow );
6120
// Doubles vector mul
6121
instruct vmulD(vec dst, vec src) %{
6122
predicate(UseAVX == 0);
6123
match(Set dst (MulVD dst src));
6124
format %{ "mulpd $dst,$src\t! mul packedD" %}
6126
__ mulpd($dst$$XMMRegister, $src$$XMMRegister);
6128
ins_pipe( pipe_slow );
6131
instruct vmulD_reg(vec dst, vec src1, vec src2) %{
6132
predicate(UseAVX > 0);
6133
match(Set dst (MulVD src1 src2));
6134
format %{ "vmulpd $dst,$src1,$src2\t! mul packedD" %}
6136
int vlen_enc = vector_length_encoding(this);
6137
__ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
6139
ins_pipe( pipe_slow );
6142
instruct vmulD_mem(vec dst, vec src, memory mem) %{
6143
predicate((UseAVX > 0) &&
6144
(Matcher::vector_length_in_bytes(n->in(1)) > 8));
6145
match(Set dst (MulVD src (LoadVector mem)));
6146
format %{ "vmulpd $dst,$src,$mem\t! mul packedD" %}
6148
int vlen_enc = vector_length_encoding(this);
6149
__ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
6151
ins_pipe( pipe_slow );
6154
// --------------------------------- DIV --------------------------------------
6157
instruct vdivF(vec dst, vec src) %{
6158
predicate(UseAVX == 0);
6159
match(Set dst (DivVF dst src));
6160
format %{ "divps $dst,$src\t! div packedF" %}
6162
__ divps($dst$$XMMRegister, $src$$XMMRegister);
6164
ins_pipe( pipe_slow );
6167
instruct vdivF_reg(vec dst, vec src1, vec src2) %{
6168
predicate(UseAVX > 0);
6169
match(Set dst (DivVF src1 src2));
6170
format %{ "vdivps $dst,$src1,$src2\t! div packedF" %}
6172
int vlen_enc = vector_length_encoding(this);
6173
__ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
6175
ins_pipe( pipe_slow );
6178
instruct vdivF_mem(vec dst, vec src, memory mem) %{
6179
predicate((UseAVX > 0) &&
6180
(Matcher::vector_length_in_bytes(n->in(1)) > 8));
6181
match(Set dst (DivVF src (LoadVector mem)));
6182
format %{ "vdivps $dst,$src,$mem\t! div packedF" %}
6184
int vlen_enc = vector_length_encoding(this);
6185
__ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
6187
ins_pipe( pipe_slow );
6190
// Doubles vector div
6191
instruct vdivD(vec dst, vec src) %{
6192
predicate(UseAVX == 0);
6193
match(Set dst (DivVD dst src));
6194
format %{ "divpd $dst,$src\t! div packedD" %}
6196
__ divpd($dst$$XMMRegister, $src$$XMMRegister);
6198
ins_pipe( pipe_slow );
6201
instruct vdivD_reg(vec dst, vec src1, vec src2) %{
6202
predicate(UseAVX > 0);
6203
match(Set dst (DivVD src1 src2));
6204
format %{ "vdivpd $dst,$src1,$src2\t! div packedD" %}
6206
int vlen_enc = vector_length_encoding(this);
6207
__ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
6209
ins_pipe( pipe_slow );
6212
instruct vdivD_mem(vec dst, vec src, memory mem) %{
6213
predicate((UseAVX > 0) &&
6214
(Matcher::vector_length_in_bytes(n->in(1)) > 8));
6215
match(Set dst (DivVD src (LoadVector mem)));
6216
format %{ "vdivpd $dst,$src,$mem\t! div packedD" %}
6218
int vlen_enc = vector_length_encoding(this);
6219
__ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
6221
ins_pipe( pipe_slow );
6224
// ------------------------------ MinMax ---------------------------------------
6226
// Byte, Short, Int vector Min/Max
6227
instruct minmax_reg_sse(vec dst, vec src) %{
6228
predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
6230
match(Set dst (MinV dst src));
6231
match(Set dst (MaxV dst src));
6232
format %{ "vector_minmax $dst,$src\t! " %}
6234
assert(UseSSE >= 4, "required");
6236
int opcode = this->ideal_Opcode();
6237
BasicType elem_bt = Matcher::vector_element_basic_type(this);
6238
__ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister);
6240
ins_pipe( pipe_slow );
6243
instruct vminmax_reg(vec dst, vec src1, vec src2) %{
6244
predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
6246
match(Set dst (MinV src1 src2));
6247
match(Set dst (MaxV src1 src2));
6248
format %{ "vector_minmax $dst,$src1,$src2\t! " %}
6250
int opcode = this->ideal_Opcode();
6251
int vlen_enc = vector_length_encoding(this);
6252
BasicType elem_bt = Matcher::vector_element_basic_type(this);
6254
__ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
6256
ins_pipe( pipe_slow );
6259
// Long vector Min/Max
6260
instruct minmaxL_reg_sse(vec dst, vec src, rxmm0 tmp) %{
6261
predicate(Matcher::vector_length_in_bytes(n) == 16 && Matcher::vector_element_basic_type(n) == T_LONG &&
6263
match(Set dst (MinV dst src));
6264
match(Set dst (MaxV src dst));
6265
effect(TEMP dst, TEMP tmp);
6266
format %{ "vector_minmaxL $dst,$src\t!using $tmp as TEMP" %}
6268
assert(UseSSE >= 4, "required");
6270
int opcode = this->ideal_Opcode();
6271
BasicType elem_bt = Matcher::vector_element_basic_type(this);
6272
assert(elem_bt == T_LONG, "sanity");
6274
__ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister);
6276
ins_pipe( pipe_slow );
6279
instruct vminmaxL_reg_avx(legVec dst, legVec src1, legVec src2) %{
6280
predicate(Matcher::vector_length_in_bytes(n) <= 32 && Matcher::vector_element_basic_type(n) == T_LONG &&
6281
UseAVX > 0 && !VM_Version::supports_avx512vl());
6282
match(Set dst (MinV src1 src2));
6283
match(Set dst (MaxV src1 src2));
6285
format %{ "vector_minmaxL $dst,$src1,$src2\t! " %}
6287
int vlen_enc = vector_length_encoding(this);
6288
int opcode = this->ideal_Opcode();
6289
BasicType elem_bt = Matcher::vector_element_basic_type(this);
6290
assert(elem_bt == T_LONG, "sanity");
6292
__ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
6294
ins_pipe( pipe_slow );
6297
instruct vminmaxL_reg_evex(vec dst, vec src1, vec src2) %{
6298
predicate((Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()) &&
6299
Matcher::vector_element_basic_type(n) == T_LONG);
6300
match(Set dst (MinV src1 src2));
6301
match(Set dst (MaxV src1 src2));
6302
format %{ "vector_minmaxL $dst,$src1,src2\t! " %}
6304
assert(UseAVX > 2, "required");
6306
int vlen_enc = vector_length_encoding(this);
6307
int opcode = this->ideal_Opcode();
6308
BasicType elem_bt = Matcher::vector_element_basic_type(this);
6309
assert(elem_bt == T_LONG, "sanity");
6311
__ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
6313
ins_pipe( pipe_slow );
6316
// Float/Double vector Min/Max
6317
instruct minmaxFP_reg(legVec dst, legVec a, legVec b, legVec tmp, legVec atmp, legVec btmp) %{
6318
predicate(Matcher::vector_length_in_bytes(n) <= 32 &&
6319
is_floating_point_type(Matcher::vector_element_basic_type(n)) && // T_FLOAT, T_DOUBLE
6321
match(Set dst (MinV a b));
6322
match(Set dst (MaxV a b));
6323
effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
6324
format %{ "vector_minmaxFP $dst,$a,$b\t!using $tmp, $atmp, $btmp as TEMP" %}
6326
assert(UseAVX > 0, "required");
6328
int opcode = this->ideal_Opcode();
6329
int vlen_enc = vector_length_encoding(this);
6330
BasicType elem_bt = Matcher::vector_element_basic_type(this);
6332
__ vminmax_fp(opcode, elem_bt,
6333
$dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
6334
$tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
6336
ins_pipe( pipe_slow );
6339
instruct evminmaxFP_reg_eavx(vec dst, vec a, vec b, vec atmp, vec btmp, kReg ktmp) %{
6340
predicate(Matcher::vector_length_in_bytes(n) == 64 &&
6341
is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
6342
match(Set dst (MinV a b));
6343
match(Set dst (MaxV a b));
6344
effect(TEMP dst, USE a, USE b, TEMP atmp, TEMP btmp, TEMP ktmp);
6345
format %{ "vector_minmaxFP $dst,$a,$b\t!using $atmp, $btmp as TEMP" %}
6347
assert(UseAVX > 2, "required");
6349
int opcode = this->ideal_Opcode();
6350
int vlen_enc = vector_length_encoding(this);
6351
BasicType elem_bt = Matcher::vector_element_basic_type(this);
6353
__ evminmax_fp(opcode, elem_bt,
6354
$dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
6355
$ktmp$$KRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
6357
ins_pipe( pipe_slow );
6360
// --------------------------------- Signum/CopySign ---------------------------
6362
instruct signumF_reg(regF dst, regF zero, regF one, rFlagsReg cr) %{
6363
match(Set dst (SignumF dst (Binary zero one)));
6365
format %{ "signumF $dst, $dst" %}
6367
int opcode = this->ideal_Opcode();
6368
__ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
6370
ins_pipe( pipe_slow );
6373
instruct signumD_reg(regD dst, regD zero, regD one, rFlagsReg cr) %{
6374
match(Set dst (SignumD dst (Binary zero one)));
6376
format %{ "signumD $dst, $dst" %}
6378
int opcode = this->ideal_Opcode();
6379
__ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
6381
ins_pipe( pipe_slow );
6384
instruct signumV_reg_avx(vec dst, vec src, vec zero, vec one, vec xtmp1) %{
6385
predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
6386
match(Set dst (SignumVF src (Binary zero one)));
6387
match(Set dst (SignumVD src (Binary zero one)));
6388
effect(TEMP dst, TEMP xtmp1);
6389
format %{ "vector_signum_avx $dst, $src\t! using $xtmp1 as TEMP" %}
6391
int opcode = this->ideal_Opcode();
6392
int vec_enc = vector_length_encoding(this);
6393
__ vector_signum_avx(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
6394
$xtmp1$$XMMRegister, vec_enc);
6396
ins_pipe( pipe_slow );
6399
instruct signumV_reg_evex(vec dst, vec src, vec zero, vec one, kReg ktmp1) %{
6400
predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
6401
match(Set dst (SignumVF src (Binary zero one)));
6402
match(Set dst (SignumVD src (Binary zero one)));
6403
effect(TEMP dst, TEMP ktmp1);
6404
format %{ "vector_signum_evex $dst, $src\t! using $ktmp1 as TEMP" %}
6406
int opcode = this->ideal_Opcode();
6407
int vec_enc = vector_length_encoding(this);
6408
__ vector_signum_evex(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
6409
$ktmp1$$KRegister, vec_enc);
6411
ins_pipe( pipe_slow );
6414
// ---------------------------------------
6415
// For copySign use 0xE4 as writemask for vpternlog
6416
// Desired Truth Table: A -> xmm0 bit, B -> xmm1 bit, C -> xmm2 bit
6417
// C (xmm2) is set to 0x7FFFFFFF
6418
// Wherever xmm2 is 0, we want to pick from B (sign)
6419
// Wherever xmm2 is 1, we want to pick from A (src)
6431
// Result going from high bit to low bit is 0x11100100 = 0xe4
6432
// ---------------------------------------
6435
instruct copySignF_reg(regF dst, regF src, regF tmp1, rRegI tmp2) %{
6436
match(Set dst (CopySignF dst src));
6437
effect(TEMP tmp1, TEMP tmp2);
6438
format %{ "CopySignF $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
6440
__ movl($tmp2$$Register, 0x7FFFFFFF);
6441
__ movdl($tmp1$$XMMRegister, $tmp2$$Register);
6442
__ vpternlogd($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
6444
ins_pipe( pipe_slow );
6447
instruct copySignD_imm(regD dst, regD src, regD tmp1, rRegL tmp2, immD zero) %{
6448
match(Set dst (CopySignD dst (Binary src zero)));
6450
effect(TEMP tmp1, TEMP tmp2);
6451
format %{ "CopySignD $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
6453
__ mov64($tmp2$$Register, 0x7FFFFFFFFFFFFFFF);
6454
__ movq($tmp1$$XMMRegister, $tmp2$$Register);
6455
__ vpternlogq($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
6457
ins_pipe( pipe_slow );
6462
//----------------------------- CompressBits/ExpandBits ------------------------
6464
instruct compressBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
6465
predicate(n->bottom_type()->isa_int());
6466
match(Set dst (CompressBits src mask));
6467
format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %}
6469
__ pextl($dst$$Register, $src$$Register, $mask$$Register);
6471
ins_pipe( pipe_slow );
6474
instruct expandBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
6475
predicate(n->bottom_type()->isa_int());
6476
match(Set dst (ExpandBits src mask));
6477
format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %}
6479
__ pdepl($dst$$Register, $src$$Register, $mask$$Register);
6481
ins_pipe( pipe_slow );
6484
instruct compressBitsI_mem(rRegI dst, rRegI src, memory mask) %{
6485
predicate(n->bottom_type()->isa_int());
6486
match(Set dst (CompressBits src (LoadI mask)));
6487
format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %}
6489
__ pextl($dst$$Register, $src$$Register, $mask$$Address);
6491
ins_pipe( pipe_slow );
6494
instruct expandBitsI_mem(rRegI dst, rRegI src, memory mask) %{
6495
predicate(n->bottom_type()->isa_int());
6496
match(Set dst (ExpandBits src (LoadI mask)));
6497
format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %}
6499
__ pdepl($dst$$Register, $src$$Register, $mask$$Address);
6501
ins_pipe( pipe_slow );
6504
// --------------------------------- Sqrt --------------------------------------
6506
instruct vsqrtF_reg(vec dst, vec src) %{
6507
match(Set dst (SqrtVF src));
6508
format %{ "vsqrtps $dst,$src\t! sqrt packedF" %}
6510
assert(UseAVX > 0, "required");
6511
int vlen_enc = vector_length_encoding(this);
6512
__ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
6514
ins_pipe( pipe_slow );
6517
instruct vsqrtF_mem(vec dst, memory mem) %{
6518
predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
6519
match(Set dst (SqrtVF (LoadVector mem)));
6520
format %{ "vsqrtps $dst,$mem\t! sqrt packedF" %}
6522
assert(UseAVX > 0, "required");
6523
int vlen_enc = vector_length_encoding(this);
6524
__ vsqrtps($dst$$XMMRegister, $mem$$Address, vlen_enc);
6526
ins_pipe( pipe_slow );
6529
// Floating point vector sqrt
6530
instruct vsqrtD_reg(vec dst, vec src) %{
6531
match(Set dst (SqrtVD src));
6532
format %{ "vsqrtpd $dst,$src\t! sqrt packedD" %}
6534
assert(UseAVX > 0, "required");
6535
int vlen_enc = vector_length_encoding(this);
6536
__ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
6538
ins_pipe( pipe_slow );
6541
instruct vsqrtD_mem(vec dst, memory mem) %{
6542
predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
6543
match(Set dst (SqrtVD (LoadVector mem)));
6544
format %{ "vsqrtpd $dst,$mem\t! sqrt packedD" %}
6546
assert(UseAVX > 0, "required");
6547
int vlen_enc = vector_length_encoding(this);
6548
__ vsqrtpd($dst$$XMMRegister, $mem$$Address, vlen_enc);
6550
ins_pipe( pipe_slow );
6553
// ------------------------------ Shift ---------------------------------------
6555
// Left and right shift count vectors are the same on x86
6556
// (only lowest bits of xmm reg are used for count).
6557
instruct vshiftcnt(vec dst, rRegI cnt) %{
6558
match(Set dst (LShiftCntV cnt));
6559
match(Set dst (RShiftCntV cnt));
6560
format %{ "movdl $dst,$cnt\t! load shift count" %}
6562
__ movdl($dst$$XMMRegister, $cnt$$Register);
6564
ins_pipe( pipe_slow );
6568
instruct vshiftB(vec dst, vec src, vec shift, vec tmp) %{
6569
predicate(Matcher::vector_length(n) <= 8 && !n->as_ShiftV()->is_var_shift());
6570
match(Set dst ( LShiftVB src shift));
6571
match(Set dst ( RShiftVB src shift));
6572
match(Set dst (URShiftVB src shift));
6573
effect(TEMP dst, USE src, USE shift, TEMP tmp);
6574
format %{"vector_byte_shift $dst,$src,$shift" %}
6576
assert(UseSSE > 3, "required");
6577
int opcode = this->ideal_Opcode();
6578
bool sign = (opcode != Op_URShiftVB);
6579
__ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister);
6580
__ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister);
6581
__ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
6582
__ pand($dst$$XMMRegister, $tmp$$XMMRegister);
6583
__ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
6585
ins_pipe( pipe_slow );
6588
instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
6589
predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
6591
match(Set dst ( LShiftVB src shift));
6592
match(Set dst ( RShiftVB src shift));
6593
match(Set dst (URShiftVB src shift));
6594
effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2);
6595
format %{"vector_byte_shift $dst,$src,$shift" %}
6597
assert(UseSSE > 3, "required");
6598
int opcode = this->ideal_Opcode();
6599
bool sign = (opcode != Op_URShiftVB);
6600
__ vextendbw(sign, $tmp1$$XMMRegister, $src$$XMMRegister);
6601
__ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister);
6602
__ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE);
6603
__ vextendbw(sign, $tmp2$$XMMRegister, $tmp2$$XMMRegister);
6604
__ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister);
6605
__ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
6606
__ pand($tmp2$$XMMRegister, $dst$$XMMRegister);
6607
__ pand($dst$$XMMRegister, $tmp1$$XMMRegister);
6608
__ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister);
6610
ins_pipe( pipe_slow );
6613
instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp) %{
6614
predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
6616
match(Set dst ( LShiftVB src shift));
6617
match(Set dst ( RShiftVB src shift));
6618
match(Set dst (URShiftVB src shift));
6619
effect(TEMP dst, TEMP tmp);
6620
format %{"vector_byte_shift $dst,$src,$shift" %}
6622
int opcode = this->ideal_Opcode();
6623
bool sign = (opcode != Op_URShiftVB);
6624
int vlen_enc = Assembler::AVX_256bit;
6625
__ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister, vlen_enc);
6626
__ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
6627
__ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
6628
__ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister);
6629
__ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0);
6631
ins_pipe( pipe_slow );
6634
instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp) %{
6635
predicate(Matcher::vector_length(n) == 32 && !n->as_ShiftV()->is_var_shift());
6636
match(Set dst ( LShiftVB src shift));
6637
match(Set dst ( RShiftVB src shift));
6638
match(Set dst (URShiftVB src shift));
6639
effect(TEMP dst, TEMP tmp);
6640
format %{"vector_byte_shift $dst,$src,$shift" %}
6642
assert(UseAVX > 1, "required");
6643
int opcode = this->ideal_Opcode();
6644
bool sign = (opcode != Op_URShiftVB);
6645
int vlen_enc = Assembler::AVX_256bit;
6646
__ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister);
6647
__ vextendbw(sign, $tmp$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
6648
__ vextendbw(sign, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
6649
__ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
6650
__ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vlen_enc);
6651
__ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
6652
__ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
6653
__ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
6654
__ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
6656
ins_pipe( pipe_slow );
6659
instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
6660
predicate(Matcher::vector_length(n) == 64 && !n->as_ShiftV()->is_var_shift());
6661
match(Set dst ( LShiftVB src shift));
6662
match(Set dst (RShiftVB src shift));
6663
match(Set dst (URShiftVB src shift));
6664
effect(TEMP dst, TEMP tmp1, TEMP tmp2);
6665
format %{"vector_byte_shift $dst,$src,$shift" %}
6667
assert(UseAVX > 2, "required");
6668
int opcode = this->ideal_Opcode();
6669
bool sign = (opcode != Op_URShiftVB);
6670
int vlen_enc = Assembler::AVX_512bit;
6671
__ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1);
6672
__ vextendbw(sign, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc);
6673
__ vextendbw(sign, $tmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
6674
__ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
6675
__ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vlen_enc);
6676
__ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
6677
__ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
6678
__ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc);
6679
__ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
6680
__ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vlen_enc);
6681
__ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vlen_enc, noreg);
6682
__ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
6684
ins_pipe( pipe_slow );
6687
// Shorts vector logical right shift produces incorrect Java result
6688
// for negative data because java code convert short value into int with
6689
// sign extension before a shift. But char vectors are fine since chars are
6691
// Shorts/Chars vector left shift
6692
instruct vshiftS(vec dst, vec src, vec shift) %{
6693
predicate(!n->as_ShiftV()->is_var_shift());
6694
match(Set dst ( LShiftVS src shift));
6695
match(Set dst ( RShiftVS src shift));
6696
match(Set dst (URShiftVS src shift));
6697
effect(TEMP dst, USE src, USE shift);
6698
format %{ "vshiftw $dst,$src,$shift\t! shift packedS" %}
6700
int opcode = this->ideal_Opcode();
6702
int vlen_enc = vector_length_encoding(this);
6703
__ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
6705
int vlen = Matcher::vector_length(this);
6707
__ movflt($dst$$XMMRegister, $src$$XMMRegister);
6708
__ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
6709
} else if (vlen == 4) {
6710
__ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6711
__ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
6713
assert (vlen == 8, "sanity");
6714
__ movdqu($dst$$XMMRegister, $src$$XMMRegister);
6715
__ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
6719
ins_pipe( pipe_slow );
6722
// Integers vector left shift
6723
instruct vshiftI(vec dst, vec src, vec shift) %{
6724
predicate(!n->as_ShiftV()->is_var_shift());
6725
match(Set dst ( LShiftVI src shift));
6726
match(Set dst ( RShiftVI src shift));
6727
match(Set dst (URShiftVI src shift));
6728
effect(TEMP dst, USE src, USE shift);
6729
format %{ "vshiftd $dst,$src,$shift\t! shift packedI" %}
6731
int opcode = this->ideal_Opcode();
6733
int vlen_enc = vector_length_encoding(this);
6734
__ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
6736
int vlen = Matcher::vector_length(this);
6738
__ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6739
__ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
6741
assert(vlen == 4, "sanity");
6742
__ movdqu($dst$$XMMRegister, $src$$XMMRegister);
6743
__ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
6747
ins_pipe( pipe_slow );
6750
// Integers vector left constant shift
6751
instruct vshiftI_imm(vec dst, vec src, immI8 shift) %{
6752
match(Set dst (LShiftVI src (LShiftCntV shift)));
6753
match(Set dst (RShiftVI src (RShiftCntV shift)));
6754
match(Set dst (URShiftVI src (RShiftCntV shift)));
6755
format %{ "vshiftd_imm $dst,$src,$shift\t! shift packedI" %}
6757
int opcode = this->ideal_Opcode();
6759
int vector_len = vector_length_encoding(this);
6760
__ vshiftd_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
6762
int vlen = Matcher::vector_length(this);
6764
__ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6765
__ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
6767
assert(vlen == 4, "sanity");
6768
__ movdqu($dst$$XMMRegister, $src$$XMMRegister);
6769
__ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
6773
ins_pipe( pipe_slow );
6776
// Longs vector shift
6777
instruct vshiftL(vec dst, vec src, vec shift) %{
6778
predicate(!n->as_ShiftV()->is_var_shift());
6779
match(Set dst ( LShiftVL src shift));
6780
match(Set dst (URShiftVL src shift));
6781
effect(TEMP dst, USE src, USE shift);
6782
format %{ "vshiftq $dst,$src,$shift\t! shift packedL" %}
6784
int opcode = this->ideal_Opcode();
6786
int vlen_enc = vector_length_encoding(this);
6787
__ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
6789
assert(Matcher::vector_length(this) == 2, "");
6790
__ movdqu($dst$$XMMRegister, $src$$XMMRegister);
6791
__ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
6794
ins_pipe( pipe_slow );
6797
// Longs vector constant shift
6798
instruct vshiftL_imm(vec dst, vec src, immI8 shift) %{
6799
match(Set dst (LShiftVL src (LShiftCntV shift)));
6800
match(Set dst (URShiftVL src (RShiftCntV shift)));
6801
format %{ "vshiftq_imm $dst,$src,$shift\t! shift packedL" %}
6803
int opcode = this->ideal_Opcode();
6805
int vector_len = vector_length_encoding(this);
6806
__ vshiftq_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
6808
assert(Matcher::vector_length(this) == 2, "");
6809
__ movdqu($dst$$XMMRegister, $src$$XMMRegister);
6810
__ vshiftq_imm(opcode, $dst$$XMMRegister, $shift$$constant);
6813
ins_pipe( pipe_slow );
6816
// -------------------ArithmeticRightShift -----------------------------------
6817
// Long vector arithmetic right shift
6818
instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp) %{
6819
predicate(!n->as_ShiftV()->is_var_shift() && UseAVX <= 2);
6820
match(Set dst (RShiftVL src shift));
6821
effect(TEMP dst, TEMP tmp);
6822
format %{ "vshiftq $dst,$src,$shift" %}
6824
uint vlen = Matcher::vector_length(this);
6826
assert(UseSSE >= 2, "required");
6827
__ movdqu($dst$$XMMRegister, $src$$XMMRegister);
6828
__ psrlq($dst$$XMMRegister, $shift$$XMMRegister);
6829
__ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
6830
__ psrlq($tmp$$XMMRegister, $shift$$XMMRegister);
6831
__ pxor($dst$$XMMRegister, $tmp$$XMMRegister);
6832
__ psubq($dst$$XMMRegister, $tmp$$XMMRegister);
6834
assert(vlen == 4, "sanity");
6835
assert(UseAVX > 1, "required");
6836
int vlen_enc = Assembler::AVX_256bit;
6837
__ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
6838
__ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
6839
__ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
6840
__ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
6841
__ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
6844
ins_pipe( pipe_slow );
6847
instruct vshiftL_arith_reg_evex(vec dst, vec src, vec shift) %{
6848
predicate(!n->as_ShiftV()->is_var_shift() && UseAVX > 2);
6849
match(Set dst (RShiftVL src shift));
6850
format %{ "vshiftq $dst,$src,$shift" %}
6852
int vlen_enc = vector_length_encoding(this);
6853
__ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
6855
ins_pipe( pipe_slow );
6858
// ------------------- Variable Shift -----------------------------
6859
// Byte variable shift
6860
instruct vshift8B_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
6861
predicate(Matcher::vector_length(n) <= 8 &&
6862
n->as_ShiftV()->is_var_shift() &&
6863
!VM_Version::supports_avx512bw());
6864
match(Set dst ( LShiftVB src shift));
6865
match(Set dst ( RShiftVB src shift));
6866
match(Set dst (URShiftVB src shift));
6867
effect(TEMP dst, TEMP vtmp);
6868
format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
6870
assert(UseAVX >= 2, "required");
6872
int opcode = this->ideal_Opcode();
6873
int vlen_enc = Assembler::AVX_128bit;
6874
__ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
6875
__ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
6877
ins_pipe( pipe_slow );
6880
instruct vshift16B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
6881
predicate(Matcher::vector_length(n) == 16 &&
6882
n->as_ShiftV()->is_var_shift() &&
6883
!VM_Version::supports_avx512bw());
6884
match(Set dst ( LShiftVB src shift));
6885
match(Set dst ( RShiftVB src shift));
6886
match(Set dst (URShiftVB src shift));
6887
effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
6888
format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
6890
assert(UseAVX >= 2, "required");
6892
int opcode = this->ideal_Opcode();
6893
int vlen_enc = Assembler::AVX_128bit;
6894
// Shift lower half and get word result in dst
6895
__ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
6897
// Shift upper half and get word result in vtmp1
6898
__ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
6899
__ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
6900
__ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
6902
// Merge and down convert the two word results to byte in dst
6903
__ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
6905
ins_pipe( pipe_slow );
6908
instruct vshift32B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, vec vtmp3, vec vtmp4) %{
6909
predicate(Matcher::vector_length(n) == 32 &&
6910
n->as_ShiftV()->is_var_shift() &&
6911
!VM_Version::supports_avx512bw());
6912
match(Set dst ( LShiftVB src shift));
6913
match(Set dst ( RShiftVB src shift));
6914
match(Set dst (URShiftVB src shift));
6915
effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4);
6916
format %{ "vector_varshift_byte $dst, $src, $shift\n\t using $vtmp1, $vtmp2, $vtmp3, $vtmp4 as TEMP" %}
6918
assert(UseAVX >= 2, "required");
6920
int opcode = this->ideal_Opcode();
6921
int vlen_enc = Assembler::AVX_128bit;
6922
// Process lower 128 bits and get result in dst
6923
__ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
6924
__ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
6925
__ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
6926
__ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
6927
__ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
6929
// Process higher 128 bits and get result in vtmp3
6930
__ vextracti128_high($vtmp1$$XMMRegister, $src$$XMMRegister);
6931
__ vextracti128_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
6932
__ varshiftbw(opcode, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp4$$XMMRegister);
6933
__ vpshufd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, 0xE, 0);
6934
__ vpshufd($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, 0xE, 0);
6935
__ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
6936
__ vpackuswb($vtmp1$$XMMRegister, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, 0);
6938
// Merge the two results in dst
6939
__ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
6941
ins_pipe( pipe_slow );
6944
instruct vshiftB_var_evex_bw(vec dst, vec src, vec shift, vec vtmp) %{
6945
predicate(Matcher::vector_length(n) <= 32 &&
6946
n->as_ShiftV()->is_var_shift() &&
6947
VM_Version::supports_avx512bw());
6948
match(Set dst ( LShiftVB src shift));
6949
match(Set dst ( RShiftVB src shift));
6950
match(Set dst (URShiftVB src shift));
6951
effect(TEMP dst, TEMP vtmp);
6952
format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
6954
assert(UseAVX > 2, "required");
6956
int opcode = this->ideal_Opcode();
6957
int vlen_enc = vector_length_encoding(this);
6958
__ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
6960
ins_pipe( pipe_slow );
6963
instruct vshift64B_var_evex_bw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
6964
predicate(Matcher::vector_length(n) == 64 &&
6965
n->as_ShiftV()->is_var_shift() &&
6966
VM_Version::supports_avx512bw());
6967
match(Set dst ( LShiftVB src shift));
6968
match(Set dst ( RShiftVB src shift));
6969
match(Set dst (URShiftVB src shift));
6970
effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
6971
format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
6973
assert(UseAVX > 2, "required");
6975
int opcode = this->ideal_Opcode();
6976
int vlen_enc = Assembler::AVX_256bit;
6977
__ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
6978
__ vextracti64x4_high($vtmp1$$XMMRegister, $src$$XMMRegister);
6979
__ vextracti64x4_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
6980
__ evarshiftb(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
6981
__ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
6983
ins_pipe( pipe_slow );
6986
// Short variable shift
6987
instruct vshift8S_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
6988
predicate(Matcher::vector_length(n) <= 8 &&
6989
n->as_ShiftV()->is_var_shift() &&
6990
!VM_Version::supports_avx512bw());
6991
match(Set dst ( LShiftVS src shift));
6992
match(Set dst ( RShiftVS src shift));
6993
match(Set dst (URShiftVS src shift));
6994
effect(TEMP dst, TEMP vtmp);
6995
format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
6997
assert(UseAVX >= 2, "required");
6999
int opcode = this->ideal_Opcode();
7000
bool sign = (opcode != Op_URShiftVS);
7001
int vlen_enc = Assembler::AVX_256bit;
7002
__ vextendwd(sign, $dst$$XMMRegister, $src$$XMMRegister, 1);
7003
__ vpmovzxwd($vtmp$$XMMRegister, $shift$$XMMRegister, 1);
7004
__ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
7005
__ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
7006
__ vextracti128_high($vtmp$$XMMRegister, $dst$$XMMRegister);
7007
__ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
7009
ins_pipe( pipe_slow );
7012
instruct vshift16S_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
7013
predicate(Matcher::vector_length(n) == 16 &&
7014
n->as_ShiftV()->is_var_shift() &&
7015
!VM_Version::supports_avx512bw());
7016
match(Set dst ( LShiftVS src shift));
7017
match(Set dst ( RShiftVS src shift));
7018
match(Set dst (URShiftVS src shift));
7019
effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
7020
format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
7022
assert(UseAVX >= 2, "required");
7024
int opcode = this->ideal_Opcode();
7025
bool sign = (opcode != Op_URShiftVS);
7026
int vlen_enc = Assembler::AVX_256bit;
7027
// Shift lower half, with result in vtmp2 using vtmp1 as TEMP
7028
__ vextendwd(sign, $vtmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
7029
__ vpmovzxwd($vtmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
7030
__ varshiftd(opcode, $vtmp2$$XMMRegister, $vtmp2$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
7031
__ vpand($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
7033
// Shift upper half, with result in dst using vtmp1 as TEMP
7034
__ vextracti128_high($dst$$XMMRegister, $src$$XMMRegister);
7035
__ vextracti128_high($vtmp1$$XMMRegister, $shift$$XMMRegister);
7036
__ vextendwd(sign, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
7037
__ vpmovzxwd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
7038
__ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
7039
__ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
7041
// Merge lower and upper half result into dst
7042
__ vpackusdw($dst$$XMMRegister, $vtmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
7043
__ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
7045
ins_pipe( pipe_slow );
7048
instruct vshift16S_var_evex_bw(vec dst, vec src, vec shift) %{
7049
predicate(n->as_ShiftV()->is_var_shift() &&
7050
VM_Version::supports_avx512bw());
7051
match(Set dst ( LShiftVS src shift));
7052
match(Set dst ( RShiftVS src shift));
7053
match(Set dst (URShiftVS src shift));
7054
format %{ "vector_varshift_short $dst,$src,$shift\t!" %}
7056
assert(UseAVX > 2, "required");
7058
int opcode = this->ideal_Opcode();
7059
int vlen_enc = vector_length_encoding(this);
7060
if (!VM_Version::supports_avx512vl()) {
7061
vlen_enc = Assembler::AVX_512bit;
7063
__ varshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
7065
ins_pipe( pipe_slow );
7068
//Integer variable shift
7069
instruct vshiftI_var(vec dst, vec src, vec shift) %{
7070
predicate(n->as_ShiftV()->is_var_shift());
7071
match(Set dst ( LShiftVI src shift));
7072
match(Set dst ( RShiftVI src shift));
7073
match(Set dst (URShiftVI src shift));
7074
format %{ "vector_varshift_int $dst,$src,$shift\t!" %}
7076
assert(UseAVX >= 2, "required");
7078
int opcode = this->ideal_Opcode();
7079
int vlen_enc = vector_length_encoding(this);
7080
__ varshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
7082
ins_pipe( pipe_slow );
7085
//Long variable shift
7086
instruct vshiftL_var(vec dst, vec src, vec shift) %{
7087
predicate(n->as_ShiftV()->is_var_shift());
7088
match(Set dst ( LShiftVL src shift));
7089
match(Set dst (URShiftVL src shift));
7090
format %{ "vector_varshift_long $dst,$src,$shift\t!" %}
7092
assert(UseAVX >= 2, "required");
7094
int opcode = this->ideal_Opcode();
7095
int vlen_enc = vector_length_encoding(this);
7096
__ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
7098
ins_pipe( pipe_slow );
7101
//Long variable right shift arithmetic
7102
instruct vshiftL_arith_var(vec dst, vec src, vec shift, vec vtmp) %{
7103
predicate(Matcher::vector_length(n) <= 4 &&
7104
n->as_ShiftV()->is_var_shift() &&
7106
match(Set dst (RShiftVL src shift));
7107
effect(TEMP dst, TEMP vtmp);
7108
format %{ "vector_varshift_long $dst,$src,$shift\n\t! using $vtmp as TEMP" %}
7110
int opcode = this->ideal_Opcode();
7111
int vlen_enc = vector_length_encoding(this);
7112
__ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc,
7113
$vtmp$$XMMRegister);
7115
ins_pipe( pipe_slow );
7118
instruct vshiftL_arith_var_evex(vec dst, vec src, vec shift) %{
7119
predicate(n->as_ShiftV()->is_var_shift() &&
7121
match(Set dst (RShiftVL src shift));
7122
format %{ "vector_varfshift_long $dst,$src,$shift\t!" %}
7124
int opcode = this->ideal_Opcode();
7125
int vlen_enc = vector_length_encoding(this);
7126
__ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
7128
ins_pipe( pipe_slow );
7131
// --------------------------------- AND --------------------------------------
7133
instruct vand(vec dst, vec src) %{
7134
predicate(UseAVX == 0);
7135
match(Set dst (AndV dst src));
7136
format %{ "pand $dst,$src\t! and vectors" %}
7138
__ pand($dst$$XMMRegister, $src$$XMMRegister);
7140
ins_pipe( pipe_slow );
7143
instruct vand_reg(vec dst, vec src1, vec src2) %{
7144
predicate(UseAVX > 0);
7145
match(Set dst (AndV src1 src2));
7146
format %{ "vpand $dst,$src1,$src2\t! and vectors" %}
7148
int vlen_enc = vector_length_encoding(this);
7149
__ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
7151
ins_pipe( pipe_slow );
7154
instruct vand_mem(vec dst, vec src, memory mem) %{
7155
predicate((UseAVX > 0) &&
7156
(Matcher::vector_length_in_bytes(n->in(1)) > 8));
7157
match(Set dst (AndV src (LoadVector mem)));
7158
format %{ "vpand $dst,$src,$mem\t! and vectors" %}
7160
int vlen_enc = vector_length_encoding(this);
7161
__ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
7163
ins_pipe( pipe_slow );
7166
// --------------------------------- OR ---------------------------------------
7168
instruct vor(vec dst, vec src) %{
7169
predicate(UseAVX == 0);
7170
match(Set dst (OrV dst src));
7171
format %{ "por $dst,$src\t! or vectors" %}
7173
__ por($dst$$XMMRegister, $src$$XMMRegister);
7175
ins_pipe( pipe_slow );
7178
instruct vor_reg(vec dst, vec src1, vec src2) %{
7179
predicate(UseAVX > 0);
7180
match(Set dst (OrV src1 src2));
7181
format %{ "vpor $dst,$src1,$src2\t! or vectors" %}
7183
int vlen_enc = vector_length_encoding(this);
7184
__ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
7186
ins_pipe( pipe_slow );
7189
instruct vor_mem(vec dst, vec src, memory mem) %{
7190
predicate((UseAVX > 0) &&
7191
(Matcher::vector_length_in_bytes(n->in(1)) > 8));
7192
match(Set dst (OrV src (LoadVector mem)));
7193
format %{ "vpor $dst,$src,$mem\t! or vectors" %}
7195
int vlen_enc = vector_length_encoding(this);
7196
__ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
7198
ins_pipe( pipe_slow );
7201
// --------------------------------- XOR --------------------------------------
7203
instruct vxor(vec dst, vec src) %{
7204
predicate(UseAVX == 0);
7205
match(Set dst (XorV dst src));
7206
format %{ "pxor $dst,$src\t! xor vectors" %}
7208
__ pxor($dst$$XMMRegister, $src$$XMMRegister);
7210
ins_pipe( pipe_slow );
7213
instruct vxor_reg(vec dst, vec src1, vec src2) %{
7214
predicate(UseAVX > 0);
7215
match(Set dst (XorV src1 src2));
7216
format %{ "vpxor $dst,$src1,$src2\t! xor vectors" %}
7218
int vlen_enc = vector_length_encoding(this);
7219
__ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
7221
ins_pipe( pipe_slow );
7224
instruct vxor_mem(vec dst, vec src, memory mem) %{
7225
predicate((UseAVX > 0) &&
7226
(Matcher::vector_length_in_bytes(n->in(1)) > 8));
7227
match(Set dst (XorV src (LoadVector mem)));
7228
format %{ "vpxor $dst,$src,$mem\t! xor vectors" %}
7230
int vlen_enc = vector_length_encoding(this);
7231
__ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
7233
ins_pipe( pipe_slow );
7236
// --------------------------------- VectorCast --------------------------------------
7238
instruct vcastBtoX(vec dst, vec src) %{
7239
predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_DOUBLE);
7240
match(Set dst (VectorCastB2X src));
7241
format %{ "vector_cast_b2x $dst,$src\t!" %}
7243
BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
7244
int vlen_enc = vector_length_encoding(this);
7245
__ vconvert_b2x(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
7247
ins_pipe( pipe_slow );
7250
instruct vcastBtoD(legVec dst, legVec src) %{
7251
predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_DOUBLE);
7252
match(Set dst (VectorCastB2X src));
7253
format %{ "vector_cast_b2x $dst,$src\t!" %}
7255
int vlen_enc = vector_length_encoding(this);
7256
__ vconvert_b2x(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
7258
ins_pipe( pipe_slow );
7261
instruct castStoX(vec dst, vec src) %{
7262
predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
7263
Matcher::vector_length(n->in(1)) <= 8 && // src
7264
Matcher::vector_element_basic_type(n) == T_BYTE);
7265
match(Set dst (VectorCastS2X src));
7266
format %{ "vector_cast_s2x $dst,$src" %}
7268
assert(UseAVX > 0, "required");
7270
__ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), 0, noreg);
7271
__ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
7273
ins_pipe( pipe_slow );
7276
instruct vcastStoX(vec dst, vec src, vec vtmp) %{
7277
predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
7278
Matcher::vector_length(n->in(1)) == 16 && // src
7279
Matcher::vector_element_basic_type(n) == T_BYTE);
7280
effect(TEMP dst, TEMP vtmp);
7281
match(Set dst (VectorCastS2X src));
7282
format %{ "vector_cast_s2x $dst,$src\t! using $vtmp as TEMP" %}
7284
assert(UseAVX > 0, "required");
7286
int vlen_enc = vector_length_encoding(Matcher::vector_length_in_bytes(this, $src));
7287
__ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
7288
__ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
7289
__ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
7291
ins_pipe( pipe_slow );
7294
instruct vcastStoX_evex(vec dst, vec src) %{
7295
predicate((UseAVX > 2 && VM_Version::supports_avx512vlbw()) ||
7296
(Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
7297
match(Set dst (VectorCastS2X src));
7298
format %{ "vector_cast_s2x $dst,$src\t!" %}
7300
BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
7301
int src_vlen_enc = vector_length_encoding(this, $src);
7302
int vlen_enc = vector_length_encoding(this);
7303
switch (to_elem_bt) {
7305
if (!VM_Version::supports_avx512vl()) {
7306
vlen_enc = Assembler::AVX_512bit;
7308
__ evpmovwb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
7311
__ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
7314
__ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
7315
__ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
7318
__ vpmovsxwq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
7321
int mid_vlen_enc = (vlen_enc == Assembler::AVX_512bit) ? Assembler::AVX_256bit : Assembler::AVX_128bit;
7322
__ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, mid_vlen_enc);
7323
__ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
7327
ShouldNotReachHere();
7330
ins_pipe( pipe_slow );
7333
instruct castItoX(vec dst, vec src) %{
7334
predicate(UseAVX <= 2 &&
7335
(Matcher::vector_length_in_bytes(n->in(1)) <= 16) &&
7336
(Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
7337
match(Set dst (VectorCastI2X src));
7338
format %{ "vector_cast_i2x $dst,$src" %}
7340
assert(UseAVX > 0, "required");
7342
BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
7343
int vlen_enc = vector_length_encoding(this, $src);
7345
if (to_elem_bt == T_BYTE) {
7346
__ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
7347
__ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
7348
__ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
7350
assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
7351
__ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
7352
__ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
7355
ins_pipe( pipe_slow );
7358
instruct vcastItoX(vec dst, vec src, vec vtmp) %{
7359
predicate(UseAVX <= 2 &&
7360
(Matcher::vector_length_in_bytes(n->in(1)) == 32) &&
7361
(Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
7362
match(Set dst (VectorCastI2X src));
7363
format %{ "vector_cast_i2x $dst,$src\t! using $vtmp as TEMP" %}
7364
effect(TEMP dst, TEMP vtmp);
7366
assert(UseAVX > 0, "required");
7368
BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
7369
int vlen_enc = vector_length_encoding(this, $src);
7371
if (to_elem_bt == T_BYTE) {
7372
__ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
7373
__ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
7374
__ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
7375
__ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
7377
assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
7378
__ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
7379
__ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
7380
__ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
7383
ins_pipe( pipe_slow );
7386
instruct vcastItoX_evex(vec dst, vec src) %{
7387
predicate(UseAVX > 2 ||
7388
(Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
7389
match(Set dst (VectorCastI2X src));
7390
format %{ "vector_cast_i2x $dst,$src\t!" %}
7392
assert(UseAVX > 0, "required");
7394
BasicType dst_elem_bt = Matcher::vector_element_basic_type(this);
7395
int src_vlen_enc = vector_length_encoding(this, $src);
7396
int dst_vlen_enc = vector_length_encoding(this);
7397
switch (dst_elem_bt) {
7399
if (!VM_Version::supports_avx512vl()) {
7400
src_vlen_enc = Assembler::AVX_512bit;
7402
__ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
7405
if (!VM_Version::supports_avx512vl()) {
7406
src_vlen_enc = Assembler::AVX_512bit;
7408
__ evpmovdw($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
7411
__ vcvtdq2ps($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
7414
__ vpmovsxdq($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
7417
__ vcvtdq2pd($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
7420
ShouldNotReachHere();
7423
ins_pipe( pipe_slow );
7426
instruct vcastLtoBS(vec dst, vec src) %{
7427
predicate((Matcher::vector_element_basic_type(n) == T_BYTE || Matcher::vector_element_basic_type(n) == T_SHORT) &&
7429
match(Set dst (VectorCastL2X src));
7430
format %{ "vector_cast_l2x $dst,$src" %}
7432
assert(UseAVX > 0, "required");
7434
int vlen = Matcher::vector_length_in_bytes(this, $src);
7435
BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
7436
AddressLiteral mask_addr = (to_elem_bt == T_BYTE) ? ExternalAddress(vector_int_to_byte_mask())
7437
: ExternalAddress(vector_int_to_short_mask());
7439
__ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_128bit);
7440
__ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
7441
__ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
7443
assert(vlen <= 32, "required");
7444
__ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_256bit);
7445
__ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, Assembler::AVX_256bit);
7446
__ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
7447
__ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
7449
if (to_elem_bt == T_BYTE) {
7450
__ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
7453
ins_pipe( pipe_slow );
7456
instruct vcastLtoX_evex(vec dst, vec src) %{
7457
predicate(UseAVX > 2 ||
7458
(Matcher::vector_element_basic_type(n) == T_INT ||
7459
Matcher::vector_element_basic_type(n) == T_FLOAT ||
7460
Matcher::vector_element_basic_type(n) == T_DOUBLE));
7461
match(Set dst (VectorCastL2X src));
7462
format %{ "vector_cast_l2x $dst,$src\t!" %}
7464
BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
7465
int vlen = Matcher::vector_length_in_bytes(this, $src);
7466
int vlen_enc = vector_length_encoding(this, $src);
7467
switch (to_elem_bt) {
7469
if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
7470
vlen_enc = Assembler::AVX_512bit;
7472
__ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
7475
if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
7476
vlen_enc = Assembler::AVX_512bit;
7478
__ evpmovqw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
7482
if ($dst$$XMMRegister != $src$$XMMRegister) {
7483
__ movflt($dst$$XMMRegister, $src$$XMMRegister);
7485
} else if (vlen == 16) {
7486
__ pshufd($dst$$XMMRegister, $src$$XMMRegister, 8);
7487
} else if (vlen == 32) {
7489
if (!VM_Version::supports_avx512vl()) {
7490
vlen_enc = Assembler::AVX_512bit;
7492
__ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
7494
__ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, vlen_enc);
7495
__ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc);
7497
} else { // vlen == 64
7498
__ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
7502
assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
7503
__ evcvtqq2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
7506
assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
7507
__ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
7510
default: assert(false, "%s", type2name(to_elem_bt));
7513
ins_pipe( pipe_slow );
7516
instruct vcastFtoD_reg(vec dst, vec src) %{
7517
predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
7518
match(Set dst (VectorCastF2X src));
7519
format %{ "vector_cast_f2d $dst,$src\t!" %}
7521
int vlen_enc = vector_length_encoding(this);
7522
__ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
7524
ins_pipe( pipe_slow );
7528
instruct castFtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
7529
predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
7530
type2aelembytes(Matcher::vector_element_basic_type(n)) <= 4);
7531
match(Set dst (VectorCastF2X src));
7532
effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
7533
format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
7535
int vlen_enc = vector_length_encoding(this, $src);
7536
BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
7537
// JDK-8292878 removed the need for an explicit scratch register needed to load greater than
7538
// 32 bit addresses for register indirect addressing mode since stub constants
7539
// are part of code cache and there is a cap of 2G on ReservedCodeCacheSize currently.
7540
// However, targets are free to increase this limit, but having a large code cache size
7541
// greater than 2G looks unreasonable in practical scenario, on the hind side with given
7542
// cap we save a temporary register allocation which in limiting case can prevent
7543
// spilling in high register pressure blocks.
7544
__ vector_castF2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
7545
$xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
7546
ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
7548
ins_pipe( pipe_slow );
7551
instruct castFtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
7552
predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
7553
is_integral_type(Matcher::vector_element_basic_type(n)));
7554
match(Set dst (VectorCastF2X src));
7555
effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
7556
format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
7558
BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
7559
if (to_elem_bt == T_LONG) {
7560
int vlen_enc = vector_length_encoding(this);
7561
__ vector_castF2L_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
7562
$xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
7563
ExternalAddress(vector_double_signflip()), noreg, vlen_enc);
7565
int vlen_enc = vector_length_encoding(this, $src);
7566
__ vector_castF2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
7567
$xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
7568
ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
7571
ins_pipe( pipe_slow );
7574
instruct vcastDtoF_reg(vec dst, vec src) %{
7575
predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
7576
match(Set dst (VectorCastD2X src));
7577
format %{ "vector_cast_d2x $dst,$src\t!" %}
7579
int vlen_enc = vector_length_encoding(this, $src);
7580
__ vcvtpd2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
7582
ins_pipe( pipe_slow );
7585
instruct castDtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, vec xtmp5, rFlagsReg cr) %{
7586
predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
7587
is_integral_type(Matcher::vector_element_basic_type(n)));
7588
match(Set dst (VectorCastD2X src));
7589
effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP xtmp5, KILL cr);
7590
format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3, $xtmp4 and $xtmp5 as TEMP" %}
7592
int vlen_enc = vector_length_encoding(this, $src);
7593
BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
7594
__ vector_castD2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
7595
$xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, $xtmp5$$XMMRegister,
7596
ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
7598
ins_pipe( pipe_slow );
7601
instruct castDtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
7602
predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
7603
is_integral_type(Matcher::vector_element_basic_type(n)));
7604
match(Set dst (VectorCastD2X src));
7605
effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
7606
format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
7608
int vlen_enc = vector_length_encoding(this, $src);
7609
BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
7610
AddressLiteral signflip = VM_Version::supports_avx512dq() ? ExternalAddress(vector_double_signflip()) :
7611
ExternalAddress(vector_float_signflip());
7612
__ vector_castD2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
7613
$xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, signflip, noreg, vlen_enc);
7615
ins_pipe( pipe_slow );
7618
instruct vucast(vec dst, vec src) %{
7619
match(Set dst (VectorUCastB2X src));
7620
match(Set dst (VectorUCastS2X src));
7621
match(Set dst (VectorUCastI2X src));
7622
format %{ "vector_ucast $dst,$src\t!" %}
7624
assert(UseAVX > 0, "required");
7626
BasicType from_elem_bt = Matcher::vector_element_basic_type(this, $src);
7627
BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
7628
int vlen_enc = vector_length_encoding(this);
7629
__ vector_unsigned_cast($dst$$XMMRegister, $src$$XMMRegister, vlen_enc, from_elem_bt, to_elem_bt);
7631
ins_pipe( pipe_slow );
7635
instruct vround_float_avx(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
7636
predicate(!VM_Version::supports_avx512vl() &&
7637
Matcher::vector_length_in_bytes(n) < 64 &&
7638
Matcher::vector_element_basic_type(n) == T_INT);
7639
match(Set dst (RoundVF src));
7640
effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
7641
format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $xtmp3, $xtmp4 as TEMP" %}
7643
int vlen_enc = vector_length_encoding(this);
7644
InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
7645
__ vector_round_float_avx($dst$$XMMRegister, $src$$XMMRegister,
7646
ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
7647
$tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister);
7649
ins_pipe( pipe_slow );
7652
instruct vround_float_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
7653
predicate((VM_Version::supports_avx512vl() ||
7654
Matcher::vector_length_in_bytes(n) == 64) &&
7655
Matcher::vector_element_basic_type(n) == T_INT);
7656
match(Set dst (RoundVF src));
7657
effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
7658
format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
7660
int vlen_enc = vector_length_encoding(this);
7661
InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
7662
__ vector_round_float_evex($dst$$XMMRegister, $src$$XMMRegister,
7663
ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
7664
$tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
7666
ins_pipe( pipe_slow );
7669
instruct vround_reg_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
7670
predicate(Matcher::vector_element_basic_type(n) == T_LONG);
7671
match(Set dst (RoundVD src));
7672
effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
7673
format %{ "vector_round_long $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
7675
int vlen_enc = vector_length_encoding(this);
7676
InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
7677
__ vector_round_double_evex($dst$$XMMRegister, $src$$XMMRegister,
7678
ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), new_mxcsr, vlen_enc,
7679
$tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
7681
ins_pipe( pipe_slow );
7686
// --------------------------------- VectorMaskCmp --------------------------------------
7688
instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{
7689
predicate(n->bottom_type()->isa_vectmask() == nullptr &&
7690
Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 8 && // src1
7691
Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
7692
is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
7693
match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
7694
format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
7696
int vlen_enc = vector_length_encoding(this, $src1);
7697
Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
7698
if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
7699
__ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
7701
__ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
7704
ins_pipe( pipe_slow );
7707
instruct evcmpFD64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
7708
predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1
7709
n->bottom_type()->isa_vectmask() == nullptr &&
7710
is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
7711
match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
7713
format %{ "vector_compare $dst,$src1,$src2,$cond" %}
7715
int vlen_enc = Assembler::AVX_512bit;
7716
Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
7717
KRegister mask = k0; // The comparison itself is not being masked.
7718
if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
7719
__ evcmpps($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
7720
__ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
7722
__ evcmppd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
7723
__ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
7726
ins_pipe( pipe_slow );
7729
instruct evcmpFD(kReg dst, vec src1, vec src2, immI8 cond) %{
7730
predicate(n->bottom_type()->isa_vectmask() &&
7731
is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
7732
match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
7733
format %{ "vector_compare_evex $dst,$src1,$src2,$cond\t!" %}
7735
assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
7736
int vlen_enc = vector_length_encoding(this, $src1);
7737
Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
7738
KRegister mask = k0; // The comparison itself is not being masked.
7739
if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
7740
__ evcmpps($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
7742
__ evcmppd($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
7745
ins_pipe( pipe_slow );
7748
instruct vcmp_direct(legVec dst, legVec src1, legVec src2, immI8 cond) %{
7749
predicate(n->bottom_type()->isa_vectmask() == nullptr &&
7750
!Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
7751
Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
7752
Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
7753
is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
7754
(n->in(2)->get_int() == BoolTest::eq ||
7755
n->in(2)->get_int() == BoolTest::lt ||
7756
n->in(2)->get_int() == BoolTest::gt)); // cond
7757
match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
7758
format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
7760
int vlen_enc = vector_length_encoding(this, $src1);
7761
Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
7762
Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
7763
__ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, cmp, ww, vlen_enc);
7765
ins_pipe( pipe_slow );
7768
instruct vcmp_negate(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
7769
predicate(n->bottom_type()->isa_vectmask() == nullptr &&
7770
!Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
7771
Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
7772
Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
7773
is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
7774
(n->in(2)->get_int() == BoolTest::ne ||
7775
n->in(2)->get_int() == BoolTest::le ||
7776
n->in(2)->get_int() == BoolTest::ge)); // cond
7777
match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
7778
effect(TEMP dst, TEMP xtmp);
7779
format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
7781
int vlen_enc = vector_length_encoding(this, $src1);
7782
Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
7783
Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
7784
__ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
7786
ins_pipe( pipe_slow );
7789
instruct vcmpu(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
7790
predicate(n->bottom_type()->isa_vectmask() == nullptr &&
7791
Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
7792
Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
7793
Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
7794
is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
7795
match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
7796
effect(TEMP dst, TEMP xtmp);
7797
format %{ "vector_compareu $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
7799
InternalAddress flip_bit = $constantaddress(high_bit_set(Matcher::vector_element_basic_type(this, $src1)));
7800
int vlen_enc = vector_length_encoding(this, $src1);
7801
Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
7802
Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
7804
if (vlen_enc == Assembler::AVX_128bit) {
7805
__ vmovddup($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
7807
__ vbroadcastsd($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
7809
__ vpxor($dst$$XMMRegister, $xtmp$$XMMRegister, $src1$$XMMRegister, vlen_enc);
7810
__ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $src2$$XMMRegister, vlen_enc);
7811
__ vpcmpCCW($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
7813
ins_pipe( pipe_slow );
7816
instruct vcmp64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
7817
predicate((n->bottom_type()->isa_vectmask() == nullptr &&
7818
Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64) && // src1
7819
is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
7820
match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
7822
format %{ "vector_compare $dst,$src1,$src2,$cond" %}
7824
assert(UseAVX > 2, "required");
7826
int vlen_enc = vector_length_encoding(this, $src1);
7827
Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
7828
bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
7829
KRegister mask = k0; // The comparison itself is not being masked.
7831
BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
7833
switch (src1_elem_bt) {
7835
__ evpcmpd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
7836
__ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
7840
__ evpcmpq($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
7841
__ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
7844
default: assert(false, "%s", type2name(src1_elem_bt));
7847
ins_pipe( pipe_slow );
7851
instruct evcmp(kReg dst, vec src1, vec src2, immI8 cond) %{
7852
predicate(n->bottom_type()->isa_vectmask() &&
7853
is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
7854
match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
7855
format %{ "vector_compared_evex $dst,$src1,$src2,$cond\t!" %}
7857
assert(UseAVX > 2, "required");
7858
assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
7860
int vlen_enc = vector_length_encoding(this, $src1);
7861
Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
7862
bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
7863
BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
7866
switch (src1_elem_bt) {
7868
__ evpcmpb($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
7872
__ evpcmpw($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
7876
__ evpcmpd($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
7880
__ evpcmpq($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
7883
default: assert(false, "%s", type2name(src1_elem_bt));
7886
ins_pipe( pipe_slow );
7891
instruct extractI(rRegI dst, legVec src, immU8 idx) %{
7892
predicate(Matcher::vector_length_in_bytes(n->in(1)) <= 16); // src
7893
match(Set dst (ExtractI src idx));
7894
match(Set dst (ExtractS src idx));
7896
match(Set dst (ExtractB src idx));
7898
format %{ "extractI $dst,$src,$idx\t!" %}
7900
assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
7902
BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
7903
__ get_elem(elem_bt, $dst$$Register, $src$$XMMRegister, $idx$$constant);
7905
ins_pipe( pipe_slow );
7908
instruct vextractI(rRegI dst, legVec src, immI idx, legVec vtmp) %{
7909
predicate(Matcher::vector_length_in_bytes(n->in(1)) == 32 || // src
7910
Matcher::vector_length_in_bytes(n->in(1)) == 64); // src
7911
match(Set dst (ExtractI src idx));
7912
match(Set dst (ExtractS src idx));
7914
match(Set dst (ExtractB src idx));
7917
format %{ "vextractI $dst,$src,$idx\t! using $vtmp as TEMP" %}
7919
assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
7921
BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
7922
XMMRegister lane_xmm = __ get_lane(elem_bt, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
7923
__ get_elem(elem_bt, $dst$$Register, lane_xmm, $idx$$constant);
7925
ins_pipe( pipe_slow );
7929
instruct extractL(rRegL dst, legVec src, immU8 idx) %{
7930
predicate(Matcher::vector_length(n->in(1)) <= 2); // src
7931
match(Set dst (ExtractL src idx));
7932
format %{ "extractL $dst,$src,$idx\t!" %}
7934
assert(UseSSE >= 4, "required");
7935
assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
7937
__ get_elem(T_LONG, $dst$$Register, $src$$XMMRegister, $idx$$constant);
7939
ins_pipe( pipe_slow );
7942
instruct vextractL(rRegL dst, legVec src, immU8 idx, legVec vtmp) %{
7943
predicate(Matcher::vector_length(n->in(1)) == 4 || // src
7944
Matcher::vector_length(n->in(1)) == 8); // src
7945
match(Set dst (ExtractL src idx));
7947
format %{ "vextractL $dst,$src,$idx\t! using $vtmp as TEMP" %}
7949
assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
7951
XMMRegister lane_reg = __ get_lane(T_LONG, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
7952
__ get_elem(T_LONG, $dst$$Register, lane_reg, $idx$$constant);
7954
ins_pipe( pipe_slow );
7958
instruct extractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
7959
predicate(Matcher::vector_length(n->in(1)) <= 4);
7960
match(Set dst (ExtractF src idx));
7961
effect(TEMP dst, TEMP vtmp);
7962
format %{ "extractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
7964
assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
7966
__ get_elem(T_FLOAT, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant, $vtmp$$XMMRegister);
7968
ins_pipe( pipe_slow );
7971
instruct vextractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
7972
predicate(Matcher::vector_length(n->in(1)/*src*/) == 8 ||
7973
Matcher::vector_length(n->in(1)/*src*/) == 16);
7974
match(Set dst (ExtractF src idx));
7976
format %{ "vextractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
7978
assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
7980
XMMRegister lane_reg = __ get_lane(T_FLOAT, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
7981
__ get_elem(T_FLOAT, $dst$$XMMRegister, lane_reg, $idx$$constant);
7983
ins_pipe( pipe_slow );
7986
instruct extractD(legRegD dst, legVec src, immU8 idx) %{
7987
predicate(Matcher::vector_length(n->in(1)) == 2); // src
7988
match(Set dst (ExtractD src idx));
7989
format %{ "extractD $dst,$src,$idx\t!" %}
7991
assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
7993
__ get_elem(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant);
7995
ins_pipe( pipe_slow );
7998
instruct vextractD(legRegD dst, legVec src, immU8 idx, legVec vtmp) %{
7999
predicate(Matcher::vector_length(n->in(1)) == 4 || // src
8000
Matcher::vector_length(n->in(1)) == 8); // src
8001
match(Set dst (ExtractD src idx));
8003
format %{ "vextractD $dst,$src,$idx\t! using $vtmp as TEMP" %}
8005
assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
8007
XMMRegister lane_reg = __ get_lane(T_DOUBLE, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
8008
__ get_elem(T_DOUBLE, $dst$$XMMRegister, lane_reg, $idx$$constant);
8010
ins_pipe( pipe_slow );
8013
// --------------------------------- Vector Blend --------------------------------------
8015
instruct blendvp(vec dst, vec src, vec mask, rxmm0 tmp) %{
8016
predicate(UseAVX == 0);
8017
match(Set dst (VectorBlend (Binary dst src) mask));
8018
format %{ "vector_blend $dst,$src,$mask\t! using $tmp as TEMP" %}
8021
assert(UseSSE >= 4, "required");
8023
if ($mask$$XMMRegister != $tmp$$XMMRegister) {
8024
__ movdqu($tmp$$XMMRegister, $mask$$XMMRegister);
8026
__ pblendvb($dst$$XMMRegister, $src$$XMMRegister); // uses xmm0 as mask
8028
ins_pipe( pipe_slow );
8031
instruct vblendvpI(legVec dst, legVec src1, legVec src2, legVec mask) %{
8032
predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
8033
n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
8034
Matcher::vector_length_in_bytes(n) <= 32 &&
8035
is_integral_type(Matcher::vector_element_basic_type(n)));
8036
match(Set dst (VectorBlend (Binary src1 src2) mask));
8037
format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %}
8039
int vlen_enc = vector_length_encoding(this);
8040
__ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
8042
ins_pipe( pipe_slow );
8045
instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{
8046
predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
8047
n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
8048
Matcher::vector_length_in_bytes(n) <= 32 &&
8049
!is_integral_type(Matcher::vector_element_basic_type(n)));
8050
match(Set dst (VectorBlend (Binary src1 src2) mask));
8051
format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %}
8053
int vlen_enc = vector_length_encoding(this);
8054
__ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
8056
ins_pipe( pipe_slow );
8059
instruct vblendvp(legVec dst, legVec src1, legVec src2, legVec mask, legVec vtmp) %{
8060
predicate(UseAVX > 0 && EnableX86ECoreOpts &&
8061
n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
8062
Matcher::vector_length_in_bytes(n) <= 32);
8063
match(Set dst (VectorBlend (Binary src1 src2) mask));
8064
format %{ "vector_blend $dst,$src1,$src2,$mask\t! using $vtmp as TEMP" %}
8065
effect(TEMP vtmp, TEMP dst);
8067
int vlen_enc = vector_length_encoding(this);
8068
__ vpandn($vtmp$$XMMRegister, $mask$$XMMRegister, $src1$$XMMRegister, vlen_enc);
8069
__ vpand ($dst$$XMMRegister, $mask$$XMMRegister, $src2$$XMMRegister, vlen_enc);
8070
__ vpor ($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
8072
ins_pipe( pipe_slow );
8075
instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, kReg ktmp) %{
8076
predicate(Matcher::vector_length_in_bytes(n) == 64 &&
8077
n->in(2)->bottom_type()->isa_vectmask() == nullptr);
8078
match(Set dst (VectorBlend (Binary src1 src2) mask));
8079
format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
8082
int vlen_enc = Assembler::AVX_512bit;
8083
BasicType elem_bt = Matcher::vector_element_basic_type(this);
8084
__ evpcmp(elem_bt, $ktmp$$KRegister, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vlen_enc, noreg);
8085
__ evpblend(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
8087
ins_pipe( pipe_slow );
8091
instruct evblendvp64_masked(vec dst, vec src1, vec src2, kReg mask) %{
8092
predicate(n->in(2)->bottom_type()->isa_vectmask() &&
8093
(!is_subword_type(Matcher::vector_element_basic_type(n)) ||
8094
VM_Version::supports_avx512bw()));
8095
match(Set dst (VectorBlend (Binary src1 src2) mask));
8096
format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
8098
int vlen_enc = vector_length_encoding(this);
8099
BasicType elem_bt = Matcher::vector_element_basic_type(this);
8100
__ evpblend(elem_bt, $dst$$XMMRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
8102
ins_pipe( pipe_slow );
8105
// --------------------------------- ABS --------------------------------------
8107
instruct vabsB_reg(vec dst, vec src) %{
8108
match(Set dst (AbsVB src));
8109
format %{ "vabsb $dst,$src\t# $dst = |$src| abs packedB" %}
8111
uint vlen = Matcher::vector_length(this);
8113
__ pabsb($dst$$XMMRegister, $src$$XMMRegister);
8115
int vlen_enc = vector_length_encoding(this);
8116
__ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
8119
ins_pipe( pipe_slow );
8122
instruct vabsS_reg(vec dst, vec src) %{
8123
match(Set dst (AbsVS src));
8124
format %{ "vabsw $dst,$src\t# $dst = |$src| abs packedS" %}
8126
uint vlen = Matcher::vector_length(this);
8128
__ pabsw($dst$$XMMRegister, $src$$XMMRegister);
8130
int vlen_enc = vector_length_encoding(this);
8131
__ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
8134
ins_pipe( pipe_slow );
8137
instruct vabsI_reg(vec dst, vec src) %{
8138
match(Set dst (AbsVI src));
8139
format %{ "pabsd $dst,$src\t# $dst = |$src| abs packedI" %}
8141
uint vlen = Matcher::vector_length(this);
8143
__ pabsd($dst$$XMMRegister, $src$$XMMRegister);
8145
int vlen_enc = vector_length_encoding(this);
8146
__ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
8149
ins_pipe( pipe_slow );
8152
instruct vabsL_reg(vec dst, vec src) %{
8153
match(Set dst (AbsVL src));
8154
format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packedL" %}
8156
assert(UseAVX > 2, "required");
8157
int vlen_enc = vector_length_encoding(this);
8158
if (!VM_Version::supports_avx512vl()) {
8159
vlen_enc = Assembler::AVX_512bit;
8161
__ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
8163
ins_pipe( pipe_slow );
8166
// --------------------------------- ABSNEG --------------------------------------
8168
instruct vabsnegF(vec dst, vec src) %{
8169
predicate(Matcher::vector_length(n) != 4); // handled by 1-operand instruction vabsneg4F
8170
match(Set dst (AbsVF src));
8171
match(Set dst (NegVF src));
8172
format %{ "vabsnegf $dst,$src,[mask]\t# absneg packedF" %}
8175
int opcode = this->ideal_Opcode();
8176
int vlen = Matcher::vector_length(this);
8178
__ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister);
8180
assert(vlen == 8 || vlen == 16, "required");
8181
int vlen_enc = vector_length_encoding(this);
8182
__ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
8185
ins_pipe( pipe_slow );
8188
instruct vabsneg4F(vec dst) %{
8189
predicate(Matcher::vector_length(n) == 4);
8190
match(Set dst (AbsVF dst));
8191
match(Set dst (NegVF dst));
8192
format %{ "vabsnegf $dst,[mask]\t# absneg packed4F" %}
8195
int opcode = this->ideal_Opcode();
8196
__ vabsnegf(opcode, $dst$$XMMRegister, $dst$$XMMRegister);
8198
ins_pipe( pipe_slow );
8201
instruct vabsnegD(vec dst, vec src) %{
8202
match(Set dst (AbsVD src));
8203
match(Set dst (NegVD src));
8204
format %{ "vabsnegd $dst,$src,[mask]\t# absneg packedD" %}
8206
int opcode = this->ideal_Opcode();
8207
uint vlen = Matcher::vector_length(this);
8209
assert(UseSSE >= 2, "required");
8210
__ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister);
8212
int vlen_enc = vector_length_encoding(this);
8213
__ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
8216
ins_pipe( pipe_slow );
8219
//------------------------------------- VectorTest --------------------------------------------
8222
instruct vptest_lt16(rFlagsRegU cr, legVec src1, legVec src2, legVec vtmp) %{
8223
predicate(Matcher::vector_length_in_bytes(n->in(1)) < 16);
8224
match(Set cr (VectorTest src1 src2));
8226
format %{ "vptest_lt16 $src1, $src2\t! using $vtmp as TEMP" %}
8228
BasicType bt = Matcher::vector_element_basic_type(this, $src1);
8229
int vlen = Matcher::vector_length_in_bytes(this, $src1);
8230
__ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister, vlen);
8232
ins_pipe( pipe_slow );
8235
instruct vptest_ge16(rFlagsRegU cr, legVec src1, legVec src2) %{
8236
predicate(Matcher::vector_length_in_bytes(n->in(1)) >= 16);
8237
match(Set cr (VectorTest src1 src2));
8238
format %{ "vptest_ge16 $src1, $src2\n\t" %}
8240
BasicType bt = Matcher::vector_element_basic_type(this, $src1);
8241
int vlen = Matcher::vector_length_in_bytes(this, $src1);
8242
__ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, vlen);
8244
ins_pipe( pipe_slow );
8247
instruct ktest_alltrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
8248
predicate((Matcher::vector_length(n->in(1)) < 8 ||
8249
(Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
8250
static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
8251
match(Set cr (VectorTest src1 src2));
8253
format %{ "ktest_alltrue_le8 $src1, $src2\t! using $tmp as TEMP" %}
8255
uint masklen = Matcher::vector_length(this, $src1);
8256
__ kmovwl($tmp$$Register, $src1$$KRegister);
8257
__ andl($tmp$$Register, (1 << masklen) - 1);
8258
__ cmpl($tmp$$Register, (1 << masklen) - 1);
8260
ins_pipe( pipe_slow );
8263
instruct ktest_anytrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
8264
predicate((Matcher::vector_length(n->in(1)) < 8 ||
8265
(Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
8266
static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
8267
match(Set cr (VectorTest src1 src2));
8269
format %{ "ktest_anytrue_le8 $src1, $src2\t! using $tmp as TEMP" %}
8271
uint masklen = Matcher::vector_length(this, $src1);
8272
__ kmovwl($tmp$$Register, $src1$$KRegister);
8273
__ andl($tmp$$Register, (1 << masklen) - 1);
8275
ins_pipe( pipe_slow );
8278
instruct ktest_ge8(rFlagsRegU cr, kReg src1, kReg src2) %{
8279
predicate(Matcher::vector_length(n->in(1)) >= 16 ||
8280
(Matcher::vector_length(n->in(1)) == 8 && VM_Version::supports_avx512dq()));
8281
match(Set cr (VectorTest src1 src2));
8282
format %{ "ktest_ge8 $src1, $src2\n\t" %}
8284
uint masklen = Matcher::vector_length(this, $src1);
8285
__ kortest(masklen, $src1$$KRegister, $src1$$KRegister);
8287
ins_pipe( pipe_slow );
8291
//------------------------------------- LoadMask --------------------------------------------
8293
instruct loadMask(legVec dst, legVec src) %{
8294
predicate(n->bottom_type()->isa_vectmask() == nullptr && !VM_Version::supports_avx512vlbw());
8295
match(Set dst (VectorLoadMask src));
8297
format %{ "vector_loadmask_byte $dst, $src\n\t" %}
8299
int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
8300
BasicType elem_bt = Matcher::vector_element_basic_type(this);
8301
__ load_vector_mask($dst$$XMMRegister, $src$$XMMRegister, vlen_in_bytes, elem_bt, true);
8303
ins_pipe( pipe_slow );
8306
instruct loadMask64(kReg dst, vec src, vec xtmp) %{
8307
predicate(n->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw());
8308
match(Set dst (VectorLoadMask src));
8310
format %{ "vector_loadmask_64byte $dst, $src\t! using $xtmp as TEMP" %}
8312
__ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
8313
true, Assembler::AVX_512bit);
8315
ins_pipe( pipe_slow );
8318
instruct loadMask_evex(kReg dst, vec src, vec xtmp) %{
8319
predicate(n->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw());
8320
match(Set dst (VectorLoadMask src));
8322
format %{ "vector_loadmask_byte $dst, $src\t! using $xtmp as TEMP" %}
8324
int vlen_enc = vector_length_encoding(in(1));
8325
__ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
8328
ins_pipe( pipe_slow );
8331
//------------------------------------- StoreMask --------------------------------------------
8333
instruct vstoreMask1B(vec dst, vec src, immI_1 size) %{
8334
predicate(Matcher::vector_length(n) < 64 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
8335
match(Set dst (VectorStoreMask src size));
8336
format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
8338
int vlen = Matcher::vector_length(this);
8339
if (vlen <= 16 && UseAVX <= 2) {
8340
assert(UseSSE >= 3, "required");
8341
__ pabsb($dst$$XMMRegister, $src$$XMMRegister);
8343
assert(UseAVX > 0, "required");
8344
int src_vlen_enc = vector_length_encoding(this, $src);
8345
__ vpabsb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
8348
ins_pipe( pipe_slow );
8351
instruct vstoreMask2B(vec dst, vec src, vec xtmp, immI_2 size) %{
8352
predicate(Matcher::vector_length(n) <= 16 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
8353
match(Set dst (VectorStoreMask src size));
8354
effect(TEMP_DEF dst, TEMP xtmp);
8355
format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
8357
int vlen_enc = Assembler::AVX_128bit;
8358
int vlen = Matcher::vector_length(this);
8360
assert(UseSSE >= 3, "required");
8361
__ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
8362
__ pabsw($dst$$XMMRegister, $src$$XMMRegister);
8363
__ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
8365
assert(UseAVX > 0, "required");
8366
__ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
8367
__ vpacksswb($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
8368
__ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
8371
ins_pipe( pipe_slow );
8374
instruct vstoreMask4B(vec dst, vec src, vec xtmp, immI_4 size) %{
8375
predicate(UseAVX <= 2 && Matcher::vector_length(n) <= 8 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
8376
match(Set dst (VectorStoreMask src size));
8377
format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
8378
effect(TEMP_DEF dst, TEMP xtmp);
8380
int vlen_enc = Assembler::AVX_128bit;
8381
int vlen = Matcher::vector_length(this);
8383
assert(UseSSE >= 3, "required");
8384
__ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
8385
__ pabsd($dst$$XMMRegister, $src$$XMMRegister);
8386
__ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
8387
__ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
8389
assert(UseAVX > 0, "required");
8390
__ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
8391
__ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
8392
__ vpackssdw($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
8393
__ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
8394
__ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
8397
ins_pipe( pipe_slow );
8400
instruct storeMask8B(vec dst, vec src, vec xtmp, immI_8 size) %{
8401
predicate(UseAVX <= 2 && Matcher::vector_length(n) == 2);
8402
match(Set dst (VectorStoreMask src size));
8403
effect(TEMP_DEF dst, TEMP xtmp);
8404
format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
8406
assert(UseSSE >= 3, "required");
8407
__ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
8408
__ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x8);
8409
__ pabsd($dst$$XMMRegister, $dst$$XMMRegister);
8410
__ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
8411
__ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
8413
ins_pipe( pipe_slow );
8416
instruct storeMask8B_avx(vec dst, vec src, immI_8 size, vec vtmp) %{
8417
predicate(UseAVX <= 2 && Matcher::vector_length(n) == 4);
8418
match(Set dst (VectorStoreMask src size));
8419
format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s], using $vtmp as TEMP" %}
8420
effect(TEMP_DEF dst, TEMP vtmp);
8422
int vlen_enc = Assembler::AVX_128bit;
8423
__ vshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 0x88, Assembler::AVX_256bit);
8424
__ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
8425
__ vblendps($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0xC, vlen_enc);
8426
__ vpxor($vtmp$$XMMRegister, $vtmp$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
8427
__ vpackssdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
8428
__ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
8429
__ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
8431
ins_pipe( pipe_slow );
8434
instruct vstoreMask4B_evex_novectmask(vec dst, vec src, immI_4 size) %{
8435
predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
8436
match(Set dst (VectorStoreMask src size));
8437
format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
8439
int src_vlen_enc = vector_length_encoding(this, $src);
8440
int dst_vlen_enc = vector_length_encoding(this);
8441
if (!VM_Version::supports_avx512vl()) {
8442
src_vlen_enc = Assembler::AVX_512bit;
8444
__ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
8445
__ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
8447
ins_pipe( pipe_slow );
8450
instruct vstoreMask8B_evex_novectmask(vec dst, vec src, immI_8 size) %{
8451
predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
8452
match(Set dst (VectorStoreMask src size));
8453
format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
8455
int src_vlen_enc = vector_length_encoding(this, $src);
8456
int dst_vlen_enc = vector_length_encoding(this);
8457
if (!VM_Version::supports_avx512vl()) {
8458
src_vlen_enc = Assembler::AVX_512bit;
8460
__ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
8461
__ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
8463
ins_pipe( pipe_slow );
8466
instruct vstoreMask_evex_vectmask(vec dst, kReg mask, immI size) %{
8467
predicate(n->in(1)->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw());
8468
match(Set dst (VectorStoreMask mask size));
8469
effect(TEMP_DEF dst);
8470
format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
8472
assert(Matcher::vector_length_in_bytes(this, $mask) == 64, "");
8473
__ evmovdqul($dst$$XMMRegister, $mask$$KRegister, ExternalAddress(vector_int_mask_cmp_bits()),
8474
false, Assembler::AVX_512bit, noreg);
8475
__ evpmovdb($dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_512bit);
8477
ins_pipe( pipe_slow );
8480
instruct vstoreMask_evex(vec dst, kReg mask, immI size) %{
8481
predicate(n->in(1)->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw());
8482
match(Set dst (VectorStoreMask mask size));
8483
effect(TEMP_DEF dst);
8484
format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
8486
int dst_vlen_enc = vector_length_encoding(this);
8487
__ evpmovm2b($dst$$XMMRegister, $mask$$KRegister, dst_vlen_enc);
8488
__ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
8490
ins_pipe( pipe_slow );
8493
instruct vmaskcast_evex(kReg dst) %{
8494
match(Set dst (VectorMaskCast dst));
8496
format %{ "vector_mask_cast $dst" %}
8503
instruct vmaskcast(vec dst) %{
8504
predicate(Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1)));
8505
match(Set dst (VectorMaskCast dst));
8507
format %{ "vector_mask_cast $dst" %}
8514
instruct vmaskcast_avx(vec dst, vec src) %{
8515
predicate(Matcher::vector_length_in_bytes(n) != Matcher::vector_length_in_bytes(n->in(1)));
8516
match(Set dst (VectorMaskCast src));
8517
format %{ "vector_mask_cast $dst, $src" %}
8519
int vlen = Matcher::vector_length(this);
8520
BasicType src_bt = Matcher::vector_element_basic_type(this, $src);
8521
BasicType dst_bt = Matcher::vector_element_basic_type(this);
8522
__ vector_mask_cast($dst$$XMMRegister, $src$$XMMRegister, dst_bt, src_bt, vlen);
8524
ins_pipe(pipe_slow);
8527
//-------------------------------- Load Iota Indices ----------------------------------
8529
instruct loadIotaIndices(vec dst, immI_0 src) %{
8530
match(Set dst (VectorLoadConst src));
8531
format %{ "vector_load_iota $dst CONSTANT_MEMORY\t! load iota indices" %}
8533
int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
8534
BasicType bt = Matcher::vector_element_basic_type(this);
8535
__ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, bt);
8537
ins_pipe( pipe_slow );
8541
instruct VectorPopulateIndex(vec dst, rRegI src1, immI_1 src2, vec vtmp) %{
8542
match(Set dst (PopulateIndex src1 src2));
8543
effect(TEMP dst, TEMP vtmp);
8544
format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
8546
assert($src2$$constant == 1, "required");
8547
int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
8548
int vlen_enc = vector_length_encoding(this);
8549
BasicType elem_bt = Matcher::vector_element_basic_type(this);
8550
__ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
8551
__ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
8552
__ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
8554
ins_pipe( pipe_slow );
8557
instruct VectorPopulateLIndex(vec dst, rRegL src1, immI_1 src2, vec vtmp) %{
8558
match(Set dst (PopulateIndex src1 src2));
8559
effect(TEMP dst, TEMP vtmp);
8560
format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
8562
assert($src2$$constant == 1, "required");
8563
int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
8564
int vlen_enc = vector_length_encoding(this);
8565
BasicType elem_bt = Matcher::vector_element_basic_type(this);
8566
__ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
8567
__ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
8568
__ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
8570
ins_pipe( pipe_slow );
8573
//-------------------------------- Rearrange ----------------------------------
8575
// LoadShuffle/Rearrange for Byte
8577
instruct loadShuffleB(vec dst) %{
8578
predicate(Matcher::vector_element_basic_type(n) == T_BYTE);
8579
match(Set dst (VectorLoadShuffle dst));
8580
format %{ "vector_load_shuffle $dst, $dst" %}
8584
ins_pipe( pipe_slow );
8587
instruct rearrangeB(vec dst, vec shuffle) %{
8588
predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
8589
Matcher::vector_length(n) < 32);
8590
match(Set dst (VectorRearrange dst shuffle));
8591
format %{ "vector_rearrange $dst, $shuffle, $dst" %}
8593
assert(UseSSE >= 4, "required");
8594
__ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
8596
ins_pipe( pipe_slow );
8599
instruct rearrangeB_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
8600
predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
8601
Matcher::vector_length(n) == 32 && !VM_Version::supports_avx512_vbmi());
8602
match(Set dst (VectorRearrange src shuffle));
8603
effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
8604
format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
8606
assert(UseAVX >= 2, "required");
8607
// Swap src into vtmp1
8608
__ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
8609
// Shuffle swapped src to get entries from other 128 bit lane
8610
__ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
8611
// Shuffle original src to get entries from self 128 bit lane
8612
__ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
8613
// Create a blend mask by setting high bits for entries coming from other lane in shuffle
8614
__ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
8615
// Perform the blend
8616
__ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
8618
ins_pipe( pipe_slow );
8622
instruct rearrangeB_evex(vec dst, vec src, vec shuffle, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegI rtmp) %{
8623
predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
8624
Matcher::vector_length(n) > 32 && !VM_Version::supports_avx512_vbmi());
8625
match(Set dst (VectorRearrange src shuffle));
8626
effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
8627
format %{ "vector_rearrange $dst, $shuffle, $src!\t using $xtmp1, $xtmp2, $xtmp3, $rtmp and $ktmp as TEMP" %}
8629
int vlen_enc = vector_length_encoding(this);
8630
__ rearrange_bytes($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister,
8631
$xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister,
8632
$rtmp$$Register, $ktmp$$KRegister, vlen_enc);
8634
ins_pipe( pipe_slow );
8637
instruct rearrangeB_evex_vbmi(vec dst, vec src, vec shuffle) %{
8638
predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
8639
Matcher::vector_length(n) >= 32 && VM_Version::supports_avx512_vbmi());
8640
match(Set dst (VectorRearrange src shuffle));
8641
format %{ "vector_rearrange $dst, $shuffle, $src" %}
8643
int vlen_enc = vector_length_encoding(this);
8644
__ vpermb($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
8646
ins_pipe( pipe_slow );
8649
// LoadShuffle/Rearrange for Short
8651
instruct loadShuffleS(vec dst, vec src, vec vtmp) %{
8652
predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
8653
Matcher::vector_length(n) <= 16 && !VM_Version::supports_avx512bw()); // NB! aligned with rearrangeS
8654
match(Set dst (VectorLoadShuffle src));
8655
effect(TEMP dst, TEMP vtmp);
8656
format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
8658
// Create a byte shuffle mask from short shuffle mask
8659
// only byte shuffle instruction available on these platforms
8660
int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
8662
assert(vlen_in_bytes <= 16, "required");
8663
// Multiply each shuffle by two to get byte index
8664
__ pmovzxbw($vtmp$$XMMRegister, $src$$XMMRegister);
8665
__ psllw($vtmp$$XMMRegister, 1);
8667
// Duplicate to create 2 copies of byte index
8668
__ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
8669
__ psllw($dst$$XMMRegister, 8);
8670
__ por($dst$$XMMRegister, $vtmp$$XMMRegister);
8672
// Add one to get alternate byte index
8673
__ movdqu($vtmp$$XMMRegister, ExternalAddress(vector_short_shufflemask()), noreg);
8674
__ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
8676
assert(UseAVX > 1 || vlen_in_bytes <= 16, "required");
8677
int vlen_enc = vector_length_encoding(this);
8678
// Multiply each shuffle by two to get byte index
8679
__ vpmovzxbw($vtmp$$XMMRegister, $src$$XMMRegister, vlen_enc);
8680
__ vpsllw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 1, vlen_enc);
8682
// Duplicate to create 2 copies of byte index
8683
__ vpsllw($dst$$XMMRegister, $vtmp$$XMMRegister, 8, vlen_enc);
8684
__ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
8686
// Add one to get alternate byte index
8687
__ vpaddb($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_shufflemask()), vlen_enc, noreg);
8690
ins_pipe( pipe_slow );
8693
instruct rearrangeS(vec dst, vec shuffle) %{
8694
predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
8695
Matcher::vector_length(n) <= 8 && !VM_Version::supports_avx512bw());
8696
match(Set dst (VectorRearrange dst shuffle));
8697
format %{ "vector_rearrange $dst, $shuffle, $dst" %}
8699
assert(UseSSE >= 4, "required");
8700
__ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
8702
ins_pipe( pipe_slow );
8705
instruct rearrangeS_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
8706
predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
8707
Matcher::vector_length(n) == 16 && !VM_Version::supports_avx512bw());
8708
match(Set dst (VectorRearrange src shuffle));
8709
effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
8710
format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
8712
assert(UseAVX >= 2, "required");
8713
// Swap src into vtmp1
8714
__ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
8715
// Shuffle swapped src to get entries from other 128 bit lane
8716
__ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
8717
// Shuffle original src to get entries from self 128 bit lane
8718
__ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
8719
// Create a blend mask by setting high bits for entries coming from other lane in shuffle
8720
__ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
8721
// Perform the blend
8722
__ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
8724
ins_pipe( pipe_slow );
8727
instruct loadShuffleS_evex(vec dst, vec src) %{
8728
predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
8729
VM_Version::supports_avx512bw());
8730
match(Set dst (VectorLoadShuffle src));
8731
format %{ "vector_load_shuffle $dst, $src" %}
8733
int vlen_enc = vector_length_encoding(this);
8734
if (!VM_Version::supports_avx512vl()) {
8735
vlen_enc = Assembler::AVX_512bit;
8737
__ vpmovzxbw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
8739
ins_pipe( pipe_slow );
8742
instruct rearrangeS_evex(vec dst, vec src, vec shuffle) %{
8743
predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
8744
VM_Version::supports_avx512bw());
8745
match(Set dst (VectorRearrange src shuffle));
8746
format %{ "vector_rearrange $dst, $shuffle, $src" %}
8748
int vlen_enc = vector_length_encoding(this);
8749
if (!VM_Version::supports_avx512vl()) {
8750
vlen_enc = Assembler::AVX_512bit;
8752
__ vpermw($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
8754
ins_pipe( pipe_slow );
8757
// LoadShuffle/Rearrange for Integer and Float
8759
instruct loadShuffleI(vec dst, vec src, vec vtmp) %{
8760
predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
8761
Matcher::vector_length(n) == 4 && UseAVX == 0);
8762
match(Set dst (VectorLoadShuffle src));
8763
effect(TEMP dst, TEMP vtmp);
8764
format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
8766
assert(UseSSE >= 4, "required");
8768
// Create a byte shuffle mask from int shuffle mask
8769
// only byte shuffle instruction available on these platforms
8771
// Duplicate and multiply each shuffle by 4
8772
__ pmovzxbd($vtmp$$XMMRegister, $src$$XMMRegister);
8773
__ pshuflw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
8774
__ pshufhw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
8775
__ psllw($vtmp$$XMMRegister, 2);
8777
// Duplicate again to create 4 copies of byte index
8778
__ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
8779
__ psllw($dst$$XMMRegister, 8);
8780
__ por($vtmp$$XMMRegister, $dst$$XMMRegister);
8782
// Add 3,2,1,0 to get alternate byte index
8783
__ movdqu($dst$$XMMRegister, ExternalAddress(vector_int_shufflemask()), noreg);
8784
__ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
8786
ins_pipe( pipe_slow );
8789
instruct rearrangeI(vec dst, vec shuffle) %{
8790
predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
8792
match(Set dst (VectorRearrange dst shuffle));
8793
format %{ "vector_rearrange $dst, $shuffle, $dst" %}
8795
assert(UseSSE >= 4, "required");
8796
__ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
8798
ins_pipe( pipe_slow );
8801
instruct loadShuffleI_avx(vec dst, vec src) %{
8802
predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
8804
match(Set dst (VectorLoadShuffle src));
8805
format %{ "vector_load_shuffle $dst, $src" %}
8807
int vlen_enc = vector_length_encoding(this);
8808
__ vpmovzxbd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
8810
ins_pipe( pipe_slow );
8813
instruct rearrangeI_avx(vec dst, vec src, vec shuffle) %{
8814
predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
8816
match(Set dst (VectorRearrange src shuffle));
8817
format %{ "vector_rearrange $dst, $shuffle, $src" %}
8819
int vlen_enc = vector_length_encoding(this);
8820
BasicType bt = Matcher::vector_element_basic_type(this);
8821
__ vector_rearrange_int_float(bt, $dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
8823
ins_pipe( pipe_slow );
8826
// LoadShuffle/Rearrange for Long and Double
8828
instruct loadShuffleL(vec dst, vec src, vec vtmp) %{
8829
predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
8830
Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
8831
match(Set dst (VectorLoadShuffle src));
8832
effect(TEMP dst, TEMP vtmp);
8833
format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
8835
assert(UseAVX >= 2, "required");
8837
int vlen_enc = vector_length_encoding(this);
8838
// Create a double word shuffle mask from long shuffle mask
8839
// only double word shuffle instruction available on these platforms
8841
// Multiply each shuffle by two to get double word index
8842
__ vpmovzxbq($vtmp$$XMMRegister, $src$$XMMRegister, vlen_enc);
8843
__ vpsllq($vtmp$$XMMRegister, $vtmp$$XMMRegister, 1, vlen_enc);
8845
// Duplicate each double word shuffle
8846
__ vpsllq($dst$$XMMRegister, $vtmp$$XMMRegister, 32, vlen_enc);
8847
__ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
8849
// Add one to get alternate double word index
8850
__ vpaddd($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_long_shufflemask()), vlen_enc, noreg);
8852
ins_pipe( pipe_slow );
8855
instruct rearrangeL(vec dst, vec src, vec shuffle) %{
8856
predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
8857
Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
8858
match(Set dst (VectorRearrange src shuffle));
8859
format %{ "vector_rearrange $dst, $shuffle, $src" %}
8861
assert(UseAVX >= 2, "required");
8863
int vlen_enc = vector_length_encoding(this);
8864
__ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
8866
ins_pipe( pipe_slow );
8869
instruct loadShuffleL_evex(vec dst, vec src) %{
8870
predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
8871
(Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl()));
8872
match(Set dst (VectorLoadShuffle src));
8873
format %{ "vector_load_shuffle $dst, $src" %}
8875
assert(UseAVX > 2, "required");
8877
int vlen_enc = vector_length_encoding(this);
8878
__ vpmovzxbq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
8880
ins_pipe( pipe_slow );
8883
instruct rearrangeL_evex(vec dst, vec src, vec shuffle) %{
8884
predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
8885
(Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl()));
8886
match(Set dst (VectorRearrange src shuffle));
8887
format %{ "vector_rearrange $dst, $shuffle, $src" %}
8889
assert(UseAVX > 2, "required");
8891
int vlen_enc = vector_length_encoding(this);
8892
if (vlen_enc == Assembler::AVX_128bit) {
8893
vlen_enc = Assembler::AVX_256bit;
8895
__ vpermq($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
8897
ins_pipe( pipe_slow );
8900
// --------------------------------- FMA --------------------------------------
8903
instruct vfmaF_reg(vec a, vec b, vec c) %{
8904
match(Set c (FmaVF c (Binary a b)));
8905
format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
8908
assert(UseFMA, "not enabled");
8909
int vlen_enc = vector_length_encoding(this);
8910
__ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
8912
ins_pipe( pipe_slow );
8915
instruct vfmaF_mem(vec a, memory b, vec c) %{
8916
predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
8917
match(Set c (FmaVF c (Binary a (LoadVector b))));
8918
format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
8921
assert(UseFMA, "not enabled");
8922
int vlen_enc = vector_length_encoding(this);
8923
__ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
8925
ins_pipe( pipe_slow );
8928
instruct vfmaD_reg(vec a, vec b, vec c) %{
8929
match(Set c (FmaVD c (Binary a b)));
8930
format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
8933
assert(UseFMA, "not enabled");
8934
int vlen_enc = vector_length_encoding(this);
8935
__ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
8937
ins_pipe( pipe_slow );
8940
instruct vfmaD_mem(vec a, memory b, vec c) %{
8941
predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
8942
match(Set c (FmaVD c (Binary a (LoadVector b))));
8943
format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
8946
assert(UseFMA, "not enabled");
8947
int vlen_enc = vector_length_encoding(this);
8948
__ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
8950
ins_pipe( pipe_slow );
8953
// --------------------------------- Vector Multiply Add --------------------------------------
8955
instruct vmuladdS2I_reg_sse(vec dst, vec src1) %{
8956
predicate(UseAVX == 0);
8957
match(Set dst (MulAddVS2VI dst src1));
8958
format %{ "pmaddwd $dst,$src1\t! muladd packedStoI" %}
8960
__ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister);
8962
ins_pipe( pipe_slow );
8965
instruct vmuladdS2I_reg_avx(vec dst, vec src1, vec src2) %{
8966
predicate(UseAVX > 0);
8967
match(Set dst (MulAddVS2VI src1 src2));
8968
format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packedStoI" %}
8970
int vlen_enc = vector_length_encoding(this);
8971
__ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
8973
ins_pipe( pipe_slow );
8976
// --------------------------------- Vector Multiply Add Add ----------------------------------
8978
instruct vmuladdaddS2I_reg(vec dst, vec src1, vec src2) %{
8979
predicate(VM_Version::supports_avx512_vnni());
8980
match(Set dst (AddVI (MulAddVS2VI src1 src2) dst));
8981
format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packedStoI" %}
8983
assert(UseAVX > 2, "required");
8984
int vlen_enc = vector_length_encoding(this);
8985
__ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
8987
ins_pipe( pipe_slow );
8991
// --------------------------------- PopCount --------------------------------------
8993
instruct vpopcount_integral_reg_evex(vec dst, vec src) %{
8994
predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
8995
match(Set dst (PopCountVI src));
8996
match(Set dst (PopCountVL src));
8997
format %{ "vector_popcount_integral $dst, $src" %}
8999
int opcode = this->ideal_Opcode();
9000
int vlen_enc = vector_length_encoding(this, $src);
9001
BasicType bt = Matcher::vector_element_basic_type(this, $src);
9002
__ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, k0, true, vlen_enc);
9004
ins_pipe( pipe_slow );
9007
instruct vpopcount_integral_reg_evex_masked(vec dst, vec src, kReg mask) %{
9008
predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
9009
match(Set dst (PopCountVI src mask));
9010
match(Set dst (PopCountVL src mask));
9011
format %{ "vector_popcount_integral_masked $dst, $src, $mask" %}
9013
int vlen_enc = vector_length_encoding(this, $src);
9014
BasicType bt = Matcher::vector_element_basic_type(this, $src);
9015
__ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
9016
__ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, true, vlen_enc);
9018
ins_pipe( pipe_slow );
9021
instruct vpopcount_avx_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegP rtmp) %{
9022
predicate(!is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
9023
match(Set dst (PopCountVI src));
9024
match(Set dst (PopCountVL src));
9025
effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
9026
format %{ "vector_popcount_integral $dst, $src\t! using $xtmp1, $xtmp2, and $rtmp as TEMP" %}
9028
int opcode = this->ideal_Opcode();
9029
int vlen_enc = vector_length_encoding(this, $src);
9030
BasicType bt = Matcher::vector_element_basic_type(this, $src);
9031
__ vector_popcount_integral(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
9032
$xtmp2$$XMMRegister, $rtmp$$Register, vlen_enc);
9034
ins_pipe( pipe_slow );
9037
// --------------------------------- Vector Trailing Zeros Count --------------------------------------
9039
instruct vcount_trailing_zeros_reg_evex(vec dst, vec src, vec xtmp, rRegP rtmp) %{
9040
predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
9041
Matcher::vector_length_in_bytes(n->in(1))));
9042
match(Set dst (CountTrailingZerosV src));
9043
effect(TEMP dst, TEMP xtmp, TEMP rtmp);
9045
format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp and $rtmp as TEMP" %}
9047
int vlen_enc = vector_length_encoding(this, $src);
9048
BasicType bt = Matcher::vector_element_basic_type(this, $src);
9049
__ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
9050
xnoreg, xnoreg, $xtmp$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
9052
ins_pipe( pipe_slow );
9055
instruct vcount_trailing_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
9056
predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
9057
VM_Version::supports_avx512cd() &&
9058
(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
9059
match(Set dst (CountTrailingZerosV src));
9060
effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
9062
format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3 and $rtmp as TEMP" %}
9064
int vlen_enc = vector_length_encoding(this, $src);
9065
BasicType bt = Matcher::vector_element_basic_type(this, $src);
9066
__ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
9067
$xtmp2$$XMMRegister, xnoreg, $xtmp3$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
9069
ins_pipe( pipe_slow );
9072
instruct vcount_trailing_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, kReg ktmp, rRegP rtmp) %{
9073
predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
9074
match(Set dst (CountTrailingZerosV src));
9075
effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP ktmp, TEMP rtmp);
9077
format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $xtmp4, $ktmp and $rtmp as TEMP" %}
9079
int vlen_enc = vector_length_encoding(this, $src);
9080
BasicType bt = Matcher::vector_element_basic_type(this, $src);
9081
__ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
9082
$xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
9083
$ktmp$$KRegister, $rtmp$$Register, vlen_enc);
9085
ins_pipe( pipe_slow );
9088
instruct vcount_trailing_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
9089
predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
9090
match(Set dst (CountTrailingZerosV src));
9091
effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
9092
format %{ "vector_count_trailing_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
9094
int vlen_enc = vector_length_encoding(this, $src);
9095
BasicType bt = Matcher::vector_element_basic_type(this, $src);
9096
__ vector_count_trailing_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
9097
$xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
9099
ins_pipe( pipe_slow );
9103
// --------------------------------- Bitwise Ternary Logic ----------------------------------
9105
instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{
9106
match(Set dst (MacroLogicV (Binary dst src2) (Binary src3 func)));
9108
format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
9110
int vector_len = vector_length_encoding(this);
9111
__ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$XMMRegister, vector_len);
9113
ins_pipe( pipe_slow );
9116
instruct vpternlog_mem(vec dst, vec src2, memory src3, immU8 func) %{
9117
predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) > 8);
9118
match(Set dst (MacroLogicV (Binary dst src2) (Binary (LoadVector src3) func)));
9120
format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
9122
int vector_len = vector_length_encoding(this);
9123
__ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$Address, vector_len);
9125
ins_pipe( pipe_slow );
9128
// --------------------------------- Rotation Operations ----------------------------------
9129
instruct vprotate_immI8(vec dst, vec src, immI8 shift) %{
9130
match(Set dst (RotateLeftV src shift));
9131
match(Set dst (RotateRightV src shift));
9132
format %{ "vprotate_imm8 $dst,$src,$shift\t! vector rotate" %}
9134
int opcode = this->ideal_Opcode();
9135
int vector_len = vector_length_encoding(this);
9136
BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
9137
__ vprotate_imm(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
9139
ins_pipe( pipe_slow );
9142
instruct vprorate(vec dst, vec src, vec shift) %{
9143
match(Set dst (RotateLeftV src shift));
9144
match(Set dst (RotateRightV src shift));
9145
format %{ "vprotate $dst,$src,$shift\t! vector rotate" %}
9147
int opcode = this->ideal_Opcode();
9148
int vector_len = vector_length_encoding(this);
9149
BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
9150
__ vprotate_var(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
9152
ins_pipe( pipe_slow );
9155
// ---------------------------------- Masked Operations ------------------------------------
9156
instruct vmasked_load_avx_non_subword(vec dst, memory mem, vec mask) %{
9157
predicate(!n->in(3)->bottom_type()->isa_vectmask());
9158
match(Set dst (LoadVectorMasked mem mask));
9159
format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
9161
BasicType elmType = this->bottom_type()->is_vect()->element_basic_type();
9162
int vlen_enc = vector_length_encoding(this);
9163
__ vmovmask(elmType, $dst$$XMMRegister, $mem$$Address, $mask$$XMMRegister, vlen_enc);
9165
ins_pipe( pipe_slow );
9169
instruct vmasked_load_evex(vec dst, memory mem, kReg mask) %{
9170
predicate(n->in(3)->bottom_type()->isa_vectmask());
9171
match(Set dst (LoadVectorMasked mem mask));
9172
format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
9174
BasicType elmType = this->bottom_type()->is_vect()->element_basic_type();
9175
int vector_len = vector_length_encoding(this);
9176
__ evmovdqu(elmType, $mask$$KRegister, $dst$$XMMRegister, $mem$$Address, false, vector_len);
9178
ins_pipe( pipe_slow );
9181
instruct vmasked_store_avx_non_subword(memory mem, vec src, vec mask) %{
9182
predicate(!n->in(3)->in(2)->bottom_type()->isa_vectmask());
9183
match(Set mem (StoreVectorMasked mem (Binary src mask)));
9184
format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
9186
const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
9187
int vlen_enc = vector_length_encoding(src_node);
9188
BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type();
9189
__ vmovmask(elmType, $mem$$Address, $src$$XMMRegister, $mask$$XMMRegister, vlen_enc);
9191
ins_pipe( pipe_slow );
9194
instruct vmasked_store_evex(memory mem, vec src, kReg mask) %{
9195
predicate(n->in(3)->in(2)->bottom_type()->isa_vectmask());
9196
match(Set mem (StoreVectorMasked mem (Binary src mask)));
9197
format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
9199
const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
9200
BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type();
9201
int vlen_enc = vector_length_encoding(src_node);
9202
__ evmovdqu(elmType, $mask$$KRegister, $mem$$Address, $src$$XMMRegister, true, vlen_enc);
9204
ins_pipe( pipe_slow );
9208
instruct verify_vector_alignment(rRegP addr, immL32 mask, rFlagsReg cr) %{
9209
match(Set addr (VerifyVectorAlignment addr mask));
9211
format %{ "verify_vector_alignment $addr $mask \t! verify alignment" %}
9214
// check if masked bits of addr are zero
9215
__ testq($addr$$Register, $mask$$constant);
9216
__ jccb(Assembler::equal, Lskip);
9217
__ stop("verify_vector_alignment found a misaligned vector memory access");
9220
ins_pipe(pipe_slow);
9223
instruct vmask_cmp_node(rRegI dst, vec src1, vec src2, kReg mask, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
9224
match(Set dst (VectorCmpMasked src1 (Binary src2 mask)));
9225
effect(TEMP_DEF dst, TEMP ktmp1, TEMP ktmp2, KILL cr);
9226
format %{ "vector_mask_cmp $src1, $src2, $mask \t! vector mask comparison" %}
9228
assert(vector_length_encoding(this, $src1) == vector_length_encoding(this, $src2), "mismatch");
9229
assert(Matcher::vector_element_basic_type(this, $src1) == Matcher::vector_element_basic_type(this, $src2), "mismatch");
9232
int vlen_enc = vector_length_encoding(this, $src1);
9233
BasicType elem_bt = Matcher::vector_element_basic_type(this, $src1);
9235
__ knotql($ktmp2$$KRegister, $mask$$KRegister);
9236
__ mov64($dst$$Register, -1L);
9237
__ evpcmp(elem_bt, $ktmp1$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, Assembler::eq, vlen_enc);
9238
__ kortestql($ktmp2$$KRegister, $ktmp1$$KRegister);
9239
__ jccb(Assembler::carrySet, DONE);
9240
__ kmovql($dst$$Register, $ktmp1$$KRegister);
9241
__ notq($dst$$Register);
9242
__ tzcntq($dst$$Register, $dst$$Register);
9245
ins_pipe( pipe_slow );
9249
instruct vmask_gen(kReg dst, rRegL len, rRegL temp, rFlagsReg cr) %{
9250
match(Set dst (VectorMaskGen len));
9251
effect(TEMP temp, KILL cr);
9252
format %{ "vector_mask_gen32 $dst, $len \t! vector mask generator" %}
9254
__ genmask($dst$$KRegister, $len$$Register, $temp$$Register);
9256
ins_pipe( pipe_slow );
9259
instruct vmask_gen_imm(kReg dst, immL len, rRegL temp) %{
9260
match(Set dst (VectorMaskGen len));
9261
format %{ "vector_mask_gen $len \t! vector mask generator" %}
9264
__ mov64($temp$$Register, (0xFFFFFFFFFFFFFFFFUL >> (64 -$len$$constant)));
9265
__ kmovql($dst$$KRegister, $temp$$Register);
9267
ins_pipe( pipe_slow );
9270
instruct vmask_tolong_evex(rRegL dst, kReg mask, rFlagsReg cr) %{
9271
predicate(n->in(1)->bottom_type()->isa_vectmask());
9272
match(Set dst (VectorMaskToLong mask));
9273
effect(TEMP dst, KILL cr);
9274
format %{ "vector_tolong_evex $dst, $mask \t! vector mask tolong" %}
9276
int opcode = this->ideal_Opcode();
9277
BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
9278
int mask_len = Matcher::vector_length(this, $mask);
9279
int mask_size = mask_len * type2aelembytes(mbt);
9280
int vlen_enc = vector_length_encoding(this, $mask);
9281
__ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
9282
$dst$$Register, mask_len, mask_size, vlen_enc);
9284
ins_pipe( pipe_slow );
9287
instruct vmask_tolong_bool(rRegL dst, vec mask, vec xtmp, rFlagsReg cr) %{
9288
predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
9289
match(Set dst (VectorMaskToLong mask));
9290
format %{ "vector_tolong_bool $dst, $mask \t! using $xtmp as TEMP" %}
9291
effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
9293
int opcode = this->ideal_Opcode();
9294
BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
9295
int mask_len = Matcher::vector_length(this, $mask);
9296
int vlen_enc = vector_length_encoding(this, $mask);
9297
__ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
9298
$dst$$Register, mask_len, mbt, vlen_enc);
9300
ins_pipe( pipe_slow );
9303
instruct vmask_tolong_avx(rRegL dst, vec mask, immI size, vec xtmp, rFlagsReg cr) %{
9304
predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
9305
match(Set dst (VectorMaskToLong (VectorStoreMask mask size)));
9306
format %{ "vector_tolong_avx $dst, $mask \t! using $xtmp as TEMP" %}
9307
effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
9309
int opcode = this->ideal_Opcode();
9310
BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
9311
int mask_len = Matcher::vector_length(this, $mask);
9312
int vlen_enc = vector_length_encoding(this, $mask);
9313
__ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
9314
$dst$$Register, mask_len, mbt, vlen_enc);
9316
ins_pipe( pipe_slow );
9319
instruct vmask_truecount_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
9320
predicate(n->in(1)->bottom_type()->isa_vectmask());
9321
match(Set dst (VectorMaskTrueCount mask));
9322
effect(TEMP_DEF dst, TEMP tmp, KILL cr);
9323
format %{ "vector_truecount_evex $dst, $mask \t! using $tmp as TEMP" %}
9325
int opcode = this->ideal_Opcode();
9326
BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
9327
int mask_len = Matcher::vector_length(this, $mask);
9328
int mask_size = mask_len * type2aelembytes(mbt);
9329
int vlen_enc = vector_length_encoding(this, $mask);
9330
__ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
9331
$tmp$$Register, mask_len, mask_size, vlen_enc);
9333
ins_pipe( pipe_slow );
9336
instruct vmask_truecount_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
9337
predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
9338
match(Set dst (VectorMaskTrueCount mask));
9339
effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
9340
format %{ "vector_truecount_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
9342
int opcode = this->ideal_Opcode();
9343
BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
9344
int mask_len = Matcher::vector_length(this, $mask);
9345
int vlen_enc = vector_length_encoding(this, $mask);
9346
__ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
9347
$tmp$$Register, mask_len, mbt, vlen_enc);
9349
ins_pipe( pipe_slow );
9352
instruct vmask_truecount_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
9353
predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
9354
match(Set dst (VectorMaskTrueCount (VectorStoreMask mask size)));
9355
effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
9356
format %{ "vector_truecount_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
9358
int opcode = this->ideal_Opcode();
9359
BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
9360
int mask_len = Matcher::vector_length(this, $mask);
9361
int vlen_enc = vector_length_encoding(this, $mask);
9362
__ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
9363
$tmp$$Register, mask_len, mbt, vlen_enc);
9365
ins_pipe( pipe_slow );
9368
instruct vmask_first_or_last_true_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
9369
predicate(n->in(1)->bottom_type()->isa_vectmask());
9370
match(Set dst (VectorMaskFirstTrue mask));
9371
match(Set dst (VectorMaskLastTrue mask));
9372
effect(TEMP_DEF dst, TEMP tmp, KILL cr);
9373
format %{ "vector_mask_first_or_last_true_evex $dst, $mask \t! using $tmp as TEMP" %}
9375
int opcode = this->ideal_Opcode();
9376
BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
9377
int mask_len = Matcher::vector_length(this, $mask);
9378
int mask_size = mask_len * type2aelembytes(mbt);
9379
int vlen_enc = vector_length_encoding(this, $mask);
9380
__ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
9381
$tmp$$Register, mask_len, mask_size, vlen_enc);
9383
ins_pipe( pipe_slow );
9386
instruct vmask_first_or_last_true_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
9387
predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
9388
match(Set dst (VectorMaskFirstTrue mask));
9389
match(Set dst (VectorMaskLastTrue mask));
9390
effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
9391
format %{ "vector_mask_first_or_last_true_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
9393
int opcode = this->ideal_Opcode();
9394
BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
9395
int mask_len = Matcher::vector_length(this, $mask);
9396
int vlen_enc = vector_length_encoding(this, $mask);
9397
__ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
9398
$tmp$$Register, mask_len, mbt, vlen_enc);
9400
ins_pipe( pipe_slow );
9403
instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
9404
predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
9405
match(Set dst (VectorMaskFirstTrue (VectorStoreMask mask size)));
9406
match(Set dst (VectorMaskLastTrue (VectorStoreMask mask size)));
9407
effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
9408
format %{ "vector_mask_first_or_last_true_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
9410
int opcode = this->ideal_Opcode();
9411
BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
9412
int mask_len = Matcher::vector_length(this, $mask);
9413
int vlen_enc = vector_length_encoding(this, $mask);
9414
__ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
9415
$tmp$$Register, mask_len, mbt, vlen_enc);
9417
ins_pipe( pipe_slow );
9420
// --------------------------------- Compress/Expand Operations ---------------------------
9422
instruct vcompress_reg_avx(vec dst, vec src, vec mask, rRegI rtmp, rRegL rscratch, vec perm, vec xtmp, rFlagsReg cr) %{
9423
predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
9424
match(Set dst (CompressV src mask));
9425
match(Set dst (ExpandV src mask));
9426
effect(TEMP_DEF dst, TEMP perm, TEMP xtmp, TEMP rtmp, TEMP rscratch, KILL cr);
9427
format %{ "vector_compress $dst, $src, $mask \t!using $xtmp, $rtmp, $rscratch and $perm as TEMP" %}
9429
int opcode = this->ideal_Opcode();
9430
int vlen_enc = vector_length_encoding(this);
9431
BasicType bt = Matcher::vector_element_basic_type(this);
9432
__ vector_compress_expand_avx2(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$XMMRegister, $rtmp$$Register,
9433
$rscratch$$Register, $perm$$XMMRegister, $xtmp$$XMMRegister, bt, vlen_enc);
9435
ins_pipe( pipe_slow );
9439
instruct vcompress_expand_reg_evex(vec dst, vec src, kReg mask) %{
9440
predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
9441
match(Set dst (CompressV src mask));
9442
match(Set dst (ExpandV src mask));
9443
format %{ "vector_compress_expand $dst, $src, $mask" %}
9445
int opcode = this->ideal_Opcode();
9446
int vector_len = vector_length_encoding(this);
9447
BasicType bt = Matcher::vector_element_basic_type(this);
9448
__ vector_compress_expand(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, false, bt, vector_len);
9450
ins_pipe( pipe_slow );
9453
instruct vcompress_mask_reg_evex(kReg dst, kReg mask, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
9454
match(Set dst (CompressM mask));
9455
effect(TEMP rtmp1, TEMP rtmp2, KILL cr);
9456
format %{ "mask_compress_evex $dst, $mask\t! using $rtmp1 and $rtmp2 as TEMP" %}
9458
assert(this->in(1)->bottom_type()->isa_vectmask(), "");
9459
int mask_len = Matcher::vector_length(this);
9460
__ vector_mask_compress($dst$$KRegister, $mask$$KRegister, $rtmp1$$Register, $rtmp2$$Register, mask_len);
9462
ins_pipe( pipe_slow );
9467
// -------------------------------- Bit and Byte Reversal Vector Operations ------------------------
9469
instruct vreverse_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
9470
predicate(!VM_Version::supports_gfni());
9471
match(Set dst (ReverseV src));
9472
effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
9473
format %{ "vector_reverse_bit_evex $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
9475
int vec_enc = vector_length_encoding(this);
9476
BasicType bt = Matcher::vector_element_basic_type(this);
9477
__ vector_reverse_bit(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
9478
$xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
9480
ins_pipe( pipe_slow );
9483
instruct vreverse_reg_gfni(vec dst, vec src, vec xtmp) %{
9484
predicate(VM_Version::supports_gfni());
9485
match(Set dst (ReverseV src));
9486
effect(TEMP dst, TEMP xtmp);
9487
format %{ "vector_reverse_bit_gfni $dst, $src!\t using $xtmp as TEMP" %}
9489
int vec_enc = vector_length_encoding(this);
9490
BasicType bt = Matcher::vector_element_basic_type(this);
9491
InternalAddress addr = $constantaddress(T_LONG, vreplicate_imm(T_LONG, 0x8040201008040201L, 1));
9492
__ vector_reverse_bit_gfni(bt, $dst$$XMMRegister, $src$$XMMRegister, addr, vec_enc,
9493
$xtmp$$XMMRegister);
9495
ins_pipe( pipe_slow );
9498
instruct vreverse_byte_reg(vec dst, vec src) %{
9499
predicate(VM_Version::supports_avx512bw() || Matcher::vector_length_in_bytes(n) < 64);
9500
match(Set dst (ReverseBytesV src));
9502
format %{ "vector_reverse_byte $dst, $src" %}
9504
int vec_enc = vector_length_encoding(this);
9505
BasicType bt = Matcher::vector_element_basic_type(this);
9506
__ vector_reverse_byte(bt, $dst$$XMMRegister, $src$$XMMRegister, vec_enc);
9508
ins_pipe( pipe_slow );
9511
instruct vreverse_byte64_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
9512
predicate(!VM_Version::supports_avx512bw() && Matcher::vector_length_in_bytes(n) == 64);
9513
match(Set dst (ReverseBytesV src));
9514
effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
9515
format %{ "vector_reverse_byte $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
9517
int vec_enc = vector_length_encoding(this);
9518
BasicType bt = Matcher::vector_element_basic_type(this);
9519
__ vector_reverse_byte64(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
9520
$xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
9522
ins_pipe( pipe_slow );
9525
// ---------------------------------- Vector Count Leading Zeros -----------------------------------
9527
instruct vcount_leading_zeros_IL_reg_evex(vec dst, vec src) %{
9528
predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
9529
Matcher::vector_length_in_bytes(n->in(1))));
9530
match(Set dst (CountLeadingZerosV src));
9531
format %{ "vector_count_leading_zeros $dst, $src" %}
9533
int vlen_enc = vector_length_encoding(this, $src);
9534
BasicType bt = Matcher::vector_element_basic_type(this, $src);
9535
__ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
9536
xnoreg, xnoreg, k0, noreg, true, vlen_enc);
9538
ins_pipe( pipe_slow );
9541
instruct vcount_leading_zeros_IL_reg_evex_masked(vec dst, vec src, kReg mask) %{
9542
predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
9543
Matcher::vector_length_in_bytes(n->in(1))));
9544
match(Set dst (CountLeadingZerosV src mask));
9545
format %{ "vector_count_leading_zeros $dst, $src, $mask" %}
9547
int vlen_enc = vector_length_encoding(this, $src);
9548
BasicType bt = Matcher::vector_element_basic_type(this, $src);
9549
__ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
9550
__ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg,
9551
xnoreg, $mask$$KRegister, noreg, true, vlen_enc);
9553
ins_pipe( pipe_slow );
9556
instruct vcount_leading_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2) %{
9557
predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
9558
VM_Version::supports_avx512cd() &&
9559
(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
9560
match(Set dst (CountLeadingZerosV src));
9561
effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
9562
format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1 and $xtmp2 as TEMP" %}
9564
int vlen_enc = vector_length_encoding(this, $src);
9565
BasicType bt = Matcher::vector_element_basic_type(this, $src);
9566
__ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
9567
$xtmp2$$XMMRegister, xnoreg, k0, noreg, true, vlen_enc);
9569
ins_pipe( pipe_slow );
9572
instruct vcount_leading_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegP rtmp) %{
9573
predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
9574
match(Set dst (CountLeadingZerosV src));
9575
effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
9576
format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $ktmp and $rtmp as TEMP" %}
9578
int vlen_enc = vector_length_encoding(this, $src);
9579
BasicType bt = Matcher::vector_element_basic_type(this, $src);
9580
__ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
9581
$xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $ktmp$$KRegister,
9582
$rtmp$$Register, true, vlen_enc);
9584
ins_pipe( pipe_slow );
9587
instruct vcount_leading_zeros_int_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3) %{
9588
predicate(Matcher::vector_element_basic_type(n->in(1)) == T_INT &&
9589
!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
9590
match(Set dst (CountLeadingZerosV src));
9591
effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
9592
format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
9594
int vlen_enc = vector_length_encoding(this, $src);
9595
BasicType bt = Matcher::vector_element_basic_type(this, $src);
9596
__ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
9597
$xtmp2$$XMMRegister, $xtmp3$$XMMRegister, noreg, vlen_enc);
9599
ins_pipe( pipe_slow );
9602
instruct vcount_leading_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
9603
predicate(Matcher::vector_element_basic_type(n->in(1)) != T_INT &&
9604
!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
9605
match(Set dst (CountLeadingZerosV src));
9606
effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
9607
format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
9609
int vlen_enc = vector_length_encoding(this, $src);
9610
BasicType bt = Matcher::vector_element_basic_type(this, $src);
9611
__ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
9612
$xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
9614
ins_pipe( pipe_slow );
9617
// ---------------------------------- Vector Masked Operations ------------------------------------
9619
instruct vadd_reg_masked(vec dst, vec src2, kReg mask) %{
9620
match(Set dst (AddVB (Binary dst src2) mask));
9621
match(Set dst (AddVS (Binary dst src2) mask));
9622
match(Set dst (AddVI (Binary dst src2) mask));
9623
match(Set dst (AddVL (Binary dst src2) mask));
9624
match(Set dst (AddVF (Binary dst src2) mask));
9625
match(Set dst (AddVD (Binary dst src2) mask));
9626
format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
9628
int vlen_enc = vector_length_encoding(this);
9629
BasicType bt = Matcher::vector_element_basic_type(this);
9630
int opc = this->ideal_Opcode();
9631
__ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
9632
$dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
9634
ins_pipe( pipe_slow );
9637
instruct vadd_mem_masked(vec dst, memory src2, kReg mask) %{
9638
match(Set dst (AddVB (Binary dst (LoadVector src2)) mask));
9639
match(Set dst (AddVS (Binary dst (LoadVector src2)) mask));
9640
match(Set dst (AddVI (Binary dst (LoadVector src2)) mask));
9641
match(Set dst (AddVL (Binary dst (LoadVector src2)) mask));
9642
match(Set dst (AddVF (Binary dst (LoadVector src2)) mask));
9643
match(Set dst (AddVD (Binary dst (LoadVector src2)) mask));
9644
format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
9646
int vlen_enc = vector_length_encoding(this);
9647
BasicType bt = Matcher::vector_element_basic_type(this);
9648
int opc = this->ideal_Opcode();
9649
__ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
9650
$dst$$XMMRegister, $src2$$Address, true, vlen_enc);
9652
ins_pipe( pipe_slow );
9655
instruct vxor_reg_masked(vec dst, vec src2, kReg mask) %{
9656
match(Set dst (XorV (Binary dst src2) mask));
9657
format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
9659
int vlen_enc = vector_length_encoding(this);
9660
BasicType bt = Matcher::vector_element_basic_type(this);
9661
int opc = this->ideal_Opcode();
9662
__ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
9663
$dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
9665
ins_pipe( pipe_slow );
9668
instruct vxor_mem_masked(vec dst, memory src2, kReg mask) %{
9669
match(Set dst (XorV (Binary dst (LoadVector src2)) mask));
9670
format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
9672
int vlen_enc = vector_length_encoding(this);
9673
BasicType bt = Matcher::vector_element_basic_type(this);
9674
int opc = this->ideal_Opcode();
9675
__ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
9676
$dst$$XMMRegister, $src2$$Address, true, vlen_enc);
9678
ins_pipe( pipe_slow );
9681
instruct vor_reg_masked(vec dst, vec src2, kReg mask) %{
9682
match(Set dst (OrV (Binary dst src2) mask));
9683
format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
9685
int vlen_enc = vector_length_encoding(this);
9686
BasicType bt = Matcher::vector_element_basic_type(this);
9687
int opc = this->ideal_Opcode();
9688
__ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
9689
$dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
9691
ins_pipe( pipe_slow );
9694
instruct vor_mem_masked(vec dst, memory src2, kReg mask) %{
9695
match(Set dst (OrV (Binary dst (LoadVector src2)) mask));
9696
format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
9698
int vlen_enc = vector_length_encoding(this);
9699
BasicType bt = Matcher::vector_element_basic_type(this);
9700
int opc = this->ideal_Opcode();
9701
__ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
9702
$dst$$XMMRegister, $src2$$Address, true, vlen_enc);
9704
ins_pipe( pipe_slow );
9707
instruct vand_reg_masked(vec dst, vec src2, kReg mask) %{
9708
match(Set dst (AndV (Binary dst src2) mask));
9709
format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
9711
int vlen_enc = vector_length_encoding(this);
9712
BasicType bt = Matcher::vector_element_basic_type(this);
9713
int opc = this->ideal_Opcode();
9714
__ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
9715
$dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
9717
ins_pipe( pipe_slow );
9720
instruct vand_mem_masked(vec dst, memory src2, kReg mask) %{
9721
match(Set dst (AndV (Binary dst (LoadVector src2)) mask));
9722
format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
9724
int vlen_enc = vector_length_encoding(this);
9725
BasicType bt = Matcher::vector_element_basic_type(this);
9726
int opc = this->ideal_Opcode();
9727
__ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
9728
$dst$$XMMRegister, $src2$$Address, true, vlen_enc);
9730
ins_pipe( pipe_slow );
9733
instruct vsub_reg_masked(vec dst, vec src2, kReg mask) %{
9734
match(Set dst (SubVB (Binary dst src2) mask));
9735
match(Set dst (SubVS (Binary dst src2) mask));
9736
match(Set dst (SubVI (Binary dst src2) mask));
9737
match(Set dst (SubVL (Binary dst src2) mask));
9738
match(Set dst (SubVF (Binary dst src2) mask));
9739
match(Set dst (SubVD (Binary dst src2) mask));
9740
format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
9742
int vlen_enc = vector_length_encoding(this);
9743
BasicType bt = Matcher::vector_element_basic_type(this);
9744
int opc = this->ideal_Opcode();
9745
__ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
9746
$dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
9748
ins_pipe( pipe_slow );
9751
instruct vsub_mem_masked(vec dst, memory src2, kReg mask) %{
9752
match(Set dst (SubVB (Binary dst (LoadVector src2)) mask));
9753
match(Set dst (SubVS (Binary dst (LoadVector src2)) mask));
9754
match(Set dst (SubVI (Binary dst (LoadVector src2)) mask));
9755
match(Set dst (SubVL (Binary dst (LoadVector src2)) mask));
9756
match(Set dst (SubVF (Binary dst (LoadVector src2)) mask));
9757
match(Set dst (SubVD (Binary dst (LoadVector src2)) mask));
9758
format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
9760
int vlen_enc = vector_length_encoding(this);
9761
BasicType bt = Matcher::vector_element_basic_type(this);
9762
int opc = this->ideal_Opcode();
9763
__ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
9764
$dst$$XMMRegister, $src2$$Address, true, vlen_enc);
9766
ins_pipe( pipe_slow );
9769
instruct vmul_reg_masked(vec dst, vec src2, kReg mask) %{
9770
match(Set dst (MulVS (Binary dst src2) mask));
9771
match(Set dst (MulVI (Binary dst src2) mask));
9772
match(Set dst (MulVL (Binary dst src2) mask));
9773
match(Set dst (MulVF (Binary dst src2) mask));
9774
match(Set dst (MulVD (Binary dst src2) mask));
9775
format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
9777
int vlen_enc = vector_length_encoding(this);
9778
BasicType bt = Matcher::vector_element_basic_type(this);
9779
int opc = this->ideal_Opcode();
9780
__ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
9781
$dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
9783
ins_pipe( pipe_slow );
9786
instruct vmul_mem_masked(vec dst, memory src2, kReg mask) %{
9787
match(Set dst (MulVS (Binary dst (LoadVector src2)) mask));
9788
match(Set dst (MulVI (Binary dst (LoadVector src2)) mask));
9789
match(Set dst (MulVL (Binary dst (LoadVector src2)) mask));
9790
match(Set dst (MulVF (Binary dst (LoadVector src2)) mask));
9791
match(Set dst (MulVD (Binary dst (LoadVector src2)) mask));
9792
format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
9794
int vlen_enc = vector_length_encoding(this);
9795
BasicType bt = Matcher::vector_element_basic_type(this);
9796
int opc = this->ideal_Opcode();
9797
__ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
9798
$dst$$XMMRegister, $src2$$Address, true, vlen_enc);
9800
ins_pipe( pipe_slow );
9803
instruct vsqrt_reg_masked(vec dst, kReg mask) %{
9804
match(Set dst (SqrtVF dst mask));
9805
match(Set dst (SqrtVD dst mask));
9806
format %{ "vpsqrt_masked $dst, $mask\t! sqrt masked operation" %}
9808
int vlen_enc = vector_length_encoding(this);
9809
BasicType bt = Matcher::vector_element_basic_type(this);
9810
int opc = this->ideal_Opcode();
9811
__ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
9812
$dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
9814
ins_pipe( pipe_slow );
9817
instruct vdiv_reg_masked(vec dst, vec src2, kReg mask) %{
9818
match(Set dst (DivVF (Binary dst src2) mask));
9819
match(Set dst (DivVD (Binary dst src2) mask));
9820
format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
9822
int vlen_enc = vector_length_encoding(this);
9823
BasicType bt = Matcher::vector_element_basic_type(this);
9824
int opc = this->ideal_Opcode();
9825
__ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
9826
$dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
9828
ins_pipe( pipe_slow );
9831
instruct vdiv_mem_masked(vec dst, memory src2, kReg mask) %{
9832
match(Set dst (DivVF (Binary dst (LoadVector src2)) mask));
9833
match(Set dst (DivVD (Binary dst (LoadVector src2)) mask));
9834
format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
9836
int vlen_enc = vector_length_encoding(this);
9837
BasicType bt = Matcher::vector_element_basic_type(this);
9838
int opc = this->ideal_Opcode();
9839
__ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
9840
$dst$$XMMRegister, $src2$$Address, true, vlen_enc);
9842
ins_pipe( pipe_slow );
9846
instruct vrol_imm_masked(vec dst, immI8 shift, kReg mask) %{
9847
match(Set dst (RotateLeftV (Binary dst shift) mask));
9848
match(Set dst (RotateRightV (Binary dst shift) mask));
9849
format %{ "vprotate_imm_masked $dst, $dst, $shift, $mask\t! rotate masked operation" %}
9851
int vlen_enc = vector_length_encoding(this);
9852
BasicType bt = Matcher::vector_element_basic_type(this);
9853
int opc = this->ideal_Opcode();
9854
__ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
9855
$dst$$XMMRegister, $shift$$constant, true, vlen_enc);
9857
ins_pipe( pipe_slow );
9860
instruct vrol_reg_masked(vec dst, vec src2, kReg mask) %{
9861
match(Set dst (RotateLeftV (Binary dst src2) mask));
9862
match(Set dst (RotateRightV (Binary dst src2) mask));
9863
format %{ "vrotate_masked $dst, $dst, $src2, $mask\t! rotate masked operation" %}
9865
int vlen_enc = vector_length_encoding(this);
9866
BasicType bt = Matcher::vector_element_basic_type(this);
9867
int opc = this->ideal_Opcode();
9868
__ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
9869
$dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
9871
ins_pipe( pipe_slow );
9874
instruct vlshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
9875
match(Set dst (LShiftVS (Binary dst (LShiftCntV shift)) mask));
9876
match(Set dst (LShiftVI (Binary dst (LShiftCntV shift)) mask));
9877
match(Set dst (LShiftVL (Binary dst (LShiftCntV shift)) mask));
9878
format %{ "vplshift_imm_masked $dst, $dst, $shift, $mask\t! lshift masked operation" %}
9880
int vlen_enc = vector_length_encoding(this);
9881
BasicType bt = Matcher::vector_element_basic_type(this);
9882
int opc = this->ideal_Opcode();
9883
__ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
9884
$dst$$XMMRegister, $shift$$constant, true, vlen_enc);
9886
ins_pipe( pipe_slow );
9889
instruct vlshift_reg_masked(vec dst, vec src2, kReg mask) %{
9890
predicate(!n->as_ShiftV()->is_var_shift());
9891
match(Set dst (LShiftVS (Binary dst src2) mask));
9892
match(Set dst (LShiftVI (Binary dst src2) mask));
9893
match(Set dst (LShiftVL (Binary dst src2) mask));
9894
format %{ "vplshift_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
9896
int vlen_enc = vector_length_encoding(this);
9897
BasicType bt = Matcher::vector_element_basic_type(this);
9898
int opc = this->ideal_Opcode();
9899
__ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
9900
$dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
9902
ins_pipe( pipe_slow );
9905
instruct vlshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
9906
predicate(n->as_ShiftV()->is_var_shift());
9907
match(Set dst (LShiftVS (Binary dst src2) mask));
9908
match(Set dst (LShiftVI (Binary dst src2) mask));
9909
match(Set dst (LShiftVL (Binary dst src2) mask));
9910
format %{ "vplshiftv_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
9912
int vlen_enc = vector_length_encoding(this);
9913
BasicType bt = Matcher::vector_element_basic_type(this);
9914
int opc = this->ideal_Opcode();
9915
__ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
9916
$dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
9918
ins_pipe( pipe_slow );
9921
instruct vrshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
9922
match(Set dst (RShiftVS (Binary dst (RShiftCntV shift)) mask));
9923
match(Set dst (RShiftVI (Binary dst (RShiftCntV shift)) mask));
9924
match(Set dst (RShiftVL (Binary dst (RShiftCntV shift)) mask));
9925
format %{ "vprshift_imm_masked $dst, $dst, $shift, $mask\t! rshift masked operation" %}
9927
int vlen_enc = vector_length_encoding(this);
9928
BasicType bt = Matcher::vector_element_basic_type(this);
9929
int opc = this->ideal_Opcode();
9930
__ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
9931
$dst$$XMMRegister, $shift$$constant, true, vlen_enc);
9933
ins_pipe( pipe_slow );
9936
instruct vrshift_reg_masked(vec dst, vec src2, kReg mask) %{
9937
predicate(!n->as_ShiftV()->is_var_shift());
9938
match(Set dst (RShiftVS (Binary dst src2) mask));
9939
match(Set dst (RShiftVI (Binary dst src2) mask));
9940
match(Set dst (RShiftVL (Binary dst src2) mask));
9941
format %{ "vprshift_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
9943
int vlen_enc = vector_length_encoding(this);
9944
BasicType bt = Matcher::vector_element_basic_type(this);
9945
int opc = this->ideal_Opcode();
9946
__ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
9947
$dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
9949
ins_pipe( pipe_slow );
9952
instruct vrshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
9953
predicate(n->as_ShiftV()->is_var_shift());
9954
match(Set dst (RShiftVS (Binary dst src2) mask));
9955
match(Set dst (RShiftVI (Binary dst src2) mask));
9956
match(Set dst (RShiftVL (Binary dst src2) mask));
9957
format %{ "vprshiftv_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
9959
int vlen_enc = vector_length_encoding(this);
9960
BasicType bt = Matcher::vector_element_basic_type(this);
9961
int opc = this->ideal_Opcode();
9962
__ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
9963
$dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
9965
ins_pipe( pipe_slow );
9968
instruct vurshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
9969
match(Set dst (URShiftVS (Binary dst (RShiftCntV shift)) mask));
9970
match(Set dst (URShiftVI (Binary dst (RShiftCntV shift)) mask));
9971
match(Set dst (URShiftVL (Binary dst (RShiftCntV shift)) mask));
9972
format %{ "vpurshift_imm_masked $dst, $dst, $shift, $mask\t! urshift masked operation" %}
9974
int vlen_enc = vector_length_encoding(this);
9975
BasicType bt = Matcher::vector_element_basic_type(this);
9976
int opc = this->ideal_Opcode();
9977
__ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
9978
$dst$$XMMRegister, $shift$$constant, true, vlen_enc);
9980
ins_pipe( pipe_slow );
9983
instruct vurshift_reg_masked(vec dst, vec src2, kReg mask) %{
9984
predicate(!n->as_ShiftV()->is_var_shift());
9985
match(Set dst (URShiftVS (Binary dst src2) mask));
9986
match(Set dst (URShiftVI (Binary dst src2) mask));
9987
match(Set dst (URShiftVL (Binary dst src2) mask));
9988
format %{ "vpurshift_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
9990
int vlen_enc = vector_length_encoding(this);
9991
BasicType bt = Matcher::vector_element_basic_type(this);
9992
int opc = this->ideal_Opcode();
9993
__ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
9994
$dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
9996
ins_pipe( pipe_slow );
9999
instruct vurshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
10000
predicate(n->as_ShiftV()->is_var_shift());
10001
match(Set dst (URShiftVS (Binary dst src2) mask));
10002
match(Set dst (URShiftVI (Binary dst src2) mask));
10003
match(Set dst (URShiftVL (Binary dst src2) mask));
10004
format %{ "vpurshiftv_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
10006
int vlen_enc = vector_length_encoding(this);
10007
BasicType bt = Matcher::vector_element_basic_type(this);
10008
int opc = this->ideal_Opcode();
10009
__ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
10010
$dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
10012
ins_pipe( pipe_slow );
10015
instruct vmaxv_reg_masked(vec dst, vec src2, kReg mask) %{
10016
match(Set dst (MaxV (Binary dst src2) mask));
10017
format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
10019
int vlen_enc = vector_length_encoding(this);
10020
BasicType bt = Matcher::vector_element_basic_type(this);
10021
int opc = this->ideal_Opcode();
10022
__ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
10023
$dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
10025
ins_pipe( pipe_slow );
10028
instruct vmaxv_mem_masked(vec dst, memory src2, kReg mask) %{
10029
match(Set dst (MaxV (Binary dst (LoadVector src2)) mask));
10030
format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
10032
int vlen_enc = vector_length_encoding(this);
10033
BasicType bt = Matcher::vector_element_basic_type(this);
10034
int opc = this->ideal_Opcode();
10035
__ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
10036
$dst$$XMMRegister, $src2$$Address, true, vlen_enc);
10038
ins_pipe( pipe_slow );
10041
instruct vminv_reg_masked(vec dst, vec src2, kReg mask) %{
10042
match(Set dst (MinV (Binary dst src2) mask));
10043
format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
10045
int vlen_enc = vector_length_encoding(this);
10046
BasicType bt = Matcher::vector_element_basic_type(this);
10047
int opc = this->ideal_Opcode();
10048
__ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
10049
$dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
10051
ins_pipe( pipe_slow );
10054
instruct vminv_mem_masked(vec dst, memory src2, kReg mask) %{
10055
match(Set dst (MinV (Binary dst (LoadVector src2)) mask));
10056
format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
10058
int vlen_enc = vector_length_encoding(this);
10059
BasicType bt = Matcher::vector_element_basic_type(this);
10060
int opc = this->ideal_Opcode();
10061
__ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
10062
$dst$$XMMRegister, $src2$$Address, true, vlen_enc);
10064
ins_pipe( pipe_slow );
10067
instruct vrearrangev_reg_masked(vec dst, vec src2, kReg mask) %{
10068
match(Set dst (VectorRearrange (Binary dst src2) mask));
10069
format %{ "vprearrange_masked $dst, $dst, $src2, $mask\t! rearrange masked operation" %}
10071
int vlen_enc = vector_length_encoding(this);
10072
BasicType bt = Matcher::vector_element_basic_type(this);
10073
int opc = this->ideal_Opcode();
10074
__ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
10075
$dst$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
10077
ins_pipe( pipe_slow );
10080
instruct vabs_masked(vec dst, kReg mask) %{
10081
match(Set dst (AbsVB dst mask));
10082
match(Set dst (AbsVS dst mask));
10083
match(Set dst (AbsVI dst mask));
10084
match(Set dst (AbsVL dst mask));
10085
format %{ "vabs_masked $dst, $mask \t! vabs masked operation" %}
10087
int vlen_enc = vector_length_encoding(this);
10088
BasicType bt = Matcher::vector_element_basic_type(this);
10089
int opc = this->ideal_Opcode();
10090
__ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
10091
$dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
10093
ins_pipe( pipe_slow );
10096
instruct vfma_reg_masked(vec dst, vec src2, vec src3, kReg mask) %{
10097
match(Set dst (FmaVF (Binary dst src2) (Binary src3 mask)));
10098
match(Set dst (FmaVD (Binary dst src2) (Binary src3 mask)));
10099
format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
10101
assert(UseFMA, "Needs FMA instructions support.");
10102
int vlen_enc = vector_length_encoding(this);
10103
BasicType bt = Matcher::vector_element_basic_type(this);
10104
int opc = this->ideal_Opcode();
10105
__ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
10106
$src2$$XMMRegister, $src3$$XMMRegister, true, vlen_enc);
10108
ins_pipe( pipe_slow );
10111
instruct vfma_mem_masked(vec dst, vec src2, memory src3, kReg mask) %{
10112
match(Set dst (FmaVF (Binary dst src2) (Binary (LoadVector src3) mask)));
10113
match(Set dst (FmaVD (Binary dst src2) (Binary (LoadVector src3) mask)));
10114
format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
10116
assert(UseFMA, "Needs FMA instructions support.");
10117
int vlen_enc = vector_length_encoding(this);
10118
BasicType bt = Matcher::vector_element_basic_type(this);
10119
int opc = this->ideal_Opcode();
10120
__ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
10121
$src2$$XMMRegister, $src3$$Address, true, vlen_enc);
10123
ins_pipe( pipe_slow );
10126
instruct evcmp_masked(kReg dst, vec src1, vec src2, immI8 cond, kReg mask) %{
10127
match(Set dst (VectorMaskCmp (Binary src1 src2) (Binary cond mask)));
10128
format %{ "vcmp_masked $dst, $src1, $src2, $cond, $mask" %}
10130
assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
10131
int vlen_enc = vector_length_encoding(this, $src1);
10132
BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
10135
switch (src1_elem_bt) {
10137
bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
10138
Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
10139
__ evpcmpb($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
10143
bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
10144
Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
10145
__ evpcmpw($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
10149
bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
10150
Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
10151
__ evpcmpd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
10155
bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
10156
Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
10157
__ evpcmpq($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
10161
Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
10162
__ evcmpps($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
10166
Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
10167
__ evcmppd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
10170
default: assert(false, "%s", type2name(src1_elem_bt)); break;
10173
ins_pipe( pipe_slow );
10176
instruct mask_all_evexI_LE32(kReg dst, rRegI src) %{
10177
predicate(Matcher::vector_length(n) <= 32);
10178
match(Set dst (MaskAll src));
10179
format %{ "mask_all_evexI_LE32 $dst, $src \t" %}
10181
int mask_len = Matcher::vector_length(this);
10182
__ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
10184
ins_pipe( pipe_slow );
10188
instruct mask_not_immLT8(kReg dst, kReg src, rRegI rtmp, kReg ktmp, immI_M1 cnt) %{
10189
predicate(Matcher::vector_length(n) < 8 && VM_Version::supports_avx512dq());
10190
match(Set dst (XorVMask src (MaskAll cnt)));
10191
effect(TEMP_DEF dst, TEMP rtmp, TEMP ktmp);
10192
format %{ "mask_not_LT8 $dst, $src, $cnt \t!using $ktmp and $rtmp as TEMP" %}
10194
uint masklen = Matcher::vector_length(this);
10195
__ knot(masklen, $dst$$KRegister, $src$$KRegister, $ktmp$$KRegister, $rtmp$$Register);
10197
ins_pipe( pipe_slow );
10200
instruct mask_not_imm(kReg dst, kReg src, immI_M1 cnt) %{
10201
predicate((Matcher::vector_length(n) == 8 && VM_Version::supports_avx512dq()) ||
10202
(Matcher::vector_length(n) == 16) ||
10203
(Matcher::vector_length(n) > 16 && VM_Version::supports_avx512bw()));
10204
match(Set dst (XorVMask src (MaskAll cnt)));
10205
format %{ "mask_not $dst, $src, $cnt \t! mask not operation" %}
10207
uint masklen = Matcher::vector_length(this);
10208
__ knot(masklen, $dst$$KRegister, $src$$KRegister);
10210
ins_pipe( pipe_slow );
10213
instruct long_to_maskLE8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp) %{
10214
predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) <= 8);
10215
match(Set dst (VectorLongToMask src));
10216
effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp);
10217
format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp as TEMP" %}
10219
int mask_len = Matcher::vector_length(this);
10220
int vec_enc = vector_length_encoding(mask_len);
10221
__ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
10222
$rtmp2$$Register, xnoreg, mask_len, vec_enc);
10224
ins_pipe( pipe_slow );
10228
instruct long_to_maskGT8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp1, rFlagsReg cr) %{
10229
predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) > 8);
10230
match(Set dst (VectorLongToMask src));
10231
effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp1, KILL cr);
10232
format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp1, as TEMP" %}
10234
int mask_len = Matcher::vector_length(this);
10235
assert(mask_len <= 32, "invalid mask length");
10236
int vec_enc = vector_length_encoding(mask_len);
10237
__ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
10238
$rtmp2$$Register, $xtmp1$$XMMRegister, mask_len, vec_enc);
10240
ins_pipe( pipe_slow );
10243
instruct long_to_mask_evex(kReg dst, rRegL src) %{
10244
predicate(n->bottom_type()->isa_vectmask());
10245
match(Set dst (VectorLongToMask src));
10246
format %{ "long_to_mask_evex $dst, $src\t!" %}
10248
__ kmov($dst$$KRegister, $src$$Register);
10250
ins_pipe( pipe_slow );
10254
instruct mask_opers_evex(kReg dst, kReg src1, kReg src2, kReg kscratch) %{
10255
match(Set dst (AndVMask src1 src2));
10256
match(Set dst (OrVMask src1 src2));
10257
match(Set dst (XorVMask src1 src2));
10258
effect(TEMP kscratch);
10259
format %{ "mask_opers_evex $dst, $src1, $src2\t! using $kscratch as TEMP" %}
10261
const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1)));
10262
const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2)));
10263
assert(Type::equals(mask1->bottom_type(), mask2->bottom_type()), "Mask types must be equal");
10264
uint masklen = Matcher::vector_length(this);
10265
masklen = (masklen < 16 && !VM_Version::supports_avx512dq()) ? 16 : masklen;
10266
__ masked_op(this->ideal_Opcode(), masklen, $dst$$KRegister, $src1$$KRegister, $src2$$KRegister);
10268
ins_pipe( pipe_slow );
10271
instruct vternlog_reg_masked(vec dst, vec src2, vec src3, immU8 func, kReg mask) %{
10272
match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
10273
format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
10275
int vlen_enc = vector_length_encoding(this);
10276
BasicType bt = Matcher::vector_element_basic_type(this);
10277
__ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
10278
$src2$$XMMRegister, $src3$$XMMRegister, true, bt, vlen_enc);
10280
ins_pipe( pipe_slow );
10283
instruct vternlogd_mem_masked(vec dst, vec src2, memory src3, immU8 func, kReg mask) %{
10284
match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
10285
format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
10287
int vlen_enc = vector_length_encoding(this);
10288
BasicType bt = Matcher::vector_element_basic_type(this);
10289
__ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
10290
$src2$$XMMRegister, $src3$$Address, true, bt, vlen_enc);
10292
ins_pipe( pipe_slow );
10295
instruct castMM(kReg dst)
10297
match(Set dst (CastVV dst));
10300
format %{ "# castVV of $dst" %}
10301
ins_encode(/* empty encoding */);
10306
instruct castVV(vec dst)
10308
match(Set dst (CastVV dst));
10311
format %{ "# castVV of $dst" %}
10312
ins_encode(/* empty encoding */);
10317
instruct castVVLeg(legVec dst)
10319
match(Set dst (CastVV dst));
10322
format %{ "# castVV of $dst" %}
10323
ins_encode(/* empty encoding */);
10328
instruct FloatClassCheck_reg_reg_vfpclass(rRegI dst, regF src, kReg ktmp, rFlagsReg cr)
10330
match(Set dst (IsInfiniteF src));
10331
effect(TEMP ktmp, KILL cr);
10332
format %{ "float_class_check $dst, $src" %}
10334
__ vfpclassss($ktmp$$KRegister, $src$$XMMRegister, 0x18);
10335
__ kmovbl($dst$$Register, $ktmp$$KRegister);
10337
ins_pipe(pipe_slow);
10340
instruct DoubleClassCheck_reg_reg_vfpclass(rRegI dst, regD src, kReg ktmp, rFlagsReg cr)
10342
match(Set dst (IsInfiniteD src));
10343
effect(TEMP ktmp, KILL cr);
10344
format %{ "double_class_check $dst, $src" %}
10346
__ vfpclasssd($ktmp$$KRegister, $src$$XMMRegister, 0x18);
10347
__ kmovbl($dst$$Register, $ktmp$$KRegister);
10349
ins_pipe(pipe_slow);