pytorch

Форк
0
/
xnnpack.buck.bzl 
2692 строки · 79.5 Кб
1
load("//tools/build_defs:fb_xplat_cxx_library.bzl", "fb_xplat_cxx_library")
2
load("//tools/build_defs:fbsource_utils.bzl", "is_arvr_mode")
3
load("//tools/build_defs:glob_defs.bzl", "subdir_glob")
4
load("//tools/build_defs:platform_defs.bzl", "ANDROID", "APPLE", "APPLETVOS", "CXX", "IOS", "MACOSX", "WINDOWS")
5
load(
6
    ":xnnpack_src_defs.bzl",
7
    "LOGGING_SRCS",
8
    "OPERATOR_SRCS",
9
    "SUBGRAPH_SRCS",
10
    "TABLE_SRCS",
11
    "XNNPACK_SRCS",
12
)
13
load(
14
    ":xnnpack_wrapper_defs.bzl",
15
    "AARCH32_ASM_MICROKERNEL_SRCS",
16
    "AARCH64_ASM_MICROKERNEL_SRCS",
17
    "PROD_ARMSIMD32_MICROKERNEL_SRCS",
18
    "PROD_AVX2_MICROKERNEL_SRCS",
19
    "PROD_AVX512F_MICROKERNEL_SRCS",
20
    "PROD_AVX512SKX_MICROKERNEL_SRCS",
21
    "PROD_AVX512VBMI_MICROKERNEL_SRCS",
22
    "PROD_AVX512VNNI_MICROKERNEL_SRCS",
23
    "PROD_AVX512VNNIGFNI_MICROKERNEL_SRCS",
24
    "PROD_AVXVNNI_MICROKERNEL_SRCS",
25
    "PROD_AVX_MICROKERNEL_SRCS",
26
    "PROD_F16C_MICROKERNEL_SRCS",
27
    "PROD_FMA3_MICROKERNEL_SRCS",
28
    "PROD_FP16ARITH_MICROKERNEL_SRCS",
29
    "PROD_NEONDOTFP16ARITH_AARCH64_MICROKERNEL_SRCS",
30
    "PROD_NEONDOTFP16ARITH_MICROKERNEL_SRCS",
31
    "PROD_NEONDOT_AARCH64_MICROKERNEL_SRCS",
32
    "PROD_NEONDOT_MICROKERNEL_SRCS",
33
    "PROD_NEONFMA_MICROKERNEL_SRCS",
34
    "PROD_NEONFP16ARITH_AARCH64_MICROKERNEL_SRCS",
35
    "PROD_NEONFP16ARITH_MICROKERNEL_SRCS",
36
    "PROD_NEONFP16_MICROKERNEL_SRCS",
37
    "PROD_NEONI8MM_MICROKERNEL_SRCS",
38
    "PROD_NEONV8_MICROKERNEL_SRCS",
39
    "PROD_NEON_AARCH64_MICROKERNEL_SRCS",
40
    "PROD_NEON_MICROKERNEL_SRCS",
41
    "PROD_SCALAR_MICROKERNEL_SRCS",
42
    "PROD_SSE2_MICROKERNEL_SRCS",
43
    "PROD_SSE41_MICROKERNEL_SRCS",
44
    "PROD_SSE_MICROKERNEL_SRCS",
45
    "PROD_SSSE3_MICROKERNEL_SRCS",
46
    "PROD_XOP_MICROKERNEL_SRCS",
47
)
48

49
# This defines XNNPACK targets for both fbsource BUCK and OSS BUCK
50
# Note that the file path is relative to the BUCK file that called from, not to this bzl file.
51
# So for fbsource build it points to xplat/third-party/XNNPACK/XNNPACK,
52
# and for OSS it points to pytorch/third_party/XNNPACK
53
def define_xnnpack(third_party, labels = [], XNNPACK_WINDOWS_AVX512F_ENABLED = False):
54
    WINDOWS_FLAGS = [
55
        "/D__x86_64__",
56
        "/EHsc",
57
        "/wd4090",  # 'function': different 'const' qualifiers
58
        "/wd4146",  # unary minus operator applied to unsigned type, result still unsigned
59
    ] + ([
60
        "/D__AVX512F__",  # needed to avoid linkage errors
61
        "-mavx2",
62
        "/D__builtin_clz=__lzcnt",  # Intrinsics are spelled differently in MSVC
63
        "/Drestrict=",  # MSVC doesn't understand [restrict XNN_NUM_ELEMENTS(N)] syntax
64
    ] if XNNPACK_WINDOWS_AVX512F_ENABLED else [])
65

66
    WINDOWS_CLANG_COMPILER_FLAGS = [
67
        "-Wno-error",
68
        "-Wno-error=undef",
69
        "-Wno-error=incompatible-pointer-types",
70
        "-Wno-error=incompatible-pointer-types-discards-qualifiers",
71
    ]
72

73
    fb_xplat_cxx_library(
74
        name = "interface",
75
        header_namespace = "",
76
        exported_headers = {
77
            "xnnpack.h": "XNNPACK/include/xnnpack.h",
78
        },
79
        apple_sdks = (IOS, MACOSX, APPLETVOS),
80
        labels = labels,
81
        preprocessor_flags = [
82
            "-DXNN_LOG_LEVEL=0",
83
        ],
84
        visibility = ["PUBLIC"],
85
        exported_deps = [
86
            # Dependency only on pthreadpool interface
87
            third_party("pthreadpool_header"),
88
        ],
89
    )
90

91
    fb_xplat_cxx_library(
92
        name = "subgraph",
93
        srcs = SUBGRAPH_SRCS,
94
        headers = subdir_glob([
95
            ("XNNPACK/src", "**/*.h"),
96
        ]),
97
        header_namespace = "",
98
        apple_sdks = (IOS, MACOSX, APPLETVOS),
99
        compiler_flags = [
100
            "-O2",
101
        ],
102
        fbobjc_preprocessor_flags = [
103
            "-DXNN_PRIVATE=",
104
            "-DXNN_INTERNAL=",
105
        ],
106
        labels = labels,
107
        preferred_linkage = "static",
108
        preprocessor_flags = [
109
            "-DXNN_LOG_LEVEL=0",
110
            "-DXNN_ENABLE_SPARSE=0",
111
            "-DXNN_ENABLE_MEMOPT",
112
        ],
113
        visibility = ["PUBLIC"],
114
        windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS,
115
        windows_compiler_flags_override = WINDOWS_FLAGS,
116
        deps = [
117
            ":interface",
118
            third_party("FP16"),
119
            third_party("FXdiv"),
120
            third_party("clog"),
121
        ],
122
    )
123

124
    fb_xplat_cxx_library(
125
        name = "tables",
126
        srcs = TABLE_SRCS,
127
        headers = subdir_glob([
128
            ("XNNPACK/src", "**/*.h"),
129
        ]),
130
        header_namespace = "",
131
        apple_sdks = (IOS, MACOSX, APPLETVOS),
132
        compiler_flags = [
133
            "-O2",
134
        ],
135
        fbobjc_preprocessor_flags = [
136
            "-DXNN_PRIVATE=",
137
            "-DXNN_INTERNAL=",
138
        ],
139
        labels = labels,
140
        preferred_linkage = "static",
141
        preprocessor_flags = [
142
            "-DXNN_LOG_LEVEL=0",
143
        ],
144
        visibility = ["PUBLIC"],
145
        windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS,
146
        windows_compiler_flags_override = WINDOWS_FLAGS,
147
        deps = [
148
            ":interface",
149
            third_party("FP16"),
150
            third_party("FXdiv"),
151
            third_party("clog"),
152
        ],
153
    )
154

155
    fb_xplat_cxx_library(
156
        name = "ukernels_scalar",
157
        srcs = PROD_SCALAR_MICROKERNEL_SRCS,
158
        headers = subdir_glob([
159
            ("XNNPACK/src", "**/*.c"),
160
            ("XNNPACK/src", "**/*.h"),
161
        ]),
162
        header_namespace = "",
163
        apple_sdks = (IOS, MACOSX, APPLETVOS),
164
        compiler_flags = [
165
            "-O2",
166
            "-fno-fast-math",
167
            "-fno-math-errno",
168
            "-ffp-contract=off",
169
        ],
170
        fbobjc_preprocessor_flags = [
171
            "-DXNN_PRIVATE=",
172
            "-DXNN_INTERNAL=",
173
        ],
174
        labels = labels,
175
        preferred_linkage = "static",
176
        preprocessor_flags = [
177
            "-DXNN_LOG_LEVEL=0",
178
        ],
179
        visibility = ["PUBLIC"],
180
        windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS,
181
        windows_compiler_flags_override = WINDOWS_FLAGS,
182
        deps = [
183
            ":interface",
184
            third_party("FP16"),
185
            third_party("FXdiv"),
186
        ],
187
    )
188

189
    fb_xplat_cxx_library(
190
        name = "ukernels_sse",
191
        srcs = PROD_SSE_MICROKERNEL_SRCS if is_arvr_mode() else [],
192
        headers = subdir_glob([
193
            ("XNNPACK/src", "**/*.c"),
194
            ("XNNPACK/src", "**/*.h"),
195
        ]),
196
        header_namespace = "",
197
        apple_sdks = (IOS, MACOSX, APPLETVOS),
198
        compiler_flags = [
199
            "-O2",
200
        ],
201
        fbobjc_preprocessor_flags = [
202
            "-DXNN_PRIVATE=",
203
            "-DXNN_INTERNAL=",
204
        ],
205
        labels = labels,
206
        platform_compiler_flags = [
207
            (
208
                "x86",
209
                [
210
                    "-msse",
211
                ],
212
            ),
213
        ],
214
        platform_srcs = ([
215
            (
216
                "x86|x86_64|platform009|platform010",
217
                PROD_SSE_MICROKERNEL_SRCS,
218
            ),
219
        ] if not is_arvr_mode() else []),
220
        preferred_linkage = "static",
221
        preprocessor_flags = [
222
            "-DXNN_LOG_LEVEL=0",
223
        ],
224
        visibility = ["PUBLIC"],
225
        windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS + ["-msse"],
226
        windows_compiler_flags_override = WINDOWS_FLAGS + ["-msse"],
227
        deps = [
228
            ":interface",
229
        ],
230
    )
231

232
    fb_xplat_cxx_library(
233
        name = "ukernels_sse_ovr_win32",
234
        headers = subdir_glob([
235
            ("XNNPACK/src", "**/*.c"),
236
            ("XNNPACK/src", "**/*.h"),
237
        ]),
238
        header_namespace = "",
239
        apple_sdks = (IOS, MACOSX, APPLETVOS),
240
        compiler_flags = [
241
            "-O2",
242
        ],
243
        fbobjc_preprocessor_flags = [
244
            "-DXNN_PRIVATE=",
245
            "-DXNN_INTERNAL=",
246
        ],
247
        labels = labels,
248
        platform_compiler_flags = [
249
            (
250
                "x86",
251
                [
252
                    "-msse",
253
                ],
254
            ),
255
        ],
256
        preferred_linkage = "static",
257
        preprocessor_flags = [
258
            "-DXNN_LOG_LEVEL=0",
259
        ],
260
        visibility = ["PUBLIC"],
261
        windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS + ["-msse"],
262
        windows_compiler_flags_override = WINDOWS_FLAGS + ["-msse"],
263
        windows_srcs = PROD_SSE_MICROKERNEL_SRCS,
264
        deps = [
265
            ":interface",
266
        ],
267
    )
268

269
    fb_xplat_cxx_library(
270
        name = "ukernels_sse2",
271
        srcs = PROD_SSE2_MICROKERNEL_SRCS if is_arvr_mode() else [],
272
        headers = subdir_glob([
273
            ("XNNPACK/src", "**/*.c"),
274
            ("XNNPACK/src", "**/*.h"),
275
        ]),
276
        header_namespace = "",
277
        apple_sdks = (IOS, MACOSX, APPLETVOS),
278
        compiler_flags = [
279
            "-O2",
280
        ],
281
        fbobjc_preprocessor_flags = [
282
            "-DXNN_PRIVATE=",
283
            "-DXNN_INTERNAL=",
284
        ],
285
        labels = labels,
286
        platform_compiler_flags = [
287
            (
288
                "x86",
289
                [
290
                    "-msse2",
291
                ],
292
            ),
293
        ],
294
        platform_srcs = ([
295
            (
296
                "x86|x86_64|platform009|platform010",
297
                PROD_SSE2_MICROKERNEL_SRCS,
298
            ),
299
        ] if not is_arvr_mode() else []),
300
        preferred_linkage = "static",
301
        preprocessor_flags = [
302
            "-DXNN_LOG_LEVEL=0",
303
        ],
304
        visibility = ["PUBLIC"],
305
        windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS + ["-msse2"],
306
        windows_compiler_flags_override = WINDOWS_FLAGS + ["-msse2"],
307
        deps = [
308
            ":interface",
309
            third_party("FP16"),
310
        ],
311
    )
312

313
    fb_xplat_cxx_library(
314
        name = "ukernels_sse2_ovr_win32",
315
        headers = subdir_glob([
316
            ("XNNPACK/src", "**/*.c"),
317
            ("XNNPACK/src", "**/*.h"),
318
        ]),
319
        header_namespace = "",
320
        apple_sdks = (IOS, MACOSX, APPLETVOS),
321
        compiler_flags = [
322
            "-O2",
323
        ],
324
        fbobjc_preprocessor_flags = [
325
            "-DXNN_PRIVATE=",
326
            "-DXNN_INTERNAL=",
327
        ],
328
        labels = labels,
329
        platform_compiler_flags = [
330
            (
331
                "x86",
332
                [
333
                    "-msse2",
334
                ],
335
            ),
336
        ],
337
        preferred_linkage = "static",
338
        preprocessor_flags = [
339
            "-DXNN_LOG_LEVEL=0",
340
        ],
341
        visibility = ["PUBLIC"],
342
        windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS + ["-msse2"],
343
        windows_compiler_flags_override = WINDOWS_FLAGS + ["-msse2"],
344
        windows_srcs = PROD_SSE2_MICROKERNEL_SRCS,
345
        deps = [
346
            ":interface",
347
            third_party("FP16"),
348
        ],
349
    )
350

351
    fb_xplat_cxx_library(
352
        name = "ukernels_ssse3",
353
        srcs = PROD_SSSE3_MICROKERNEL_SRCS if is_arvr_mode() else [],
354
        headers = subdir_glob([
355
            ("XNNPACK/src", "**/*.c"),
356
            ("XNNPACK/src", "**/*.h"),
357
        ]),
358
        header_namespace = "",
359
        apple_sdks = (IOS, MACOSX, APPLETVOS),
360
        compiler_flags = [
361
            "-O2",
362
        ],
363
        fbobjc_preprocessor_flags = [
364
            "-DXNN_PRIVATE=",
365
            "-DXNN_INTERNAL=",
366
        ],
367
        labels = labels,
368
        platform_compiler_flags = [
369
            (
370
                "x86",
371
                [
372
                    "-mssse3",
373
                ],
374
            ),
375
        ],
376
        platform_srcs = ([
377
            (
378
                "x86|x86_64|platform009|platform010",
379
                PROD_SSSE3_MICROKERNEL_SRCS,
380
            ),
381
        ] if not is_arvr_mode() else []),
382
        preferred_linkage = "static",
383
        preprocessor_flags = [
384
            "-DXNN_LOG_LEVEL=0",
385
        ],
386
        visibility = ["PUBLIC"],
387
        windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS + ["-mssse3"],
388
        windows_compiler_flags_override = WINDOWS_FLAGS + ["-mssse3"],
389
        deps = [
390
            ":interface",
391
            third_party("FP16"),
392
        ],
393
    )
394

395
    fb_xplat_cxx_library(
396
        name = "ukernels_ssse3_ovr_win32",
397
        headers = subdir_glob([
398
            ("XNNPACK/src", "**/*.c"),
399
            ("XNNPACK/src", "**/*.h"),
400
        ]),
401
        header_namespace = "",
402
        apple_sdks = (IOS, MACOSX, APPLETVOS),
403
        compiler_flags = [
404
            "-O2",
405
        ],
406
        fbobjc_preprocessor_flags = [
407
            "-DXNN_PRIVATE=",
408
            "-DXNN_INTERNAL=",
409
        ],
410
        labels = labels,
411
        platform_compiler_flags = [
412
            (
413
                "x86",
414
                [
415
                    "-mssse3",
416
                ],
417
            ),
418
        ],
419
        preferred_linkage = "static",
420
        preprocessor_flags = [
421
            "-DXNN_LOG_LEVEL=0",
422
        ],
423
        visibility = ["PUBLIC"],
424
        windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS + ["-mssse3"],
425
        windows_compiler_flags_override = WINDOWS_FLAGS + ["-mssse3"],
426
        windows_srcs = PROD_SSSE3_MICROKERNEL_SRCS,
427
        deps = [
428
            ":interface",
429
            third_party("FP16"),
430
        ],
431
    )
432

433
    fb_xplat_cxx_library(
434
        name = "ukernels_sse41",
435
        srcs = PROD_SSE41_MICROKERNEL_SRCS if is_arvr_mode() else [],
436
        headers = subdir_glob([
437
            ("XNNPACK/src", "**/*.c"),
438
            ("XNNPACK/src", "**/*.h"),
439
        ]),
440
        header_namespace = "",
441
        apple_sdks = (IOS, MACOSX, APPLETVOS),
442
        compiler_flags = [
443
            "-O2",
444
        ],
445
        fbobjc_preprocessor_flags = [
446
            "-DXNN_PRIVATE=",
447
            "-DXNN_INTERNAL=",
448
        ],
449
        labels = labels,
450
        platform_compiler_flags = [
451
            (
452
                "x86",
453
                [
454
                    "-msse4.1",
455
                ],
456
            ),
457
        ],
458
        platform_srcs = ([
459
            (
460
                "x86|x86_64|platform009|platform010",
461
                PROD_SSE41_MICROKERNEL_SRCS,
462
            ),
463
        ] if not is_arvr_mode() else []),
464
        preferred_linkage = "static",
465
        preprocessor_flags = [
466
            "-DXNN_LOG_LEVEL=0",
467
        ],
468
        visibility = ["PUBLIC"],
469
        windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS + ["-msse4.1"],
470
        windows_compiler_flags_override = WINDOWS_FLAGS + ["-msse4.1"],
471
        deps = [
472
            ":interface",
473
            third_party("FP16"),
474
        ],
475
    )
476

477
    fb_xplat_cxx_library(
478
        name = "ukernels_sse41_ovr_win32",
479
        headers = subdir_glob([
480
            ("XNNPACK/src", "**/*.c"),
481
            ("XNNPACK/src", "**/*.h"),
482
        ]),
483
        header_namespace = "",
484
        apple_sdks = (IOS, MACOSX, APPLETVOS),
485
        compiler_flags = [
486
            "-O2",
487
        ],
488
        fbobjc_preprocessor_flags = [
489
            "-DXNN_PRIVATE=",
490
            "-DXNN_INTERNAL=",
491
        ],
492
        labels = labels,
493
        platform_compiler_flags = [
494
            (
495
                "x86",
496
                [
497
                    "-msse4.1",
498
                ],
499
            ),
500
        ],
501
        preferred_linkage = "static",
502
        preprocessor_flags = [
503
            "-DXNN_LOG_LEVEL=0",
504
        ],
505
        visibility = ["PUBLIC"],
506
        windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS + ["-msse4.1"],
507
        windows_compiler_flags_override = WINDOWS_FLAGS + ["-msse4.1"],
508
        windows_srcs = PROD_SSE41_MICROKERNEL_SRCS,
509
        deps = [
510
            ":interface",
511
            third_party("FP16"),
512
        ],
513
    )
514

515
    fb_xplat_cxx_library(
516
        name = "ukernels_avx",
517
        srcs = PROD_AVX_MICROKERNEL_SRCS if is_arvr_mode() else [],
518
        headers = subdir_glob([
519
            ("XNNPACK/src", "**/*.h"),
520
            ("XNNPACK/src", "**/*.c"),
521
        ]),
522
        header_namespace = "",
523
        apple_sdks = (IOS, MACOSX, APPLETVOS),
524
        compiler_flags = [
525
            "-O2",
526
        ] + select({
527
            "DEFAULT": [],
528
            "ovr_config//cpu:x86_32": [
529
                "-mavx",
530
            ],
531
            "ovr_config//cpu:x86_64": [
532
                "-mavx",
533
            ],
534
        }),
535
        fbobjc_preprocessor_flags = [
536
            "-DXNN_PRIVATE=",
537
            "-DXNN_INTERNAL=",
538
        ],
539
        labels = labels,
540
        platform_compiler_flags = [
541
            (
542
                "x86|x86_64|platform009|platform010",
543
                [
544
                    "-mavx",
545
                ],
546
            ),
547
        ],
548
        platform_srcs = ([
549
            (
550
                "x86|x86_64|platform009|platform010",
551
                PROD_AVX_MICROKERNEL_SRCS,
552
            ),
553
        ] if not is_arvr_mode() else []),
554
        preferred_linkage = "static",
555
        preprocessor_flags = [
556
            "-DXNN_LOG_LEVEL=0",
557
        ],
558
        visibility = ["PUBLIC"],
559
        windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS + ["-mavx"],
560
        windows_compiler_flags_override = WINDOWS_FLAGS + ["-mavx"],
561
        deps = [
562
            ":interface",
563
        ],
564
    )
565

566
    fb_xplat_cxx_library(
567
        name = "ukernels_avx_ovr_win32",
568
        headers = subdir_glob([
569
            ("XNNPACK/src", "**/*.h"),
570
            ("XNNPACK/src", "**/*.c"),
571
        ]),
572
        header_namespace = "",
573
        apple_sdks = (IOS, MACOSX, APPLETVOS),
574
        compiler_flags = [
575
            "-O2",
576
            "-mavx",
577
        ],
578
        fbobjc_preprocessor_flags = [
579
            "-DXNN_PRIVATE=",
580
            "-DXNN_INTERNAL=",
581
        ],
582
        labels = labels,
583
        platform_compiler_flags = [
584
            (
585
                "x86",
586
                [
587
                    "-mavx",
588
                ],
589
            ),
590
        ],
591
        preferred_linkage = "static",
592
        preprocessor_flags = [
593
            "-DXNN_LOG_LEVEL=0",
594
        ],
595
        visibility = ["PUBLIC"],
596
        windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS + ["-mavx"],
597
        windows_compiler_flags_override = WINDOWS_FLAGS + ["-mavx"],
598
        windows_srcs = PROD_AVX_MICROKERNEL_SRCS,
599
        deps = [
600
            ":interface",
601
        ],
602
    )
603

604
    fb_xplat_cxx_library(
605
        name = "ukernels_avx512vnnigfni",
606
        srcs = PROD_AVX512VNNIGFNI_MICROKERNEL_SRCS if is_arvr_mode() else [],
607
        headers = subdir_glob([
608
            ("XNNPACK/src", "**/*.h"),
609
            ("XNNPACK/src", "**/*.c"),
610
        ]),
611
        header_namespace = "",
612
        apple_sdks = (IOS, MACOSX, APPLETVOS),
613
        compiler_flags = [
614
            "-O2",
615
        ] + select({
616
            "DEFAULT": [],
617
            "ovr_config//cpu:x86_32": [
618
                "-mavx",
619
                "-mgfni",
620
                "-mavx512vl",
621
                "-mavx512vnni",
622
                "-mavx512bw",
623
                "-mavx512dq",
624
            ],
625
            "ovr_config//cpu:x86_64": [
626
                "-mavx",
627
                "-mgfni",
628
                "-mavx512vl",
629
                "-mavx512vnni",
630
                "-mavx512bw",
631
                "-mavx512dq",
632
            ],
633
        }),
634
        fbobjc_preprocessor_flags = [
635
            "-DXNN_PRIVATE=",
636
            "-DXNN_INTERNAL=",
637
        ],
638
        labels = labels,
639
        platform_compiler_flags = [
640
            (
641
                "x86|x86_64|platform009|platform010",
642
                [
643
                    "-mavx512f",
644
                    "-mavx512cd",
645
                    "-mavx512bw",
646
                    "-mavx512dq",
647
                    "-mavx512vl",
648
                    "-mavx512vnni",
649
                    "-mgfni",
650
                ],
651
            ),
652
        ],
653
        platform_srcs = ([
654
            (
655
                "x86|x86_64|platform009|platform010",
656
                PROD_AVX512VNNIGFNI_MICROKERNEL_SRCS,
657
            ),
658
        ] if not is_arvr_mode() else []),
659
        preferred_linkage = "static",
660
        preprocessor_flags = [
661
            "-DXNN_LOG_LEVEL=0",
662
        ],
663
        visibility = ["PUBLIC"],
664
        windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS + ["-mavx"],
665
        windows_compiler_flags_override = WINDOWS_FLAGS + ["-mavx"],
666
        deps = [
667
            ":interface",
668
        ],
669
    )
670

671
    fb_xplat_cxx_library(
672
        name = "ukernels_avx512vnnigfni_ovr_win32",
673
        headers = subdir_glob([
674
            ("XNNPACK/src", "**/*.h"),
675
            ("XNNPACK/src", "**/*.c"),
676
        ]),
677
        header_namespace = "",
678
        apple_sdks = (IOS, MACOSX, APPLETVOS),
679
        compiler_flags = [
680
            "-O2",
681
        ],
682
        fbobjc_preprocessor_flags = [
683
            "-DXNN_PRIVATE=",
684
            "-DXNN_INTERNAL=",
685
        ],
686
        labels = labels,
687
        platform_compiler_flags = [
688
            (
689
                "x86|x86_64|platform009|platform010",
690
                [
691
                    "-mavx512f",
692
                    "-mavx512cd",
693
                    "-mavx512bw",
694
                    "-mavx512dq",
695
                    "-mavx512vl",
696
                    "-mavx512vnni",
697
                    "-mgfni",
698
                ],
699
            ),
700
        ],
701
        preferred_linkage = "static",
702
        preprocessor_flags = [
703
            "-DXNN_LOG_LEVEL=0",
704
        ],
705
        visibility = ["PUBLIC"],
706
        windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS + ["-mavx"],
707
        windows_compiler_flags_override = WINDOWS_FLAGS + ["-mavx"],
708
        windows_srcs = PROD_AVX512VNNIGFNI_MICROKERNEL_SRCS,
709
        deps = [
710
            ":interface",
711
        ],
712
    )
713

714
    fb_xplat_cxx_library(
715
        name = "ukernels_avx512vnni",
716
        srcs = PROD_AVX512VNNI_MICROKERNEL_SRCS if is_arvr_mode() else [],
717
        headers = subdir_glob([
718
            ("XNNPACK/src", "**/*.h"),
719
            ("XNNPACK/src", "**/*.c"),
720
        ]),
721
        header_namespace = "",
722
        apple_sdks = (IOS, MACOSX, APPLETVOS),
723
        compiler_flags = [
724
            "-O2",
725
        ] + select({
726
            "DEFAULT": [],
727
            "ovr_config//cpu:x86_32": [
728
                "-mavx",
729
            ],
730
            "ovr_config//cpu:x86_64": [
731
                "-mavx",
732
            ],
733
        }),
734
        fbobjc_preprocessor_flags = [
735
            "-DXNN_PRIVATE=",
736
            "-DXNN_INTERNAL=",
737
        ],
738
        labels = labels,
739
        platform_compiler_flags = [
740
            (
741
                "x86|x86_64|platform009|platform010",
742
                [
743
                    "-mavx512f",
744
                    "-mavx512cd",
745
                    "-mavx512bw",
746
                    "-mavx512dq",
747
                    "-mavx512vl",
748
                    "-mavx512vnni",
749
                ],
750
            ),
751
        ],
752
        platform_srcs = ([
753
            (
754
                "x86|x86_64|platform009|platform010",
755
                PROD_AVX512VNNI_MICROKERNEL_SRCS,
756
            ),
757
        ] if not is_arvr_mode() else []),
758
        preferred_linkage = "static",
759
        preprocessor_flags = [
760
            "-DXNN_LOG_LEVEL=0",
761
        ],
762
        exported_preprocessor_flags = [
763
            "-DXNN_ENABLE_AVX512VNNI"
764
        ],
765
        visibility = ["PUBLIC"],
766
        windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS + ["-mavx"],
767
        windows_compiler_flags_override = WINDOWS_FLAGS + ["-mavx"],
768
        deps = [
769
            ":interface",
770
        ],
771
    )
772

773
    fb_xplat_cxx_library(
774
        name = "ukernels_avx512vnni_ovr_win32",
775
        headers = subdir_glob([
776
            ("XNNPACK/src", "**/*.h"),
777
            ("XNNPACK/src", "**/*.c"),
778
        ]),
779
        header_namespace = "",
780
        apple_sdks = (IOS, MACOSX, APPLETVOS),
781
        compiler_flags = [
782
            "-O2",
783
        ],
784
        fbobjc_preprocessor_flags = [
785
            "-DXNN_PRIVATE=",
786
            "-DXNN_INTERNAL=",
787
        ],
788
        labels = labels,
789
        platform_compiler_flags = [
790
            (
791
                "x86|x86_64|platform009|platform010",
792
                [
793
                    "-mavx512f",
794
                    "-mavx512cd",
795
                    "-mavx512bw",
796
                    "-mavx512dq",
797
                    "-mavx512vl",
798
                    "-mavx512vnni",
799
                ],
800
            ),
801
        ],
802
        preferred_linkage = "static",
803
        preprocessor_flags = [
804
            "-DXNN_LOG_LEVEL=0",
805
        ],
806
        exported_preprocessor_flags = [
807
            "-DXNN_ENABLE_AVX512VNNI"
808
        ],
809
        visibility = ["PUBLIC"],
810
        windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS + ["-mavx"],
811
        windows_compiler_flags_override = WINDOWS_FLAGS + ["-mavx"],
812
        windows_srcs = PROD_AVX512VNNI_MICROKERNEL_SRCS,
813
        deps = [
814
            ":interface",
815
        ],
816
    )
817

818
    fb_xplat_cxx_library(
819
        name = "ukernels_avxvnni",
820
        srcs = PROD_AVXVNNI_MICROKERNEL_SRCS if is_arvr_mode() else [],
821
        headers = subdir_glob([
822
            ("XNNPACK/src", "**/*.h"),
823
            ("XNNPACK/src", "**/*.c"),
824
        ]),
825
        header_namespace = "",
826
        apple_sdks = (IOS, MACOSX, APPLETVOS),
827
        compiler_flags = [
828
            "-O2",
829
            "-mavxvnni",
830
            "-mf16c",
831
            "-mfma",
832
        ],
833
        fbobjc_preprocessor_flags = [
834
            "-DXNN_PRIVATE=",
835
            "-DXNN_INTERNAL=",
836
        ],
837
        labels = labels,
838
        platform_compiler_flags = [
839
            (
840
                "x86|x86_64|platform009|platform010",
841
                [
842
                    "-mavx2",
843
                    "-mavxvnni",
844
                    "-mf16c",
845
                    "-mfma",
846
                ],
847
            ),
848
        ],
849
        platform_srcs = ([
850
            (
851
                "x86|x86_64|platform009|platform010",
852
                PROD_AVXVNNI_MICROKERNEL_SRCS,
853
            ),
854
        ] if not is_arvr_mode() else []),
855
        preferred_linkage = "static",
856
        preprocessor_flags = [
857
            "-DXNN_LOG_LEVEL=0",
858
        ],
859
        visibility = ["PUBLIC"],
860
        windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS + ["-mavx"],
861
        windows_compiler_flags_override = WINDOWS_FLAGS + ["-mavx"],
862
        deps = [
863
            ":interface",
864
        ],
865
    )
866

867
    fb_xplat_cxx_library(
868
        name = "ukernels_avxvnni_ovr_win32",
869
        headers = subdir_glob([
870
            ("XNNPACK/src", "**/*.h"),
871
            ("XNNPACK/src", "**/*.c"),
872
        ]),
873
        header_namespace = "",
874
        apple_sdks = (IOS, MACOSX, APPLETVOS),
875
        compiler_flags = [
876
            "-O2",
877
        ],
878
        fbobjc_preprocessor_flags = [
879
            "-DXNN_PRIVATE=",
880
            "-DXNN_INTERNAL=",
881
        ],
882
        labels = labels,
883
        platform_compiler_flags = [
884
            (
885
                "x86|x86_64|platform009|platform010",
886
                [
887
                    "-mavx2",
888
                    "-mavxvnni",
889
                ],
890
            ),
891
        ],
892
        preferred_linkage = "static",
893
        preprocessor_flags = [
894
            "-DXNN_LOG_LEVEL=0",
895
        ],
896
        visibility = ["PUBLIC"],
897
        windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS + ["-mavx"],
898
        windows_compiler_flags_override = WINDOWS_FLAGS + ["-mavx"],
899
        windows_srcs = PROD_AVXVNNI_MICROKERNEL_SRCS,
900
        deps = [
901
            ":interface",
902
        ],
903
    )
904

905
    fb_xplat_cxx_library(
906
        name = "ukernels_f16c",
907
        srcs = PROD_F16C_MICROKERNEL_SRCS if is_arvr_mode() else [],
908
        headers = subdir_glob([
909
            ("XNNPACK/src", "**/*.h"),
910
            ("XNNPACK/src", "**/*.c"),
911
        ]),
912
        header_namespace = "",
913
        apple_sdks = (IOS, MACOSX, APPLETVOS),
914
        compiler_flags = [
915
            "-O2",
916
        ] + select({
917
            "DEFAULT": [],
918
            "ovr_config//cpu:x86_32": [
919
                "-mf16c",
920
            ],
921
            "ovr_config//cpu:x86_64": [
922
                "-mf16c",
923
            ],
924
        }),
925
        fbobjc_preprocessor_flags = [
926
            "-DXNN_PRIVATE=",
927
            "-DXNN_INTERNAL=",
928
        ],
929
        labels = labels,
930
        platform_compiler_flags = [
931
            (
932
                "x86|x86_64|platform009|platform010",
933
                [
934
                    "-mf16c",
935
                ],
936
            ),
937
        ],
938
        platform_srcs = ([
939
            (
940
                "x86|x86_64|platform009|platform010",
941
                PROD_F16C_MICROKERNEL_SRCS,
942
            ),
943
        ] if not is_arvr_mode() else []),
944
        platforms = (APPLE, ANDROID, CXX, WINDOWS),
945
        preferred_linkage = "static",
946
        preprocessor_flags = [
947
            "-DXNN_LOG_LEVEL=0",
948
        ],
949
        visibility = ["PUBLIC"],
950
        windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS + ["-mf16c"],
951
        windows_compiler_flags_override = WINDOWS_FLAGS + ["-mf16c"],
952
        deps = [
953
            ":interface",
954
        ],
955
    )
956

957
    fb_xplat_cxx_library(
958
        name = "ukernels_f16c_ovr_win32",
959
        headers = subdir_glob([
960
            ("XNNPACK/src", "**/*.h"),
961
            ("XNNPACK/src", "**/*.c"),
962
        ]),
963
        header_namespace = "",
964
        apple_sdks = (IOS, MACOSX, APPLETVOS),
965
        compiler_flags = [
966
            "-O2",
967
            "-mf16c",
968
        ],
969
        fbobjc_preprocessor_flags = [
970
            "-DXNN_PRIVATE=",
971
            "-DXNN_INTERNAL=",
972
        ],
973
        labels = labels,
974
        platform_compiler_flags = [
975
            (
976
                "x86",
977
                [
978
                    "-mf16c",
979
                ],
980
            ),
981
        ],
982
        platforms = (APPLE, ANDROID, CXX, WINDOWS),
983
        preferred_linkage = "static",
984
        preprocessor_flags = [
985
            "-DXNN_LOG_LEVEL=0",
986
        ],
987
        visibility = ["PUBLIC"],
988
        windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS + ["-mf16c"],
989
        windows_compiler_flags_override = WINDOWS_FLAGS + ["-mf16c"],
990
        windows_srcs = PROD_F16C_MICROKERNEL_SRCS,
991
        deps = [
992
            ":interface",
993
        ],
994
    )
995

996
    fb_xplat_cxx_library(
997
        name = "ukernels_xop",
998
        srcs = PROD_XOP_MICROKERNEL_SRCS if is_arvr_mode() else [],
999
        headers = subdir_glob([
1000
            ("XNNPACK/src", "**/*.h"),
1001
            ("XNNPACK/src", "**/*.c"),
1002
        ]),
1003
        header_namespace = "",
1004
        apple_sdks = (IOS, MACOSX, APPLETVOS),
1005
        compiler_flags = [
1006
            "-O2",
1007
        ] + select({
1008
            "DEFAULT": [],
1009
            "ovr_config//cpu:x86_32": [
1010
                "-mxop",
1011
            ],
1012
            "ovr_config//cpu:x86_64": [
1013
                "-mxop",
1014
            ],
1015
        }),
1016
        platform_compiler_flags = [
1017
            (
1018
                "x86|x86_64|platform009|platform010",
1019
                [
1020
                    "-mxop",
1021
                ],
1022
            ),
1023
        ],
1024
        fbobjc_preprocessor_flags = [
1025
            "-DXNN_PRIVATE=",
1026
            "-DXNN_INTERNAL=",
1027
        ],
1028
        labels = labels,
1029
        platform_preprocessor_flags = [
1030
            (
1031
                "windows-x86_64",
1032
                [
1033
                    "-Drestrict=",
1034
                ],
1035
            ),
1036
        ],
1037
        platform_srcs = ([
1038
            (
1039
                "x86|x86_64|platform009|platform010",
1040
                PROD_XOP_MICROKERNEL_SRCS,
1041
            ),
1042
        ] if not is_arvr_mode() else []),
1043
        preferred_linkage = "static",
1044
        preprocessor_flags = [
1045
            "-DXNN_LOG_LEVEL=0",
1046
        ],
1047
        visibility = ["PUBLIC"],
1048
        windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS + ["-mxop"],
1049
        windows_compiler_flags_override = WINDOWS_FLAGS + ["-mxop"],
1050
        deps = [
1051
            ":interface",
1052
        ],
1053
    )
1054

1055
    fb_xplat_cxx_library(
1056
        name = "ukernels_xop_ovr_win32",
1057
        headers = subdir_glob([
1058
            ("XNNPACK/src", "**/*.h"),
1059
            ("XNNPACK/src", "**/*.c"),
1060
        ]),
1061
        header_namespace = "",
1062
        apple_sdks = (IOS, MACOSX, APPLETVOS),
1063
        compiler_flags = [
1064
            "-O2",
1065
            "-mxop",
1066
        ],
1067
        fbobjc_preprocessor_flags = [
1068
            "-DXNN_PRIVATE=",
1069
            "-DXNN_INTERNAL=",
1070
        ],
1071
        labels = labels,
1072
        platform_preprocessor_flags = [
1073
            (
1074
                "windows-x86_64",
1075
                [
1076
                    "-Drestrict=",
1077
                ],
1078
            ),
1079
        ],
1080
        preferred_linkage = "static",
1081
        preprocessor_flags = [
1082
            "-DXNN_LOG_LEVEL=0",
1083
        ],
1084
        visibility = ["PUBLIC"],
1085
        windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS + ["-mxop"],
1086
        windows_compiler_flags_override = WINDOWS_FLAGS + ["-mxop"],
1087
        windows_srcs = PROD_XOP_MICROKERNEL_SRCS,
1088
        deps = [
1089
            ":interface",
1090
        ],
1091
    )
1092

1093
    fb_xplat_cxx_library(
1094
        name = "ukernels_fma3",
1095
        srcs = PROD_FMA3_MICROKERNEL_SRCS if is_arvr_mode() else [],
1096
        headers = subdir_glob([
1097
            ("XNNPACK/src", "**/*.h"),
1098
            ("XNNPACK/src", "**/*.c"),
1099
        ]),
1100
        header_namespace = "",
1101
        apple_sdks = (IOS, MACOSX, APPLETVOS),
1102
        compiler_flags = [
1103
            "-O2",
1104
        ] + select({
1105
            "DEFAULT": [],
1106
            "ovr_config//cpu:x86_32": [
1107
                "-mfma",
1108
                "-mf16c",
1109
            ],
1110
            "ovr_config//cpu:x86_64": [
1111
                "-mfma",
1112
                "-mf16c",
1113
            ],
1114
        }),
1115
        fbobjc_preprocessor_flags = [
1116
            "-DXNN_PRIVATE=",
1117
            "-DXNN_INTERNAL=",
1118
        ],
1119
        labels = labels,
1120
        platform_compiler_flags = [
1121
            (
1122
                "(i[3-6]86|x86|x86_64|AMD64)",
1123
                [
1124
                    "-mfma",
1125
                    "-mf16c",
1126
                ],
1127
            ),
1128
        ],
1129
        platform_srcs = ([
1130
            (
1131
                "x86|x86_64|platform009|platform010",
1132
                PROD_FMA3_MICROKERNEL_SRCS,
1133
            ),
1134
        ] if not is_arvr_mode() else []),
1135
        preferred_linkage = "static",
1136
        preprocessor_flags = [
1137
            "-DXNN_LOG_LEVEL=0",
1138
        ],
1139
        visibility = ["PUBLIC"],
1140
        windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS + [
1141
            "-mfma",
1142
            "-mf16c",
1143
        ],
1144
        windows_compiler_flags_override = WINDOWS_FLAGS + [
1145
            "-mfma",
1146
            "-mf16c",
1147
        ],
1148
        deps = [
1149
            ":interface",
1150
        ],
1151
    )
1152

1153
    fb_xplat_cxx_library(
1154
        name = "ukernels_fma3_ovr_win32",
1155
        headers = subdir_glob([
1156
            ("XNNPACK/src", "**/*.h"),
1157
            ("XNNPACK/src", "**/*.c"),
1158
        ]),
1159
        header_namespace = "",
1160
        apple_sdks = (IOS, MACOSX, APPLETVOS),
1161
        compiler_flags = [
1162
            "-O2",
1163
            "-mfma",
1164
            "-mf16c",
1165
        ],
1166
        fbobjc_preprocessor_flags = [
1167
            "-DXNN_PRIVATE=",
1168
            "-DXNN_INTERNAL=",
1169
        ],
1170
        labels = labels,
1171
        platform_compiler_flags = [
1172
            (
1173
                "^(i[3-6]86|x86|x86_64|AMD64)$",
1174
                [
1175
                    "-mfma",
1176
                    "-mf16c",
1177
                ],
1178
            ),
1179
        ],
1180
        preferred_linkage = "static",
1181
        preprocessor_flags = [
1182
            "-DXNN_LOG_LEVEL=0",
1183
        ],
1184
        visibility = ["PUBLIC"],
1185
        windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS + [
1186
            "-mfma",
1187
            "-mf16c",
1188
        ],
1189
        windows_compiler_flags_override = WINDOWS_FLAGS + [
1190
            "-mfma",
1191
            "-mf16c",
1192
        ],
1193
        windows_srcs = PROD_FMA3_MICROKERNEL_SRCS,
1194
        deps = [
1195
            ":interface",
1196
        ],
1197
    )
1198

1199
    fb_xplat_cxx_library(
1200
        name = "ukernels_avx2",
1201
        srcs = PROD_AVX2_MICROKERNEL_SRCS if is_arvr_mode() else [],
1202
        headers = subdir_glob([
1203
            ("XNNPACK/src", "**/*.c"),
1204
            ("XNNPACK/src", "**/*.h"),
1205
        ]),
1206
        header_namespace = "",
1207
        apple_sdks = (IOS, MACOSX, APPLETVOS),
1208
        compiler_flags = [
1209
            "-O2",
1210
        ] + select({
1211
            "DEFAULT": [],
1212
            "ovr_config//cpu:x86_32": [
1213
                "-mavx2",
1214
                "-mfma",
1215
                "-mf16c",
1216
            ],
1217
            "ovr_config//cpu:x86_64": [
1218
                "-mavx2",
1219
                "-mfma",
1220
                "-mf16c",
1221
            ],
1222
        }),
1223
        fbobjc_preprocessor_flags = [
1224
            "-DXNN_PRIVATE=",
1225
            "-DXNN_INTERNAL=",
1226
        ],
1227
        labels = labels,
1228
        platform_compiler_flags = [
1229
            (
1230
                "x86|x86_64|platform009|platform010",
1231
                [
1232
                    "-mavx2",
1233
                    "-mfma",
1234
                    "-mf16c",
1235
                ],
1236
            ),
1237
        ],
1238
        platform_srcs = ([
1239
            (
1240
                "x86|x86_64|platform009|platform010",
1241
                PROD_AVX2_MICROKERNEL_SRCS,
1242
            ),
1243
        ] if not is_arvr_mode() else []),
1244
        preferred_linkage = "static",
1245
        preprocessor_flags = [
1246
            "-DXNN_LOG_LEVEL=0",
1247
        ],
1248
        visibility = ["PUBLIC"],
1249
        windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS + [
1250
            "-mavx2",
1251
            "-mfma",
1252
            "-mf16c",
1253
        ],
1254
        windows_compiler_flags_override = WINDOWS_FLAGS + [
1255
            "-mavx2",
1256
            "-mfma",
1257
            "-mf16c",
1258
        ],
1259
        deps = [
1260
            ":interface",
1261
        ],
1262
    )
1263

1264
    fb_xplat_cxx_library(
1265
        name = "ukernels_avx2_ovr_win32",
1266
        headers = subdir_glob([
1267
            ("XNNPACK/src", "**/*.c"),
1268
            ("XNNPACK/src", "**/*.h"),
1269
        ]),
1270
        header_namespace = "",
1271
        apple_sdks = (IOS, MACOSX, APPLETVOS),
1272
        compiler_flags = [
1273
            "-O2",
1274
            "-mavx2",
1275
            "-mfma",
1276
            "-mf16c",
1277
        ],
1278
        fbobjc_preprocessor_flags = [
1279
            "-DXNN_PRIVATE=",
1280
            "-DXNN_INTERNAL=",
1281
        ],
1282
        labels = labels,
1283
        platform_compiler_flags = [
1284
            (
1285
                "x86",
1286
                [
1287
                    "-mavx2",
1288
                    "-mfma",
1289
                    "-mf16c",
1290
                ],
1291
            ),
1292
        ],
1293
        preferred_linkage = "static",
1294
        preprocessor_flags = [
1295
            "-DXNN_LOG_LEVEL=0",
1296
        ],
1297
        visibility = ["PUBLIC"],
1298
        windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS + [
1299
            "-mavx2",
1300
            "-mfma",
1301
            "-mf16c",
1302
        ],
1303
        windows_compiler_flags_override = WINDOWS_FLAGS + [
1304
            "/D__AVX2__",
1305
            "-mavx2",
1306
            "-mfma",
1307
            "-mf16c",
1308
        ],
1309
        windows_srcs = PROD_AVX2_MICROKERNEL_SRCS,
1310
        deps = [
1311
            ":interface",
1312
        ],
1313
    )
1314

1315
    fb_xplat_cxx_library(
1316
        name = "ukernels_avx512",
1317
        srcs = PROD_AVX512F_MICROKERNEL_SRCS if is_arvr_mode() else [],
1318
        headers = subdir_glob([
1319
            ("XNNPACK/src", "**/*.c"),
1320
            ("XNNPACK/src", "**/*.h"),
1321
        ]),
1322
        header_namespace = "",
1323
        apple_sdks = (IOS, MACOSX, APPLETVOS),
1324
        compiler_flags = [
1325
            "-O2",
1326
        ] + select({
1327
            "DEFAULT": [],
1328
            "ovr_config//cpu:x86_32": [
1329
                "-mavx512f",
1330
            ],
1331
            "ovr_config//cpu:x86_64": [
1332
                "-mavx512f",
1333
            ],
1334
        }),
1335
        fbobjc_preprocessor_flags = [
1336
            "-DXNN_PRIVATE=",
1337
            "-DXNN_INTERNAL=",
1338
        ],
1339
        labels = labels,
1340
        platform_compiler_flags = [
1341
            (
1342
                "x86|x86_64|platform009|platform010",
1343
                [
1344
                    "-mavx512f",
1345
                ],
1346
            ),
1347
        ],
1348
        platform_srcs = ([
1349
            (
1350
                "x86|x86_64|platform009|platform010",
1351
                PROD_AVX512F_MICROKERNEL_SRCS,
1352
            ),
1353
        ] if not is_arvr_mode() else []),
1354
        preferred_linkage = "static",
1355
        preprocessor_flags = [
1356
            "-DXNN_LOG_LEVEL=0",
1357
        ],
1358
        visibility = ["PUBLIC"],
1359
        windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS + ["-mavx512f"],
1360
        windows_compiler_flags_override = WINDOWS_FLAGS + ["-mavx512f"],
1361
        deps = [
1362
            ":interface",
1363
        ],
1364
    )
1365

1366
    fb_xplat_cxx_library(
1367
        name = "ukernels_avx512vbmi",
1368
        srcs = PROD_AVX512VBMI_MICROKERNEL_SRCS if is_arvr_mode() else [],
1369
        headers = subdir_glob([
1370
            ("XNNPACK/src", "**/*.c"),
1371
            ("XNNPACK/src", "**/*.h"),
1372
        ]),
1373
        header_namespace = "",
1374
        apple_sdks = (IOS, MACOSX, APPLETVOS),
1375
        compiler_flags = [
1376
            "-O2",
1377
        ] + select({
1378
            "DEFAULT": [],
1379
            "ovr_config//cpu:x86_32": [
1380
                "-mavx512f",
1381
                "-mavx512cd",
1382
                "-mavx512bw",
1383
                "-mavx512dq",
1384
                "-mavx512vl",
1385
                "-mavx512vbmi",
1386
            ],
1387
            "ovr_config//cpu:x86_64": [
1388
                "-mavx512f",
1389
                "-mavx512cd",
1390
                "-mavx512bw",
1391
                "-mavx512dq",
1392
                "-mavx512vl",
1393
                "-mavx512vbmi",
1394
            ],
1395
        }),
1396
        fbobjc_preprocessor_flags = [
1397
            "-DXNN_PRIVATE=",
1398
            "-DXNN_INTERNAL=",
1399
        ],
1400
        labels = labels,
1401
        platform_compiler_flags = [
1402
            (
1403
                "(i[3-6]86|x86|x86_64|AMD64)",
1404
                [
1405
                    "-mavx512f",
1406
                    "-mavx512cd",
1407
                    "-mavx512bw",
1408
                    "-mavx512dq",
1409
                    "-mavx512vl",
1410
                    "-mavx512vbmi",
1411
                ],
1412
            ),
1413
        ],
1414
        platform_srcs = ([
1415
            (
1416
                "x86|x86_64|platform009|platform010",
1417
                PROD_AVX512VBMI_MICROKERNEL_SRCS,
1418
            ),
1419
        ] if not is_arvr_mode() else []),
1420
        preferred_linkage = "static",
1421
        preprocessor_flags = [
1422
            "-DXNN_LOG_LEVEL=0",
1423
        ],
1424
        visibility = ["PUBLIC"],
1425
        windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS + [
1426
            "-mavx512f",
1427
            "-mavx512cd",
1428
            "-mavx512bw",
1429
            "-mavx512dq",
1430
            "-mavx512vl",
1431
            "-mavx512vbmi",
1432
        ],
1433
        windows_compiler_flags_override = WINDOWS_FLAGS + [
1434
            "-mavx512f",
1435
            "-mavx512cd",
1436
            "-mavx512bw",
1437
            "-mavx512dq",
1438
            "-mavx512vl",
1439
            "-mavx512vbmi",
1440
        ],
1441
        deps = [
1442
            ":interface",
1443
        ],
1444
    )
1445

1446
    fb_xplat_cxx_library(
1447
        name = "ukernels_avx512_ovr_win32",
1448
        headers = subdir_glob([
1449
            ("XNNPACK/src", "**/*.c"),
1450
            ("XNNPACK/src", "**/*.h"),
1451
        ]),
1452
        header_namespace = "",
1453
        apple_sdks = (IOS, MACOSX, APPLETVOS),
1454
        compiler_flags = [
1455
            "-O2",
1456
            "-mavx512f",
1457
        ],
1458
        fbobjc_preprocessor_flags = [
1459
            "-DXNN_PRIVATE=",
1460
            "-DXNN_INTERNAL=",
1461
        ],
1462
        labels = labels,
1463
        platform_compiler_flags = [
1464
            (
1465
                "x86",
1466
                [
1467
                    "-mavx512f",
1468
                ],
1469
            ),
1470
        ],
1471
        preferred_linkage = "static",
1472
        preprocessor_flags = [
1473
            "-DXNN_LOG_LEVEL=0",
1474
        ],
1475
        visibility = ["PUBLIC"],
1476
        windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS + ["-mavx512f"],
1477
        windows_compiler_flags_override = WINDOWS_FLAGS + ["-mavx512f"],
1478
        windows_srcs = PROD_AVX512F_MICROKERNEL_SRCS,
1479
        deps = [
1480
            ":interface",
1481
        ],
1482
    )
1483

1484
    fb_xplat_cxx_library(
1485
        name = "ukernels_avx512skx",
1486
        srcs = PROD_AVX512SKX_MICROKERNEL_SRCS if is_arvr_mode() else [],
1487
        headers = subdir_glob([
1488
            ("XNNPACK/src", "**/*.c"),
1489
            ("XNNPACK/src", "**/*.h"),
1490
        ]),
1491
        header_namespace = "",
1492
        apple_sdks = (IOS, MACOSX, APPLETVOS),
1493
        compiler_flags = [
1494
            "-O2",
1495
        ] + select({
1496
            "DEFAULT": [],
1497
            "ovr_config//cpu:x86_32": [
1498
                "-mavx512f",
1499
                "-mavx512cd",
1500
                "-mavx512bw",
1501
                "-mavx512dq",
1502
                "-mavx512vl",
1503
            ],
1504
            "ovr_config//cpu:x86_64": [
1505
                "-mavx512f",
1506
                "-mavx512cd",
1507
                "-mavx512bw",
1508
                "-mavx512dq",
1509
                "-mavx512vl",
1510
            ],
1511
        }),
1512
        fbobjc_preprocessor_flags = [
1513
            "-DXNN_PRIVATE=",
1514
            "-DXNN_INTERNAL=",
1515
        ],
1516
        labels = labels,
1517
        platform_compiler_flags = [
1518
            (
1519
                "(i[3-6]86|x86|x86_64|AMD64)",
1520
                [
1521
                    "-mavx512f",
1522
                    "-mavx512cd",
1523
                    "-mavx512bw",
1524
                    "-mavx512dq",
1525
                    "-mavx512vl",
1526
                ],
1527
            ),
1528
        ],
1529
        platform_srcs = ([
1530
            (
1531
                "x86|x86_64|platform009|platform010",
1532
                PROD_AVX512SKX_MICROKERNEL_SRCS,
1533
            ),
1534
        ] if not is_arvr_mode() else []),
1535
        preferred_linkage = "static",
1536
        preprocessor_flags = [
1537
            "-DXNN_LOG_LEVEL=0",
1538
        ],
1539
        visibility = ["PUBLIC"],
1540
        windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS + [
1541
            "-mavx512f",
1542
            "-mavx512cd",
1543
            "-mavx512bw",
1544
            "-mavx512dq",
1545
            "-mavx512vl",
1546
        ],
1547
        windows_compiler_flags_override = WINDOWS_FLAGS + [
1548
            "-mavx512f",
1549
            "-mavx512cd",
1550
            "-mavx512bw",
1551
            "-mavx512dq",
1552
            "-mavx512vl",
1553
            
1554
        ],
1555
        deps = [
1556
            ":interface",
1557
        ],
1558
    )
1559

1560
    fb_xplat_cxx_library(
1561
        name = "ukernels_avx512skx_ovr_win32",
1562
        headers = subdir_glob([
1563
            ("XNNPACK/src", "**/*.c"),
1564
            ("XNNPACK/src", "**/*.h"),
1565
        ]),
1566
        header_namespace = "",
1567
        apple_sdks = (IOS, MACOSX, APPLETVOS),
1568
        compiler_flags = [
1569
            "-O2",
1570
            "-mavx512f",
1571
            "-mavx512cd",
1572
            "-mavx512bw",
1573
            "-mavx512dq",
1574
            "-mavx512vl",
1575
        ],
1576
        fbobjc_preprocessor_flags = [
1577
            "-DXNN_PRIVATE=",
1578
            "-DXNN_INTERNAL=",
1579
        ],
1580
        labels = labels,
1581
        platform_compiler_flags = [
1582
            (
1583
                "^(i[3-6]86|x86|x86_64|AMD64)$",
1584
                [
1585
                    "-mavx512f",
1586
                    "-mavx512cd",
1587
                    "-mavx512bw",
1588
                    "-mavx512dq",
1589
                    "-mavx512vl",
1590
                ],
1591
            ),
1592
        ],
1593
        preferred_linkage = "static",
1594
        preprocessor_flags = [
1595
            "-DXNN_LOG_LEVEL=0",
1596
        ],
1597
        visibility = ["PUBLIC"],
1598
        windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS + [
1599
            "-mavx512f",
1600
            "-mavx512cd",
1601
            "-mavx512bw",
1602
            "-mavx512dq",
1603
            "-mavx512vl",
1604
        ],
1605
        windows_compiler_flags_override = WINDOWS_FLAGS + [
1606
            "-mavx512f",
1607
            "-mavx512cd",
1608
            "-mavx512bw",
1609
            "-mavx512dq",
1610
            "-mavx512vl",
1611
            "/D__AVX512BW__",
1612
        ],
1613
        windows_srcs = PROD_AVX512SKX_MICROKERNEL_SRCS,
1614
        deps = [
1615
            ":interface",
1616
        ],
1617
    )
1618

1619
    fb_xplat_cxx_library(
1620
        name = "ukernels_armsimd32",
1621
        srcs = PROD_ARMSIMD32_MICROKERNEL_SRCS,
1622
        headers = subdir_glob([
1623
            ("XNNPACK/src", "**/*.c"),
1624
            ("XNNPACK/src", "**/*.h"),
1625
        ]),
1626
        header_namespace = "",
1627
        apple_sdks = (IOS, MACOSX, APPLETVOS),
1628
        compiler_flags = [
1629
            "-O2",
1630
            "-fno-fast-math",
1631
            "-fno-math-errno",
1632
        ],
1633
        fbobjc_preprocessor_flags = [
1634
            "-DXNN_PRIVATE=",
1635
            "-DXNN_INTERNAL=",
1636
        ],
1637
        labels = labels,
1638
        platform_compiler_flags = [
1639
            (
1640
                "(arm32|aarch32|armv7)",
1641
                [
1642
                    "-marm",
1643
                    "-march=armv6",
1644
                    "-mfpu=vfp",
1645
                    "-munaligned-access",
1646
                ],
1647
            ),
1648
        ],
1649
        preferred_linkage = "static",
1650
        preprocessor_flags = [
1651
            "-DXNN_LOG_LEVEL=0",
1652
        ],
1653
        visibility = ["PUBLIC"],
1654
        windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS,
1655
        windows_compiler_flags_override = WINDOWS_FLAGS,
1656
        deps = [
1657
            ":interface",
1658
            third_party("FP16"),
1659
        ],
1660
    )
1661

1662
    fb_xplat_cxx_library(
1663
        name = "ukernels_neon",
1664
        srcs = select({
1665
            "DEFAULT": [],
1666
            "ovr_config//cpu:arm32": PROD_NEON_MICROKERNEL_SRCS,
1667
        }) if is_arvr_mode() else [],
1668
        headers = subdir_glob([
1669
            ("XNNPACK/src", "**/*.c"),
1670
            ("XNNPACK/src", "**/*.h"),
1671
        ]),
1672
        header_namespace = "",
1673
        apple_sdks = (IOS, MACOSX, APPLETVOS),
1674
        compiler_flags = [
1675
            "-O2",
1676
        ] + select({
1677
            "DEFAULT": [],
1678
            "ovr_config//cpu:arm32": [
1679
                "-marm",
1680
                "-march=armv7-a",
1681
                "-mfpu=neon",
1682
            ],
1683
        }),
1684
        fbobjc_preprocessor_flags = [
1685
            "-DXNN_PRIVATE=",
1686
            "-DXNN_INTERNAL=",
1687
        ],
1688
        labels = labels,
1689
        platform_compiler_flags = [
1690
            (
1691
                "(aarch32|arm32|armv7)",
1692
                [
1693
                    "-marm",
1694
                    "-march=armv7-a",
1695
                    "-mfpu=neon",
1696
                ],
1697
            ),
1698
        ],
1699
        platform_srcs = [
1700
            (
1701
                "(aarch32|arm32|armv7)",
1702
                PROD_NEON_MICROKERNEL_SRCS,
1703
            ),
1704
        ] if not is_arvr_mode() else [],
1705
        preferred_linkage = "static",
1706
        preprocessor_flags = [
1707
            "-DXNN_LOG_LEVEL=0",
1708
        ],
1709
        visibility = ["PUBLIC"],
1710
        windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS,
1711
        windows_compiler_flags_override = WINDOWS_FLAGS,
1712
        deps = [
1713
            ":interface",
1714
            third_party("FP16"),
1715
        ],
1716
    )
1717

1718
    fb_xplat_cxx_library(
1719
        name = "ukernels_neon_aarch64",
1720
        srcs = select({
1721
            "DEFAULT": [],
1722
            "ovr_config//cpu:arm64": PROD_NEON_MICROKERNEL_SRCS + [PROD_NEON_AARCH64_MICROKERNEL_SRCS[0]],
1723
        }) if is_arvr_mode() else [],
1724
        headers = subdir_glob([
1725
            ("XNNPACK/src", "**/*.c"),
1726
            ("XNNPACK/src", "**/*.h"),
1727
        ]),
1728
        header_namespace = "",
1729
        apple_sdks = (IOS, MACOSX, APPLETVOS),
1730
        compiler_flags = [
1731
            "-O2",
1732
        ],
1733
        fbobjc_preprocessor_flags = [
1734
            "-DXNN_PRIVATE=",
1735
            "-DXNN_INTERNAL=",
1736
        ],
1737
        platform_srcs = [
1738
            (
1739
                "(aarch64|arm64)",
1740
                PROD_NEON_MICROKERNEL_SRCS + [PROD_NEON_AARCH64_MICROKERNEL_SRCS[0]],
1741
            ),
1742
        ] if not is_arvr_mode() else [],
1743
        labels = labels,
1744
        preferred_linkage = "static",
1745
        preprocessor_flags = [
1746
            "-DXNN_LOG_LEVEL=0",
1747
        ],
1748
        visibility = ["PUBLIC"],
1749
        windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS,
1750
        windows_compiler_flags_override = WINDOWS_FLAGS,
1751
        deps = [
1752
            ":interface",
1753
            third_party("FP16"),
1754
        ],
1755
    )
1756

1757
    fb_xplat_cxx_library(
1758
        name = "ukernels_neon_fma",
1759
        srcs = select({
1760
            "DEFAULT": [],
1761
            "ovr_config//cpu:arm32": PROD_NEONFMA_MICROKERNEL_SRCS,
1762
        }) if is_arvr_mode() else [],
1763
        headers = subdir_glob([
1764
            ("XNNPACK/src", "**/*.c"),
1765
            ("XNNPACK/src", "**/*.h"),
1766
        ]),
1767
        header_namespace = "",
1768
        apple_sdks = (IOS, MACOSX, APPLETVOS),
1769
        compiler_flags = [
1770
            "-O2",
1771
        ] + select({
1772
            "DEFAULT": [],
1773
            "ovr_config//cpu:arm32": [
1774
                "-marm",
1775
                "-march=armv7-a",
1776
                "-mfpu=neon-vfpv4",
1777
            ],
1778
        }),
1779
        fbobjc_preprocessor_flags = [
1780
            "-DXNN_PRIVATE=",
1781
            "-DXNN_INTERNAL=",
1782
        ],
1783
        labels = labels,
1784
        platform_compiler_flags = [
1785
            (
1786
                "^iphoneos-armv7$",
1787
                [
1788
                    "-mcpu=cyclone",
1789
                    "-mtune=generic",
1790
                ],
1791
            ),
1792
            (
1793
                "(aarch32|arm32|armv7)",
1794
                [
1795
                    "-marm",
1796
                    "-march=armv7-a",
1797
                    "-mfpu=neon-vfpv4",
1798
                ],
1799
            ),
1800
        ],
1801
        platform_srcs = [
1802
            (
1803
                "(aarch32|arm32|armv7)",
1804
                PROD_NEONFMA_MICROKERNEL_SRCS,
1805
            ),
1806
        ] if not is_arvr_mode() else [],
1807
        preferred_linkage = "static",
1808
        preprocessor_flags = [
1809
            "-DXNN_LOG_LEVEL=0",
1810
        ],
1811
        visibility = ["PUBLIC"],
1812
        windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS,
1813
        windows_compiler_flags_override = WINDOWS_FLAGS,
1814
        deps = [
1815
            ":interface",
1816
            third_party("FP16"),
1817
        ],
1818
    )
1819

1820
    fb_xplat_cxx_library(
1821
        name = "ukernels_neonfma_aarch64",
1822
        srcs = select({
1823
            "DEFAULT": [],
1824
            "ovr_config//cpu:arm64": PROD_NEONFMA_MICROKERNEL_SRCS + [PROD_NEON_AARCH64_MICROKERNEL_SRCS[1]],
1825
        }) if is_arvr_mode() else [],
1826
        headers = subdir_glob([
1827
            ("XNNPACK/src", "**/*.h"),
1828
            ("XNNPACK/src", "**/*.c"),
1829
        ]),
1830
        header_namespace = "",
1831
        apple_sdks = (IOS, MACOSX, APPLETVOS),
1832
        compiler_flags = [
1833
            "-O2",
1834
        ],
1835
        fbobjc_preprocessor_flags = [
1836
            "-DXNN_PRIVATE=",
1837
            "-DXNN_INTERNAL=",
1838
        ],
1839
        labels = labels,
1840
        platform_srcs = [
1841
            (
1842
                "(arm64|aarch64)$",
1843
                PROD_NEONFMA_MICROKERNEL_SRCS + [PROD_NEON_AARCH64_MICROKERNEL_SRCS[1]],
1844
            ),
1845
        ] if not is_arvr_mode() else [],
1846
        platforms = (APPLE, ANDROID, CXX, WINDOWS),
1847
        preferred_linkage = "static",
1848
        preprocessor_flags = [
1849
            "-DXNN_LOG_LEVEL=0",
1850
        ],
1851
        visibility = ["PUBLIC"],
1852
        windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS,
1853
        windows_compiler_flags_override = WINDOWS_FLAGS,
1854
        deps = [
1855
            ":interface",
1856
            third_party("FP16"),
1857
        ],
1858
    )
1859

1860
    fb_xplat_cxx_library(
1861
        name = "ukernels_fp16arith",
1862
        srcs = PROD_FP16ARITH_MICROKERNEL_SRCS,
1863
        headers = subdir_glob([
1864
            ("XNNPACK/src", "**/*.c"),
1865
            ("XNNPACK/src", "**/*.h"),
1866
        ]),
1867
        header_namespace = "",
1868
        apple_sdks = (IOS, MACOSX, APPLETVOS),
1869
        compiler_flags = [
1870
            "-O2",
1871
            "-Wno-error=missing-braces",  # required since the SGX toolchain does not have this by default
1872
            "-fno-fast-math",
1873
            "-fno-math-errno",
1874
        ] + select({
1875
            "DEFAULT": [],
1876
            "ovr_config//cpu:arm32": [
1877
                "-marm",
1878
                "-march=armv8.2-a+fp16",
1879
                # GCC emits wrong directives for assembler with -mfpu=fp-armv8
1880
                "-mfpu=neon-fp-armv8",
1881
                # For vsqrth_f16 polyfill using sqrtf
1882
                "-fno-math-errno",
1883
                # For vminh_f16/vmaxh_f16 polyfills using compare + select
1884
                "-ffinite-math-only",
1885
            ],
1886
        }),
1887
        fbobjc_preprocessor_flags = [
1888
            "-DXNN_PRIVATE=",
1889
            "-DXNN_INTERNAL=",
1890
        ],
1891
        labels = labels,
1892
        platform_compiler_flags = [
1893
            (
1894
                "(aarch32|arm32|armv7)",
1895
                [
1896
                    "-marm",
1897
                    "-march=armv8.2-a+fp16",
1898
                    # GCC emits wrong directives for assembler with -mfpu=fp-armv8
1899
                    "-mfpu=neon-fp-armv8",
1900
                    # For vsqrth_f16 polyfill using sqrtf
1901
                    "-fno-math-errno",
1902
                    # For vminh_f16/vmaxh_f16 polyfills using compare + select
1903
                    "-ffinite-math-only",
1904
                ],
1905
            ),
1906
        ],
1907
        preferred_linkage = "static",
1908
        preprocessor_flags = [
1909
            "-DXNN_LOG_LEVEL=0",
1910
        ],
1911
        visibility = ["PUBLIC"],
1912
        windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS,
1913
        windows_compiler_flags_override = WINDOWS_FLAGS,
1914
        deps = [
1915
            ":interface",
1916
        ],
1917
    )
1918

1919
    fb_xplat_cxx_library(
1920
        name = "ukernels_neon_fp16",
1921
        srcs = PROD_NEONFP16_MICROKERNEL_SRCS,
1922
        headers = subdir_glob([
1923
            ("XNNPACK/src", "**/*.c"),
1924
            ("XNNPACK/src", "**/*.h"),
1925
        ]),
1926
        header_namespace = "",
1927
        apple_sdks = (IOS, MACOSX, APPLETVOS),
1928
        compiler_flags = [
1929
            "-O2",
1930
        ] + select({
1931
            "DEFAULT": [],
1932
            "ovr_config//cpu:arm32": [
1933
                "-marm",
1934
                "-march=armv7-a",
1935
                "-mfpu=neon-fp16",
1936
            ],
1937
        }),
1938
        fbobjc_preprocessor_flags = [
1939
            "-DXNN_PRIVATE=",
1940
            "-DXNN_INTERNAL=",
1941
        ],
1942
        labels = labels,
1943
        platform_compiler_flags = [
1944
            (
1945
                "(aarch32|arm32|armv7)",
1946
                [
1947
                    "-marm",
1948
                    "-march=armv7-a",
1949
                    "-mfpu=neon-fp16",
1950
                ],
1951
            ),
1952
        ],
1953
        preferred_linkage = "static",
1954
        preprocessor_flags = [
1955
            "-DXNN_LOG_LEVEL=0",
1956
        ],
1957
        visibility = ["PUBLIC"],
1958
        windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS,
1959
        windows_compiler_flags_override = WINDOWS_FLAGS,
1960
        deps = [
1961
            ":interface",
1962
        ],
1963
    )
1964

1965
    fb_xplat_cxx_library(
1966
        name = "ukernels_neon_v8",
1967
        srcs = PROD_NEONV8_MICROKERNEL_SRCS,
1968
        headers = subdir_glob([
1969
            ("XNNPACK/src", "**/*.c"),
1970
            ("XNNPACK/src", "**/*.h"),
1971
        ]),
1972
        header_namespace = "",
1973
        apple_sdks = (IOS, MACOSX, APPLETVOS),
1974
        compiler_flags = [
1975
            "-O2",
1976
        ] + select({
1977
            "DEFAULT": [],
1978
            "ovr_config//cpu:arm64": ["-march=armv8-a"],
1979
        }),
1980
        fbobjc_preprocessor_flags = [
1981
            "-DXNN_PRIVATE=",
1982
            "-DXNN_INTERNAL=",
1983
        ],
1984
        labels = labels,
1985
        platform_compiler_flags = [
1986
            (
1987
                "(aarch64|arm64)",
1988
                [
1989
                    "-march=armv8-a",
1990
                ],
1991
            ),
1992
            (
1993
                "^android-armv7$",
1994
                [
1995
                    "-march=armv8-a",
1996
                    "-mfpu=neon-fp-armv8",
1997
                    "-mfloat-abi=softfp",
1998
                ],
1999
            ),
2000
            (
2001
                "^iphoneos-armv7$",
2002
                [
2003
                    "-mcpu=cyclone",
2004
                    "-mtune=generic",
2005
                ],
2006
            ),
2007
        ],
2008
        preferred_linkage = "static",
2009
        preprocessor_flags = [
2010
            "-DXNN_LOG_LEVEL=0",
2011
        ],
2012
        visibility = ["PUBLIC"],
2013
        windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS,
2014
        windows_compiler_flags_override = WINDOWS_FLAGS,
2015
        deps = [
2016
            ":interface",
2017
            third_party("FP16"),
2018
        ],
2019
    )
2020

2021
    fb_xplat_cxx_library(
2022
        name = "ukernels_neon_dot",
2023
        srcs = select({
2024
            "DEFAULT": [],
2025
            "ovr_config//cpu:arm32": PROD_NEONDOT_MICROKERNEL_SRCS,
2026
        }) if is_arvr_mode() else [],
2027
        headers = subdir_glob([
2028
            ("XNNPACK/src", "**/*.c"),
2029
            ("XNNPACK/src", "**/*.h"),
2030
        ]),
2031
        header_namespace = "",
2032
        apple_sdks = (IOS, MACOSX, APPLETVOS),
2033
        compiler_flags = [
2034
            "-O2",
2035
        ] + select({
2036
            "DEFAULT": [],
2037
            "ovr_config//cpu:arm32": [
2038
                "-march=armv8.2-a+dotprod",
2039
                "-mfpu=neon-fp-armv8",
2040
                "-mfloat-abi=softfp",
2041
            ],
2042
        }),
2043
        fbobjc_preprocessor_flags = [
2044
            "-DXNN_PRIVATE=",
2045
            "-DXNN_INTERNAL=",
2046
        ],
2047
        labels = labels,
2048
        platform_compiler_flags = [
2049
            (
2050
                "(aarch32|arm32|armv7)",
2051
                [
2052
                    "-march=armv8.2-a+dotprod",
2053
                    "-mfpu=neon-fp-armv8",
2054
                    "-mfloat-abi=softfp",
2055
                ],
2056
            ),
2057
        ],
2058
        platform_srcs = [
2059
            (
2060
                "(aarch32|arm32|armv7)",
2061
                PROD_NEONDOT_MICROKERNEL_SRCS,
2062
            ),
2063
        ] if not is_arvr_mode() else [],
2064
        preferred_linkage = "static",
2065
        preprocessor_flags = [
2066
            "-DXNN_LOG_LEVEL=0",
2067
        ],
2068
        visibility = ["PUBLIC"],
2069
        windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS,
2070
        windows_compiler_flags_override = WINDOWS_FLAGS,
2071
        deps = [
2072
            ":interface",
2073
            third_party("FP16"),
2074
        ],
2075
    )
2076

2077
    fb_xplat_cxx_library(
2078
        name = "ukernels_neon_dot_aarch64",
2079
        srcs = select({
2080
            "DEFAULT": [],
2081
            "ovr_config//cpu:arm64": PROD_NEONDOT_MICROKERNEL_SRCS + PROD_NEONDOT_AARCH64_MICROKERNEL_SRCS,
2082
        }) if is_arvr_mode() else [],
2083
        headers = subdir_glob([
2084
            ("XNNPACK/src", "**/*.c"),
2085
            ("XNNPACK/src", "**/*.h"),
2086
        ]),
2087
        header_namespace = "",
2088
        apple_sdks = (IOS, MACOSX, APPLETVOS),
2089
        compiler_flags = [
2090
            "-O2",
2091
        ] + select({
2092
            "DEFAULT": [],
2093
            "ovr_config//cpu:arm64": ["-march=armv8.2-a+dotprod"],
2094
        }),
2095
        fbobjc_preprocessor_flags = [
2096
            "-DXNN_PRIVATE=",
2097
            "-DXNN_INTERNAL=",
2098
        ],
2099
        labels = labels,
2100
        platform_compiler_flags = [
2101
            (
2102
                "(aarch64|arm64)",
2103
                [
2104
                    "-march=armv8.2-a+dotprod",
2105
                ],
2106
            ),
2107
        ],
2108
        platform_srcs = [
2109
            (
2110
                "(aarch64|arm64)",
2111
                PROD_NEONDOT_MICROKERNEL_SRCS + PROD_NEONDOT_AARCH64_MICROKERNEL_SRCS,
2112
            ),
2113
        ] if not is_arvr_mode() else [],
2114
        preferred_linkage = "static",
2115
        preprocessor_flags = [
2116
            "-DXNN_LOG_LEVEL=0",
2117
        ],
2118
        visibility = ["PUBLIC"],
2119
        windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS,
2120
        windows_compiler_flags_override = WINDOWS_FLAGS,
2121
        deps = [
2122
            ":interface",
2123
            third_party("FP16"),
2124
        ],
2125
    )
2126

2127
    fb_xplat_cxx_library(
2128
        name = "ukernels_neon_dot_fp16arith",
2129
        srcs = select({
2130
            "DEFAULT": [],
2131
            "ovr_config//cpu:arm32": PROD_NEONDOTFP16ARITH_MICROKERNEL_SRCS,
2132
        }) if is_arvr_mode() else [],
2133
        headers = subdir_glob([
2134
            ("XNNPACK/src", "**/*.c"),
2135
            ("XNNPACK/src", "**/*.h"),
2136
        ]),
2137
        header_namespace = "",
2138
        apple_sdks = (IOS, MACOSX, APPLETVOS),
2139
        compiler_flags = [
2140
            "-O2",
2141
        ] + select({
2142
            "DEFAULT": [],
2143
            "ovr_config//cpu:arm32": [
2144
                "-marm",
2145
                "-march=armv8.2-a+dotprod+fp16",
2146
                "-mfpu=neon-fp-armv8",
2147
            ],
2148
        }),
2149
        platform_compiler_flags = [
2150
            (
2151
                "(aarch32|arm32|armv7)",
2152
                [
2153
                    "-marm",
2154
                    "-march=armv8.2-a+dotprod+fp16",
2155
                    "-mfpu=neon-fp-armv8",
2156
                ],
2157
            ),
2158
        ],
2159
        platform_srcs = [
2160
            (
2161
                "(aarch32|arm32|armv7)",
2162
                PROD_NEONDOTFP16ARITH_MICROKERNEL_SRCS,
2163
            ),
2164
        ] if not is_arvr_mode() else [],
2165
        fbobjc_preprocessor_flags = [
2166
            "-DXNN_PRIVATE=",
2167
            "-DXNN_INTERNAL=",
2168
        ],
2169
        labels = labels,
2170
        preferred_linkage = "static",
2171
        preprocessor_flags = [
2172
            "-DXNN_LOG_LEVEL=0",
2173
        ],
2174
        visibility = ["PUBLIC"],
2175
        windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS,
2176
        windows_compiler_flags_override = WINDOWS_FLAGS,
2177
        deps = [
2178
            ":interface",
2179
            third_party("FP16"),
2180
        ],
2181
    )
2182

2183
    fb_xplat_cxx_library(
2184
        name = "ukernels_neon_dot_fp16arith_aarch64",
2185
        srcs = select({
2186
            "DEFAULT": [],
2187
            "ovr_config//cpu:arm64": PROD_NEONDOTFP16ARITH_MICROKERNEL_SRCS + PROD_NEONDOTFP16ARITH_AARCH64_MICROKERNEL_SRCS,
2188
        }) if is_arvr_mode() else [],
2189
        headers = subdir_glob([
2190
            ("XNNPACK/src", "**/*.c"),
2191
            ("XNNPACK/src", "**/*.h"),
2192
        ]),
2193
        header_namespace = "",
2194
        apple_sdks = (IOS, MACOSX, APPLETVOS),
2195
        compiler_flags = [
2196
            "-O2",
2197
        ] + select({
2198
            "DEFAULT": [],
2199
            "ovr_config//cpu:arm64": [
2200
                "-march=armv8.2-a+dotprod+fp16",
2201
            ],
2202
        }),
2203
        fbobjc_preprocessor_flags = [
2204
            "-DXNN_PRIVATE=",
2205
            "-DXNN_INTERNAL=",
2206
        ],
2207
        platform_compiler_flags = [
2208
            (
2209
                "(aarch64|arm64)",
2210
                [
2211
                    "-march=armv8.2-a+dotprod+fp16",
2212
                ],
2213
            ),
2214
        ],
2215
        platform_srcs = [
2216
            (
2217
                "(aarch64|arm64)",
2218
                PROD_NEONDOTFP16ARITH_MICROKERNEL_SRCS + PROD_NEONDOTFP16ARITH_AARCH64_MICROKERNEL_SRCS,
2219
            ),
2220
        ] if not is_arvr_mode() else [],
2221
        labels = labels,
2222
        preferred_linkage = "static",
2223
        preprocessor_flags = [
2224
            "-DXNN_LOG_LEVEL=0",
2225
        ],
2226
        visibility = ["PUBLIC"],
2227
        windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS,
2228
        windows_compiler_flags_override = WINDOWS_FLAGS,
2229
        deps = [
2230
            ":interface",
2231
            third_party("FP16"),
2232
        ],
2233
    )
2234

2235
    fb_xplat_cxx_library(
2236
        name = "ukernels_neon_fp16arith",
2237
        srcs = select({
2238
            "DEFAULT": [],
2239
            "ovr_config//cpu:arm32": PROD_NEONFP16ARITH_MICROKERNEL_SRCS,
2240
        }) if is_arvr_mode() else [],
2241
        headers = subdir_glob([
2242
            ("XNNPACK/src", "**/*.c"),
2243
            ("XNNPACK/src", "**/*.h"),
2244
        ]),
2245
        header_namespace = "",
2246
        apple_sdks = (IOS, MACOSX, APPLETVOS),
2247
        compiler_flags = [
2248
            "-O2",
2249
        ] + select({
2250
            "DEFAULT": [],
2251
            "ovr_config//cpu:arm32": [
2252
                "-marm",
2253
                "-march=armv8.2-a+fp16",
2254
                "-mfpu=neon-fp-armv8",
2255
            ],
2256
        }),
2257
        fbobjc_preprocessor_flags = [
2258
            "-DXNN_PRIVATE=",
2259
            "-DXNN_INTERNAL=",
2260
        ],
2261
        labels = labels,
2262
        platform_compiler_flags = [
2263
            (
2264
                "(aarch32|arm32|armv7)",
2265
                [
2266
                    "-marm",
2267
                    "-march=armv8.2-a+fp16",
2268
                    "-mfpu=neon-fp-armv8",
2269
                ],
2270
            ),
2271
        ],
2272
        platform_srcs = [
2273
            (
2274
                "(aarch32|arm32|armv7)",
2275
                PROD_NEONFP16ARITH_MICROKERNEL_SRCS,
2276
            ),
2277
        ] if not is_arvr_mode() else [],
2278
        preferred_linkage = "static",
2279
        preprocessor_flags = [
2280
            "-DXNN_LOG_LEVEL=0",
2281
        ],
2282
        visibility = ["PUBLIC"],
2283
        windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS,
2284
        windows_compiler_flags_override = WINDOWS_FLAGS,
2285
        deps = [
2286
            ":interface",
2287
            third_party("FP16"),
2288
        ],
2289
    )
2290

2291
    fb_xplat_cxx_library(
2292
        name = "ukernels_neon_fp16arith_aarch64",
2293
        srcs = select({
2294
            "DEFAULT": [],
2295
            "ovr_config//cpu:arm64": PROD_NEONFP16ARITH_MICROKERNEL_SRCS + PROD_NEONFP16ARITH_AARCH64_MICROKERNEL_SRCS,
2296
        }) if is_arvr_mode() else [],
2297
        headers = subdir_glob([
2298
            ("XNNPACK/src", "**/*.c"),
2299
            ("XNNPACK/src", "**/*.h"),
2300
        ]),
2301
        header_namespace = "",
2302
        apple_sdks = (IOS, MACOSX, APPLETVOS),
2303
        compiler_flags = [
2304
            "-O2",
2305
        ] + select({
2306
            "DEFAULT": [],
2307
            "ovr_config//cpu:arm64": ["-march=armv8.2-a+fp16"],
2308
        }),
2309
        fbobjc_preprocessor_flags = [
2310
            "-DXNN_PRIVATE=",
2311
            "-DXNN_INTERNAL=",
2312
        ],
2313
        labels = labels,
2314
        platform_compiler_flags = [
2315
            (
2316
                "(aarch64|arm64)",
2317
                [
2318
                    "-march=armv8.2-a+fp16",
2319
                ],
2320
            ),
2321
        ],
2322
        platform_srcs = [
2323
            (
2324
                "(aarch64|arm64)",
2325
                PROD_NEONFP16ARITH_MICROKERNEL_SRCS + PROD_NEONFP16ARITH_AARCH64_MICROKERNEL_SRCS,
2326
            ),
2327
        ] if not is_arvr_mode() else [],
2328
        preferred_linkage = "static",
2329
        preprocessor_flags = [
2330
            "-DXNN_LOG_LEVEL=0",
2331
        ],
2332
        visibility = ["PUBLIC"],
2333
        windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS,
2334
        windows_compiler_flags_override = WINDOWS_FLAGS,
2335
        deps = [
2336
            ":interface",
2337
            third_party("FP16"),
2338
        ],
2339
    )
2340

2341
    fb_xplat_cxx_library(
2342
        name = "ukernels_neonfma_i8mm",
2343
        srcs = PROD_NEONI8MM_MICROKERNEL_SRCS,
2344
        headers = subdir_glob([
2345
            ("XNNPACK/src", "**/*.h"),
2346
            ("XNNPACK/src", "**/*.c"),
2347
        ]),
2348
        header_namespace = "",
2349
        apple_sdks = (IOS, MACOSX, APPLETVOS),
2350
        compiler_flags = [
2351
            "-O2",
2352
        ] + select({
2353
            "DEFAULT": [],
2354
            "ovr_config//cpu:arm32": [
2355
                "-marm",
2356
                "-march=armv8.2-a+i8mm+fp16",
2357
                "-mfpu=neon-fp-armv8",
2358
            ],
2359
            "ovr_config//cpu:arm64": [
2360
                "-march=armv8.2-a+i8mm+fp16",
2361
            ],
2362
        }),
2363
        fbobjc_preprocessor_flags = [
2364
            "-DXNN_PRIVATE=",
2365
            "-DXNN_INTERNAL=",
2366
        ],
2367
        labels = labels,
2368
        platform_compiler_flags = [
2369
            (
2370
                "(aarch32|arm32|armv7)$",
2371
                [
2372
                    "-marm",
2373
                    "-march=armv8.2-a+i8mm+fp16",
2374
                    "-mfpu=neon-fp-armv8",
2375
                ],
2376
            ),
2377
            (
2378
                "(arm64|aarch64)",
2379
                [
2380
                    "-march=armv8.2-a+i8mm+fp16",
2381
                ],
2382
            ),
2383
        ],
2384
        platforms = (APPLE, ANDROID, CXX, WINDOWS),
2385
        preferred_linkage = "static",
2386
        preprocessor_flags = [
2387
            "-DXNN_LOG_LEVEL=0",
2388
        ],
2389
        visibility = ["PUBLIC"],
2390
        windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS,
2391
        windows_compiler_flags_override = WINDOWS_FLAGS,
2392
        deps = [
2393
            ":interface",
2394
            third_party("FP16"),
2395
        ],
2396
    )
2397

2398
    fb_xplat_cxx_library(
2399
        name = "ukernels_asm_aarch32",
2400
        srcs = AARCH32_ASM_MICROKERNEL_SRCS,
2401
        headers = subdir_glob([
2402
            ("XNNPACK/src", "xnnpack/assembly.h"),
2403
            ("XNNPACK/src", "**/*.S"),
2404
        ]),
2405
        header_namespace = "",
2406
        apple_sdks = (IOS, MACOSX, APPLETVOS),
2407
        compiler_flags = [
2408
            "-O2",
2409
        ] + select({
2410
            "DEFAULT": [],
2411
            "ovr_config//cpu:arm32": [
2412
                "-marm",
2413
                "-march=armv8.2-a+dotprod+fp16",
2414
                "-mfpu=neon-fp-armv8",
2415
            ],
2416
        }),
2417
        fbobjc_preprocessor_flags = [
2418
            "-DXNN_PRIVATE=",
2419
            "-DXNN_INTERNAL=",
2420
        ],
2421
        labels = labels,
2422
        platform_compiler_flags = [
2423
            (
2424
                "(aarch32|arm32|armv7)",
2425
                [
2426
                    "-marm",
2427
                    "-march=armv8.2-a+dotprod+fp16",
2428
                    "-mfpu=neon-fp-armv8",
2429
                ],
2430
            ),
2431
        ],
2432
        platforms = (APPLE, ANDROID, CXX, WINDOWS),
2433
        preferred_linkage = "static",
2434
        preprocessor_flags = [
2435
            "-DXNN_LOG_LEVEL=0",
2436
        ],
2437
        visibility = ["PUBLIC"],
2438
        windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS,
2439
        windows_compiler_flags_override = WINDOWS_FLAGS,
2440
        deps = [
2441
            ":interface",
2442
            third_party("FP16"),
2443
        ],
2444
    )
2445

2446
    fb_xplat_cxx_library(
2447
        name = "ukernels_asm_aarch64",
2448
        srcs = AARCH64_ASM_MICROKERNEL_SRCS,
2449
        headers = subdir_glob([
2450
            ("XNNPACK/src", "xnnpack/assembly.h"),
2451
            ("XNNPACK/src", "**/*.S"),
2452
        ]),
2453
        header_namespace = "",
2454
        apple_sdks = (IOS, MACOSX, APPLETVOS),
2455
        compiler_flags = [
2456
            "-O2",
2457
        ] + select({
2458
            "DEFAULT": [],
2459
            "ovr_config//cpu:arm64": [
2460
                "-march=armv8.2-a+fp16+dotprod",
2461
            ],
2462
        }),
2463
        fbobjc_preprocessor_flags = [
2464
            "-DXNN_PRIVATE=",
2465
            "-DXNN_INTERNAL=",
2466
        ],
2467
        labels = labels,
2468
        platform_compiler_flags = [
2469
            (
2470
                "(aarch64|arm64)",
2471
                [
2472
                    "-march=armv8.2-a+fp16+dotprod",
2473
                ],
2474
            ),
2475
        ],
2476
        preferred_linkage = "static",
2477
        preprocessor_flags = [
2478
            "-DXNN_LOG_LEVEL=0",
2479
        ],
2480
        visibility = ["PUBLIC"],
2481
        windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS,
2482
        windows_compiler_flags_override = WINDOWS_FLAGS,
2483
        deps = [
2484
            ":interface",
2485
            third_party("FP16"),
2486
        ],
2487
    )
2488

2489
    fb_xplat_cxx_library(
2490
        name = "arm64_lib",
2491
        apple_sdks = (IOS, MACOSX, APPLETVOS),
2492
        labels = labels,
2493
        preferred_linkage = "static",
2494
        visibility = ["PUBLIC"],
2495
        deps = [
2496
            ":ukernels_asm_aarch64",
2497
            ":ukernels_neon",
2498
            ":ukernels_neon_aarch64",
2499
            ":ukernels_neon_dot_fp16arith",
2500
            ":ukernels_neon_dot_fp16arith_aarch64",
2501
            ":ukernels_neon_dot",
2502
            ":ukernels_neon_dot_aarch64",
2503
            ":ukernels_neon_fma",
2504
            ":ukernels_neon_fp16",
2505
            ":ukernels_neon_fp16arith",
2506
            ":ukernels_neon_fp16arith_aarch64",
2507
            ":ukernels_neon_v8",
2508
            ":ukernels_neonfma_aarch64",
2509
            ":ukernels_neonfma_i8mm",
2510
        ],
2511
    )
2512

2513
    fb_xplat_cxx_library(
2514
        name = "x86_and_x86_64_lib",
2515
        apple_sdks = (IOS, MACOSX, APPLETVOS),
2516
        labels = labels,
2517
        preferred_linkage = "static",
2518
        visibility = ["PUBLIC"],
2519
        deps = [
2520
            ":ukernels_avx",
2521
            ":ukernels_avx2",
2522
            ":ukernels_avx512",
2523
            ":ukernels_avx512skx",
2524
            ":ukernels_f16c",
2525
            ":ukernels_fma3",
2526
            ":ukernels_sse",
2527
            ":ukernels_sse2",
2528
            ":ukernels_sse41",
2529
            ":ukernels_ssse3",
2530
            ":ukernels_xop",
2531
            ":ukernels_avx512vbmi",
2532
            ":ukernels_avx512vnni",
2533
            ":ukernels_avx512vnnigfni",
2534
            # ":ukernels_avxvnni" Excluding avxvnni microkernels because they fail on older compilers
2535
        ],
2536
    )
2537

2538
    fb_xplat_cxx_library(
2539
        name = "x86_and_x86_64_lib_ovr_win32",
2540
        apple_sdks = (IOS, MACOSX, APPLETVOS),
2541
        labels = labels,
2542
        preferred_linkage = "static",
2543
        visibility = ["PUBLIC"],
2544
        deps = [
2545
            ":ukernels_avx2_ovr_win32",
2546
            ":ukernels_avx512_ovr_win32",
2547
            ":ukernels_avx512skx_ovr_win32",
2548
            ":ukernels_avx_ovr_win32",
2549
            ":ukernels_f16c_ovr_win32",
2550
            ":ukernels_fma3_ovr_win32",
2551
            ":ukernels_sse2_ovr_win32",
2552
            ":ukernels_sse41_ovr_win32",
2553
            ":ukernels_sse_ovr_win32",
2554
            ":ukernels_ssse3_ovr_win32",
2555
            ":ukernels_xop_ovr_win32",
2556
            ":ukernels_avx512vbmi",
2557
            # ":ukernels_avx512vnni_ovr_win32", # Build crashes on Windows Clang 17.0.3, re-enable when fixed (T199959765)
2558
            # ":ukernels_avx512vnnigfni_ovr_win32",
2559
            # ":ukernels_avxvnni_ovr_win32" Excluding avxvnni microkernels because they fail on older compilers
2560
        ],
2561
        exported_preprocessor_flags = [
2562
            "-DXNN_ENABLE_AVX512VNNIGFNI=0"
2563
        ]
2564
    )
2565

2566
    fb_xplat_cxx_library(
2567
        name = "arm_lib",
2568
        apple_sdks = (IOS, MACOSX, APPLETVOS),
2569
        labels = labels,
2570
        preferred_linkage = "static",
2571
        visibility = ["PUBLIC"],
2572
        deps = [
2573
            ":ukernels_armsimd32",
2574
            ":ukernels_asm_aarch32",
2575
            ":ukernels_asm_aarch64",
2576
            ":ukernels_neon",
2577
            ":ukernels_neon_aarch64",
2578
            ":ukernels_neon_dot",
2579
            ":ukernels_neon_dot_aarch64",
2580
            ":ukernels_neon_dot_fp16arith",
2581
            ":ukernels_neon_dot_fp16arith_aarch64",
2582
            ":ukernels_neon_fma",
2583
            ":ukernels_neon_fp16",
2584
            ":ukernels_neon_fp16arith",
2585
            ":ukernels_neon_fp16arith_aarch64",
2586
            ":ukernels_neon_v8",
2587
            ":ukernels_neonfma_aarch64",
2588
            ":ukernels_neonfma_i8mm",
2589
            ":ukernels_fp16arith",
2590
        ],
2591
    )
2592

2593
    fb_xplat_cxx_library(
2594
        name = "armv7_lib",
2595
        apple_sdks = (IOS, MACOSX, APPLETVOS),
2596
        labels = labels,
2597
        preferred_linkage = "static",
2598
        visibility = ["PUBLIC"],
2599
        deps = [
2600
            ":ukernels_asm_aarch32",
2601
            ":ukernels_neon",
2602
            ":ukernels_neon_dot",
2603
            ":ukernels_neon_fma",
2604
            ":ukernels_neon_v8",
2605
        ],
2606
    )
2607

2608
    fb_xplat_cxx_library(
2609
        name = "prod_ukernels",
2610
        apple_sdks = (IOS, MACOSX, APPLETVOS),
2611
        labels = labels,
2612
        preferred_linkage = "static",
2613
        visibility = ["PUBLIC"],
2614
        deps = [
2615
            ":ukernels_scalar",
2616
        ] + select({
2617
            "DEFAULT": [
2618
                ":arm_lib",
2619
                ":x86_and_x86_64_lib",
2620
            ],
2621
            "ovr_config//os:windows": [":x86_and_x86_64_lib_ovr_win32"] if XNNPACK_WINDOWS_AVX512F_ENABLED else [
2622
                ":arm_lib",
2623
                ":x86_and_x86_64_lib",
2624
            ],
2625
            # doesn't cover iphonesimulator-x86_64
2626
            "ovr_config//runtime:arm64-linux-ubuntu-neon": [":arm64_lib"],
2627
            "ovr_config//runtime:platform010": [":x86_and_x86_64_lib"],
2628
        }),
2629
    )
2630

2631
    fb_xplat_cxx_library(
2632
        name = "XNNPACK",
2633
        apple_sdks = (IOS, MACOSX, APPLETVOS),
2634
        labels = labels,
2635
        deps = [
2636
            ":subgraph",
2637
            ":tables",
2638
            ":prod_ukernels",
2639
            third_party("cpuinfo"),
2640
            third_party("pthreadpool"),
2641
        ],
2642
        exported_headers = {
2643
            "xnnpack.h": "XNNPACK/include/xnnpack.h",
2644
        },
2645
        fbobjc_preprocessor_flags = [
2646
            "-DXNN_PRIVATE=",
2647
            "-DXNN_INTERNAL=",
2648
        ],
2649
        header_namespace = "",
2650
        headers = subdir_glob([
2651
            ("XNNPACK/src", "**/*.h"),
2652
            ("XNNPACK/include", "**/*.h"),
2653
        ]),
2654
        platforms = (APPLE, ANDROID, CXX, WINDOWS),
2655
        preferred_linkage = "static",
2656
        preprocessor_flags = [
2657
            "-DXNN_LOG_LEVEL=0",
2658
            "-DXNN_NO_Q8_OPERATORS",
2659
            "-DXNN_NO_F16_OPERATORS",
2660
            "-DXNN_NO_NCHW_OPERATORS",
2661
            "-DXNN_NO_QU8_OPERATORS",
2662
            "-DXNN_NO_U8_OPERATORS",
2663
            "-DXNN_NO_X32_OPERATORS",
2664
            "-DXNN_NO_X8_OPERATORS",
2665
            "-DXNN_ENABLE_MEMOPT",
2666
            "-DXNN_ENABLE_SPARSE=0",
2667
            "-DXNN_ENABLE_ASSEMBLY",
2668
            "-DXNN_ENABLE_GEMM_M_SPECIALIZATION",
2669
            "-DXNN_ENABLE_ARM_DOTPROD",
2670
            "-DXNN_ENABLE_CPUINFO",
2671
            "-DXNN_ENABLE_ARM_I8MM=1",
2672
            "-DXNN_ENABLE_ARM_FP16_VECTOR=1",
2673
            "-DXNN_ENABLE_AVXVNNI=0",
2674
        ],
2675
        srcs = XNNPACK_SRCS + LOGGING_SRCS + OPERATOR_SRCS + [
2676
            "XNNPACK/src/configs/hardware-config.c",
2677
            "XNNPACK/src/microkernel-utils.c",
2678
            "XNNPACK/src/operator-run.c",
2679
            "XNNPACK/src/packing.c",
2680
            "XNNPACK/src/cache.c",
2681
            "XNNPACK/src/indirection.c",
2682
            "XNNPACK/src/operator-utils.c",
2683
            "XNNPACK/src/normalization.c",
2684
            "XNNPACK/src/allocator.c",
2685
            "XNNPACK/src/memory.c",
2686
            "XNNPACK/src/mutex.c",
2687
            "XNNPACK/src/microparams-init.c",
2688
        ],
2689
        visibility = ["PUBLIC"],
2690
        windows_clang_compiler_flags_override = (WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS) if XNNPACK_WINDOWS_AVX512F_ENABLED else WINDOWS_FLAGS,
2691
        windows_compiler_flags_override = WINDOWS_FLAGS if XNNPACK_WINDOWS_AVX512F_ENABLED else [],
2692
    )
2693

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.