pytorch
2602 строки · 76.7 Кб
1load("//tools/build_defs:fb_xplat_cxx_library.bzl", "fb_xplat_cxx_library")
2load("//tools/build_defs:fbsource_utils.bzl", "is_arvr_mode")
3load("//tools/build_defs:glob_defs.bzl", "subdir_glob")
4load("//tools/build_defs:platform_defs.bzl", "ANDROID", "APPLE", "APPLETVOS", "CXX", "IOS", "MACOSX", "WINDOWS")
5load(
6":xnnpack_src_defs.bzl",
7"JIT_SRCS",
8"LOGGING_SRCS",
9"OPERATOR_SRCS",
10"SUBGRAPH_SRCS",
11"TABLE_SRCS",
12"XNNPACK_SRCS",
13)
14load(
15":xnnpack_wrapper_defs.bzl",
16"AARCH32_ASM_MICROKERNEL_SRCS",
17"AARCH64_ASM_MICROKERNEL_SRCS",
18"PROD_ARMSIMD32_MICROKERNEL_SRCS",
19"PROD_AVX2_MICROKERNEL_SRCS",
20"PROD_AVX512F_MICROKERNEL_SRCS",
21"PROD_AVX512SKX_MICROKERNEL_SRCS",
22"PROD_AVX512VBMI_MICROKERNEL_SRCS",
23"PROD_AVX512VNNI_MICROKERNEL_SRCS",
24"PROD_AVXVNNI_MICROKERNEL_SRCS",
25"PROD_AVX_MICROKERNEL_SRCS",
26"PROD_F16C_MICROKERNEL_SRCS",
27"PROD_FMA3_MICROKERNEL_SRCS",
28"PROD_FP16ARITH_MICROKERNEL_SRCS",
29"PROD_NEONDOTFP16ARITH_AARCH64_MICROKERNEL_SRCS",
30"PROD_NEONDOTFP16ARITH_MICROKERNEL_SRCS",
31"PROD_NEONDOT_AARCH64_MICROKERNEL_SRCS",
32"PROD_NEONDOT_MICROKERNEL_SRCS",
33"PROD_NEONFMA_MICROKERNEL_SRCS",
34"PROD_NEONFP16ARITH_AARCH64_MICROKERNEL_SRCS",
35"PROD_NEONFP16ARITH_MICROKERNEL_SRCS",
36"PROD_NEONFP16_MICROKERNEL_SRCS",
37"PROD_NEONI8MM_MICROKERNEL_SRCS",
38"PROD_NEONV8_MICROKERNEL_SRCS",
39"PROD_NEON_AARCH64_MICROKERNEL_SRCS",
40"PROD_NEON_MICROKERNEL_SRCS",
41"PROD_SCALAR_MICROKERNEL_SRCS",
42"PROD_SSE2_MICROKERNEL_SRCS",
43"PROD_SSE41_MICROKERNEL_SRCS",
44"PROD_SSE_MICROKERNEL_SRCS",
45"PROD_SSSE3_MICROKERNEL_SRCS",
46"PROD_XOP_MICROKERNEL_SRCS",
47)
48
49# This defines XNNPACK targets for both fbsource BUCK and OSS BUCK
50# Note that the file path is relative to the BUCK file that called from, not to this bzl file.
51# So for fbsource build it points to xplat/third-party/XNNPACK/XNNPACK,
52# and for OSS it points to pytorch/third_party/XNNPACK
53def define_xnnpack(third_party, labels = [], XNNPACK_WINDOWS_AVX512F_ENABLED = False):
54WINDOWS_FLAGS = [
55"/D__x86_64__",
56"/EHsc",
57"/wd4090", # 'function': different 'const' qualifiers
58"/wd4146", # unary minus operator applied to unsigned type, result still unsigned
59] + ([
60"/D__AVX512F__", # needed to avoid linkage errors
61"-mavx2",
62"/D__builtin_clz=__lzcnt", # Intrinsics are spelled differently in MSVC
63"/Drestrict=", # MSVC doesn't understand [restrict XNN_NUM_ELEMENTS(N)] syntax
64] if XNNPACK_WINDOWS_AVX512F_ENABLED else [])
65
66WINDOWS_CLANG_COMPILER_FLAGS = [
67"-Wno-error",
68"-Wno-error=undef",
69"-Wno-error=incompatible-pointer-types",
70"-Wno-error=incompatible-pointer-types-discards-qualifiers",
71]
72
73fb_xplat_cxx_library(
74name = "interface",
75header_namespace = "",
76exported_headers = {
77"xnnpack.h": "XNNPACK/include/xnnpack.h",
78},
79apple_sdks = (IOS, MACOSX, APPLETVOS),
80labels = labels,
81preprocessor_flags = [
82"-DXNN_LOG_LEVEL=0",
83],
84visibility = ["PUBLIC"],
85exported_deps = [
86# Dependency only on pthreadpool interface
87third_party("pthreadpool_header"),
88],
89)
90
91fb_xplat_cxx_library(
92name = "subgraph",
93srcs = SUBGRAPH_SRCS,
94headers = subdir_glob([
95("XNNPACK/src", "**/*.h"),
96]),
97header_namespace = "",
98apple_sdks = (IOS, MACOSX, APPLETVOS),
99compiler_flags = [
100"-O2",
101],
102fbobjc_preprocessor_flags = [
103"-DXNN_PRIVATE=",
104"-DXNN_INTERNAL=",
105],
106labels = labels,
107preferred_linkage = "static",
108preprocessor_flags = [
109"-DXNN_LOG_LEVEL=0",
110"-DXNN_ENABLE_JIT=0",
111"-DXNN_ENABLE_SPARSE=0",
112"-DXNN_ENABLE_MEMOPT",
113],
114visibility = ["PUBLIC"],
115windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS,
116windows_compiler_flags_override = WINDOWS_FLAGS,
117deps = [
118":interface",
119third_party("FP16"),
120third_party("FXdiv"),
121third_party("clog"),
122],
123)
124
125fb_xplat_cxx_library(
126name = "tables",
127srcs = TABLE_SRCS,
128headers = subdir_glob([
129("XNNPACK/src", "**/*.h"),
130]),
131header_namespace = "",
132apple_sdks = (IOS, MACOSX, APPLETVOS),
133compiler_flags = [
134"-O2",
135],
136fbobjc_preprocessor_flags = [
137"-DXNN_PRIVATE=",
138"-DXNN_INTERNAL=",
139],
140labels = labels,
141preferred_linkage = "static",
142preprocessor_flags = [
143"-DXNN_LOG_LEVEL=0",
144],
145visibility = ["PUBLIC"],
146windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS,
147windows_compiler_flags_override = WINDOWS_FLAGS,
148deps = [
149":interface",
150third_party("FP16"),
151third_party("FXdiv"),
152third_party("clog"),
153],
154)
155
156fb_xplat_cxx_library(
157name = "jit_memory",
158# srcs have to include HOT_SRCS to be able to build on ARVR
159srcs = JIT_SRCS,
160headers = subdir_glob([
161("XNNPACK/src", "**/*.h"),
162]),
163header_namespace = "",
164apple_sdks = (IOS, MACOSX, APPLETVOS),
165compiler_flags = [
166"-Oz",
167],
168fbobjc_preprocessor_flags = [
169"-DXNN_PRIVATE=",
170"-DXNN_INTERNAL=",
171],
172labels = labels,
173platforms = (APPLE, ANDROID, CXX, WINDOWS),
174preferred_linkage = "static",
175preprocessor_flags = [
176"-DXNN_LOG_LEVEL=0",
177],
178visibility = ["PUBLIC"],
179windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS,
180windows_compiler_flags_override = WINDOWS_FLAGS,
181deps = [
182":interface",
183third_party("clog"),
184],
185)
186
187fb_xplat_cxx_library(
188name = "ukernels_scalar",
189srcs = PROD_SCALAR_MICROKERNEL_SRCS,
190headers = subdir_glob([
191("XNNPACK/src", "**/*.c"),
192("XNNPACK/src", "**/*.h"),
193]),
194header_namespace = "",
195apple_sdks = (IOS, MACOSX, APPLETVOS),
196compiler_flags = [
197"-O2",
198"-fno-fast-math",
199"-fno-math-errno",
200"-ffp-contract=off",
201],
202fbobjc_preprocessor_flags = [
203"-DXNN_PRIVATE=",
204"-DXNN_INTERNAL=",
205],
206labels = labels,
207preferred_linkage = "static",
208preprocessor_flags = [
209"-DXNN_LOG_LEVEL=0",
210],
211visibility = ["PUBLIC"],
212windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS,
213windows_compiler_flags_override = WINDOWS_FLAGS,
214deps = [
215":interface",
216third_party("FP16"),
217third_party("FXdiv"),
218],
219)
220
221fb_xplat_cxx_library(
222name = "ukernels_sse",
223srcs = PROD_SSE_MICROKERNEL_SRCS if is_arvr_mode() else [],
224headers = subdir_glob([
225("XNNPACK/src", "**/*.c"),
226("XNNPACK/src", "**/*.h"),
227]),
228header_namespace = "",
229apple_sdks = (IOS, MACOSX, APPLETVOS),
230compiler_flags = [
231"-O2",
232],
233fbobjc_preprocessor_flags = [
234"-DXNN_PRIVATE=",
235"-DXNN_INTERNAL=",
236],
237labels = labels,
238platform_compiler_flags = [
239(
240"x86",
241[
242"-msse",
243],
244),
245],
246platform_srcs = ([
247(
248"x86|x86_64|platform009|platform010",
249PROD_SSE_MICROKERNEL_SRCS,
250),
251] if not is_arvr_mode() else []),
252preferred_linkage = "static",
253preprocessor_flags = [
254"-DXNN_LOG_LEVEL=0",
255],
256visibility = ["PUBLIC"],
257windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS + ["-msse"],
258windows_compiler_flags_override = WINDOWS_FLAGS + ["-msse"],
259deps = [
260":interface",
261],
262)
263
264fb_xplat_cxx_library(
265name = "ukernels_sse_ovr_win32",
266headers = subdir_glob([
267("XNNPACK/src", "**/*.c"),
268("XNNPACK/src", "**/*.h"),
269]),
270header_namespace = "",
271apple_sdks = (IOS, MACOSX, APPLETVOS),
272compiler_flags = [
273"-O2",
274],
275fbobjc_preprocessor_flags = [
276"-DXNN_PRIVATE=",
277"-DXNN_INTERNAL=",
278],
279labels = labels,
280platform_compiler_flags = [
281(
282"x86",
283[
284"-msse",
285],
286),
287],
288preferred_linkage = "static",
289preprocessor_flags = [
290"-DXNN_LOG_LEVEL=0",
291],
292visibility = ["PUBLIC"],
293windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS + ["-msse"],
294windows_compiler_flags_override = WINDOWS_FLAGS + ["-msse"],
295windows_srcs = PROD_SSE_MICROKERNEL_SRCS,
296deps = [
297":interface",
298],
299)
300
301fb_xplat_cxx_library(
302name = "ukernels_sse2",
303srcs = PROD_SSE2_MICROKERNEL_SRCS if is_arvr_mode() else [],
304headers = subdir_glob([
305("XNNPACK/src", "**/*.c"),
306("XNNPACK/src", "**/*.h"),
307]),
308header_namespace = "",
309apple_sdks = (IOS, MACOSX, APPLETVOS),
310compiler_flags = [
311"-O2",
312],
313fbobjc_preprocessor_flags = [
314"-DXNN_PRIVATE=",
315"-DXNN_INTERNAL=",
316],
317labels = labels,
318platform_compiler_flags = [
319(
320"x86",
321[
322"-msse2",
323],
324),
325],
326platform_srcs = ([
327(
328"x86|x86_64|platform009|platform010",
329PROD_SSE2_MICROKERNEL_SRCS,
330),
331] if not is_arvr_mode() else []),
332preferred_linkage = "static",
333preprocessor_flags = [
334"-DXNN_LOG_LEVEL=0",
335],
336visibility = ["PUBLIC"],
337windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS + ["-msse2"],
338windows_compiler_flags_override = WINDOWS_FLAGS + ["-msse2"],
339deps = [
340":interface",
341third_party("FP16"),
342],
343)
344
345fb_xplat_cxx_library(
346name = "ukernels_sse2_ovr_win32",
347headers = subdir_glob([
348("XNNPACK/src", "**/*.c"),
349("XNNPACK/src", "**/*.h"),
350]),
351header_namespace = "",
352apple_sdks = (IOS, MACOSX, APPLETVOS),
353compiler_flags = [
354"-O2",
355],
356fbobjc_preprocessor_flags = [
357"-DXNN_PRIVATE=",
358"-DXNN_INTERNAL=",
359],
360labels = labels,
361platform_compiler_flags = [
362(
363"x86",
364[
365"-msse2",
366],
367),
368],
369preferred_linkage = "static",
370preprocessor_flags = [
371"-DXNN_LOG_LEVEL=0",
372],
373visibility = ["PUBLIC"],
374windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS + ["-msse2"],
375windows_compiler_flags_override = WINDOWS_FLAGS + ["-msse2"],
376windows_srcs = PROD_SSE2_MICROKERNEL_SRCS,
377deps = [
378":interface",
379third_party("FP16"),
380],
381)
382
383fb_xplat_cxx_library(
384name = "ukernels_ssse3",
385srcs = PROD_SSSE3_MICROKERNEL_SRCS if is_arvr_mode() else [],
386headers = subdir_glob([
387("XNNPACK/src", "**/*.c"),
388("XNNPACK/src", "**/*.h"),
389]),
390header_namespace = "",
391apple_sdks = (IOS, MACOSX, APPLETVOS),
392compiler_flags = [
393"-O2",
394],
395fbobjc_preprocessor_flags = [
396"-DXNN_PRIVATE=",
397"-DXNN_INTERNAL=",
398],
399labels = labels,
400platform_compiler_flags = [
401(
402"x86",
403[
404"-mssse3",
405],
406),
407],
408platform_srcs = ([
409(
410"x86|x86_64|platform009|platform010",
411PROD_SSSE3_MICROKERNEL_SRCS,
412),
413] if not is_arvr_mode() else []),
414preferred_linkage = "static",
415preprocessor_flags = [
416"-DXNN_LOG_LEVEL=0",
417],
418visibility = ["PUBLIC"],
419windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS + ["-mssse3"],
420windows_compiler_flags_override = WINDOWS_FLAGS + ["-mssse3"],
421deps = [
422":interface",
423third_party("FP16"),
424],
425)
426
427fb_xplat_cxx_library(
428name = "ukernels_ssse3_ovr_win32",
429headers = subdir_glob([
430("XNNPACK/src", "**/*.c"),
431("XNNPACK/src", "**/*.h"),
432]),
433header_namespace = "",
434apple_sdks = (IOS, MACOSX, APPLETVOS),
435compiler_flags = [
436"-O2",
437],
438fbobjc_preprocessor_flags = [
439"-DXNN_PRIVATE=",
440"-DXNN_INTERNAL=",
441],
442labels = labels,
443platform_compiler_flags = [
444(
445"x86",
446[
447"-mssse3",
448],
449),
450],
451preferred_linkage = "static",
452preprocessor_flags = [
453"-DXNN_LOG_LEVEL=0",
454],
455visibility = ["PUBLIC"],
456windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS + ["-mssse3"],
457windows_compiler_flags_override = WINDOWS_FLAGS + ["-mssse3"],
458windows_srcs = PROD_SSSE3_MICROKERNEL_SRCS,
459deps = [
460":interface",
461third_party("FP16"),
462],
463)
464
465fb_xplat_cxx_library(
466name = "ukernels_sse41",
467srcs = PROD_SSE41_MICROKERNEL_SRCS if is_arvr_mode() else [],
468headers = subdir_glob([
469("XNNPACK/src", "**/*.c"),
470("XNNPACK/src", "**/*.h"),
471]),
472header_namespace = "",
473apple_sdks = (IOS, MACOSX, APPLETVOS),
474compiler_flags = [
475"-O2",
476],
477fbobjc_preprocessor_flags = [
478"-DXNN_PRIVATE=",
479"-DXNN_INTERNAL=",
480],
481labels = labels,
482platform_compiler_flags = [
483(
484"x86",
485[
486"-msse4.1",
487],
488),
489],
490platform_srcs = ([
491(
492"x86|x86_64|platform009|platform010",
493PROD_SSE41_MICROKERNEL_SRCS,
494),
495] if not is_arvr_mode() else []),
496preferred_linkage = "static",
497preprocessor_flags = [
498"-DXNN_LOG_LEVEL=0",
499],
500visibility = ["PUBLIC"],
501windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS + ["-msse4.1"],
502windows_compiler_flags_override = WINDOWS_FLAGS + ["-msse4.1"],
503deps = [
504":interface",
505third_party("FP16"),
506],
507)
508
509fb_xplat_cxx_library(
510name = "ukernels_sse41_ovr_win32",
511headers = subdir_glob([
512("XNNPACK/src", "**/*.c"),
513("XNNPACK/src", "**/*.h"),
514]),
515header_namespace = "",
516apple_sdks = (IOS, MACOSX, APPLETVOS),
517compiler_flags = [
518"-O2",
519],
520fbobjc_preprocessor_flags = [
521"-DXNN_PRIVATE=",
522"-DXNN_INTERNAL=",
523],
524labels = labels,
525platform_compiler_flags = [
526(
527"x86",
528[
529"-msse4.1",
530],
531),
532],
533preferred_linkage = "static",
534preprocessor_flags = [
535"-DXNN_LOG_LEVEL=0",
536],
537visibility = ["PUBLIC"],
538windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS + ["-msse4.1"],
539windows_compiler_flags_override = WINDOWS_FLAGS + ["-msse4.1"],
540windows_srcs = PROD_SSE41_MICROKERNEL_SRCS,
541deps = [
542":interface",
543third_party("FP16"),
544],
545)
546
547fb_xplat_cxx_library(
548name = "ukernels_avx",
549srcs = PROD_AVX_MICROKERNEL_SRCS if is_arvr_mode() else [],
550headers = subdir_glob([
551("XNNPACK/src", "**/*.h"),
552("XNNPACK/src", "**/*.c"),
553]),
554header_namespace = "",
555apple_sdks = (IOS, MACOSX, APPLETVOS),
556compiler_flags = [
557"-O2",
558] + select({
559"DEFAULT": [],
560"ovr_config//cpu:x86_32": [
561"-mavx",
562],
563"ovr_config//cpu:x86_64": [
564"-mavx",
565],
566}),
567fbobjc_preprocessor_flags = [
568"-DXNN_PRIVATE=",
569"-DXNN_INTERNAL=",
570],
571labels = labels,
572platform_compiler_flags = [
573(
574"x86|x86_64|platform009|platform010",
575[
576"-mavx",
577],
578),
579],
580platform_srcs = ([
581(
582"x86|x86_64|platform009|platform010",
583PROD_AVX_MICROKERNEL_SRCS,
584),
585] if not is_arvr_mode() else []),
586preferred_linkage = "static",
587preprocessor_flags = [
588"-DXNN_LOG_LEVEL=0",
589],
590visibility = ["PUBLIC"],
591windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS + ["-mavx"],
592windows_compiler_flags_override = WINDOWS_FLAGS + ["-mavx"],
593deps = [
594":interface",
595],
596)
597
598fb_xplat_cxx_library(
599name = "ukernels_avx_ovr_win32",
600headers = subdir_glob([
601("XNNPACK/src", "**/*.h"),
602("XNNPACK/src", "**/*.c"),
603]),
604header_namespace = "",
605apple_sdks = (IOS, MACOSX, APPLETVOS),
606compiler_flags = [
607"-O2",
608"-mavx",
609],
610fbobjc_preprocessor_flags = [
611"-DXNN_PRIVATE=",
612"-DXNN_INTERNAL=",
613],
614labels = labels,
615platform_compiler_flags = [
616(
617"x86",
618[
619"-mavx",
620],
621),
622],
623preferred_linkage = "static",
624preprocessor_flags = [
625"-DXNN_LOG_LEVEL=0",
626],
627visibility = ["PUBLIC"],
628windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS + ["-mavx"],
629windows_compiler_flags_override = WINDOWS_FLAGS + ["-mavx"],
630windows_srcs = PROD_AVX_MICROKERNEL_SRCS,
631deps = [
632":interface",
633],
634)
635
636fb_xplat_cxx_library(
637name = "ukernels_avx512vnni",
638srcs = PROD_AVX512VNNI_MICROKERNEL_SRCS if is_arvr_mode() else [],
639headers = subdir_glob([
640("XNNPACK/src", "**/*.h"),
641("XNNPACK/src", "**/*.c"),
642]),
643header_namespace = "",
644apple_sdks = (IOS, MACOSX, APPLETVOS),
645compiler_flags = [
646"-O2",
647] + select({
648"DEFAULT": [],
649"ovr_config//cpu:x86_32": [
650"-mavx",
651],
652"ovr_config//cpu:x86_64": [
653"-mavx",
654],
655}),
656fbobjc_preprocessor_flags = [
657"-DXNN_PRIVATE=",
658"-DXNN_INTERNAL=",
659],
660labels = labels,
661platform_compiler_flags = [
662(
663"x86|x86_64|platform009|platform010",
664[
665"-mavx512f",
666"-mavx512cd",
667"-mavx512bw",
668"-mavx512dq",
669"-mavx512vl",
670"-mavx512vnni",
671],
672),
673],
674platform_srcs = ([
675(
676"x86|x86_64|platform009|platform010",
677PROD_AVX512VNNI_MICROKERNEL_SRCS,
678),
679] if not is_arvr_mode() else []),
680preferred_linkage = "static",
681preprocessor_flags = [
682"-DXNN_LOG_LEVEL=0",
683],
684visibility = ["PUBLIC"],
685windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS + ["-mavx"],
686windows_compiler_flags_override = WINDOWS_FLAGS + ["-mavx"],
687deps = [
688":interface",
689],
690)
691
692fb_xplat_cxx_library(
693name = "ukernels_avx512vnni_ovr_win32",
694headers = subdir_glob([
695("XNNPACK/src", "**/*.h"),
696("XNNPACK/src", "**/*.c"),
697]),
698header_namespace = "",
699apple_sdks = (IOS, MACOSX, APPLETVOS),
700compiler_flags = [
701"-O2",
702],
703fbobjc_preprocessor_flags = [
704"-DXNN_PRIVATE=",
705"-DXNN_INTERNAL=",
706],
707labels = labels,
708platform_compiler_flags = [
709(
710"x86|x86_64|platform009|platform010",
711[
712"-mavx512f",
713"-mavx512cd",
714"-mavx512bw",
715"-mavx512dq",
716"-mavx512vl",
717"-mavx512vnni",
718],
719),
720],
721preferred_linkage = "static",
722preprocessor_flags = [
723"-DXNN_LOG_LEVEL=0",
724],
725visibility = ["PUBLIC"],
726windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS + ["-mavx"],
727windows_compiler_flags_override = WINDOWS_FLAGS + ["-mavx"],
728windows_srcs = PROD_AVX512VNNI_MICROKERNEL_SRCS,
729deps = [
730":interface",
731],
732)
733
734fb_xplat_cxx_library(
735name = "ukernels_avxvnni",
736srcs = PROD_AVXVNNI_MICROKERNEL_SRCS if is_arvr_mode() else [],
737headers = subdir_glob([
738("XNNPACK/src", "**/*.h"),
739("XNNPACK/src", "**/*.c"),
740]),
741header_namespace = "",
742apple_sdks = (IOS, MACOSX, APPLETVOS),
743compiler_flags = [
744"-O2",
745],
746fbobjc_preprocessor_flags = [
747"-DXNN_PRIVATE=",
748"-DXNN_INTERNAL=",
749],
750labels = labels,
751platform_compiler_flags = [
752(
753"x86|x86_64|platform009|platform010",
754[
755"-mavx2",
756"-mavxvnni",
757],
758),
759],
760platform_srcs = ([
761(
762"x86|x86_64|platform009|platform010",
763PROD_AVXVNNI_MICROKERNEL_SRCS,
764),
765] if not is_arvr_mode() else []),
766preferred_linkage = "static",
767preprocessor_flags = [
768"-DXNN_LOG_LEVEL=0",
769],
770visibility = ["PUBLIC"],
771windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS + ["-mavx"],
772windows_compiler_flags_override = WINDOWS_FLAGS + ["-mavx"],
773deps = [
774":interface",
775],
776)
777
778fb_xplat_cxx_library(
779name = "ukernels_avxvnni_ovr_win32",
780headers = subdir_glob([
781("XNNPACK/src", "**/*.h"),
782("XNNPACK/src", "**/*.c"),
783]),
784header_namespace = "",
785apple_sdks = (IOS, MACOSX, APPLETVOS),
786compiler_flags = [
787"-O2",
788],
789fbobjc_preprocessor_flags = [
790"-DXNN_PRIVATE=",
791"-DXNN_INTERNAL=",
792],
793labels = labels,
794platform_compiler_flags = [
795(
796"x86|x86_64|platform009|platform010",
797[
798"-mavx2",
799"-mavxvnni",
800],
801),
802],
803preferred_linkage = "static",
804preprocessor_flags = [
805"-DXNN_LOG_LEVEL=0",
806],
807visibility = ["PUBLIC"],
808windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS + ["-mavx"],
809windows_compiler_flags_override = WINDOWS_FLAGS + ["-mavx"],
810windows_srcs = PROD_AVXVNNI_MICROKERNEL_SRCS,
811deps = [
812":interface",
813],
814)
815
816fb_xplat_cxx_library(
817name = "ukernels_f16c",
818srcs = PROD_F16C_MICROKERNEL_SRCS if is_arvr_mode() else [],
819headers = subdir_glob([
820("XNNPACK/src", "**/*.h"),
821("XNNPACK/src", "**/*.c"),
822]),
823header_namespace = "",
824apple_sdks = (IOS, MACOSX, APPLETVOS),
825compiler_flags = [
826"-O2",
827] + select({
828"DEFAULT": [],
829"ovr_config//cpu:x86_32": [
830"-mf16c",
831],
832"ovr_config//cpu:x86_64": [
833"-mf16c",
834],
835}),
836fbobjc_preprocessor_flags = [
837"-DXNN_PRIVATE=",
838"-DXNN_INTERNAL=",
839],
840labels = labels,
841platform_compiler_flags = [
842(
843"x86|x86_64|platform009|platform010",
844[
845"-mf16c",
846],
847),
848],
849platform_srcs = ([
850(
851"x86|x86_64|platform009|platform010",
852PROD_F16C_MICROKERNEL_SRCS,
853),
854] if not is_arvr_mode() else []),
855platforms = (APPLE, ANDROID, CXX, WINDOWS),
856preferred_linkage = "static",
857preprocessor_flags = [
858"-DXNN_LOG_LEVEL=0",
859],
860visibility = ["PUBLIC"],
861windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS + ["-mf16c"],
862windows_compiler_flags_override = WINDOWS_FLAGS + ["-mf16c"],
863deps = [
864":interface",
865],
866)
867
868fb_xplat_cxx_library(
869name = "ukernels_f16c_ovr_win32",
870headers = subdir_glob([
871("XNNPACK/src", "**/*.h"),
872("XNNPACK/src", "**/*.c"),
873]),
874header_namespace = "",
875apple_sdks = (IOS, MACOSX, APPLETVOS),
876compiler_flags = [
877"-O2",
878"-mf16c",
879],
880fbobjc_preprocessor_flags = [
881"-DXNN_PRIVATE=",
882"-DXNN_INTERNAL=",
883],
884labels = labels,
885platform_compiler_flags = [
886(
887"x86",
888[
889"-mf16c",
890],
891),
892],
893platforms = (APPLE, ANDROID, CXX, WINDOWS),
894preferred_linkage = "static",
895preprocessor_flags = [
896"-DXNN_LOG_LEVEL=0",
897],
898visibility = ["PUBLIC"],
899windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS + ["-mf16c"],
900windows_compiler_flags_override = WINDOWS_FLAGS + ["-mf16c"],
901windows_srcs = PROD_F16C_MICROKERNEL_SRCS,
902deps = [
903":interface",
904],
905)
906
907fb_xplat_cxx_library(
908name = "ukernels_xop",
909srcs = PROD_XOP_MICROKERNEL_SRCS if is_arvr_mode() else [],
910headers = subdir_glob([
911("XNNPACK/src", "**/*.h"),
912("XNNPACK/src", "**/*.c"),
913]),
914header_namespace = "",
915apple_sdks = (IOS, MACOSX, APPLETVOS),
916compiler_flags = [
917"-O2",
918] + select({
919"DEFAULT": [],
920"ovr_config//cpu:x86_32": [
921"-mxop",
922],
923"ovr_config//cpu:x86_64": [
924"-mxop",
925],
926}),
927platform_compiler_flags = [
928(
929"x86|x86_64|platform009|platform010",
930[
931"-mxop",
932],
933),
934],
935fbobjc_preprocessor_flags = [
936"-DXNN_PRIVATE=",
937"-DXNN_INTERNAL=",
938],
939labels = labels,
940platform_preprocessor_flags = [
941(
942"windows-x86_64",
943[
944"-Drestrict=",
945],
946),
947],
948platform_srcs = ([
949(
950"x86|x86_64|platform009|platform010",
951PROD_XOP_MICROKERNEL_SRCS,
952),
953] if not is_arvr_mode() else []),
954preferred_linkage = "static",
955preprocessor_flags = [
956"-DXNN_LOG_LEVEL=0",
957],
958visibility = ["PUBLIC"],
959windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS + ["-mxop"],
960windows_compiler_flags_override = WINDOWS_FLAGS + ["-mxop"],
961deps = [
962":interface",
963],
964)
965
966fb_xplat_cxx_library(
967name = "ukernels_xop_ovr_win32",
968headers = subdir_glob([
969("XNNPACK/src", "**/*.h"),
970("XNNPACK/src", "**/*.c"),
971]),
972header_namespace = "",
973apple_sdks = (IOS, MACOSX, APPLETVOS),
974compiler_flags = [
975"-O2",
976"-mxop",
977],
978fbobjc_preprocessor_flags = [
979"-DXNN_PRIVATE=",
980"-DXNN_INTERNAL=",
981],
982labels = labels,
983platform_preprocessor_flags = [
984(
985"windows-x86_64",
986[
987"-Drestrict=",
988],
989),
990],
991preferred_linkage = "static",
992preprocessor_flags = [
993"-DXNN_LOG_LEVEL=0",
994],
995visibility = ["PUBLIC"],
996windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS + ["-mxop"],
997windows_compiler_flags_override = WINDOWS_FLAGS + ["-mxop"],
998windows_srcs = PROD_XOP_MICROKERNEL_SRCS,
999deps = [
1000":interface",
1001],
1002)
1003
1004fb_xplat_cxx_library(
1005name = "ukernels_fma3",
1006srcs = PROD_FMA3_MICROKERNEL_SRCS if is_arvr_mode() else [],
1007headers = subdir_glob([
1008("XNNPACK/src", "**/*.h"),
1009("XNNPACK/src", "**/*.c"),
1010]),
1011header_namespace = "",
1012apple_sdks = (IOS, MACOSX, APPLETVOS),
1013compiler_flags = [
1014"-O2",
1015] + select({
1016"DEFAULT": [],
1017"ovr_config//cpu:x86_32": [
1018"-mfma",
1019"-mf16c",
1020],
1021"ovr_config//cpu:x86_64": [
1022"-mfma",
1023"-mf16c",
1024],
1025}),
1026fbobjc_preprocessor_flags = [
1027"-DXNN_PRIVATE=",
1028"-DXNN_INTERNAL=",
1029],
1030labels = labels,
1031platform_compiler_flags = [
1032(
1033"(i[3-6]86|x86|x86_64|AMD64)",
1034[
1035"-mfma",
1036"-mf16c",
1037],
1038),
1039],
1040platform_srcs = ([
1041(
1042"x86|x86_64|platform009|platform010",
1043PROD_FMA3_MICROKERNEL_SRCS,
1044),
1045] if not is_arvr_mode() else []),
1046preferred_linkage = "static",
1047preprocessor_flags = [
1048"-DXNN_LOG_LEVEL=0",
1049],
1050visibility = ["PUBLIC"],
1051windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS + [
1052"-mfma",
1053"-mf16c",
1054],
1055windows_compiler_flags_override = WINDOWS_FLAGS + [
1056"-mfma",
1057"-mf16c",
1058],
1059deps = [
1060":interface",
1061],
1062)
1063
1064fb_xplat_cxx_library(
1065name = "ukernels_fma3_ovr_win32",
1066headers = subdir_glob([
1067("XNNPACK/src", "**/*.h"),
1068("XNNPACK/src", "**/*.c"),
1069]),
1070header_namespace = "",
1071apple_sdks = (IOS, MACOSX, APPLETVOS),
1072compiler_flags = [
1073"-O2",
1074"-mfma",
1075"-mf16c",
1076],
1077fbobjc_preprocessor_flags = [
1078"-DXNN_PRIVATE=",
1079"-DXNN_INTERNAL=",
1080],
1081labels = labels,
1082platform_compiler_flags = [
1083(
1084"^(i[3-6]86|x86|x86_64|AMD64)$",
1085[
1086"-mfma",
1087"-mf16c",
1088],
1089),
1090],
1091preferred_linkage = "static",
1092preprocessor_flags = [
1093"-DXNN_LOG_LEVEL=0",
1094],
1095visibility = ["PUBLIC"],
1096windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS + [
1097"-mfma",
1098"-mf16c",
1099],
1100windows_compiler_flags_override = WINDOWS_FLAGS + [
1101"-mfma",
1102"-mf16c",
1103],
1104windows_srcs = PROD_FMA3_MICROKERNEL_SRCS,
1105deps = [
1106":interface",
1107],
1108)
1109
1110fb_xplat_cxx_library(
1111name = "ukernels_avx2",
1112srcs = PROD_AVX2_MICROKERNEL_SRCS if is_arvr_mode() else [],
1113headers = subdir_glob([
1114("XNNPACK/src", "**/*.c"),
1115("XNNPACK/src", "**/*.h"),
1116]),
1117header_namespace = "",
1118apple_sdks = (IOS, MACOSX, APPLETVOS),
1119compiler_flags = [
1120"-O2",
1121] + select({
1122"DEFAULT": [],
1123"ovr_config//cpu:x86_32": [
1124"-mavx2",
1125"-mfma",
1126"-mf16c",
1127],
1128"ovr_config//cpu:x86_64": [
1129"-mavx2",
1130"-mfma",
1131"-mf16c",
1132],
1133}),
1134fbobjc_preprocessor_flags = [
1135"-DXNN_PRIVATE=",
1136"-DXNN_INTERNAL=",
1137],
1138labels = labels,
1139platform_compiler_flags = [
1140(
1141"x86|x86_64|platform009|platform010",
1142[
1143"-mavx2",
1144"-mfma",
1145"-mf16c",
1146],
1147),
1148],
1149platform_srcs = ([
1150(
1151"x86|x86_64|platform009|platform010",
1152PROD_AVX2_MICROKERNEL_SRCS,
1153),
1154] if not is_arvr_mode() else []),
1155preferred_linkage = "static",
1156preprocessor_flags = [
1157"-DXNN_LOG_LEVEL=0",
1158],
1159visibility = ["PUBLIC"],
1160windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS + [
1161"-mavx2",
1162"-mfma",
1163"-mf16c",
1164],
1165windows_compiler_flags_override = WINDOWS_FLAGS + [
1166"-mavx2",
1167"-mfma",
1168"-mf16c",
1169],
1170deps = [
1171":interface",
1172],
1173)
1174
1175fb_xplat_cxx_library(
1176name = "ukernels_avx2_ovr_win32",
1177headers = subdir_glob([
1178("XNNPACK/src", "**/*.c"),
1179("XNNPACK/src", "**/*.h"),
1180]),
1181header_namespace = "",
1182apple_sdks = (IOS, MACOSX, APPLETVOS),
1183compiler_flags = [
1184"-O2",
1185"-mavx2",
1186"-mfma",
1187"-mf16c",
1188],
1189fbobjc_preprocessor_flags = [
1190"-DXNN_PRIVATE=",
1191"-DXNN_INTERNAL=",
1192],
1193labels = labels,
1194platform_compiler_flags = [
1195(
1196"x86",
1197[
1198"-mavx2",
1199"-mfma",
1200"-mf16c",
1201],
1202),
1203],
1204preferred_linkage = "static",
1205preprocessor_flags = [
1206"-DXNN_LOG_LEVEL=0",
1207],
1208visibility = ["PUBLIC"],
1209windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS + [
1210"-mavx2",
1211"-mfma",
1212"-mf16c",
1213],
1214windows_compiler_flags_override = WINDOWS_FLAGS + [
1215"-mavx2",
1216"-mfma",
1217"-mf16c",
1218],
1219windows_srcs = PROD_AVX2_MICROKERNEL_SRCS,
1220deps = [
1221":interface",
1222],
1223)
1224
1225fb_xplat_cxx_library(
1226name = "ukernels_avx512",
1227srcs = PROD_AVX512F_MICROKERNEL_SRCS if is_arvr_mode() else [],
1228headers = subdir_glob([
1229("XNNPACK/src", "**/*.c"),
1230("XNNPACK/src", "**/*.h"),
1231]),
1232header_namespace = "",
1233apple_sdks = (IOS, MACOSX, APPLETVOS),
1234compiler_flags = [
1235"-O2",
1236] + select({
1237"DEFAULT": [],
1238"ovr_config//cpu:x86_32": [
1239"-mavx512f",
1240],
1241"ovr_config//cpu:x86_64": [
1242"-mavx512f",
1243],
1244}),
1245fbobjc_preprocessor_flags = [
1246"-DXNN_PRIVATE=",
1247"-DXNN_INTERNAL=",
1248],
1249labels = labels,
1250platform_compiler_flags = [
1251(
1252"x86|x86_64|platform009|platform010",
1253[
1254"-mavx512f",
1255],
1256),
1257],
1258platform_srcs = ([
1259(
1260"x86|x86_64|platform009|platform010",
1261PROD_AVX512F_MICROKERNEL_SRCS,
1262),
1263] if not is_arvr_mode() else []),
1264preferred_linkage = "static",
1265preprocessor_flags = [
1266"-DXNN_LOG_LEVEL=0",
1267],
1268visibility = ["PUBLIC"],
1269windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS + ["-mavx512f"],
1270windows_compiler_flags_override = WINDOWS_FLAGS + ["-mavx512f"],
1271deps = [
1272":interface",
1273],
1274)
1275
1276fb_xplat_cxx_library(
1277name = "ukernels_avx512vbmi",
1278srcs = PROD_AVX512VBMI_MICROKERNEL_SRCS if is_arvr_mode() else [],
1279headers = subdir_glob([
1280("XNNPACK/src", "**/*.c"),
1281("XNNPACK/src", "**/*.h"),
1282]),
1283header_namespace = "",
1284apple_sdks = (IOS, MACOSX, APPLETVOS),
1285compiler_flags = [
1286"-O2",
1287] + select({
1288"DEFAULT": [],
1289"ovr_config//cpu:x86_32": [
1290"-mavx512f",
1291"-mavx512cd",
1292"-mavx512bw",
1293"-mavx512dq",
1294"-mavx512vl",
1295"-mavx512vbmi",
1296],
1297"ovr_config//cpu:x86_64": [
1298"-mavx512f",
1299"-mavx512cd",
1300"-mavx512bw",
1301"-mavx512dq",
1302"-mavx512vl",
1303"-mavx512vbmi",
1304],
1305}),
1306fbobjc_preprocessor_flags = [
1307"-DXNN_PRIVATE=",
1308"-DXNN_INTERNAL=",
1309],
1310labels = labels,
1311platform_compiler_flags = [
1312(
1313"(i[3-6]86|x86|x86_64|AMD64)",
1314[
1315"-mavx512f",
1316"-mavx512cd",
1317"-mavx512bw",
1318"-mavx512dq",
1319"-mavx512vl",
1320"-mavx512vbmi",
1321],
1322),
1323],
1324platform_srcs = ([
1325(
1326"x86|x86_64|platform009|platform010",
1327PROD_AVX512VBMI_MICROKERNEL_SRCS,
1328),
1329] if not is_arvr_mode() else []),
1330preferred_linkage = "static",
1331preprocessor_flags = [
1332"-DXNN_LOG_LEVEL=0",
1333],
1334visibility = ["PUBLIC"],
1335windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS + [
1336"-mavx512f",
1337"-mavx512cd",
1338"-mavx512bw",
1339"-mavx512dq",
1340"-mavx512vl",
1341"-mavx512vbmi",
1342],
1343windows_compiler_flags_override = WINDOWS_FLAGS + [
1344"-mavx512f",
1345"-mavx512cd",
1346"-mavx512bw",
1347"-mavx512dq",
1348"-mavx512vl",
1349"-mavx512vbmi",
1350],
1351deps = [
1352":interface",
1353],
1354)
1355
1356fb_xplat_cxx_library(
1357name = "ukernels_avx512_ovr_win32",
1358headers = subdir_glob([
1359("XNNPACK/src", "**/*.c"),
1360("XNNPACK/src", "**/*.h"),
1361]),
1362header_namespace = "",
1363apple_sdks = (IOS, MACOSX, APPLETVOS),
1364compiler_flags = [
1365"-O2",
1366"-mavx512f",
1367],
1368fbobjc_preprocessor_flags = [
1369"-DXNN_PRIVATE=",
1370"-DXNN_INTERNAL=",
1371],
1372labels = labels,
1373platform_compiler_flags = [
1374(
1375"x86",
1376[
1377"-mavx512f",
1378],
1379),
1380],
1381preferred_linkage = "static",
1382preprocessor_flags = [
1383"-DXNN_LOG_LEVEL=0",
1384],
1385visibility = ["PUBLIC"],
1386windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS + ["-mavx512f"],
1387windows_compiler_flags_override = WINDOWS_FLAGS + ["-mavx512f"],
1388windows_srcs = PROD_AVX512F_MICROKERNEL_SRCS,
1389deps = [
1390":interface",
1391],
1392)
1393
1394fb_xplat_cxx_library(
1395name = "ukernels_avx512skx",
1396srcs = PROD_AVX512SKX_MICROKERNEL_SRCS if is_arvr_mode() else [],
1397headers = subdir_glob([
1398("XNNPACK/src", "**/*.c"),
1399("XNNPACK/src", "**/*.h"),
1400]),
1401header_namespace = "",
1402apple_sdks = (IOS, MACOSX, APPLETVOS),
1403compiler_flags = [
1404"-O2",
1405] + select({
1406"DEFAULT": [],
1407"ovr_config//cpu:x86_32": [
1408"-mavx512f",
1409"-mavx512cd",
1410"-mavx512bw",
1411"-mavx512dq",
1412"-mavx512vl",
1413],
1414"ovr_config//cpu:x86_64": [
1415"-mavx512f",
1416"-mavx512cd",
1417"-mavx512bw",
1418"-mavx512dq",
1419"-mavx512vl",
1420],
1421}),
1422fbobjc_preprocessor_flags = [
1423"-DXNN_PRIVATE=",
1424"-DXNN_INTERNAL=",
1425],
1426labels = labels,
1427platform_compiler_flags = [
1428(
1429"(i[3-6]86|x86|x86_64|AMD64)",
1430[
1431"-mavx512f",
1432"-mavx512cd",
1433"-mavx512bw",
1434"-mavx512dq",
1435"-mavx512vl",
1436],
1437),
1438],
1439platform_srcs = ([
1440(
1441"x86|x86_64|platform009|platform010",
1442PROD_AVX512SKX_MICROKERNEL_SRCS,
1443),
1444] if not is_arvr_mode() else []),
1445preferred_linkage = "static",
1446preprocessor_flags = [
1447"-DXNN_LOG_LEVEL=0",
1448],
1449visibility = ["PUBLIC"],
1450windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS + [
1451"-mavx512f",
1452"-mavx512cd",
1453"-mavx512bw",
1454"-mavx512dq",
1455"-mavx512vl",
1456],
1457windows_compiler_flags_override = WINDOWS_FLAGS + [
1458"-mavx512f",
1459"-mavx512cd",
1460"-mavx512bw",
1461"-mavx512dq",
1462"-mavx512vl",
1463],
1464deps = [
1465":interface",
1466],
1467)
1468
1469fb_xplat_cxx_library(
1470name = "ukernels_avx512skx_ovr_win32",
1471headers = subdir_glob([
1472("XNNPACK/src", "**/*.c"),
1473("XNNPACK/src", "**/*.h"),
1474]),
1475header_namespace = "",
1476apple_sdks = (IOS, MACOSX, APPLETVOS),
1477compiler_flags = [
1478"-O2",
1479"-mavx512f",
1480"-mavx512cd",
1481"-mavx512bw",
1482"-mavx512dq",
1483"-mavx512vl",
1484],
1485fbobjc_preprocessor_flags = [
1486"-DXNN_PRIVATE=",
1487"-DXNN_INTERNAL=",
1488],
1489labels = labels,
1490platform_compiler_flags = [
1491(
1492"^(i[3-6]86|x86|x86_64|AMD64)$",
1493[
1494"-mavx512f",
1495"-mavx512cd",
1496"-mavx512bw",
1497"-mavx512dq",
1498"-mavx512vl",
1499],
1500),
1501],
1502preferred_linkage = "static",
1503preprocessor_flags = [
1504"-DXNN_LOG_LEVEL=0",
1505],
1506visibility = ["PUBLIC"],
1507windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS + [
1508"-mavx512f",
1509"-mavx512cd",
1510"-mavx512bw",
1511"-mavx512dq",
1512"-mavx512vl",
1513],
1514windows_compiler_flags_override = WINDOWS_FLAGS + [
1515"-mavx512f",
1516"-mavx512cd",
1517"-mavx512bw",
1518"-mavx512dq",
1519"-mavx512vl",
1520],
1521windows_srcs = PROD_AVX512SKX_MICROKERNEL_SRCS,
1522deps = [
1523":interface",
1524],
1525)
1526
1527fb_xplat_cxx_library(
1528name = "ukernels_armsimd32",
1529srcs = PROD_ARMSIMD32_MICROKERNEL_SRCS,
1530headers = subdir_glob([
1531("XNNPACK/src", "**/*.c"),
1532("XNNPACK/src", "**/*.h"),
1533]),
1534header_namespace = "",
1535apple_sdks = (IOS, MACOSX, APPLETVOS),
1536compiler_flags = [
1537"-O2",
1538"-fno-fast-math",
1539"-fno-math-errno",
1540],
1541fbobjc_preprocessor_flags = [
1542"-DXNN_PRIVATE=",
1543"-DXNN_INTERNAL=",
1544],
1545labels = labels,
1546platform_compiler_flags = [
1547(
1548"(arm32|aarch32|armv7)",
1549[
1550"-marm",
1551"-march=armv6",
1552"-mfpu=vfp",
1553"-munaligned-access",
1554],
1555),
1556],
1557preferred_linkage = "static",
1558preprocessor_flags = [
1559"-DXNN_LOG_LEVEL=0",
1560],
1561visibility = ["PUBLIC"],
1562windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS,
1563windows_compiler_flags_override = WINDOWS_FLAGS,
1564deps = [
1565":interface",
1566third_party("FP16"),
1567],
1568)
1569
1570fb_xplat_cxx_library(
1571name = "ukernels_neon",
1572srcs = select({
1573"DEFAULT": [],
1574"ovr_config//cpu:arm32": PROD_NEON_MICROKERNEL_SRCS,
1575}) if is_arvr_mode() else [],
1576headers = subdir_glob([
1577("XNNPACK/src", "**/*.c"),
1578("XNNPACK/src", "**/*.h"),
1579]),
1580header_namespace = "",
1581apple_sdks = (IOS, MACOSX, APPLETVOS),
1582compiler_flags = [
1583"-O2",
1584] + select({
1585"DEFAULT": [],
1586"ovr_config//cpu:arm32": [
1587"-marm",
1588"-march=armv7-a",
1589"-mfpu=neon",
1590],
1591}),
1592fbobjc_preprocessor_flags = [
1593"-DXNN_PRIVATE=",
1594"-DXNN_INTERNAL=",
1595],
1596labels = labels,
1597platform_compiler_flags = [
1598(
1599"(aarch32|arm32|armv7)",
1600[
1601"-marm",
1602"-march=armv7-a",
1603"-mfpu=neon",
1604],
1605),
1606],
1607platform_srcs = [
1608(
1609"(aarch32|arm32|armv7)",
1610PROD_NEON_MICROKERNEL_SRCS,
1611),
1612] if not is_arvr_mode() else [],
1613preferred_linkage = "static",
1614preprocessor_flags = [
1615"-DXNN_LOG_LEVEL=0",
1616],
1617visibility = ["PUBLIC"],
1618windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS,
1619windows_compiler_flags_override = WINDOWS_FLAGS,
1620deps = [
1621":interface",
1622third_party("FP16"),
1623],
1624)
1625
1626fb_xplat_cxx_library(
1627name = "ukernels_neon_aarch64",
1628srcs = select({
1629"DEFAULT": [],
1630"ovr_config//cpu:arm64": PROD_NEON_MICROKERNEL_SRCS + [PROD_NEON_AARCH64_MICROKERNEL_SRCS[0]],
1631}) if is_arvr_mode() else [],
1632headers = subdir_glob([
1633("XNNPACK/src", "**/*.c"),
1634("XNNPACK/src", "**/*.h"),
1635]),
1636header_namespace = "",
1637apple_sdks = (IOS, MACOSX, APPLETVOS),
1638compiler_flags = [
1639"-O2",
1640],
1641fbobjc_preprocessor_flags = [
1642"-DXNN_PRIVATE=",
1643"-DXNN_INTERNAL=",
1644],
1645platform_srcs = [
1646(
1647"(aarch64|arm64)",
1648PROD_NEON_MICROKERNEL_SRCS + [PROD_NEON_AARCH64_MICROKERNEL_SRCS[0]],
1649),
1650] if not is_arvr_mode() else [],
1651labels = labels,
1652preferred_linkage = "static",
1653preprocessor_flags = [
1654"-DXNN_LOG_LEVEL=0",
1655],
1656visibility = ["PUBLIC"],
1657windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS,
1658windows_compiler_flags_override = WINDOWS_FLAGS,
1659deps = [
1660":interface",
1661third_party("FP16"),
1662],
1663)
1664
1665fb_xplat_cxx_library(
1666name = "ukernels_neon_fma",
1667srcs = select({
1668"DEFAULT": [],
1669"ovr_config//cpu:arm32": PROD_NEONFMA_MICROKERNEL_SRCS,
1670}) if is_arvr_mode() else [],
1671headers = subdir_glob([
1672("XNNPACK/src", "**/*.c"),
1673("XNNPACK/src", "**/*.h"),
1674]),
1675header_namespace = "",
1676apple_sdks = (IOS, MACOSX, APPLETVOS),
1677compiler_flags = [
1678"-O2",
1679] + select({
1680"DEFAULT": [],
1681"ovr_config//cpu:arm32": [
1682"-marm",
1683"-march=armv7-a",
1684"-mfpu=neon-vfpv4",
1685],
1686}),
1687fbobjc_preprocessor_flags = [
1688"-DXNN_PRIVATE=",
1689"-DXNN_INTERNAL=",
1690],
1691labels = labels,
1692platform_compiler_flags = [
1693(
1694"^iphoneos-armv7$",
1695[
1696"-mcpu=cyclone",
1697"-mtune=generic",
1698],
1699),
1700(
1701"(aarch32|arm32|armv7)",
1702[
1703"-marm",
1704"-march=armv7-a",
1705"-mfpu=neon-vfpv4",
1706],
1707),
1708],
1709platform_srcs = [
1710(
1711"(aarch32|arm32|armv7)",
1712PROD_NEONFMA_MICROKERNEL_SRCS,
1713),
1714] if not is_arvr_mode() else [],
1715preferred_linkage = "static",
1716preprocessor_flags = [
1717"-DXNN_LOG_LEVEL=0",
1718],
1719visibility = ["PUBLIC"],
1720windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS,
1721windows_compiler_flags_override = WINDOWS_FLAGS,
1722deps = [
1723":interface",
1724third_party("FP16"),
1725],
1726)
1727
1728fb_xplat_cxx_library(
1729name = "ukernels_neonfma_aarch64",
1730srcs = select({
1731"DEFAULT": [],
1732"ovr_config//cpu:arm64": PROD_NEONFMA_MICROKERNEL_SRCS + [PROD_NEON_AARCH64_MICROKERNEL_SRCS[1]],
1733}) if is_arvr_mode() else [],
1734headers = subdir_glob([
1735("XNNPACK/src", "**/*.h"),
1736("XNNPACK/src", "**/*.c"),
1737]),
1738header_namespace = "",
1739apple_sdks = (IOS, MACOSX, APPLETVOS),
1740compiler_flags = [
1741"-O2",
1742],
1743fbobjc_preprocessor_flags = [
1744"-DXNN_PRIVATE=",
1745"-DXNN_INTERNAL=",
1746],
1747labels = labels,
1748platform_srcs = [
1749(
1750"(arm64|aarch64)$",
1751PROD_NEONFMA_MICROKERNEL_SRCS + [PROD_NEON_AARCH64_MICROKERNEL_SRCS[1]],
1752),
1753] if not is_arvr_mode() else [],
1754platforms = (APPLE, ANDROID, CXX, WINDOWS),
1755preferred_linkage = "static",
1756preprocessor_flags = [
1757"-DXNN_LOG_LEVEL=0",
1758],
1759visibility = ["PUBLIC"],
1760windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS,
1761windows_compiler_flags_override = WINDOWS_FLAGS,
1762deps = [
1763":interface",
1764third_party("FP16"),
1765],
1766)
1767
1768fb_xplat_cxx_library(
1769name = "ukernels_fp16arith",
1770srcs = PROD_FP16ARITH_MICROKERNEL_SRCS,
1771headers = subdir_glob([
1772("XNNPACK/src", "**/*.c"),
1773("XNNPACK/src", "**/*.h"),
1774]),
1775header_namespace = "",
1776apple_sdks = (IOS, MACOSX, APPLETVOS),
1777compiler_flags = [
1778"-O2",
1779"-Wno-error=missing-braces", # required since the SGX toolchain does not have this by default
1780"-fno-fast-math",
1781"-fno-math-errno",
1782] + select({
1783"DEFAULT": [],
1784"ovr_config//cpu:arm32": [
1785"-marm",
1786"-march=armv8.2-a+fp16",
1787# GCC emits wrong directives for assembler with -mfpu=fp-armv8
1788"-mfpu=neon-fp-armv8",
1789# For vsqrth_f16 polyfill using sqrtf
1790"-fno-math-errno",
1791# For vminh_f16/vmaxh_f16 polyfills using compare + select
1792"-ffinite-math-only",
1793],
1794}),
1795fbobjc_preprocessor_flags = [
1796"-DXNN_PRIVATE=",
1797"-DXNN_INTERNAL=",
1798],
1799labels = labels,
1800platform_compiler_flags = [
1801(
1802"(aarch32|arm32|armv7)",
1803[
1804"-marm",
1805"-march=armv8.2-a+fp16",
1806# GCC emits wrong directives for assembler with -mfpu=fp-armv8
1807"-mfpu=neon-fp-armv8",
1808# For vsqrth_f16 polyfill using sqrtf
1809"-fno-math-errno",
1810# For vminh_f16/vmaxh_f16 polyfills using compare + select
1811"-ffinite-math-only",
1812],
1813),
1814],
1815preferred_linkage = "static",
1816preprocessor_flags = [
1817"-DXNN_LOG_LEVEL=0",
1818],
1819visibility = ["PUBLIC"],
1820windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS,
1821windows_compiler_flags_override = WINDOWS_FLAGS,
1822deps = [
1823":interface",
1824],
1825)
1826
1827fb_xplat_cxx_library(
1828name = "ukernels_neon_fp16",
1829srcs = PROD_NEONFP16_MICROKERNEL_SRCS,
1830headers = subdir_glob([
1831("XNNPACK/src", "**/*.c"),
1832("XNNPACK/src", "**/*.h"),
1833]),
1834header_namespace = "",
1835apple_sdks = (IOS, MACOSX, APPLETVOS),
1836compiler_flags = [
1837"-O2",
1838] + select({
1839"DEFAULT": [],
1840"ovr_config//cpu:arm32": [
1841"-marm",
1842"-march=armv7-a",
1843"-mfpu=neon-fp16",
1844],
1845}),
1846fbobjc_preprocessor_flags = [
1847"-DXNN_PRIVATE=",
1848"-DXNN_INTERNAL=",
1849],
1850labels = labels,
1851platform_compiler_flags = [
1852(
1853"(aarch32|arm32|armv7)",
1854[
1855"-marm",
1856"-march=armv7-a",
1857"-mfpu=neon-fp16",
1858],
1859),
1860],
1861preferred_linkage = "static",
1862preprocessor_flags = [
1863"-DXNN_LOG_LEVEL=0",
1864],
1865visibility = ["PUBLIC"],
1866windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS,
1867windows_compiler_flags_override = WINDOWS_FLAGS,
1868deps = [
1869":interface",
1870],
1871)
1872
1873fb_xplat_cxx_library(
1874name = "ukernels_neon_v8",
1875srcs = PROD_NEONV8_MICROKERNEL_SRCS,
1876headers = subdir_glob([
1877("XNNPACK/src", "**/*.c"),
1878("XNNPACK/src", "**/*.h"),
1879]),
1880header_namespace = "",
1881apple_sdks = (IOS, MACOSX, APPLETVOS),
1882compiler_flags = [
1883"-O2",
1884] + select({
1885"DEFAULT": [],
1886"ovr_config//cpu:arm64": ["-march=armv8-a"],
1887}),
1888fbobjc_preprocessor_flags = [
1889"-DXNN_PRIVATE=",
1890"-DXNN_INTERNAL=",
1891],
1892labels = labels,
1893platform_compiler_flags = [
1894(
1895"(aarch64|arm64)",
1896[
1897"-march=armv8-a",
1898],
1899),
1900(
1901"^android-armv7$",
1902[
1903"-march=armv8-a",
1904"-mfpu=neon-fp-armv8",
1905"-mfloat-abi=softfp",
1906],
1907),
1908(
1909"^iphoneos-armv7$",
1910[
1911"-mcpu=cyclone",
1912"-mtune=generic",
1913],
1914),
1915],
1916preferred_linkage = "static",
1917preprocessor_flags = [
1918"-DXNN_LOG_LEVEL=0",
1919],
1920visibility = ["PUBLIC"],
1921windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS,
1922windows_compiler_flags_override = WINDOWS_FLAGS,
1923deps = [
1924":interface",
1925third_party("FP16"),
1926],
1927)
1928
1929fb_xplat_cxx_library(
1930name = "ukernels_neon_dot",
1931srcs = select({
1932"DEFAULT": [],
1933"ovr_config//cpu:arm32": PROD_NEONDOT_MICROKERNEL_SRCS,
1934}) if is_arvr_mode() else [],
1935headers = subdir_glob([
1936("XNNPACK/src", "**/*.c"),
1937("XNNPACK/src", "**/*.h"),
1938]),
1939header_namespace = "",
1940apple_sdks = (IOS, MACOSX, APPLETVOS),
1941compiler_flags = [
1942"-O2",
1943] + select({
1944"DEFAULT": [],
1945"ovr_config//cpu:arm32": [
1946"-march=armv8.2-a+dotprod",
1947"-mfpu=neon-fp-armv8",
1948"-mfloat-abi=softfp",
1949],
1950}),
1951fbobjc_preprocessor_flags = [
1952"-DXNN_PRIVATE=",
1953"-DXNN_INTERNAL=",
1954],
1955labels = labels,
1956platform_compiler_flags = [
1957(
1958"(aarch32|arm32|armv7)",
1959[
1960"-march=armv8.2-a+dotprod",
1961"-mfpu=neon-fp-armv8",
1962"-mfloat-abi=softfp",
1963],
1964),
1965],
1966platform_srcs = [
1967(
1968"(aarch32|arm32|armv7)",
1969PROD_NEONDOT_MICROKERNEL_SRCS,
1970),
1971] if not is_arvr_mode() else [],
1972preferred_linkage = "static",
1973preprocessor_flags = [
1974"-DXNN_LOG_LEVEL=0",
1975],
1976visibility = ["PUBLIC"],
1977windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS,
1978windows_compiler_flags_override = WINDOWS_FLAGS,
1979deps = [
1980":interface",
1981third_party("FP16"),
1982],
1983)
1984
1985fb_xplat_cxx_library(
1986name = "ukernels_neon_dot_aarch64",
1987srcs = select({
1988"DEFAULT": [],
1989"ovr_config//cpu:arm64": PROD_NEONDOT_MICROKERNEL_SRCS + PROD_NEONDOT_AARCH64_MICROKERNEL_SRCS,
1990}) if is_arvr_mode() else [],
1991headers = subdir_glob([
1992("XNNPACK/src", "**/*.c"),
1993("XNNPACK/src", "**/*.h"),
1994]),
1995header_namespace = "",
1996apple_sdks = (IOS, MACOSX, APPLETVOS),
1997compiler_flags = [
1998"-O2",
1999] + select({
2000"DEFAULT": [],
2001"ovr_config//cpu:arm64": ["-march=armv8.2-a+dotprod"],
2002}),
2003fbobjc_preprocessor_flags = [
2004"-DXNN_PRIVATE=",
2005"-DXNN_INTERNAL=",
2006],
2007labels = labels,
2008platform_compiler_flags = [
2009(
2010"(aarch64|arm64)",
2011[
2012"-march=armv8.2-a+dotprod",
2013],
2014),
2015],
2016platform_srcs = [
2017(
2018"(aarch64|arm64)",
2019PROD_NEONDOT_MICROKERNEL_SRCS + PROD_NEONDOT_AARCH64_MICROKERNEL_SRCS,
2020),
2021] if not is_arvr_mode() else [],
2022preferred_linkage = "static",
2023preprocessor_flags = [
2024"-DXNN_LOG_LEVEL=0",
2025],
2026visibility = ["PUBLIC"],
2027windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS,
2028windows_compiler_flags_override = WINDOWS_FLAGS,
2029deps = [
2030":interface",
2031third_party("FP16"),
2032],
2033)
2034
2035fb_xplat_cxx_library(
2036name = "ukernels_neon_dot_fp16arith",
2037srcs = select({
2038"DEFAULT": [],
2039"ovr_config//cpu:arm32": PROD_NEONDOTFP16ARITH_MICROKERNEL_SRCS,
2040}) if is_arvr_mode() else [],
2041headers = subdir_glob([
2042("XNNPACK/src", "**/*.c"),
2043("XNNPACK/src", "**/*.h"),
2044]),
2045header_namespace = "",
2046apple_sdks = (IOS, MACOSX, APPLETVOS),
2047compiler_flags = [
2048"-O2",
2049] + select({
2050"DEFAULT": [],
2051"ovr_config//cpu:arm32": [
2052"-marm",
2053"-march=armv8.2-a+dotprod+fp16",
2054"-mfpu=neon-fp-armv8",
2055],
2056}),
2057platform_compiler_flags = [
2058(
2059"(aarch32|arm32|armv7)",
2060[
2061"-marm",
2062"-march=armv8.2-a+dotprod+fp16",
2063"-mfpu=neon-fp-armv8",
2064],
2065),
2066],
2067platform_srcs = [
2068(
2069"(aarch32|arm32|armv7)",
2070PROD_NEONDOTFP16ARITH_MICROKERNEL_SRCS,
2071),
2072] if not is_arvr_mode() else [],
2073fbobjc_preprocessor_flags = [
2074"-DXNN_PRIVATE=",
2075"-DXNN_INTERNAL=",
2076],
2077labels = labels,
2078preferred_linkage = "static",
2079preprocessor_flags = [
2080"-DXNN_LOG_LEVEL=0",
2081],
2082visibility = ["PUBLIC"],
2083windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS,
2084windows_compiler_flags_override = WINDOWS_FLAGS,
2085deps = [
2086":interface",
2087third_party("FP16"),
2088],
2089)
2090
2091fb_xplat_cxx_library(
2092name = "ukernels_neon_dot_fp16arith_aarch64",
2093srcs = select({
2094"DEFAULT": [],
2095"ovr_config//cpu:arm64": PROD_NEONDOTFP16ARITH_MICROKERNEL_SRCS + PROD_NEONDOTFP16ARITH_AARCH64_MICROKERNEL_SRCS,
2096}) if is_arvr_mode() else [],
2097headers = subdir_glob([
2098("XNNPACK/src", "**/*.c"),
2099("XNNPACK/src", "**/*.h"),
2100]),
2101header_namespace = "",
2102apple_sdks = (IOS, MACOSX, APPLETVOS),
2103compiler_flags = [
2104"-O2",
2105] + select({
2106"DEFAULT": [],
2107"ovr_config//cpu:arm64": [
2108"-march=armv8.2-a+dotprod+fp16",
2109],
2110}),
2111fbobjc_preprocessor_flags = [
2112"-DXNN_PRIVATE=",
2113"-DXNN_INTERNAL=",
2114],
2115platform_compiler_flags = [
2116(
2117"(aarch64|arm64)",
2118[
2119"-march=armv8.2-a+dotprod+fp16",
2120],
2121),
2122],
2123platform_srcs = [
2124(
2125"(aarch64|arm64)",
2126PROD_NEONDOTFP16ARITH_MICROKERNEL_SRCS + PROD_NEONDOTFP16ARITH_AARCH64_MICROKERNEL_SRCS,
2127),
2128] if not is_arvr_mode() else [],
2129labels = labels,
2130preferred_linkage = "static",
2131preprocessor_flags = [
2132"-DXNN_LOG_LEVEL=0",
2133],
2134visibility = ["PUBLIC"],
2135windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS,
2136windows_compiler_flags_override = WINDOWS_FLAGS,
2137deps = [
2138":interface",
2139third_party("FP16"),
2140],
2141)
2142
2143fb_xplat_cxx_library(
2144name = "ukernels_neon_fp16arith",
2145srcs = select({
2146"DEFAULT": [],
2147"ovr_config//cpu:arm32": PROD_NEONFP16ARITH_MICROKERNEL_SRCS,
2148}) if is_arvr_mode() else [],
2149headers = subdir_glob([
2150("XNNPACK/src", "**/*.c"),
2151("XNNPACK/src", "**/*.h"),
2152]),
2153header_namespace = "",
2154apple_sdks = (IOS, MACOSX, APPLETVOS),
2155compiler_flags = [
2156"-O2",
2157] + select({
2158"DEFAULT": [],
2159"ovr_config//cpu:arm32": [
2160"-marm",
2161"-march=armv8.2-a+fp16",
2162"-mfpu=neon-fp-armv8",
2163],
2164}),
2165fbobjc_preprocessor_flags = [
2166"-DXNN_PRIVATE=",
2167"-DXNN_INTERNAL=",
2168],
2169labels = labels,
2170platform_compiler_flags = [
2171(
2172"(aarch32|arm32|armv7)",
2173[
2174"-marm",
2175"-march=armv8.2-a+fp16",
2176"-mfpu=neon-fp-armv8",
2177],
2178),
2179],
2180platform_srcs = [
2181(
2182"(aarch32|arm32|armv7)",
2183PROD_NEONFP16ARITH_MICROKERNEL_SRCS,
2184),
2185] if not is_arvr_mode() else [],
2186preferred_linkage = "static",
2187preprocessor_flags = [
2188"-DXNN_LOG_LEVEL=0",
2189],
2190visibility = ["PUBLIC"],
2191windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS,
2192windows_compiler_flags_override = WINDOWS_FLAGS,
2193deps = [
2194":interface",
2195third_party("FP16"),
2196],
2197)
2198
2199fb_xplat_cxx_library(
2200name = "ukernels_neon_fp16arith_aarch64",
2201srcs = select({
2202"DEFAULT": [],
2203"ovr_config//cpu:arm64": PROD_NEONFP16ARITH_MICROKERNEL_SRCS + PROD_NEONFP16ARITH_AARCH64_MICROKERNEL_SRCS,
2204}) if is_arvr_mode() else [],
2205headers = subdir_glob([
2206("XNNPACK/src", "**/*.c"),
2207("XNNPACK/src", "**/*.h"),
2208]),
2209header_namespace = "",
2210apple_sdks = (IOS, MACOSX, APPLETVOS),
2211compiler_flags = [
2212"-O2",
2213] + select({
2214"DEFAULT": [],
2215"ovr_config//cpu:arm64": ["-march=armv8.2-a+fp16"],
2216}),
2217fbobjc_preprocessor_flags = [
2218"-DXNN_PRIVATE=",
2219"-DXNN_INTERNAL=",
2220],
2221labels = labels,
2222platform_compiler_flags = [
2223(
2224"(aarch64|arm64)",
2225[
2226"-march=armv8.2-a+fp16",
2227],
2228),
2229],
2230platform_srcs = [
2231(
2232"(aarch64|arm64)",
2233PROD_NEONFP16ARITH_MICROKERNEL_SRCS + PROD_NEONFP16ARITH_AARCH64_MICROKERNEL_SRCS,
2234),
2235] if not is_arvr_mode() else [],
2236preferred_linkage = "static",
2237preprocessor_flags = [
2238"-DXNN_LOG_LEVEL=0",
2239],
2240visibility = ["PUBLIC"],
2241windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS,
2242windows_compiler_flags_override = WINDOWS_FLAGS,
2243deps = [
2244":interface",
2245third_party("FP16"),
2246],
2247)
2248
2249fb_xplat_cxx_library(
2250name = "ukernels_neonfma_i8mm",
2251srcs = PROD_NEONI8MM_MICROKERNEL_SRCS,
2252headers = subdir_glob([
2253("XNNPACK/src", "**/*.h"),
2254("XNNPACK/src", "**/*.c"),
2255]),
2256header_namespace = "",
2257apple_sdks = (IOS, MACOSX, APPLETVOS),
2258compiler_flags = [
2259"-O2",
2260] + select({
2261"DEFAULT": [],
2262"ovr_config//cpu:arm32": [
2263"-marm",
2264"-march=armv8.2-a+i8mm+fp16",
2265"-mfpu=neon-fp-armv8",
2266],
2267"ovr_config//cpu:arm64": [
2268"-march=armv8.2-a+i8mm+fp16",
2269],
2270}),
2271fbobjc_preprocessor_flags = [
2272"-DXNN_PRIVATE=",
2273"-DXNN_INTERNAL=",
2274],
2275labels = labels,
2276platform_compiler_flags = [
2277(
2278"(aarch32|arm32|armv7)$",
2279[
2280"-marm",
2281"-march=armv8.2-a+i8mm+fp16",
2282"-mfpu=neon-fp-armv8",
2283],
2284),
2285(
2286"(arm64|aarch64)",
2287[
2288"-march=armv8.2-a+i8mm+fp16",
2289],
2290),
2291],
2292platforms = (APPLE, ANDROID, CXX, WINDOWS),
2293preferred_linkage = "static",
2294preprocessor_flags = [
2295"-DXNN_LOG_LEVEL=0",
2296],
2297visibility = ["PUBLIC"],
2298windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS,
2299windows_compiler_flags_override = WINDOWS_FLAGS,
2300deps = [
2301":interface",
2302third_party("FP16"),
2303],
2304)
2305
2306fb_xplat_cxx_library(
2307name = "ukernels_asm_aarch32",
2308srcs = AARCH32_ASM_MICROKERNEL_SRCS,
2309headers = subdir_glob([
2310("XNNPACK/src", "xnnpack/assembly.h"),
2311("XNNPACK/src", "**/*.S"),
2312]),
2313header_namespace = "",
2314apple_sdks = (IOS, MACOSX, APPLETVOS),
2315compiler_flags = [
2316"-O2",
2317] + select({
2318"DEFAULT": [],
2319"ovr_config//cpu:arm32": [
2320"-marm",
2321"-march=armv8.2-a+dotprod+fp16",
2322"-mfpu=neon-fp-armv8",
2323],
2324}),
2325fbobjc_preprocessor_flags = [
2326"-DXNN_PRIVATE=",
2327"-DXNN_INTERNAL=",
2328],
2329labels = labels,
2330platform_compiler_flags = [
2331(
2332"(aarch32|arm32|armv7)",
2333[
2334"-marm",
2335"-march=armv8.2-a+dotprod+fp16",
2336"-mfpu=neon-fp-armv8",
2337],
2338),
2339],
2340platforms = (APPLE, ANDROID, CXX, WINDOWS),
2341preferred_linkage = "static",
2342preprocessor_flags = [
2343"-DXNN_LOG_LEVEL=0",
2344],
2345visibility = ["PUBLIC"],
2346windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS,
2347windows_compiler_flags_override = WINDOWS_FLAGS,
2348deps = [
2349":interface",
2350":jit_memory",
2351third_party("FP16"),
2352],
2353)
2354
2355fb_xplat_cxx_library(
2356name = "ukernels_asm_aarch64",
2357srcs = AARCH64_ASM_MICROKERNEL_SRCS,
2358headers = subdir_glob([
2359("XNNPACK/src", "xnnpack/assembly.h"),
2360("XNNPACK/src", "**/*.S"),
2361]),
2362header_namespace = "",
2363apple_sdks = (IOS, MACOSX, APPLETVOS),
2364compiler_flags = [
2365"-O2",
2366] + select({
2367"DEFAULT": [],
2368"ovr_config//cpu:arm64": [
2369"-march=armv8.2-a+fp16+dotprod",
2370],
2371}),
2372fbobjc_preprocessor_flags = [
2373"-DXNN_PRIVATE=",
2374"-DXNN_INTERNAL=",
2375],
2376labels = labels,
2377platform_compiler_flags = [
2378(
2379"(aarch64|arm64)",
2380[
2381"-march=armv8.2-a+fp16+dotprod",
2382],
2383),
2384],
2385preferred_linkage = "static",
2386preprocessor_flags = [
2387"-DXNN_LOG_LEVEL=0",
2388],
2389visibility = ["PUBLIC"],
2390windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS,
2391windows_compiler_flags_override = WINDOWS_FLAGS,
2392deps = [
2393":interface",
2394":jit_memory",
2395third_party("FP16"),
2396],
2397)
2398
2399fb_xplat_cxx_library(
2400name = "arm64_lib",
2401apple_sdks = (IOS, MACOSX, APPLETVOS),
2402labels = labels,
2403preferred_linkage = "static",
2404visibility = ["PUBLIC"],
2405deps = [
2406":jit_memory",
2407":ukernels_asm_aarch64",
2408":ukernels_neon",
2409":ukernels_neon_aarch64",
2410":ukernels_neon_dot_fp16arith",
2411":ukernels_neon_dot_fp16arith_aarch64",
2412":ukernels_neon_dot",
2413":ukernels_neon_dot_aarch64",
2414":ukernels_neon_fma",
2415":ukernels_neon_fp16",
2416":ukernels_neon_fp16arith",
2417":ukernels_neon_fp16arith_aarch64",
2418":ukernels_neon_v8",
2419":ukernels_neonfma_aarch64",
2420":ukernels_neonfma_i8mm",
2421],
2422)
2423
2424fb_xplat_cxx_library(
2425name = "x86_and_x86_64_lib",
2426apple_sdks = (IOS, MACOSX, APPLETVOS),
2427labels = labels,
2428preferred_linkage = "static",
2429visibility = ["PUBLIC"],
2430deps = [
2431":ukernels_avx",
2432":ukernels_avx2",
2433":ukernels_avx512",
2434":ukernels_avx512skx",
2435":ukernels_f16c",
2436":ukernels_fma3",
2437":ukernels_sse",
2438":ukernels_sse2",
2439":ukernels_sse41",
2440":ukernels_ssse3",
2441":ukernels_xop",
2442":ukernels_avx512vbmi",
2443":ukernels_avx512vnni",
2444# ":ukernels_avxvnni" Excluding avxvnni microkernels because they fail on older compilers
2445],
2446)
2447
2448fb_xplat_cxx_library(
2449name = "x86_and_x86_64_lib_ovr_win32",
2450apple_sdks = (IOS, MACOSX, APPLETVOS),
2451labels = labels,
2452preferred_linkage = "static",
2453visibility = ["PUBLIC"],
2454deps = [
2455":ukernels_avx2_ovr_win32",
2456":ukernels_avx512_ovr_win32",
2457":ukernels_avx512skx_ovr_win32",
2458":ukernels_avx_ovr_win32",
2459":ukernels_f16c_ovr_win32",
2460":ukernels_fma3_ovr_win32",
2461":ukernels_sse2_ovr_win32",
2462":ukernels_sse41_ovr_win32",
2463":ukernels_sse_ovr_win32",
2464":ukernels_ssse3_ovr_win32",
2465":ukernels_xop_ovr_win32",
2466":ukernels_avx512vbmi",
2467":ukernels_avx512vnni_ovr_win32",
2468# ":ukernels_avxvnni_ovr_win32" Excluding avxvnni microkernels because they fail on older compilers
2469],
2470)
2471
2472fb_xplat_cxx_library(
2473name = "arm_lib",
2474apple_sdks = (IOS, MACOSX, APPLETVOS),
2475labels = labels,
2476preferred_linkage = "static",
2477visibility = ["PUBLIC"],
2478deps = [
2479":jit_memory",
2480":ukernels_armsimd32",
2481":ukernels_asm_aarch32",
2482":ukernels_asm_aarch64",
2483":ukernels_neon",
2484":ukernels_neon_aarch64",
2485":ukernels_neon_dot",
2486":ukernels_neon_dot_aarch64",
2487":ukernels_neon_dot_fp16arith",
2488":ukernels_neon_dot_fp16arith_aarch64",
2489":ukernels_neon_fma",
2490":ukernels_neon_fp16",
2491":ukernels_neon_fp16arith",
2492":ukernels_neon_fp16arith_aarch64",
2493":ukernels_neon_v8",
2494":ukernels_neonfma_aarch64",
2495":ukernels_neonfma_i8mm",
2496":ukernels_fp16arith",
2497],
2498)
2499
2500fb_xplat_cxx_library(
2501name = "armv7_lib",
2502apple_sdks = (IOS, MACOSX, APPLETVOS),
2503labels = labels,
2504preferred_linkage = "static",
2505visibility = ["PUBLIC"],
2506deps = [
2507":jit_memory",
2508":ukernels_asm_aarch32",
2509":ukernels_neon",
2510":ukernels_neon_dot",
2511":ukernels_neon_fma",
2512":ukernels_neon_v8",
2513],
2514)
2515
2516fb_xplat_cxx_library(
2517name = "prod_ukernels",
2518apple_sdks = (IOS, MACOSX, APPLETVOS),
2519labels = labels,
2520preferred_linkage = "static",
2521visibility = ["PUBLIC"],
2522deps = [
2523":ukernels_scalar",
2524] + select({
2525"DEFAULT": [
2526":arm_lib",
2527":x86_and_x86_64_lib",
2528],
2529"ovr_config//os:windows": [":x86_and_x86_64_lib_ovr_win32"] if XNNPACK_WINDOWS_AVX512F_ENABLED else [
2530":arm_lib",
2531":x86_and_x86_64_lib",
2532],
2533# doesn't cover iphonesimulator-x86_64
2534"ovr_config//runtime:arm64-linux-ubuntu-neon": [":arm64_lib"],
2535"ovr_config//runtime:platform010": [":x86_and_x86_64_lib"],
2536}),
2537)
2538
2539fb_xplat_cxx_library(
2540name = "XNNPACK",
2541apple_sdks = (IOS, MACOSX, APPLETVOS),
2542labels = labels,
2543deps = [
2544":subgraph",
2545":tables",
2546":prod_ukernels",
2547third_party("cpuinfo"),
2548third_party("pthreadpool"),
2549],
2550exported_headers = {
2551"xnnpack.h": "XNNPACK/include/xnnpack.h",
2552},
2553fbobjc_preprocessor_flags = [
2554"-DXNN_PRIVATE=",
2555"-DXNN_INTERNAL=",
2556],
2557header_namespace = "",
2558headers = subdir_glob([
2559("XNNPACK/src", "**/*.h"),
2560("XNNPACK/include", "**/*.h"),
2561]),
2562platforms = (APPLE, ANDROID, CXX, WINDOWS),
2563preferred_linkage = "static",
2564preprocessor_flags = [
2565"-DXNN_LOG_LEVEL=0",
2566"-DXNN_NO_Q8_OPERATORS",
2567"-DXNN_NO_F16_OPERATORS",
2568"-DXNN_NO_NCHW_OPERATORS",
2569"-DXNN_NO_QU8_OPERATORS",
2570"-DXNN_NO_U8_OPERATORS",
2571"-DXNN_NO_X32_OPERATORS",
2572"-DXNN_NO_X8_OPERATORS",
2573"-DXNN_ENABLE_MEMOPT",
2574"-DXNN_ENABLE_SPARSE=0",
2575"-DXNN_ENABLE_JIT=0",
2576"-DXNN_ENABLE_ASSEMBLY",
2577"-DXNN_ENABLE_GEMM_M_SPECIALIZATION",
2578"-DXNN_ENABLE_ARM_DOTPROD",
2579"-DXNN_ENABLE_CPUINFO",
2580"-DXNN_ENABLE_ARM_I8MM=1",
2581"-DXNN_ENABLE_ARM_FP16_VECTOR=1",
2582"-DXNN_ENABLE_AVXVNNI=0",
2583],
2584srcs = XNNPACK_SRCS + LOGGING_SRCS + OPERATOR_SRCS + [
2585"XNNPACK/src/configs/hardware-config.c",
2586"XNNPACK/src/microkernel-utils.c",
2587"XNNPACK/src/operator-run.c",
2588"XNNPACK/src/packing.c",
2589"XNNPACK/src/cache.c",
2590"XNNPACK/src/indirection.c",
2591"XNNPACK/src/operator-utils.c",
2592"XNNPACK/src/normalization.c",
2593"XNNPACK/src/allocator.c",
2594"XNNPACK/src/memory.c",
2595"XNNPACK/src/mutex.c",
2596"XNNPACK/src/microparams-init.c",
2597"XNNPACK/src/operators/post-operation.c",
2598],
2599visibility = ["PUBLIC"],
2600windows_clang_compiler_flags_override = (WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS) if XNNPACK_WINDOWS_AVX512F_ENABLED else WINDOWS_FLAGS,
2601windows_compiler_flags_override = WINDOWS_FLAGS if XNNPACK_WINDOWS_AVX512F_ENABLED else [],
2602)
2603