pytorch
2692 строки · 79.5 Кб
1load("//tools/build_defs:fb_xplat_cxx_library.bzl", "fb_xplat_cxx_library")
2load("//tools/build_defs:fbsource_utils.bzl", "is_arvr_mode")
3load("//tools/build_defs:glob_defs.bzl", "subdir_glob")
4load("//tools/build_defs:platform_defs.bzl", "ANDROID", "APPLE", "APPLETVOS", "CXX", "IOS", "MACOSX", "WINDOWS")
5load(
6":xnnpack_src_defs.bzl",
7"LOGGING_SRCS",
8"OPERATOR_SRCS",
9"SUBGRAPH_SRCS",
10"TABLE_SRCS",
11"XNNPACK_SRCS",
12)
13load(
14":xnnpack_wrapper_defs.bzl",
15"AARCH32_ASM_MICROKERNEL_SRCS",
16"AARCH64_ASM_MICROKERNEL_SRCS",
17"PROD_ARMSIMD32_MICROKERNEL_SRCS",
18"PROD_AVX2_MICROKERNEL_SRCS",
19"PROD_AVX512F_MICROKERNEL_SRCS",
20"PROD_AVX512SKX_MICROKERNEL_SRCS",
21"PROD_AVX512VBMI_MICROKERNEL_SRCS",
22"PROD_AVX512VNNI_MICROKERNEL_SRCS",
23"PROD_AVX512VNNIGFNI_MICROKERNEL_SRCS",
24"PROD_AVXVNNI_MICROKERNEL_SRCS",
25"PROD_AVX_MICROKERNEL_SRCS",
26"PROD_F16C_MICROKERNEL_SRCS",
27"PROD_FMA3_MICROKERNEL_SRCS",
28"PROD_FP16ARITH_MICROKERNEL_SRCS",
29"PROD_NEONDOTFP16ARITH_AARCH64_MICROKERNEL_SRCS",
30"PROD_NEONDOTFP16ARITH_MICROKERNEL_SRCS",
31"PROD_NEONDOT_AARCH64_MICROKERNEL_SRCS",
32"PROD_NEONDOT_MICROKERNEL_SRCS",
33"PROD_NEONFMA_MICROKERNEL_SRCS",
34"PROD_NEONFP16ARITH_AARCH64_MICROKERNEL_SRCS",
35"PROD_NEONFP16ARITH_MICROKERNEL_SRCS",
36"PROD_NEONFP16_MICROKERNEL_SRCS",
37"PROD_NEONI8MM_MICROKERNEL_SRCS",
38"PROD_NEONV8_MICROKERNEL_SRCS",
39"PROD_NEON_AARCH64_MICROKERNEL_SRCS",
40"PROD_NEON_MICROKERNEL_SRCS",
41"PROD_SCALAR_MICROKERNEL_SRCS",
42"PROD_SSE2_MICROKERNEL_SRCS",
43"PROD_SSE41_MICROKERNEL_SRCS",
44"PROD_SSE_MICROKERNEL_SRCS",
45"PROD_SSSE3_MICROKERNEL_SRCS",
46"PROD_XOP_MICROKERNEL_SRCS",
47)
48
49# This defines XNNPACK targets for both fbsource BUCK and OSS BUCK
50# Note that the file path is relative to the BUCK file that called from, not to this bzl file.
51# So for fbsource build it points to xplat/third-party/XNNPACK/XNNPACK,
52# and for OSS it points to pytorch/third_party/XNNPACK
53def define_xnnpack(third_party, labels = [], XNNPACK_WINDOWS_AVX512F_ENABLED = False):
54WINDOWS_FLAGS = [
55"/D__x86_64__",
56"/EHsc",
57"/wd4090", # 'function': different 'const' qualifiers
58"/wd4146", # unary minus operator applied to unsigned type, result still unsigned
59] + ([
60"/D__AVX512F__", # needed to avoid linkage errors
61"-mavx2",
62"/D__builtin_clz=__lzcnt", # Intrinsics are spelled differently in MSVC
63"/Drestrict=", # MSVC doesn't understand [restrict XNN_NUM_ELEMENTS(N)] syntax
64] if XNNPACK_WINDOWS_AVX512F_ENABLED else [])
65
66WINDOWS_CLANG_COMPILER_FLAGS = [
67"-Wno-error",
68"-Wno-error=undef",
69"-Wno-error=incompatible-pointer-types",
70"-Wno-error=incompatible-pointer-types-discards-qualifiers",
71]
72
73fb_xplat_cxx_library(
74name = "interface",
75header_namespace = "",
76exported_headers = {
77"xnnpack.h": "XNNPACK/include/xnnpack.h",
78},
79apple_sdks = (IOS, MACOSX, APPLETVOS),
80labels = labels,
81preprocessor_flags = [
82"-DXNN_LOG_LEVEL=0",
83],
84visibility = ["PUBLIC"],
85exported_deps = [
86# Dependency only on pthreadpool interface
87third_party("pthreadpool_header"),
88],
89)
90
91fb_xplat_cxx_library(
92name = "subgraph",
93srcs = SUBGRAPH_SRCS,
94headers = subdir_glob([
95("XNNPACK/src", "**/*.h"),
96]),
97header_namespace = "",
98apple_sdks = (IOS, MACOSX, APPLETVOS),
99compiler_flags = [
100"-O2",
101],
102fbobjc_preprocessor_flags = [
103"-DXNN_PRIVATE=",
104"-DXNN_INTERNAL=",
105],
106labels = labels,
107preferred_linkage = "static",
108preprocessor_flags = [
109"-DXNN_LOG_LEVEL=0",
110"-DXNN_ENABLE_SPARSE=0",
111"-DXNN_ENABLE_MEMOPT",
112],
113visibility = ["PUBLIC"],
114windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS,
115windows_compiler_flags_override = WINDOWS_FLAGS,
116deps = [
117":interface",
118third_party("FP16"),
119third_party("FXdiv"),
120third_party("clog"),
121],
122)
123
124fb_xplat_cxx_library(
125name = "tables",
126srcs = TABLE_SRCS,
127headers = subdir_glob([
128("XNNPACK/src", "**/*.h"),
129]),
130header_namespace = "",
131apple_sdks = (IOS, MACOSX, APPLETVOS),
132compiler_flags = [
133"-O2",
134],
135fbobjc_preprocessor_flags = [
136"-DXNN_PRIVATE=",
137"-DXNN_INTERNAL=",
138],
139labels = labels,
140preferred_linkage = "static",
141preprocessor_flags = [
142"-DXNN_LOG_LEVEL=0",
143],
144visibility = ["PUBLIC"],
145windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS,
146windows_compiler_flags_override = WINDOWS_FLAGS,
147deps = [
148":interface",
149third_party("FP16"),
150third_party("FXdiv"),
151third_party("clog"),
152],
153)
154
155fb_xplat_cxx_library(
156name = "ukernels_scalar",
157srcs = PROD_SCALAR_MICROKERNEL_SRCS,
158headers = subdir_glob([
159("XNNPACK/src", "**/*.c"),
160("XNNPACK/src", "**/*.h"),
161]),
162header_namespace = "",
163apple_sdks = (IOS, MACOSX, APPLETVOS),
164compiler_flags = [
165"-O2",
166"-fno-fast-math",
167"-fno-math-errno",
168"-ffp-contract=off",
169],
170fbobjc_preprocessor_flags = [
171"-DXNN_PRIVATE=",
172"-DXNN_INTERNAL=",
173],
174labels = labels,
175preferred_linkage = "static",
176preprocessor_flags = [
177"-DXNN_LOG_LEVEL=0",
178],
179visibility = ["PUBLIC"],
180windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS,
181windows_compiler_flags_override = WINDOWS_FLAGS,
182deps = [
183":interface",
184third_party("FP16"),
185third_party("FXdiv"),
186],
187)
188
189fb_xplat_cxx_library(
190name = "ukernels_sse",
191srcs = PROD_SSE_MICROKERNEL_SRCS if is_arvr_mode() else [],
192headers = subdir_glob([
193("XNNPACK/src", "**/*.c"),
194("XNNPACK/src", "**/*.h"),
195]),
196header_namespace = "",
197apple_sdks = (IOS, MACOSX, APPLETVOS),
198compiler_flags = [
199"-O2",
200],
201fbobjc_preprocessor_flags = [
202"-DXNN_PRIVATE=",
203"-DXNN_INTERNAL=",
204],
205labels = labels,
206platform_compiler_flags = [
207(
208"x86",
209[
210"-msse",
211],
212),
213],
214platform_srcs = ([
215(
216"x86|x86_64|platform009|platform010",
217PROD_SSE_MICROKERNEL_SRCS,
218),
219] if not is_arvr_mode() else []),
220preferred_linkage = "static",
221preprocessor_flags = [
222"-DXNN_LOG_LEVEL=0",
223],
224visibility = ["PUBLIC"],
225windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS + ["-msse"],
226windows_compiler_flags_override = WINDOWS_FLAGS + ["-msse"],
227deps = [
228":interface",
229],
230)
231
232fb_xplat_cxx_library(
233name = "ukernels_sse_ovr_win32",
234headers = subdir_glob([
235("XNNPACK/src", "**/*.c"),
236("XNNPACK/src", "**/*.h"),
237]),
238header_namespace = "",
239apple_sdks = (IOS, MACOSX, APPLETVOS),
240compiler_flags = [
241"-O2",
242],
243fbobjc_preprocessor_flags = [
244"-DXNN_PRIVATE=",
245"-DXNN_INTERNAL=",
246],
247labels = labels,
248platform_compiler_flags = [
249(
250"x86",
251[
252"-msse",
253],
254),
255],
256preferred_linkage = "static",
257preprocessor_flags = [
258"-DXNN_LOG_LEVEL=0",
259],
260visibility = ["PUBLIC"],
261windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS + ["-msse"],
262windows_compiler_flags_override = WINDOWS_FLAGS + ["-msse"],
263windows_srcs = PROD_SSE_MICROKERNEL_SRCS,
264deps = [
265":interface",
266],
267)
268
269fb_xplat_cxx_library(
270name = "ukernels_sse2",
271srcs = PROD_SSE2_MICROKERNEL_SRCS if is_arvr_mode() else [],
272headers = subdir_glob([
273("XNNPACK/src", "**/*.c"),
274("XNNPACK/src", "**/*.h"),
275]),
276header_namespace = "",
277apple_sdks = (IOS, MACOSX, APPLETVOS),
278compiler_flags = [
279"-O2",
280],
281fbobjc_preprocessor_flags = [
282"-DXNN_PRIVATE=",
283"-DXNN_INTERNAL=",
284],
285labels = labels,
286platform_compiler_flags = [
287(
288"x86",
289[
290"-msse2",
291],
292),
293],
294platform_srcs = ([
295(
296"x86|x86_64|platform009|platform010",
297PROD_SSE2_MICROKERNEL_SRCS,
298),
299] if not is_arvr_mode() else []),
300preferred_linkage = "static",
301preprocessor_flags = [
302"-DXNN_LOG_LEVEL=0",
303],
304visibility = ["PUBLIC"],
305windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS + ["-msse2"],
306windows_compiler_flags_override = WINDOWS_FLAGS + ["-msse2"],
307deps = [
308":interface",
309third_party("FP16"),
310],
311)
312
313fb_xplat_cxx_library(
314name = "ukernels_sse2_ovr_win32",
315headers = subdir_glob([
316("XNNPACK/src", "**/*.c"),
317("XNNPACK/src", "**/*.h"),
318]),
319header_namespace = "",
320apple_sdks = (IOS, MACOSX, APPLETVOS),
321compiler_flags = [
322"-O2",
323],
324fbobjc_preprocessor_flags = [
325"-DXNN_PRIVATE=",
326"-DXNN_INTERNAL=",
327],
328labels = labels,
329platform_compiler_flags = [
330(
331"x86",
332[
333"-msse2",
334],
335),
336],
337preferred_linkage = "static",
338preprocessor_flags = [
339"-DXNN_LOG_LEVEL=0",
340],
341visibility = ["PUBLIC"],
342windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS + ["-msse2"],
343windows_compiler_flags_override = WINDOWS_FLAGS + ["-msse2"],
344windows_srcs = PROD_SSE2_MICROKERNEL_SRCS,
345deps = [
346":interface",
347third_party("FP16"),
348],
349)
350
351fb_xplat_cxx_library(
352name = "ukernels_ssse3",
353srcs = PROD_SSSE3_MICROKERNEL_SRCS if is_arvr_mode() else [],
354headers = subdir_glob([
355("XNNPACK/src", "**/*.c"),
356("XNNPACK/src", "**/*.h"),
357]),
358header_namespace = "",
359apple_sdks = (IOS, MACOSX, APPLETVOS),
360compiler_flags = [
361"-O2",
362],
363fbobjc_preprocessor_flags = [
364"-DXNN_PRIVATE=",
365"-DXNN_INTERNAL=",
366],
367labels = labels,
368platform_compiler_flags = [
369(
370"x86",
371[
372"-mssse3",
373],
374),
375],
376platform_srcs = ([
377(
378"x86|x86_64|platform009|platform010",
379PROD_SSSE3_MICROKERNEL_SRCS,
380),
381] if not is_arvr_mode() else []),
382preferred_linkage = "static",
383preprocessor_flags = [
384"-DXNN_LOG_LEVEL=0",
385],
386visibility = ["PUBLIC"],
387windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS + ["-mssse3"],
388windows_compiler_flags_override = WINDOWS_FLAGS + ["-mssse3"],
389deps = [
390":interface",
391third_party("FP16"),
392],
393)
394
395fb_xplat_cxx_library(
396name = "ukernels_ssse3_ovr_win32",
397headers = subdir_glob([
398("XNNPACK/src", "**/*.c"),
399("XNNPACK/src", "**/*.h"),
400]),
401header_namespace = "",
402apple_sdks = (IOS, MACOSX, APPLETVOS),
403compiler_flags = [
404"-O2",
405],
406fbobjc_preprocessor_flags = [
407"-DXNN_PRIVATE=",
408"-DXNN_INTERNAL=",
409],
410labels = labels,
411platform_compiler_flags = [
412(
413"x86",
414[
415"-mssse3",
416],
417),
418],
419preferred_linkage = "static",
420preprocessor_flags = [
421"-DXNN_LOG_LEVEL=0",
422],
423visibility = ["PUBLIC"],
424windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS + ["-mssse3"],
425windows_compiler_flags_override = WINDOWS_FLAGS + ["-mssse3"],
426windows_srcs = PROD_SSSE3_MICROKERNEL_SRCS,
427deps = [
428":interface",
429third_party("FP16"),
430],
431)
432
433fb_xplat_cxx_library(
434name = "ukernels_sse41",
435srcs = PROD_SSE41_MICROKERNEL_SRCS if is_arvr_mode() else [],
436headers = subdir_glob([
437("XNNPACK/src", "**/*.c"),
438("XNNPACK/src", "**/*.h"),
439]),
440header_namespace = "",
441apple_sdks = (IOS, MACOSX, APPLETVOS),
442compiler_flags = [
443"-O2",
444],
445fbobjc_preprocessor_flags = [
446"-DXNN_PRIVATE=",
447"-DXNN_INTERNAL=",
448],
449labels = labels,
450platform_compiler_flags = [
451(
452"x86",
453[
454"-msse4.1",
455],
456),
457],
458platform_srcs = ([
459(
460"x86|x86_64|platform009|platform010",
461PROD_SSE41_MICROKERNEL_SRCS,
462),
463] if not is_arvr_mode() else []),
464preferred_linkage = "static",
465preprocessor_flags = [
466"-DXNN_LOG_LEVEL=0",
467],
468visibility = ["PUBLIC"],
469windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS + ["-msse4.1"],
470windows_compiler_flags_override = WINDOWS_FLAGS + ["-msse4.1"],
471deps = [
472":interface",
473third_party("FP16"),
474],
475)
476
477fb_xplat_cxx_library(
478name = "ukernels_sse41_ovr_win32",
479headers = subdir_glob([
480("XNNPACK/src", "**/*.c"),
481("XNNPACK/src", "**/*.h"),
482]),
483header_namespace = "",
484apple_sdks = (IOS, MACOSX, APPLETVOS),
485compiler_flags = [
486"-O2",
487],
488fbobjc_preprocessor_flags = [
489"-DXNN_PRIVATE=",
490"-DXNN_INTERNAL=",
491],
492labels = labels,
493platform_compiler_flags = [
494(
495"x86",
496[
497"-msse4.1",
498],
499),
500],
501preferred_linkage = "static",
502preprocessor_flags = [
503"-DXNN_LOG_LEVEL=0",
504],
505visibility = ["PUBLIC"],
506windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS + ["-msse4.1"],
507windows_compiler_flags_override = WINDOWS_FLAGS + ["-msse4.1"],
508windows_srcs = PROD_SSE41_MICROKERNEL_SRCS,
509deps = [
510":interface",
511third_party("FP16"),
512],
513)
514
515fb_xplat_cxx_library(
516name = "ukernels_avx",
517srcs = PROD_AVX_MICROKERNEL_SRCS if is_arvr_mode() else [],
518headers = subdir_glob([
519("XNNPACK/src", "**/*.h"),
520("XNNPACK/src", "**/*.c"),
521]),
522header_namespace = "",
523apple_sdks = (IOS, MACOSX, APPLETVOS),
524compiler_flags = [
525"-O2",
526] + select({
527"DEFAULT": [],
528"ovr_config//cpu:x86_32": [
529"-mavx",
530],
531"ovr_config//cpu:x86_64": [
532"-mavx",
533],
534}),
535fbobjc_preprocessor_flags = [
536"-DXNN_PRIVATE=",
537"-DXNN_INTERNAL=",
538],
539labels = labels,
540platform_compiler_flags = [
541(
542"x86|x86_64|platform009|platform010",
543[
544"-mavx",
545],
546),
547],
548platform_srcs = ([
549(
550"x86|x86_64|platform009|platform010",
551PROD_AVX_MICROKERNEL_SRCS,
552),
553] if not is_arvr_mode() else []),
554preferred_linkage = "static",
555preprocessor_flags = [
556"-DXNN_LOG_LEVEL=0",
557],
558visibility = ["PUBLIC"],
559windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS + ["-mavx"],
560windows_compiler_flags_override = WINDOWS_FLAGS + ["-mavx"],
561deps = [
562":interface",
563],
564)
565
566fb_xplat_cxx_library(
567name = "ukernels_avx_ovr_win32",
568headers = subdir_glob([
569("XNNPACK/src", "**/*.h"),
570("XNNPACK/src", "**/*.c"),
571]),
572header_namespace = "",
573apple_sdks = (IOS, MACOSX, APPLETVOS),
574compiler_flags = [
575"-O2",
576"-mavx",
577],
578fbobjc_preprocessor_flags = [
579"-DXNN_PRIVATE=",
580"-DXNN_INTERNAL=",
581],
582labels = labels,
583platform_compiler_flags = [
584(
585"x86",
586[
587"-mavx",
588],
589),
590],
591preferred_linkage = "static",
592preprocessor_flags = [
593"-DXNN_LOG_LEVEL=0",
594],
595visibility = ["PUBLIC"],
596windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS + ["-mavx"],
597windows_compiler_flags_override = WINDOWS_FLAGS + ["-mavx"],
598windows_srcs = PROD_AVX_MICROKERNEL_SRCS,
599deps = [
600":interface",
601],
602)
603
604fb_xplat_cxx_library(
605name = "ukernels_avx512vnnigfni",
606srcs = PROD_AVX512VNNIGFNI_MICROKERNEL_SRCS if is_arvr_mode() else [],
607headers = subdir_glob([
608("XNNPACK/src", "**/*.h"),
609("XNNPACK/src", "**/*.c"),
610]),
611header_namespace = "",
612apple_sdks = (IOS, MACOSX, APPLETVOS),
613compiler_flags = [
614"-O2",
615] + select({
616"DEFAULT": [],
617"ovr_config//cpu:x86_32": [
618"-mavx",
619"-mgfni",
620"-mavx512vl",
621"-mavx512vnni",
622"-mavx512bw",
623"-mavx512dq",
624],
625"ovr_config//cpu:x86_64": [
626"-mavx",
627"-mgfni",
628"-mavx512vl",
629"-mavx512vnni",
630"-mavx512bw",
631"-mavx512dq",
632],
633}),
634fbobjc_preprocessor_flags = [
635"-DXNN_PRIVATE=",
636"-DXNN_INTERNAL=",
637],
638labels = labels,
639platform_compiler_flags = [
640(
641"x86|x86_64|platform009|platform010",
642[
643"-mavx512f",
644"-mavx512cd",
645"-mavx512bw",
646"-mavx512dq",
647"-mavx512vl",
648"-mavx512vnni",
649"-mgfni",
650],
651),
652],
653platform_srcs = ([
654(
655"x86|x86_64|platform009|platform010",
656PROD_AVX512VNNIGFNI_MICROKERNEL_SRCS,
657),
658] if not is_arvr_mode() else []),
659preferred_linkage = "static",
660preprocessor_flags = [
661"-DXNN_LOG_LEVEL=0",
662],
663visibility = ["PUBLIC"],
664windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS + ["-mavx"],
665windows_compiler_flags_override = WINDOWS_FLAGS + ["-mavx"],
666deps = [
667":interface",
668],
669)
670
671fb_xplat_cxx_library(
672name = "ukernels_avx512vnnigfni_ovr_win32",
673headers = subdir_glob([
674("XNNPACK/src", "**/*.h"),
675("XNNPACK/src", "**/*.c"),
676]),
677header_namespace = "",
678apple_sdks = (IOS, MACOSX, APPLETVOS),
679compiler_flags = [
680"-O2",
681],
682fbobjc_preprocessor_flags = [
683"-DXNN_PRIVATE=",
684"-DXNN_INTERNAL=",
685],
686labels = labels,
687platform_compiler_flags = [
688(
689"x86|x86_64|platform009|platform010",
690[
691"-mavx512f",
692"-mavx512cd",
693"-mavx512bw",
694"-mavx512dq",
695"-mavx512vl",
696"-mavx512vnni",
697"-mgfni",
698],
699),
700],
701preferred_linkage = "static",
702preprocessor_flags = [
703"-DXNN_LOG_LEVEL=0",
704],
705visibility = ["PUBLIC"],
706windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS + ["-mavx"],
707windows_compiler_flags_override = WINDOWS_FLAGS + ["-mavx"],
708windows_srcs = PROD_AVX512VNNIGFNI_MICROKERNEL_SRCS,
709deps = [
710":interface",
711],
712)
713
714fb_xplat_cxx_library(
715name = "ukernels_avx512vnni",
716srcs = PROD_AVX512VNNI_MICROKERNEL_SRCS if is_arvr_mode() else [],
717headers = subdir_glob([
718("XNNPACK/src", "**/*.h"),
719("XNNPACK/src", "**/*.c"),
720]),
721header_namespace = "",
722apple_sdks = (IOS, MACOSX, APPLETVOS),
723compiler_flags = [
724"-O2",
725] + select({
726"DEFAULT": [],
727"ovr_config//cpu:x86_32": [
728"-mavx",
729],
730"ovr_config//cpu:x86_64": [
731"-mavx",
732],
733}),
734fbobjc_preprocessor_flags = [
735"-DXNN_PRIVATE=",
736"-DXNN_INTERNAL=",
737],
738labels = labels,
739platform_compiler_flags = [
740(
741"x86|x86_64|platform009|platform010",
742[
743"-mavx512f",
744"-mavx512cd",
745"-mavx512bw",
746"-mavx512dq",
747"-mavx512vl",
748"-mavx512vnni",
749],
750),
751],
752platform_srcs = ([
753(
754"x86|x86_64|platform009|platform010",
755PROD_AVX512VNNI_MICROKERNEL_SRCS,
756),
757] if not is_arvr_mode() else []),
758preferred_linkage = "static",
759preprocessor_flags = [
760"-DXNN_LOG_LEVEL=0",
761],
762exported_preprocessor_flags = [
763"-DXNN_ENABLE_AVX512VNNI"
764],
765visibility = ["PUBLIC"],
766windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS + ["-mavx"],
767windows_compiler_flags_override = WINDOWS_FLAGS + ["-mavx"],
768deps = [
769":interface",
770],
771)
772
773fb_xplat_cxx_library(
774name = "ukernels_avx512vnni_ovr_win32",
775headers = subdir_glob([
776("XNNPACK/src", "**/*.h"),
777("XNNPACK/src", "**/*.c"),
778]),
779header_namespace = "",
780apple_sdks = (IOS, MACOSX, APPLETVOS),
781compiler_flags = [
782"-O2",
783],
784fbobjc_preprocessor_flags = [
785"-DXNN_PRIVATE=",
786"-DXNN_INTERNAL=",
787],
788labels = labels,
789platform_compiler_flags = [
790(
791"x86|x86_64|platform009|platform010",
792[
793"-mavx512f",
794"-mavx512cd",
795"-mavx512bw",
796"-mavx512dq",
797"-mavx512vl",
798"-mavx512vnni",
799],
800),
801],
802preferred_linkage = "static",
803preprocessor_flags = [
804"-DXNN_LOG_LEVEL=0",
805],
806exported_preprocessor_flags = [
807"-DXNN_ENABLE_AVX512VNNI"
808],
809visibility = ["PUBLIC"],
810windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS + ["-mavx"],
811windows_compiler_flags_override = WINDOWS_FLAGS + ["-mavx"],
812windows_srcs = PROD_AVX512VNNI_MICROKERNEL_SRCS,
813deps = [
814":interface",
815],
816)
817
818fb_xplat_cxx_library(
819name = "ukernels_avxvnni",
820srcs = PROD_AVXVNNI_MICROKERNEL_SRCS if is_arvr_mode() else [],
821headers = subdir_glob([
822("XNNPACK/src", "**/*.h"),
823("XNNPACK/src", "**/*.c"),
824]),
825header_namespace = "",
826apple_sdks = (IOS, MACOSX, APPLETVOS),
827compiler_flags = [
828"-O2",
829"-mavxvnni",
830"-mf16c",
831"-mfma",
832],
833fbobjc_preprocessor_flags = [
834"-DXNN_PRIVATE=",
835"-DXNN_INTERNAL=",
836],
837labels = labels,
838platform_compiler_flags = [
839(
840"x86|x86_64|platform009|platform010",
841[
842"-mavx2",
843"-mavxvnni",
844"-mf16c",
845"-mfma",
846],
847),
848],
849platform_srcs = ([
850(
851"x86|x86_64|platform009|platform010",
852PROD_AVXVNNI_MICROKERNEL_SRCS,
853),
854] if not is_arvr_mode() else []),
855preferred_linkage = "static",
856preprocessor_flags = [
857"-DXNN_LOG_LEVEL=0",
858],
859visibility = ["PUBLIC"],
860windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS + ["-mavx"],
861windows_compiler_flags_override = WINDOWS_FLAGS + ["-mavx"],
862deps = [
863":interface",
864],
865)
866
867fb_xplat_cxx_library(
868name = "ukernels_avxvnni_ovr_win32",
869headers = subdir_glob([
870("XNNPACK/src", "**/*.h"),
871("XNNPACK/src", "**/*.c"),
872]),
873header_namespace = "",
874apple_sdks = (IOS, MACOSX, APPLETVOS),
875compiler_flags = [
876"-O2",
877],
878fbobjc_preprocessor_flags = [
879"-DXNN_PRIVATE=",
880"-DXNN_INTERNAL=",
881],
882labels = labels,
883platform_compiler_flags = [
884(
885"x86|x86_64|platform009|platform010",
886[
887"-mavx2",
888"-mavxvnni",
889],
890),
891],
892preferred_linkage = "static",
893preprocessor_flags = [
894"-DXNN_LOG_LEVEL=0",
895],
896visibility = ["PUBLIC"],
897windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS + ["-mavx"],
898windows_compiler_flags_override = WINDOWS_FLAGS + ["-mavx"],
899windows_srcs = PROD_AVXVNNI_MICROKERNEL_SRCS,
900deps = [
901":interface",
902],
903)
904
905fb_xplat_cxx_library(
906name = "ukernels_f16c",
907srcs = PROD_F16C_MICROKERNEL_SRCS if is_arvr_mode() else [],
908headers = subdir_glob([
909("XNNPACK/src", "**/*.h"),
910("XNNPACK/src", "**/*.c"),
911]),
912header_namespace = "",
913apple_sdks = (IOS, MACOSX, APPLETVOS),
914compiler_flags = [
915"-O2",
916] + select({
917"DEFAULT": [],
918"ovr_config//cpu:x86_32": [
919"-mf16c",
920],
921"ovr_config//cpu:x86_64": [
922"-mf16c",
923],
924}),
925fbobjc_preprocessor_flags = [
926"-DXNN_PRIVATE=",
927"-DXNN_INTERNAL=",
928],
929labels = labels,
930platform_compiler_flags = [
931(
932"x86|x86_64|platform009|platform010",
933[
934"-mf16c",
935],
936),
937],
938platform_srcs = ([
939(
940"x86|x86_64|platform009|platform010",
941PROD_F16C_MICROKERNEL_SRCS,
942),
943] if not is_arvr_mode() else []),
944platforms = (APPLE, ANDROID, CXX, WINDOWS),
945preferred_linkage = "static",
946preprocessor_flags = [
947"-DXNN_LOG_LEVEL=0",
948],
949visibility = ["PUBLIC"],
950windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS + ["-mf16c"],
951windows_compiler_flags_override = WINDOWS_FLAGS + ["-mf16c"],
952deps = [
953":interface",
954],
955)
956
957fb_xplat_cxx_library(
958name = "ukernels_f16c_ovr_win32",
959headers = subdir_glob([
960("XNNPACK/src", "**/*.h"),
961("XNNPACK/src", "**/*.c"),
962]),
963header_namespace = "",
964apple_sdks = (IOS, MACOSX, APPLETVOS),
965compiler_flags = [
966"-O2",
967"-mf16c",
968],
969fbobjc_preprocessor_flags = [
970"-DXNN_PRIVATE=",
971"-DXNN_INTERNAL=",
972],
973labels = labels,
974platform_compiler_flags = [
975(
976"x86",
977[
978"-mf16c",
979],
980),
981],
982platforms = (APPLE, ANDROID, CXX, WINDOWS),
983preferred_linkage = "static",
984preprocessor_flags = [
985"-DXNN_LOG_LEVEL=0",
986],
987visibility = ["PUBLIC"],
988windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS + ["-mf16c"],
989windows_compiler_flags_override = WINDOWS_FLAGS + ["-mf16c"],
990windows_srcs = PROD_F16C_MICROKERNEL_SRCS,
991deps = [
992":interface",
993],
994)
995
996fb_xplat_cxx_library(
997name = "ukernels_xop",
998srcs = PROD_XOP_MICROKERNEL_SRCS if is_arvr_mode() else [],
999headers = subdir_glob([
1000("XNNPACK/src", "**/*.h"),
1001("XNNPACK/src", "**/*.c"),
1002]),
1003header_namespace = "",
1004apple_sdks = (IOS, MACOSX, APPLETVOS),
1005compiler_flags = [
1006"-O2",
1007] + select({
1008"DEFAULT": [],
1009"ovr_config//cpu:x86_32": [
1010"-mxop",
1011],
1012"ovr_config//cpu:x86_64": [
1013"-mxop",
1014],
1015}),
1016platform_compiler_flags = [
1017(
1018"x86|x86_64|platform009|platform010",
1019[
1020"-mxop",
1021],
1022),
1023],
1024fbobjc_preprocessor_flags = [
1025"-DXNN_PRIVATE=",
1026"-DXNN_INTERNAL=",
1027],
1028labels = labels,
1029platform_preprocessor_flags = [
1030(
1031"windows-x86_64",
1032[
1033"-Drestrict=",
1034],
1035),
1036],
1037platform_srcs = ([
1038(
1039"x86|x86_64|platform009|platform010",
1040PROD_XOP_MICROKERNEL_SRCS,
1041),
1042] if not is_arvr_mode() else []),
1043preferred_linkage = "static",
1044preprocessor_flags = [
1045"-DXNN_LOG_LEVEL=0",
1046],
1047visibility = ["PUBLIC"],
1048windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS + ["-mxop"],
1049windows_compiler_flags_override = WINDOWS_FLAGS + ["-mxop"],
1050deps = [
1051":interface",
1052],
1053)
1054
1055fb_xplat_cxx_library(
1056name = "ukernels_xop_ovr_win32",
1057headers = subdir_glob([
1058("XNNPACK/src", "**/*.h"),
1059("XNNPACK/src", "**/*.c"),
1060]),
1061header_namespace = "",
1062apple_sdks = (IOS, MACOSX, APPLETVOS),
1063compiler_flags = [
1064"-O2",
1065"-mxop",
1066],
1067fbobjc_preprocessor_flags = [
1068"-DXNN_PRIVATE=",
1069"-DXNN_INTERNAL=",
1070],
1071labels = labels,
1072platform_preprocessor_flags = [
1073(
1074"windows-x86_64",
1075[
1076"-Drestrict=",
1077],
1078),
1079],
1080preferred_linkage = "static",
1081preprocessor_flags = [
1082"-DXNN_LOG_LEVEL=0",
1083],
1084visibility = ["PUBLIC"],
1085windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS + ["-mxop"],
1086windows_compiler_flags_override = WINDOWS_FLAGS + ["-mxop"],
1087windows_srcs = PROD_XOP_MICROKERNEL_SRCS,
1088deps = [
1089":interface",
1090],
1091)
1092
1093fb_xplat_cxx_library(
1094name = "ukernels_fma3",
1095srcs = PROD_FMA3_MICROKERNEL_SRCS if is_arvr_mode() else [],
1096headers = subdir_glob([
1097("XNNPACK/src", "**/*.h"),
1098("XNNPACK/src", "**/*.c"),
1099]),
1100header_namespace = "",
1101apple_sdks = (IOS, MACOSX, APPLETVOS),
1102compiler_flags = [
1103"-O2",
1104] + select({
1105"DEFAULT": [],
1106"ovr_config//cpu:x86_32": [
1107"-mfma",
1108"-mf16c",
1109],
1110"ovr_config//cpu:x86_64": [
1111"-mfma",
1112"-mf16c",
1113],
1114}),
1115fbobjc_preprocessor_flags = [
1116"-DXNN_PRIVATE=",
1117"-DXNN_INTERNAL=",
1118],
1119labels = labels,
1120platform_compiler_flags = [
1121(
1122"(i[3-6]86|x86|x86_64|AMD64)",
1123[
1124"-mfma",
1125"-mf16c",
1126],
1127),
1128],
1129platform_srcs = ([
1130(
1131"x86|x86_64|platform009|platform010",
1132PROD_FMA3_MICROKERNEL_SRCS,
1133),
1134] if not is_arvr_mode() else []),
1135preferred_linkage = "static",
1136preprocessor_flags = [
1137"-DXNN_LOG_LEVEL=0",
1138],
1139visibility = ["PUBLIC"],
1140windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS + [
1141"-mfma",
1142"-mf16c",
1143],
1144windows_compiler_flags_override = WINDOWS_FLAGS + [
1145"-mfma",
1146"-mf16c",
1147],
1148deps = [
1149":interface",
1150],
1151)
1152
1153fb_xplat_cxx_library(
1154name = "ukernels_fma3_ovr_win32",
1155headers = subdir_glob([
1156("XNNPACK/src", "**/*.h"),
1157("XNNPACK/src", "**/*.c"),
1158]),
1159header_namespace = "",
1160apple_sdks = (IOS, MACOSX, APPLETVOS),
1161compiler_flags = [
1162"-O2",
1163"-mfma",
1164"-mf16c",
1165],
1166fbobjc_preprocessor_flags = [
1167"-DXNN_PRIVATE=",
1168"-DXNN_INTERNAL=",
1169],
1170labels = labels,
1171platform_compiler_flags = [
1172(
1173"^(i[3-6]86|x86|x86_64|AMD64)$",
1174[
1175"-mfma",
1176"-mf16c",
1177],
1178),
1179],
1180preferred_linkage = "static",
1181preprocessor_flags = [
1182"-DXNN_LOG_LEVEL=0",
1183],
1184visibility = ["PUBLIC"],
1185windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS + [
1186"-mfma",
1187"-mf16c",
1188],
1189windows_compiler_flags_override = WINDOWS_FLAGS + [
1190"-mfma",
1191"-mf16c",
1192],
1193windows_srcs = PROD_FMA3_MICROKERNEL_SRCS,
1194deps = [
1195":interface",
1196],
1197)
1198
1199fb_xplat_cxx_library(
1200name = "ukernels_avx2",
1201srcs = PROD_AVX2_MICROKERNEL_SRCS if is_arvr_mode() else [],
1202headers = subdir_glob([
1203("XNNPACK/src", "**/*.c"),
1204("XNNPACK/src", "**/*.h"),
1205]),
1206header_namespace = "",
1207apple_sdks = (IOS, MACOSX, APPLETVOS),
1208compiler_flags = [
1209"-O2",
1210] + select({
1211"DEFAULT": [],
1212"ovr_config//cpu:x86_32": [
1213"-mavx2",
1214"-mfma",
1215"-mf16c",
1216],
1217"ovr_config//cpu:x86_64": [
1218"-mavx2",
1219"-mfma",
1220"-mf16c",
1221],
1222}),
1223fbobjc_preprocessor_flags = [
1224"-DXNN_PRIVATE=",
1225"-DXNN_INTERNAL=",
1226],
1227labels = labels,
1228platform_compiler_flags = [
1229(
1230"x86|x86_64|platform009|platform010",
1231[
1232"-mavx2",
1233"-mfma",
1234"-mf16c",
1235],
1236),
1237],
1238platform_srcs = ([
1239(
1240"x86|x86_64|platform009|platform010",
1241PROD_AVX2_MICROKERNEL_SRCS,
1242),
1243] if not is_arvr_mode() else []),
1244preferred_linkage = "static",
1245preprocessor_flags = [
1246"-DXNN_LOG_LEVEL=0",
1247],
1248visibility = ["PUBLIC"],
1249windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS + [
1250"-mavx2",
1251"-mfma",
1252"-mf16c",
1253],
1254windows_compiler_flags_override = WINDOWS_FLAGS + [
1255"-mavx2",
1256"-mfma",
1257"-mf16c",
1258],
1259deps = [
1260":interface",
1261],
1262)
1263
1264fb_xplat_cxx_library(
1265name = "ukernels_avx2_ovr_win32",
1266headers = subdir_glob([
1267("XNNPACK/src", "**/*.c"),
1268("XNNPACK/src", "**/*.h"),
1269]),
1270header_namespace = "",
1271apple_sdks = (IOS, MACOSX, APPLETVOS),
1272compiler_flags = [
1273"-O2",
1274"-mavx2",
1275"-mfma",
1276"-mf16c",
1277],
1278fbobjc_preprocessor_flags = [
1279"-DXNN_PRIVATE=",
1280"-DXNN_INTERNAL=",
1281],
1282labels = labels,
1283platform_compiler_flags = [
1284(
1285"x86",
1286[
1287"-mavx2",
1288"-mfma",
1289"-mf16c",
1290],
1291),
1292],
1293preferred_linkage = "static",
1294preprocessor_flags = [
1295"-DXNN_LOG_LEVEL=0",
1296],
1297visibility = ["PUBLIC"],
1298windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS + [
1299"-mavx2",
1300"-mfma",
1301"-mf16c",
1302],
1303windows_compiler_flags_override = WINDOWS_FLAGS + [
1304"/D__AVX2__",
1305"-mavx2",
1306"-mfma",
1307"-mf16c",
1308],
1309windows_srcs = PROD_AVX2_MICROKERNEL_SRCS,
1310deps = [
1311":interface",
1312],
1313)
1314
1315fb_xplat_cxx_library(
1316name = "ukernels_avx512",
1317srcs = PROD_AVX512F_MICROKERNEL_SRCS if is_arvr_mode() else [],
1318headers = subdir_glob([
1319("XNNPACK/src", "**/*.c"),
1320("XNNPACK/src", "**/*.h"),
1321]),
1322header_namespace = "",
1323apple_sdks = (IOS, MACOSX, APPLETVOS),
1324compiler_flags = [
1325"-O2",
1326] + select({
1327"DEFAULT": [],
1328"ovr_config//cpu:x86_32": [
1329"-mavx512f",
1330],
1331"ovr_config//cpu:x86_64": [
1332"-mavx512f",
1333],
1334}),
1335fbobjc_preprocessor_flags = [
1336"-DXNN_PRIVATE=",
1337"-DXNN_INTERNAL=",
1338],
1339labels = labels,
1340platform_compiler_flags = [
1341(
1342"x86|x86_64|platform009|platform010",
1343[
1344"-mavx512f",
1345],
1346),
1347],
1348platform_srcs = ([
1349(
1350"x86|x86_64|platform009|platform010",
1351PROD_AVX512F_MICROKERNEL_SRCS,
1352),
1353] if not is_arvr_mode() else []),
1354preferred_linkage = "static",
1355preprocessor_flags = [
1356"-DXNN_LOG_LEVEL=0",
1357],
1358visibility = ["PUBLIC"],
1359windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS + ["-mavx512f"],
1360windows_compiler_flags_override = WINDOWS_FLAGS + ["-mavx512f"],
1361deps = [
1362":interface",
1363],
1364)
1365
1366fb_xplat_cxx_library(
1367name = "ukernels_avx512vbmi",
1368srcs = PROD_AVX512VBMI_MICROKERNEL_SRCS if is_arvr_mode() else [],
1369headers = subdir_glob([
1370("XNNPACK/src", "**/*.c"),
1371("XNNPACK/src", "**/*.h"),
1372]),
1373header_namespace = "",
1374apple_sdks = (IOS, MACOSX, APPLETVOS),
1375compiler_flags = [
1376"-O2",
1377] + select({
1378"DEFAULT": [],
1379"ovr_config//cpu:x86_32": [
1380"-mavx512f",
1381"-mavx512cd",
1382"-mavx512bw",
1383"-mavx512dq",
1384"-mavx512vl",
1385"-mavx512vbmi",
1386],
1387"ovr_config//cpu:x86_64": [
1388"-mavx512f",
1389"-mavx512cd",
1390"-mavx512bw",
1391"-mavx512dq",
1392"-mavx512vl",
1393"-mavx512vbmi",
1394],
1395}),
1396fbobjc_preprocessor_flags = [
1397"-DXNN_PRIVATE=",
1398"-DXNN_INTERNAL=",
1399],
1400labels = labels,
1401platform_compiler_flags = [
1402(
1403"(i[3-6]86|x86|x86_64|AMD64)",
1404[
1405"-mavx512f",
1406"-mavx512cd",
1407"-mavx512bw",
1408"-mavx512dq",
1409"-mavx512vl",
1410"-mavx512vbmi",
1411],
1412),
1413],
1414platform_srcs = ([
1415(
1416"x86|x86_64|platform009|platform010",
1417PROD_AVX512VBMI_MICROKERNEL_SRCS,
1418),
1419] if not is_arvr_mode() else []),
1420preferred_linkage = "static",
1421preprocessor_flags = [
1422"-DXNN_LOG_LEVEL=0",
1423],
1424visibility = ["PUBLIC"],
1425windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS + [
1426"-mavx512f",
1427"-mavx512cd",
1428"-mavx512bw",
1429"-mavx512dq",
1430"-mavx512vl",
1431"-mavx512vbmi",
1432],
1433windows_compiler_flags_override = WINDOWS_FLAGS + [
1434"-mavx512f",
1435"-mavx512cd",
1436"-mavx512bw",
1437"-mavx512dq",
1438"-mavx512vl",
1439"-mavx512vbmi",
1440],
1441deps = [
1442":interface",
1443],
1444)
1445
1446fb_xplat_cxx_library(
1447name = "ukernels_avx512_ovr_win32",
1448headers = subdir_glob([
1449("XNNPACK/src", "**/*.c"),
1450("XNNPACK/src", "**/*.h"),
1451]),
1452header_namespace = "",
1453apple_sdks = (IOS, MACOSX, APPLETVOS),
1454compiler_flags = [
1455"-O2",
1456"-mavx512f",
1457],
1458fbobjc_preprocessor_flags = [
1459"-DXNN_PRIVATE=",
1460"-DXNN_INTERNAL=",
1461],
1462labels = labels,
1463platform_compiler_flags = [
1464(
1465"x86",
1466[
1467"-mavx512f",
1468],
1469),
1470],
1471preferred_linkage = "static",
1472preprocessor_flags = [
1473"-DXNN_LOG_LEVEL=0",
1474],
1475visibility = ["PUBLIC"],
1476windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS + ["-mavx512f"],
1477windows_compiler_flags_override = WINDOWS_FLAGS + ["-mavx512f"],
1478windows_srcs = PROD_AVX512F_MICROKERNEL_SRCS,
1479deps = [
1480":interface",
1481],
1482)
1483
1484fb_xplat_cxx_library(
1485name = "ukernels_avx512skx",
1486srcs = PROD_AVX512SKX_MICROKERNEL_SRCS if is_arvr_mode() else [],
1487headers = subdir_glob([
1488("XNNPACK/src", "**/*.c"),
1489("XNNPACK/src", "**/*.h"),
1490]),
1491header_namespace = "",
1492apple_sdks = (IOS, MACOSX, APPLETVOS),
1493compiler_flags = [
1494"-O2",
1495] + select({
1496"DEFAULT": [],
1497"ovr_config//cpu:x86_32": [
1498"-mavx512f",
1499"-mavx512cd",
1500"-mavx512bw",
1501"-mavx512dq",
1502"-mavx512vl",
1503],
1504"ovr_config//cpu:x86_64": [
1505"-mavx512f",
1506"-mavx512cd",
1507"-mavx512bw",
1508"-mavx512dq",
1509"-mavx512vl",
1510],
1511}),
1512fbobjc_preprocessor_flags = [
1513"-DXNN_PRIVATE=",
1514"-DXNN_INTERNAL=",
1515],
1516labels = labels,
1517platform_compiler_flags = [
1518(
1519"(i[3-6]86|x86|x86_64|AMD64)",
1520[
1521"-mavx512f",
1522"-mavx512cd",
1523"-mavx512bw",
1524"-mavx512dq",
1525"-mavx512vl",
1526],
1527),
1528],
1529platform_srcs = ([
1530(
1531"x86|x86_64|platform009|platform010",
1532PROD_AVX512SKX_MICROKERNEL_SRCS,
1533),
1534] if not is_arvr_mode() else []),
1535preferred_linkage = "static",
1536preprocessor_flags = [
1537"-DXNN_LOG_LEVEL=0",
1538],
1539visibility = ["PUBLIC"],
1540windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS + [
1541"-mavx512f",
1542"-mavx512cd",
1543"-mavx512bw",
1544"-mavx512dq",
1545"-mavx512vl",
1546],
1547windows_compiler_flags_override = WINDOWS_FLAGS + [
1548"-mavx512f",
1549"-mavx512cd",
1550"-mavx512bw",
1551"-mavx512dq",
1552"-mavx512vl",
1553
1554],
1555deps = [
1556":interface",
1557],
1558)
1559
1560fb_xplat_cxx_library(
1561name = "ukernels_avx512skx_ovr_win32",
1562headers = subdir_glob([
1563("XNNPACK/src", "**/*.c"),
1564("XNNPACK/src", "**/*.h"),
1565]),
1566header_namespace = "",
1567apple_sdks = (IOS, MACOSX, APPLETVOS),
1568compiler_flags = [
1569"-O2",
1570"-mavx512f",
1571"-mavx512cd",
1572"-mavx512bw",
1573"-mavx512dq",
1574"-mavx512vl",
1575],
1576fbobjc_preprocessor_flags = [
1577"-DXNN_PRIVATE=",
1578"-DXNN_INTERNAL=",
1579],
1580labels = labels,
1581platform_compiler_flags = [
1582(
1583"^(i[3-6]86|x86|x86_64|AMD64)$",
1584[
1585"-mavx512f",
1586"-mavx512cd",
1587"-mavx512bw",
1588"-mavx512dq",
1589"-mavx512vl",
1590],
1591),
1592],
1593preferred_linkage = "static",
1594preprocessor_flags = [
1595"-DXNN_LOG_LEVEL=0",
1596],
1597visibility = ["PUBLIC"],
1598windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS + [
1599"-mavx512f",
1600"-mavx512cd",
1601"-mavx512bw",
1602"-mavx512dq",
1603"-mavx512vl",
1604],
1605windows_compiler_flags_override = WINDOWS_FLAGS + [
1606"-mavx512f",
1607"-mavx512cd",
1608"-mavx512bw",
1609"-mavx512dq",
1610"-mavx512vl",
1611"/D__AVX512BW__",
1612],
1613windows_srcs = PROD_AVX512SKX_MICROKERNEL_SRCS,
1614deps = [
1615":interface",
1616],
1617)
1618
1619fb_xplat_cxx_library(
1620name = "ukernels_armsimd32",
1621srcs = PROD_ARMSIMD32_MICROKERNEL_SRCS,
1622headers = subdir_glob([
1623("XNNPACK/src", "**/*.c"),
1624("XNNPACK/src", "**/*.h"),
1625]),
1626header_namespace = "",
1627apple_sdks = (IOS, MACOSX, APPLETVOS),
1628compiler_flags = [
1629"-O2",
1630"-fno-fast-math",
1631"-fno-math-errno",
1632],
1633fbobjc_preprocessor_flags = [
1634"-DXNN_PRIVATE=",
1635"-DXNN_INTERNAL=",
1636],
1637labels = labels,
1638platform_compiler_flags = [
1639(
1640"(arm32|aarch32|armv7)",
1641[
1642"-marm",
1643"-march=armv6",
1644"-mfpu=vfp",
1645"-munaligned-access",
1646],
1647),
1648],
1649preferred_linkage = "static",
1650preprocessor_flags = [
1651"-DXNN_LOG_LEVEL=0",
1652],
1653visibility = ["PUBLIC"],
1654windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS,
1655windows_compiler_flags_override = WINDOWS_FLAGS,
1656deps = [
1657":interface",
1658third_party("FP16"),
1659],
1660)
1661
1662fb_xplat_cxx_library(
1663name = "ukernels_neon",
1664srcs = select({
1665"DEFAULT": [],
1666"ovr_config//cpu:arm32": PROD_NEON_MICROKERNEL_SRCS,
1667}) if is_arvr_mode() else [],
1668headers = subdir_glob([
1669("XNNPACK/src", "**/*.c"),
1670("XNNPACK/src", "**/*.h"),
1671]),
1672header_namespace = "",
1673apple_sdks = (IOS, MACOSX, APPLETVOS),
1674compiler_flags = [
1675"-O2",
1676] + select({
1677"DEFAULT": [],
1678"ovr_config//cpu:arm32": [
1679"-marm",
1680"-march=armv7-a",
1681"-mfpu=neon",
1682],
1683}),
1684fbobjc_preprocessor_flags = [
1685"-DXNN_PRIVATE=",
1686"-DXNN_INTERNAL=",
1687],
1688labels = labels,
1689platform_compiler_flags = [
1690(
1691"(aarch32|arm32|armv7)",
1692[
1693"-marm",
1694"-march=armv7-a",
1695"-mfpu=neon",
1696],
1697),
1698],
1699platform_srcs = [
1700(
1701"(aarch32|arm32|armv7)",
1702PROD_NEON_MICROKERNEL_SRCS,
1703),
1704] if not is_arvr_mode() else [],
1705preferred_linkage = "static",
1706preprocessor_flags = [
1707"-DXNN_LOG_LEVEL=0",
1708],
1709visibility = ["PUBLIC"],
1710windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS,
1711windows_compiler_flags_override = WINDOWS_FLAGS,
1712deps = [
1713":interface",
1714third_party("FP16"),
1715],
1716)
1717
1718fb_xplat_cxx_library(
1719name = "ukernels_neon_aarch64",
1720srcs = select({
1721"DEFAULT": [],
1722"ovr_config//cpu:arm64": PROD_NEON_MICROKERNEL_SRCS + [PROD_NEON_AARCH64_MICROKERNEL_SRCS[0]],
1723}) if is_arvr_mode() else [],
1724headers = subdir_glob([
1725("XNNPACK/src", "**/*.c"),
1726("XNNPACK/src", "**/*.h"),
1727]),
1728header_namespace = "",
1729apple_sdks = (IOS, MACOSX, APPLETVOS),
1730compiler_flags = [
1731"-O2",
1732],
1733fbobjc_preprocessor_flags = [
1734"-DXNN_PRIVATE=",
1735"-DXNN_INTERNAL=",
1736],
1737platform_srcs = [
1738(
1739"(aarch64|arm64)",
1740PROD_NEON_MICROKERNEL_SRCS + [PROD_NEON_AARCH64_MICROKERNEL_SRCS[0]],
1741),
1742] if not is_arvr_mode() else [],
1743labels = labels,
1744preferred_linkage = "static",
1745preprocessor_flags = [
1746"-DXNN_LOG_LEVEL=0",
1747],
1748visibility = ["PUBLIC"],
1749windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS,
1750windows_compiler_flags_override = WINDOWS_FLAGS,
1751deps = [
1752":interface",
1753third_party("FP16"),
1754],
1755)
1756
1757fb_xplat_cxx_library(
1758name = "ukernels_neon_fma",
1759srcs = select({
1760"DEFAULT": [],
1761"ovr_config//cpu:arm32": PROD_NEONFMA_MICROKERNEL_SRCS,
1762}) if is_arvr_mode() else [],
1763headers = subdir_glob([
1764("XNNPACK/src", "**/*.c"),
1765("XNNPACK/src", "**/*.h"),
1766]),
1767header_namespace = "",
1768apple_sdks = (IOS, MACOSX, APPLETVOS),
1769compiler_flags = [
1770"-O2",
1771] + select({
1772"DEFAULT": [],
1773"ovr_config//cpu:arm32": [
1774"-marm",
1775"-march=armv7-a",
1776"-mfpu=neon-vfpv4",
1777],
1778}),
1779fbobjc_preprocessor_flags = [
1780"-DXNN_PRIVATE=",
1781"-DXNN_INTERNAL=",
1782],
1783labels = labels,
1784platform_compiler_flags = [
1785(
1786"^iphoneos-armv7$",
1787[
1788"-mcpu=cyclone",
1789"-mtune=generic",
1790],
1791),
1792(
1793"(aarch32|arm32|armv7)",
1794[
1795"-marm",
1796"-march=armv7-a",
1797"-mfpu=neon-vfpv4",
1798],
1799),
1800],
1801platform_srcs = [
1802(
1803"(aarch32|arm32|armv7)",
1804PROD_NEONFMA_MICROKERNEL_SRCS,
1805),
1806] if not is_arvr_mode() else [],
1807preferred_linkage = "static",
1808preprocessor_flags = [
1809"-DXNN_LOG_LEVEL=0",
1810],
1811visibility = ["PUBLIC"],
1812windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS,
1813windows_compiler_flags_override = WINDOWS_FLAGS,
1814deps = [
1815":interface",
1816third_party("FP16"),
1817],
1818)
1819
1820fb_xplat_cxx_library(
1821name = "ukernels_neonfma_aarch64",
1822srcs = select({
1823"DEFAULT": [],
1824"ovr_config//cpu:arm64": PROD_NEONFMA_MICROKERNEL_SRCS + [PROD_NEON_AARCH64_MICROKERNEL_SRCS[1]],
1825}) if is_arvr_mode() else [],
1826headers = subdir_glob([
1827("XNNPACK/src", "**/*.h"),
1828("XNNPACK/src", "**/*.c"),
1829]),
1830header_namespace = "",
1831apple_sdks = (IOS, MACOSX, APPLETVOS),
1832compiler_flags = [
1833"-O2",
1834],
1835fbobjc_preprocessor_flags = [
1836"-DXNN_PRIVATE=",
1837"-DXNN_INTERNAL=",
1838],
1839labels = labels,
1840platform_srcs = [
1841(
1842"(arm64|aarch64)$",
1843PROD_NEONFMA_MICROKERNEL_SRCS + [PROD_NEON_AARCH64_MICROKERNEL_SRCS[1]],
1844),
1845] if not is_arvr_mode() else [],
1846platforms = (APPLE, ANDROID, CXX, WINDOWS),
1847preferred_linkage = "static",
1848preprocessor_flags = [
1849"-DXNN_LOG_LEVEL=0",
1850],
1851visibility = ["PUBLIC"],
1852windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS,
1853windows_compiler_flags_override = WINDOWS_FLAGS,
1854deps = [
1855":interface",
1856third_party("FP16"),
1857],
1858)
1859
1860fb_xplat_cxx_library(
1861name = "ukernels_fp16arith",
1862srcs = PROD_FP16ARITH_MICROKERNEL_SRCS,
1863headers = subdir_glob([
1864("XNNPACK/src", "**/*.c"),
1865("XNNPACK/src", "**/*.h"),
1866]),
1867header_namespace = "",
1868apple_sdks = (IOS, MACOSX, APPLETVOS),
1869compiler_flags = [
1870"-O2",
1871"-Wno-error=missing-braces", # required since the SGX toolchain does not have this by default
1872"-fno-fast-math",
1873"-fno-math-errno",
1874] + select({
1875"DEFAULT": [],
1876"ovr_config//cpu:arm32": [
1877"-marm",
1878"-march=armv8.2-a+fp16",
1879# GCC emits wrong directives for assembler with -mfpu=fp-armv8
1880"-mfpu=neon-fp-armv8",
1881# For vsqrth_f16 polyfill using sqrtf
1882"-fno-math-errno",
1883# For vminh_f16/vmaxh_f16 polyfills using compare + select
1884"-ffinite-math-only",
1885],
1886}),
1887fbobjc_preprocessor_flags = [
1888"-DXNN_PRIVATE=",
1889"-DXNN_INTERNAL=",
1890],
1891labels = labels,
1892platform_compiler_flags = [
1893(
1894"(aarch32|arm32|armv7)",
1895[
1896"-marm",
1897"-march=armv8.2-a+fp16",
1898# GCC emits wrong directives for assembler with -mfpu=fp-armv8
1899"-mfpu=neon-fp-armv8",
1900# For vsqrth_f16 polyfill using sqrtf
1901"-fno-math-errno",
1902# For vminh_f16/vmaxh_f16 polyfills using compare + select
1903"-ffinite-math-only",
1904],
1905),
1906],
1907preferred_linkage = "static",
1908preprocessor_flags = [
1909"-DXNN_LOG_LEVEL=0",
1910],
1911visibility = ["PUBLIC"],
1912windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS,
1913windows_compiler_flags_override = WINDOWS_FLAGS,
1914deps = [
1915":interface",
1916],
1917)
1918
1919fb_xplat_cxx_library(
1920name = "ukernels_neon_fp16",
1921srcs = PROD_NEONFP16_MICROKERNEL_SRCS,
1922headers = subdir_glob([
1923("XNNPACK/src", "**/*.c"),
1924("XNNPACK/src", "**/*.h"),
1925]),
1926header_namespace = "",
1927apple_sdks = (IOS, MACOSX, APPLETVOS),
1928compiler_flags = [
1929"-O2",
1930] + select({
1931"DEFAULT": [],
1932"ovr_config//cpu:arm32": [
1933"-marm",
1934"-march=armv7-a",
1935"-mfpu=neon-fp16",
1936],
1937}),
1938fbobjc_preprocessor_flags = [
1939"-DXNN_PRIVATE=",
1940"-DXNN_INTERNAL=",
1941],
1942labels = labels,
1943platform_compiler_flags = [
1944(
1945"(aarch32|arm32|armv7)",
1946[
1947"-marm",
1948"-march=armv7-a",
1949"-mfpu=neon-fp16",
1950],
1951),
1952],
1953preferred_linkage = "static",
1954preprocessor_flags = [
1955"-DXNN_LOG_LEVEL=0",
1956],
1957visibility = ["PUBLIC"],
1958windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS,
1959windows_compiler_flags_override = WINDOWS_FLAGS,
1960deps = [
1961":interface",
1962],
1963)
1964
1965fb_xplat_cxx_library(
1966name = "ukernels_neon_v8",
1967srcs = PROD_NEONV8_MICROKERNEL_SRCS,
1968headers = subdir_glob([
1969("XNNPACK/src", "**/*.c"),
1970("XNNPACK/src", "**/*.h"),
1971]),
1972header_namespace = "",
1973apple_sdks = (IOS, MACOSX, APPLETVOS),
1974compiler_flags = [
1975"-O2",
1976] + select({
1977"DEFAULT": [],
1978"ovr_config//cpu:arm64": ["-march=armv8-a"],
1979}),
1980fbobjc_preprocessor_flags = [
1981"-DXNN_PRIVATE=",
1982"-DXNN_INTERNAL=",
1983],
1984labels = labels,
1985platform_compiler_flags = [
1986(
1987"(aarch64|arm64)",
1988[
1989"-march=armv8-a",
1990],
1991),
1992(
1993"^android-armv7$",
1994[
1995"-march=armv8-a",
1996"-mfpu=neon-fp-armv8",
1997"-mfloat-abi=softfp",
1998],
1999),
2000(
2001"^iphoneos-armv7$",
2002[
2003"-mcpu=cyclone",
2004"-mtune=generic",
2005],
2006),
2007],
2008preferred_linkage = "static",
2009preprocessor_flags = [
2010"-DXNN_LOG_LEVEL=0",
2011],
2012visibility = ["PUBLIC"],
2013windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS,
2014windows_compiler_flags_override = WINDOWS_FLAGS,
2015deps = [
2016":interface",
2017third_party("FP16"),
2018],
2019)
2020
2021fb_xplat_cxx_library(
2022name = "ukernels_neon_dot",
2023srcs = select({
2024"DEFAULT": [],
2025"ovr_config//cpu:arm32": PROD_NEONDOT_MICROKERNEL_SRCS,
2026}) if is_arvr_mode() else [],
2027headers = subdir_glob([
2028("XNNPACK/src", "**/*.c"),
2029("XNNPACK/src", "**/*.h"),
2030]),
2031header_namespace = "",
2032apple_sdks = (IOS, MACOSX, APPLETVOS),
2033compiler_flags = [
2034"-O2",
2035] + select({
2036"DEFAULT": [],
2037"ovr_config//cpu:arm32": [
2038"-march=armv8.2-a+dotprod",
2039"-mfpu=neon-fp-armv8",
2040"-mfloat-abi=softfp",
2041],
2042}),
2043fbobjc_preprocessor_flags = [
2044"-DXNN_PRIVATE=",
2045"-DXNN_INTERNAL=",
2046],
2047labels = labels,
2048platform_compiler_flags = [
2049(
2050"(aarch32|arm32|armv7)",
2051[
2052"-march=armv8.2-a+dotprod",
2053"-mfpu=neon-fp-armv8",
2054"-mfloat-abi=softfp",
2055],
2056),
2057],
2058platform_srcs = [
2059(
2060"(aarch32|arm32|armv7)",
2061PROD_NEONDOT_MICROKERNEL_SRCS,
2062),
2063] if not is_arvr_mode() else [],
2064preferred_linkage = "static",
2065preprocessor_flags = [
2066"-DXNN_LOG_LEVEL=0",
2067],
2068visibility = ["PUBLIC"],
2069windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS,
2070windows_compiler_flags_override = WINDOWS_FLAGS,
2071deps = [
2072":interface",
2073third_party("FP16"),
2074],
2075)
2076
2077fb_xplat_cxx_library(
2078name = "ukernels_neon_dot_aarch64",
2079srcs = select({
2080"DEFAULT": [],
2081"ovr_config//cpu:arm64": PROD_NEONDOT_MICROKERNEL_SRCS + PROD_NEONDOT_AARCH64_MICROKERNEL_SRCS,
2082}) if is_arvr_mode() else [],
2083headers = subdir_glob([
2084("XNNPACK/src", "**/*.c"),
2085("XNNPACK/src", "**/*.h"),
2086]),
2087header_namespace = "",
2088apple_sdks = (IOS, MACOSX, APPLETVOS),
2089compiler_flags = [
2090"-O2",
2091] + select({
2092"DEFAULT": [],
2093"ovr_config//cpu:arm64": ["-march=armv8.2-a+dotprod"],
2094}),
2095fbobjc_preprocessor_flags = [
2096"-DXNN_PRIVATE=",
2097"-DXNN_INTERNAL=",
2098],
2099labels = labels,
2100platform_compiler_flags = [
2101(
2102"(aarch64|arm64)",
2103[
2104"-march=armv8.2-a+dotprod",
2105],
2106),
2107],
2108platform_srcs = [
2109(
2110"(aarch64|arm64)",
2111PROD_NEONDOT_MICROKERNEL_SRCS + PROD_NEONDOT_AARCH64_MICROKERNEL_SRCS,
2112),
2113] if not is_arvr_mode() else [],
2114preferred_linkage = "static",
2115preprocessor_flags = [
2116"-DXNN_LOG_LEVEL=0",
2117],
2118visibility = ["PUBLIC"],
2119windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS,
2120windows_compiler_flags_override = WINDOWS_FLAGS,
2121deps = [
2122":interface",
2123third_party("FP16"),
2124],
2125)
2126
2127fb_xplat_cxx_library(
2128name = "ukernels_neon_dot_fp16arith",
2129srcs = select({
2130"DEFAULT": [],
2131"ovr_config//cpu:arm32": PROD_NEONDOTFP16ARITH_MICROKERNEL_SRCS,
2132}) if is_arvr_mode() else [],
2133headers = subdir_glob([
2134("XNNPACK/src", "**/*.c"),
2135("XNNPACK/src", "**/*.h"),
2136]),
2137header_namespace = "",
2138apple_sdks = (IOS, MACOSX, APPLETVOS),
2139compiler_flags = [
2140"-O2",
2141] + select({
2142"DEFAULT": [],
2143"ovr_config//cpu:arm32": [
2144"-marm",
2145"-march=armv8.2-a+dotprod+fp16",
2146"-mfpu=neon-fp-armv8",
2147],
2148}),
2149platform_compiler_flags = [
2150(
2151"(aarch32|arm32|armv7)",
2152[
2153"-marm",
2154"-march=armv8.2-a+dotprod+fp16",
2155"-mfpu=neon-fp-armv8",
2156],
2157),
2158],
2159platform_srcs = [
2160(
2161"(aarch32|arm32|armv7)",
2162PROD_NEONDOTFP16ARITH_MICROKERNEL_SRCS,
2163),
2164] if not is_arvr_mode() else [],
2165fbobjc_preprocessor_flags = [
2166"-DXNN_PRIVATE=",
2167"-DXNN_INTERNAL=",
2168],
2169labels = labels,
2170preferred_linkage = "static",
2171preprocessor_flags = [
2172"-DXNN_LOG_LEVEL=0",
2173],
2174visibility = ["PUBLIC"],
2175windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS,
2176windows_compiler_flags_override = WINDOWS_FLAGS,
2177deps = [
2178":interface",
2179third_party("FP16"),
2180],
2181)
2182
2183fb_xplat_cxx_library(
2184name = "ukernels_neon_dot_fp16arith_aarch64",
2185srcs = select({
2186"DEFAULT": [],
2187"ovr_config//cpu:arm64": PROD_NEONDOTFP16ARITH_MICROKERNEL_SRCS + PROD_NEONDOTFP16ARITH_AARCH64_MICROKERNEL_SRCS,
2188}) if is_arvr_mode() else [],
2189headers = subdir_glob([
2190("XNNPACK/src", "**/*.c"),
2191("XNNPACK/src", "**/*.h"),
2192]),
2193header_namespace = "",
2194apple_sdks = (IOS, MACOSX, APPLETVOS),
2195compiler_flags = [
2196"-O2",
2197] + select({
2198"DEFAULT": [],
2199"ovr_config//cpu:arm64": [
2200"-march=armv8.2-a+dotprod+fp16",
2201],
2202}),
2203fbobjc_preprocessor_flags = [
2204"-DXNN_PRIVATE=",
2205"-DXNN_INTERNAL=",
2206],
2207platform_compiler_flags = [
2208(
2209"(aarch64|arm64)",
2210[
2211"-march=armv8.2-a+dotprod+fp16",
2212],
2213),
2214],
2215platform_srcs = [
2216(
2217"(aarch64|arm64)",
2218PROD_NEONDOTFP16ARITH_MICROKERNEL_SRCS + PROD_NEONDOTFP16ARITH_AARCH64_MICROKERNEL_SRCS,
2219),
2220] if not is_arvr_mode() else [],
2221labels = labels,
2222preferred_linkage = "static",
2223preprocessor_flags = [
2224"-DXNN_LOG_LEVEL=0",
2225],
2226visibility = ["PUBLIC"],
2227windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS,
2228windows_compiler_flags_override = WINDOWS_FLAGS,
2229deps = [
2230":interface",
2231third_party("FP16"),
2232],
2233)
2234
2235fb_xplat_cxx_library(
2236name = "ukernels_neon_fp16arith",
2237srcs = select({
2238"DEFAULT": [],
2239"ovr_config//cpu:arm32": PROD_NEONFP16ARITH_MICROKERNEL_SRCS,
2240}) if is_arvr_mode() else [],
2241headers = subdir_glob([
2242("XNNPACK/src", "**/*.c"),
2243("XNNPACK/src", "**/*.h"),
2244]),
2245header_namespace = "",
2246apple_sdks = (IOS, MACOSX, APPLETVOS),
2247compiler_flags = [
2248"-O2",
2249] + select({
2250"DEFAULT": [],
2251"ovr_config//cpu:arm32": [
2252"-marm",
2253"-march=armv8.2-a+fp16",
2254"-mfpu=neon-fp-armv8",
2255],
2256}),
2257fbobjc_preprocessor_flags = [
2258"-DXNN_PRIVATE=",
2259"-DXNN_INTERNAL=",
2260],
2261labels = labels,
2262platform_compiler_flags = [
2263(
2264"(aarch32|arm32|armv7)",
2265[
2266"-marm",
2267"-march=armv8.2-a+fp16",
2268"-mfpu=neon-fp-armv8",
2269],
2270),
2271],
2272platform_srcs = [
2273(
2274"(aarch32|arm32|armv7)",
2275PROD_NEONFP16ARITH_MICROKERNEL_SRCS,
2276),
2277] if not is_arvr_mode() else [],
2278preferred_linkage = "static",
2279preprocessor_flags = [
2280"-DXNN_LOG_LEVEL=0",
2281],
2282visibility = ["PUBLIC"],
2283windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS,
2284windows_compiler_flags_override = WINDOWS_FLAGS,
2285deps = [
2286":interface",
2287third_party("FP16"),
2288],
2289)
2290
2291fb_xplat_cxx_library(
2292name = "ukernels_neon_fp16arith_aarch64",
2293srcs = select({
2294"DEFAULT": [],
2295"ovr_config//cpu:arm64": PROD_NEONFP16ARITH_MICROKERNEL_SRCS + PROD_NEONFP16ARITH_AARCH64_MICROKERNEL_SRCS,
2296}) if is_arvr_mode() else [],
2297headers = subdir_glob([
2298("XNNPACK/src", "**/*.c"),
2299("XNNPACK/src", "**/*.h"),
2300]),
2301header_namespace = "",
2302apple_sdks = (IOS, MACOSX, APPLETVOS),
2303compiler_flags = [
2304"-O2",
2305] + select({
2306"DEFAULT": [],
2307"ovr_config//cpu:arm64": ["-march=armv8.2-a+fp16"],
2308}),
2309fbobjc_preprocessor_flags = [
2310"-DXNN_PRIVATE=",
2311"-DXNN_INTERNAL=",
2312],
2313labels = labels,
2314platform_compiler_flags = [
2315(
2316"(aarch64|arm64)",
2317[
2318"-march=armv8.2-a+fp16",
2319],
2320),
2321],
2322platform_srcs = [
2323(
2324"(aarch64|arm64)",
2325PROD_NEONFP16ARITH_MICROKERNEL_SRCS + PROD_NEONFP16ARITH_AARCH64_MICROKERNEL_SRCS,
2326),
2327] if not is_arvr_mode() else [],
2328preferred_linkage = "static",
2329preprocessor_flags = [
2330"-DXNN_LOG_LEVEL=0",
2331],
2332visibility = ["PUBLIC"],
2333windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS,
2334windows_compiler_flags_override = WINDOWS_FLAGS,
2335deps = [
2336":interface",
2337third_party("FP16"),
2338],
2339)
2340
2341fb_xplat_cxx_library(
2342name = "ukernels_neonfma_i8mm",
2343srcs = PROD_NEONI8MM_MICROKERNEL_SRCS,
2344headers = subdir_glob([
2345("XNNPACK/src", "**/*.h"),
2346("XNNPACK/src", "**/*.c"),
2347]),
2348header_namespace = "",
2349apple_sdks = (IOS, MACOSX, APPLETVOS),
2350compiler_flags = [
2351"-O2",
2352] + select({
2353"DEFAULT": [],
2354"ovr_config//cpu:arm32": [
2355"-marm",
2356"-march=armv8.2-a+i8mm+fp16",
2357"-mfpu=neon-fp-armv8",
2358],
2359"ovr_config//cpu:arm64": [
2360"-march=armv8.2-a+i8mm+fp16",
2361],
2362}),
2363fbobjc_preprocessor_flags = [
2364"-DXNN_PRIVATE=",
2365"-DXNN_INTERNAL=",
2366],
2367labels = labels,
2368platform_compiler_flags = [
2369(
2370"(aarch32|arm32|armv7)$",
2371[
2372"-marm",
2373"-march=armv8.2-a+i8mm+fp16",
2374"-mfpu=neon-fp-armv8",
2375],
2376),
2377(
2378"(arm64|aarch64)",
2379[
2380"-march=armv8.2-a+i8mm+fp16",
2381],
2382),
2383],
2384platforms = (APPLE, ANDROID, CXX, WINDOWS),
2385preferred_linkage = "static",
2386preprocessor_flags = [
2387"-DXNN_LOG_LEVEL=0",
2388],
2389visibility = ["PUBLIC"],
2390windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS,
2391windows_compiler_flags_override = WINDOWS_FLAGS,
2392deps = [
2393":interface",
2394third_party("FP16"),
2395],
2396)
2397
2398fb_xplat_cxx_library(
2399name = "ukernels_asm_aarch32",
2400srcs = AARCH32_ASM_MICROKERNEL_SRCS,
2401headers = subdir_glob([
2402("XNNPACK/src", "xnnpack/assembly.h"),
2403("XNNPACK/src", "**/*.S"),
2404]),
2405header_namespace = "",
2406apple_sdks = (IOS, MACOSX, APPLETVOS),
2407compiler_flags = [
2408"-O2",
2409] + select({
2410"DEFAULT": [],
2411"ovr_config//cpu:arm32": [
2412"-marm",
2413"-march=armv8.2-a+dotprod+fp16",
2414"-mfpu=neon-fp-armv8",
2415],
2416}),
2417fbobjc_preprocessor_flags = [
2418"-DXNN_PRIVATE=",
2419"-DXNN_INTERNAL=",
2420],
2421labels = labels,
2422platform_compiler_flags = [
2423(
2424"(aarch32|arm32|armv7)",
2425[
2426"-marm",
2427"-march=armv8.2-a+dotprod+fp16",
2428"-mfpu=neon-fp-armv8",
2429],
2430),
2431],
2432platforms = (APPLE, ANDROID, CXX, WINDOWS),
2433preferred_linkage = "static",
2434preprocessor_flags = [
2435"-DXNN_LOG_LEVEL=0",
2436],
2437visibility = ["PUBLIC"],
2438windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS,
2439windows_compiler_flags_override = WINDOWS_FLAGS,
2440deps = [
2441":interface",
2442third_party("FP16"),
2443],
2444)
2445
2446fb_xplat_cxx_library(
2447name = "ukernels_asm_aarch64",
2448srcs = AARCH64_ASM_MICROKERNEL_SRCS,
2449headers = subdir_glob([
2450("XNNPACK/src", "xnnpack/assembly.h"),
2451("XNNPACK/src", "**/*.S"),
2452]),
2453header_namespace = "",
2454apple_sdks = (IOS, MACOSX, APPLETVOS),
2455compiler_flags = [
2456"-O2",
2457] + select({
2458"DEFAULT": [],
2459"ovr_config//cpu:arm64": [
2460"-march=armv8.2-a+fp16+dotprod",
2461],
2462}),
2463fbobjc_preprocessor_flags = [
2464"-DXNN_PRIVATE=",
2465"-DXNN_INTERNAL=",
2466],
2467labels = labels,
2468platform_compiler_flags = [
2469(
2470"(aarch64|arm64)",
2471[
2472"-march=armv8.2-a+fp16+dotprod",
2473],
2474),
2475],
2476preferred_linkage = "static",
2477preprocessor_flags = [
2478"-DXNN_LOG_LEVEL=0",
2479],
2480visibility = ["PUBLIC"],
2481windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS,
2482windows_compiler_flags_override = WINDOWS_FLAGS,
2483deps = [
2484":interface",
2485third_party("FP16"),
2486],
2487)
2488
2489fb_xplat_cxx_library(
2490name = "arm64_lib",
2491apple_sdks = (IOS, MACOSX, APPLETVOS),
2492labels = labels,
2493preferred_linkage = "static",
2494visibility = ["PUBLIC"],
2495deps = [
2496":ukernels_asm_aarch64",
2497":ukernels_neon",
2498":ukernels_neon_aarch64",
2499":ukernels_neon_dot_fp16arith",
2500":ukernels_neon_dot_fp16arith_aarch64",
2501":ukernels_neon_dot",
2502":ukernels_neon_dot_aarch64",
2503":ukernels_neon_fma",
2504":ukernels_neon_fp16",
2505":ukernels_neon_fp16arith",
2506":ukernels_neon_fp16arith_aarch64",
2507":ukernels_neon_v8",
2508":ukernels_neonfma_aarch64",
2509":ukernels_neonfma_i8mm",
2510],
2511)
2512
2513fb_xplat_cxx_library(
2514name = "x86_and_x86_64_lib",
2515apple_sdks = (IOS, MACOSX, APPLETVOS),
2516labels = labels,
2517preferred_linkage = "static",
2518visibility = ["PUBLIC"],
2519deps = [
2520":ukernels_avx",
2521":ukernels_avx2",
2522":ukernels_avx512",
2523":ukernels_avx512skx",
2524":ukernels_f16c",
2525":ukernels_fma3",
2526":ukernels_sse",
2527":ukernels_sse2",
2528":ukernels_sse41",
2529":ukernels_ssse3",
2530":ukernels_xop",
2531":ukernels_avx512vbmi",
2532":ukernels_avx512vnni",
2533":ukernels_avx512vnnigfni",
2534# ":ukernels_avxvnni" Excluding avxvnni microkernels because they fail on older compilers
2535],
2536)
2537
2538fb_xplat_cxx_library(
2539name = "x86_and_x86_64_lib_ovr_win32",
2540apple_sdks = (IOS, MACOSX, APPLETVOS),
2541labels = labels,
2542preferred_linkage = "static",
2543visibility = ["PUBLIC"],
2544deps = [
2545":ukernels_avx2_ovr_win32",
2546":ukernels_avx512_ovr_win32",
2547":ukernels_avx512skx_ovr_win32",
2548":ukernels_avx_ovr_win32",
2549":ukernels_f16c_ovr_win32",
2550":ukernels_fma3_ovr_win32",
2551":ukernels_sse2_ovr_win32",
2552":ukernels_sse41_ovr_win32",
2553":ukernels_sse_ovr_win32",
2554":ukernels_ssse3_ovr_win32",
2555":ukernels_xop_ovr_win32",
2556":ukernels_avx512vbmi",
2557# ":ukernels_avx512vnni_ovr_win32", # Build crashes on Windows Clang 17.0.3, re-enable when fixed (T199959765)
2558# ":ukernels_avx512vnnigfni_ovr_win32",
2559# ":ukernels_avxvnni_ovr_win32" Excluding avxvnni microkernels because they fail on older compilers
2560],
2561exported_preprocessor_flags = [
2562"-DXNN_ENABLE_AVX512VNNIGFNI=0"
2563]
2564)
2565
2566fb_xplat_cxx_library(
2567name = "arm_lib",
2568apple_sdks = (IOS, MACOSX, APPLETVOS),
2569labels = labels,
2570preferred_linkage = "static",
2571visibility = ["PUBLIC"],
2572deps = [
2573":ukernels_armsimd32",
2574":ukernels_asm_aarch32",
2575":ukernels_asm_aarch64",
2576":ukernels_neon",
2577":ukernels_neon_aarch64",
2578":ukernels_neon_dot",
2579":ukernels_neon_dot_aarch64",
2580":ukernels_neon_dot_fp16arith",
2581":ukernels_neon_dot_fp16arith_aarch64",
2582":ukernels_neon_fma",
2583":ukernels_neon_fp16",
2584":ukernels_neon_fp16arith",
2585":ukernels_neon_fp16arith_aarch64",
2586":ukernels_neon_v8",
2587":ukernels_neonfma_aarch64",
2588":ukernels_neonfma_i8mm",
2589":ukernels_fp16arith",
2590],
2591)
2592
2593fb_xplat_cxx_library(
2594name = "armv7_lib",
2595apple_sdks = (IOS, MACOSX, APPLETVOS),
2596labels = labels,
2597preferred_linkage = "static",
2598visibility = ["PUBLIC"],
2599deps = [
2600":ukernels_asm_aarch32",
2601":ukernels_neon",
2602":ukernels_neon_dot",
2603":ukernels_neon_fma",
2604":ukernels_neon_v8",
2605],
2606)
2607
2608fb_xplat_cxx_library(
2609name = "prod_ukernels",
2610apple_sdks = (IOS, MACOSX, APPLETVOS),
2611labels = labels,
2612preferred_linkage = "static",
2613visibility = ["PUBLIC"],
2614deps = [
2615":ukernels_scalar",
2616] + select({
2617"DEFAULT": [
2618":arm_lib",
2619":x86_and_x86_64_lib",
2620],
2621"ovr_config//os:windows": [":x86_and_x86_64_lib_ovr_win32"] if XNNPACK_WINDOWS_AVX512F_ENABLED else [
2622":arm_lib",
2623":x86_and_x86_64_lib",
2624],
2625# doesn't cover iphonesimulator-x86_64
2626"ovr_config//runtime:arm64-linux-ubuntu-neon": [":arm64_lib"],
2627"ovr_config//runtime:platform010": [":x86_and_x86_64_lib"],
2628}),
2629)
2630
2631fb_xplat_cxx_library(
2632name = "XNNPACK",
2633apple_sdks = (IOS, MACOSX, APPLETVOS),
2634labels = labels,
2635deps = [
2636":subgraph",
2637":tables",
2638":prod_ukernels",
2639third_party("cpuinfo"),
2640third_party("pthreadpool"),
2641],
2642exported_headers = {
2643"xnnpack.h": "XNNPACK/include/xnnpack.h",
2644},
2645fbobjc_preprocessor_flags = [
2646"-DXNN_PRIVATE=",
2647"-DXNN_INTERNAL=",
2648],
2649header_namespace = "",
2650headers = subdir_glob([
2651("XNNPACK/src", "**/*.h"),
2652("XNNPACK/include", "**/*.h"),
2653]),
2654platforms = (APPLE, ANDROID, CXX, WINDOWS),
2655preferred_linkage = "static",
2656preprocessor_flags = [
2657"-DXNN_LOG_LEVEL=0",
2658"-DXNN_NO_Q8_OPERATORS",
2659"-DXNN_NO_F16_OPERATORS",
2660"-DXNN_NO_NCHW_OPERATORS",
2661"-DXNN_NO_QU8_OPERATORS",
2662"-DXNN_NO_U8_OPERATORS",
2663"-DXNN_NO_X32_OPERATORS",
2664"-DXNN_NO_X8_OPERATORS",
2665"-DXNN_ENABLE_MEMOPT",
2666"-DXNN_ENABLE_SPARSE=0",
2667"-DXNN_ENABLE_ASSEMBLY",
2668"-DXNN_ENABLE_GEMM_M_SPECIALIZATION",
2669"-DXNN_ENABLE_ARM_DOTPROD",
2670"-DXNN_ENABLE_CPUINFO",
2671"-DXNN_ENABLE_ARM_I8MM=1",
2672"-DXNN_ENABLE_ARM_FP16_VECTOR=1",
2673"-DXNN_ENABLE_AVXVNNI=0",
2674],
2675srcs = XNNPACK_SRCS + LOGGING_SRCS + OPERATOR_SRCS + [
2676"XNNPACK/src/configs/hardware-config.c",
2677"XNNPACK/src/microkernel-utils.c",
2678"XNNPACK/src/operator-run.c",
2679"XNNPACK/src/packing.c",
2680"XNNPACK/src/cache.c",
2681"XNNPACK/src/indirection.c",
2682"XNNPACK/src/operator-utils.c",
2683"XNNPACK/src/normalization.c",
2684"XNNPACK/src/allocator.c",
2685"XNNPACK/src/memory.c",
2686"XNNPACK/src/mutex.c",
2687"XNNPACK/src/microparams-init.c",
2688],
2689visibility = ["PUBLIC"],
2690windows_clang_compiler_flags_override = (WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS) if XNNPACK_WINDOWS_AVX512F_ENABLED else WINDOWS_FLAGS,
2691windows_compiler_flags_override = WINDOWS_FLAGS if XNNPACK_WINDOWS_AVX512F_ENABLED else [],
2692)
2693