opencv
1332 строки · 41.8 Кб
1/*
2* Copyright (C) 2010 The Android Open Source Project
3* All rights reserved.
4*
5* Redistribution and use in source and binary forms, with or without
6* modification, are permitted provided that the following conditions
7* are met:
8* * Redistributions of source code must retain the above copyright
9* notice, this list of conditions and the following disclaimer.
10* * Redistributions in binary form must reproduce the above copyright
11* notice, this list of conditions and the following disclaimer in
12* the documentation and/or other materials provided with the
13* distribution.
14*
15* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
18* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
19* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
21* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
22* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
23* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
25* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26* SUCH DAMAGE.
27*/
28
29/* ChangeLog for this library:
30*
31* NDK r10e?: Add MIPS MSA feature.
32*
33* NDK r10: Support for 64-bit CPUs (Intel, ARM & MIPS).
34*
35* NDK r8d: Add android_setCpu().
36*
37* NDK r8c: Add new ARM CPU features: VFPv2, VFP_D32, VFP_FP16,
38* VFP_FMA, NEON_FMA, IDIV_ARM, IDIV_THUMB2 and iWMMXt.
39*
40* Rewrite the code to parse /proc/self/auxv instead of
41* the "Features" field in /proc/cpuinfo.
42*
43* Dynamically allocate the buffer that hold the content
44* of /proc/cpuinfo to deal with newer hardware.
45*
46* NDK r7c: Fix CPU count computation. The old method only reported the
47* number of _active_ CPUs when the library was initialized,
48* which could be less than the real total.
49*
50* NDK r5: Handle buggy kernels which report a CPU Architecture number of 7
51* for an ARMv6 CPU (see below).
52*
53* Handle kernels that only report 'neon', and not 'vfpv3'
54* (VFPv3 is mandated by the ARM architecture is Neon is implemented)
55*
56* Handle kernels that only report 'vfpv3d16', and not 'vfpv3'
57*
58* Fix x86 compilation. Report ANDROID_CPU_FAMILY_X86 in
59* android_getCpuFamily().
60*
61* NDK r4: Initial release
62*/
63
64#include "cpu-features.h"65
66#include <dlfcn.h>67#include <errno.h>68#include <fcntl.h>69#include <pthread.h>70#include <stdio.h>71#include <stdlib.h>72#include <sys/system_properties.h>73#include <unistd.h>74
75static pthread_once_t g_once;76static int g_inited;77static AndroidCpuFamily g_cpuFamily;78static uint64_t g_cpuFeatures;79static int g_cpuCount;80
81#ifdef __arm__82static uint32_t g_cpuIdArm;83#endif84
85static const int android_cpufeatures_debug = 0;86
87#define D(...) \88do { \89if (android_cpufeatures_debug) { \90printf(__VA_ARGS__); fflush(stdout); \91} \92} while (0)93
94#ifdef __i386__95static __inline__ void x86_cpuid(int func, int values[4])96{
97int a, b, c, d;98/* We need to preserve ebx since we're compiling PIC code */99/* this means we can't use "=b" for the second output register */100__asm__ __volatile__ ( \101"push %%ebx\n"102"cpuid\n" \103"mov %%ebx, %1\n"104"pop %%ebx\n"105: "=a" (a), "=r" (b), "=c" (c), "=d" (d) \106: "a" (func) \107);108values[0] = a;109values[1] = b;110values[2] = c;111values[3] = d;112}
113#elif defined(__x86_64__)114static __inline__ void x86_cpuid(int func, int values[4])115{
116int64_t a, b, c, d;117/* We need to preserve ebx since we're compiling PIC code */118/* this means we can't use "=b" for the second output register */119__asm__ __volatile__ ( \120"push %%rbx\n"121"cpuid\n" \122"mov %%rbx, %1\n"123"pop %%rbx\n"124: "=a" (a), "=r" (b), "=c" (c), "=d" (d) \125: "a" (func) \126);127values[0] = a;128values[1] = b;129values[2] = c;130values[3] = d;131}
132#endif133
134/* Get the size of a file by reading it until the end. This is needed
135* because files under /proc do not always return a valid size when
136* using fseek(0, SEEK_END) + ftell(). Nor can they be mmap()-ed.
137*/
138static int139get_file_size(const char* pathname)140{
141
142int fd, result = 0;143char buffer[256];144
145fd = open(pathname, O_RDONLY);146if (fd < 0) {147D("Can't open %s: %s\n", pathname, strerror(errno));148return -1;149}150
151for (;;) {152int ret = read(fd, buffer, sizeof buffer);153if (ret < 0) {154if (errno == EINTR)155continue;156D("Error while reading %s: %s\n", pathname, strerror(errno));157break;158}159if (ret == 0)160break;161
162result += ret;163}164close(fd);165return result;166}
167
168/* Read the content of /proc/cpuinfo into a user-provided buffer.
169* Return the length of the data, or -1 on error. Does *not*
170* zero-terminate the content. Will not read more
171* than 'buffsize' bytes.
172*/
173static int174read_file(const char* pathname, char* buffer, size_t buffsize)175{
176int fd, count;177
178fd = open(pathname, O_RDONLY);179if (fd < 0) {180D("Could not open %s: %s\n", pathname, strerror(errno));181return -1;182}183count = 0;184while (count < (int)buffsize) {185int ret = read(fd, buffer + count, buffsize - count);186if (ret < 0) {187if (errno == EINTR)188continue;189D("Error while reading from %s: %s\n", pathname, strerror(errno));190if (count == 0)191count = -1;192break;193}194if (ret == 0)195break;196count += ret;197}198close(fd);199return count;200}
201
202#ifdef __arm__203/* Extract the content of a the first occurence of a given field in
204* the content of /proc/cpuinfo and return it as a heap-allocated
205* string that must be freed by the caller.
206*
207* Return NULL if not found
208*/
209static char*210extract_cpuinfo_field(const char* buffer, int buflen, const char* field)211{
212int fieldlen = strlen(field);213const char* bufend = buffer + buflen;214char* result = NULL;215int len;216const char *p, *q;217
218/* Look for first field occurence, and ensures it starts the line. */219p = buffer;220for (;;) {221p = memmem(p, bufend-p, field, fieldlen);222if (p == NULL)223goto EXIT;224
225if (p == buffer || p[-1] == '\n')226break;227
228p += fieldlen;229}230
231/* Skip to the first column followed by a space */232p += fieldlen;233p = memchr(p, ':', bufend-p);234if (p == NULL || p[1] != ' ')235goto EXIT;236
237/* Find the end of the line */238p += 2;239q = memchr(p, '\n', bufend-p);240if (q == NULL)241q = bufend;242
243/* Copy the line into a heap-allocated buffer */244len = q-p;245result = malloc(len+1);246if (result == NULL)247goto EXIT;248
249memcpy(result, p, len);250result[len] = '\0';251
252EXIT:253return result;254}
255
256/* Checks that a space-separated list of items contains one given 'item'.
257* Returns 1 if found, 0 otherwise.
258*/
259static int260has_list_item(const char* list, const char* item)261{
262const char* p = list;263int itemlen = strlen(item);264
265if (list == NULL)266return 0;267
268while (*p) {269const char* q;270
271/* skip spaces */272while (*p == ' ' || *p == '\t')273p++;274
275/* find end of current list item */276q = p;277while (*q && *q != ' ' && *q != '\t')278q++;279
280if (itemlen == q-p && !memcmp(p, item, itemlen))281return 1;282
283/* skip to next item */284p = q;285}286return 0;287}
288#endif /* __arm__ */289
290/* Parse a number starting from 'input', but not going further
291* than 'limit'. Return the value into '*result'.
292*
293* NOTE: Does not skip over leading spaces, or deal with sign characters.
294* NOTE: Ignores overflows.
295*
296* The function returns NULL in case of error (bad format), or the new
297* position after the decimal number in case of success (which will always
298* be <= 'limit').
299*/
300static const char*301parse_number(const char* input, const char* limit, int base, int* result)302{
303const char* p = input;304int val = 0;305while (p < limit) {306int d = (*p - '0');307if ((unsigned)d >= 10U) {308d = (*p - 'a');309if ((unsigned)d >= 6U)310d = (*p - 'A');311if ((unsigned)d >= 6U)312break;313d += 10;314}315if (d >= base)316break;317val = val*base + d;318p++;319}320if (p == input)321return NULL;322
323*result = val;324return p;325}
326
327static const char*328parse_decimal(const char* input, const char* limit, int* result)329{
330return parse_number(input, limit, 10, result);331}
332
333#ifdef __arm__334static const char*335parse_hexadecimal(const char* input, const char* limit, int* result)336{
337return parse_number(input, limit, 16, result);338}
339#endif /* __arm__ */340
341/* This small data type is used to represent a CPU list / mask, as read
342* from sysfs on Linux. See http://www.kernel.org/doc/Documentation/cputopology.txt
343*
344* For now, we don't expect more than 32 cores on mobile devices, so keep
345* everything simple.
346*/
347typedef struct {348uint32_t mask;349} CpuList;350
351static __inline__ void352cpulist_init(CpuList* list) {353list->mask = 0;354}
355
356static __inline__ void357cpulist_and(CpuList* list1, CpuList* list2) {358list1->mask &= list2->mask;359}
360
361static __inline__ void362cpulist_set(CpuList* list, int index) {363if ((unsigned)index < 32) {364list->mask |= (uint32_t)(1U << index);365}366}
367
368static __inline__ int369cpulist_count(CpuList* list) {370return __builtin_popcount(list->mask);371}
372
373/* Parse a textual list of cpus and store the result inside a CpuList object.
374* Input format is the following:
375* - comma-separated list of items (no spaces)
376* - each item is either a single decimal number (cpu index), or a range made
377* of two numbers separated by a single dash (-). Ranges are inclusive.
378*
379* Examples: 0
380* 2,4-127,128-143
381* 0-1
382*/
383static void384cpulist_parse(CpuList* list, const char* line, int line_len)385{
386const char* p = line;387const char* end = p + line_len;388const char* q;389
390/* NOTE: the input line coming from sysfs typically contains a391* trailing newline, so take care of it in the code below
392*/
393while (p < end && *p != '\n')394{395int val, start_value, end_value;396
397/* Find the end of current item, and put it into 'q' */398q = memchr(p, ',', end-p);399if (q == NULL) {400q = end;401}402
403/* Get first value */404p = parse_decimal(p, q, &start_value);405if (p == NULL)406goto BAD_FORMAT;407
408end_value = start_value;409
410/* If we're not at the end of the item, expect a dash and411* and integer; extract end value.
412*/
413if (p < q && *p == '-') {414p = parse_decimal(p+1, q, &end_value);415if (p == NULL)416goto BAD_FORMAT;417}418
419/* Set bits CPU list bits */420for (val = start_value; val <= end_value; val++) {421cpulist_set(list, val);422}423
424/* Jump to next item */425p = q;426if (p < end)427p++;428}429
430BAD_FORMAT:431;432}
433
434/* Read a CPU list from one sysfs file */
435static void436cpulist_read_from(CpuList* list, const char* filename)437{
438char file[64];439int filelen;440
441cpulist_init(list);442
443filelen = read_file(filename, file, sizeof file);444if (filelen < 0) {445D("Could not read %s: %s\n", filename, strerror(errno));446return;447}448
449cpulist_parse(list, file, filelen);450}
451#if defined(__aarch64__)452// see <uapi/asm/hwcap.h> kernel header
453#define HWCAP_FP (1 << 0)454#define HWCAP_ASIMD (1 << 1)455#define HWCAP_AES (1 << 3)456#define HWCAP_PMULL (1 << 4)457#define HWCAP_SHA1 (1 << 5)458#define HWCAP_SHA2 (1 << 6)459#define HWCAP_CRC32 (1 << 7)460#endif461
462#if defined(__arm__)463
464// See <asm/hwcap.h> kernel header.
465#define HWCAP_VFP (1 << 6)466#define HWCAP_IWMMXT (1 << 9)467#define HWCAP_NEON (1 << 12)468#define HWCAP_VFPv3 (1 << 13)469#define HWCAP_VFPv3D16 (1 << 14)470#define HWCAP_VFPv4 (1 << 16)471#define HWCAP_IDIVA (1 << 17)472#define HWCAP_IDIVT (1 << 18)473
474// see <uapi/asm/hwcap.h> kernel header
475#define HWCAP2_AES (1 << 0)476#define HWCAP2_PMULL (1 << 1)477#define HWCAP2_SHA1 (1 << 2)478#define HWCAP2_SHA2 (1 << 3)479#define HWCAP2_CRC32 (1 << 4)480
481// This is the list of 32-bit ARMv7 optional features that are _always_
482// supported by ARMv8 CPUs, as mandated by the ARM Architecture Reference
483// Manual.
484#define HWCAP_SET_FOR_ARMV8 \485( HWCAP_VFP | \486HWCAP_NEON | \487HWCAP_VFPv3 | \488HWCAP_VFPv4 | \489HWCAP_IDIVA | \490HWCAP_IDIVT )491#endif492
493#if defined(__mips__)494// see <uapi/asm/hwcap.h> kernel header
495#define HWCAP_MIPS_R6 (1 << 0)496#define HWCAP_MIPS_MSA (1 << 1)497#endif498
499#if defined(__arm__) || defined(__aarch64__) || defined(__mips__)500
501#define AT_HWCAP 16502#define AT_HWCAP2 26503
504// Probe the system's C library for a 'getauxval' function and call it if
505// it exits, or return 0 for failure. This function is available since API
506// level 20.
507//
508// This code does *NOT* check for '__ANDROID_API__ >= 20' to support the
509// edge case where some NDK developers use headers for a platform that is
510// newer than the one really targetted by their application.
511// This is typically done to use newer native APIs only when running on more
512// recent Android versions, and requires careful symbol management.
513//
514// Note that getauxval() can't really be re-implemented here, because
515// its implementation does not parse /proc/self/auxv. Instead it depends
516// on values that are passed by the kernel at process-init time to the
517// C runtime initialization layer.
518#if 1519// OpenCV calls CPU features check during library initialization stage
520// (under other dlopen() call).
521// Unfortunatelly, calling dlopen() recursively is not supported on some old
522// Android versions. Android fix is here:
523// - https://android-review.googlesource.com/#/c/32951/
524// - GitHub mirror: https://github.com/android/platform_bionic/commit/e19d702b8e330cef87e0983733c427b5f7842144
525__attribute__((weak)) unsigned long getauxval(unsigned long); // Lets linker to handle this symbol526static uint32_t527get_elf_hwcap_from_getauxval(int hwcap_type) {528uint32_t ret = 0;529if(getauxval != 0) {530ret = (uint32_t)getauxval(hwcap_type);531} else {532D("getauxval() is not available\n");533}534return ret;535}
536#else537static uint32_t538get_elf_hwcap_from_getauxval(int hwcap_type) {539typedef unsigned long getauxval_func_t(unsigned long);540
541dlerror();542void* libc_handle = dlopen("libc.so", RTLD_NOW);543if (!libc_handle) {544D("Could not dlopen() C library: %s\n", dlerror());545return 0;546}547
548uint32_t ret = 0;549getauxval_func_t* func = (getauxval_func_t*)550dlsym(libc_handle, "getauxval");551if (!func) {552D("Could not find getauxval() in C library\n");553} else {554// Note: getauxval() returns 0 on failure. Doesn't touch errno.555ret = (uint32_t)(*func)(hwcap_type);556}557dlclose(libc_handle);558return ret;559}
560#endif561#endif562
563#if defined(__arm__)564// Parse /proc/self/auxv to extract the ELF HW capabilities bitmap for the
565// current CPU. Note that this file is not accessible from regular
566// application processes on some Android platform releases.
567// On success, return new ELF hwcaps, or 0 on failure.
568static uint32_t569get_elf_hwcap_from_proc_self_auxv(void) {570const char filepath[] = "/proc/self/auxv";571int fd = TEMP_FAILURE_RETRY(open(filepath, O_RDONLY));572if (fd < 0) {573D("Could not open %s: %s\n", filepath, strerror(errno));574return 0;575}576
577struct { uint32_t tag; uint32_t value; } entry;578
579uint32_t result = 0;580for (;;) {581int ret = TEMP_FAILURE_RETRY(read(fd, (char*)&entry, sizeof entry));582if (ret < 0) {583D("Error while reading %s: %s\n", filepath, strerror(errno));584break;585}586// Detect end of list.587if (ret == 0 || (entry.tag == 0 && entry.value == 0))588break;589if (entry.tag == AT_HWCAP) {590result = entry.value;591break;592}593}594close(fd);595return result;596}
597
598/* Compute the ELF HWCAP flags from the content of /proc/cpuinfo.
599* This works by parsing the 'Features' line, which lists which optional
600* features the device's CPU supports, on top of its reference
601* architecture.
602*/
603static uint32_t604get_elf_hwcap_from_proc_cpuinfo(const char* cpuinfo, int cpuinfo_len) {605uint32_t hwcaps = 0;606long architecture = 0;607char* cpuArch = extract_cpuinfo_field(cpuinfo, cpuinfo_len, "CPU architecture");608if (cpuArch) {609architecture = strtol(cpuArch, NULL, 10);610free(cpuArch);611
612if (architecture >= 8L) {613// This is a 32-bit ARM binary running on a 64-bit ARM64 kernel.614// The 'Features' line only lists the optional features that the615// device's CPU supports, compared to its reference architecture616// which are of no use for this process.617D("Faking 32-bit ARM HWCaps on ARMv%ld CPU\n", architecture);618return HWCAP_SET_FOR_ARMV8;619}620}621
622char* cpuFeatures = extract_cpuinfo_field(cpuinfo, cpuinfo_len, "Features");623if (cpuFeatures != NULL) {624D("Found cpuFeatures = '%s'\n", cpuFeatures);625
626if (has_list_item(cpuFeatures, "vfp"))627hwcaps |= HWCAP_VFP;628if (has_list_item(cpuFeatures, "vfpv3"))629hwcaps |= HWCAP_VFPv3;630if (has_list_item(cpuFeatures, "vfpv3d16"))631hwcaps |= HWCAP_VFPv3D16;632if (has_list_item(cpuFeatures, "vfpv4"))633hwcaps |= HWCAP_VFPv4;634if (has_list_item(cpuFeatures, "neon"))635hwcaps |= HWCAP_NEON;636if (has_list_item(cpuFeatures, "idiva"))637hwcaps |= HWCAP_IDIVA;638if (has_list_item(cpuFeatures, "idivt"))639hwcaps |= HWCAP_IDIVT;640if (has_list_item(cpuFeatures, "idiv"))641hwcaps |= HWCAP_IDIVA | HWCAP_IDIVT;642if (has_list_item(cpuFeatures, "iwmmxt"))643hwcaps |= HWCAP_IWMMXT;644
645free(cpuFeatures);646}647return hwcaps;648}
649#endif /* __arm__ */650
651/* Return the number of cpus present on a given device.
652*
653* To handle all weird kernel configurations, we need to compute the
654* intersection of the 'present' and 'possible' CPU lists and count
655* the result.
656*/
657static int658get_cpu_count(void)659{
660CpuList cpus_present[1];661CpuList cpus_possible[1];662
663cpulist_read_from(cpus_present, "/sys/devices/system/cpu/present");664cpulist_read_from(cpus_possible, "/sys/devices/system/cpu/possible");665
666/* Compute the intersection of both sets to get the actual number of667* CPU cores that can be used on this device by the kernel.
668*/
669cpulist_and(cpus_present, cpus_possible);670
671return cpulist_count(cpus_present);672}
673
674static void675android_cpuInitFamily(void)676{
677#if defined(__arm__)678g_cpuFamily = ANDROID_CPU_FAMILY_ARM;679#elif defined(__i386__)680g_cpuFamily = ANDROID_CPU_FAMILY_X86;681#elif defined(__mips64)682/* Needs to be before __mips__ since the compiler defines both */
683g_cpuFamily = ANDROID_CPU_FAMILY_MIPS64;684#elif defined(__mips__)685g_cpuFamily = ANDROID_CPU_FAMILY_MIPS;686#elif defined(__aarch64__)687g_cpuFamily = ANDROID_CPU_FAMILY_ARM64;688#elif defined(__x86_64__)689g_cpuFamily = ANDROID_CPU_FAMILY_X86_64;690#else691g_cpuFamily = ANDROID_CPU_FAMILY_UNKNOWN;692#endif693}
694
695static void696android_cpuInit(void)697{
698char* cpuinfo = NULL;699int cpuinfo_len;700
701android_cpuInitFamily();702
703g_cpuFeatures = 0;704g_cpuCount = 1;705g_inited = 1;706
707cpuinfo_len = get_file_size("/proc/cpuinfo");708if (cpuinfo_len < 0) {709D("cpuinfo_len cannot be computed!");710return;711}712cpuinfo = malloc(cpuinfo_len);713if (cpuinfo == NULL) {714D("cpuinfo buffer could not be allocated");715return;716}717cpuinfo_len = read_file("/proc/cpuinfo", cpuinfo, cpuinfo_len);718D("cpuinfo_len is (%d):\n%.*s\n", cpuinfo_len,719cpuinfo_len >= 0 ? cpuinfo_len : 0, cpuinfo);720
721if (cpuinfo_len < 0) /* should not happen */ {722free(cpuinfo);723return;724}725
726/* Count the CPU cores, the value may be 0 for single-core CPUs */727g_cpuCount = get_cpu_count();728if (g_cpuCount == 0) {729g_cpuCount = 1;730}731
732D("found cpuCount = %d\n", g_cpuCount);733
734#ifdef __arm__735{736/* Extract architecture from the "CPU Architecture" field.737* The list is well-known, unlike the the output of
738* the 'Processor' field which can vary greatly.
739*
740* See the definition of the 'proc_arch' array in
741* $KERNEL/arch/arm/kernel/setup.c and the 'c_show' function in
742* same file.
743*/
744char* cpuArch = extract_cpuinfo_field(cpuinfo, cpuinfo_len, "CPU architecture");745
746if (cpuArch != NULL) {747char* end;748long archNumber;749int hasARMv7 = 0;750
751D("found cpuArch = '%s'\n", cpuArch);752
753/* read the initial decimal number, ignore the rest */754archNumber = strtol(cpuArch, &end, 10);755
756/* Note that ARMv8 is upwards compatible with ARMv7. */757if (end > cpuArch && archNumber >= 7) {758hasARMv7 = 1;759}760
761/* Unfortunately, it seems that certain ARMv6-based CPUs762* report an incorrect architecture number of 7!
763*
764* See http://code.google.com/p/android/issues/detail?id=10812
765*
766* We try to correct this by looking at the 'elf_format'
767* field reported by the 'Processor' field, which is of the
768* form of "(v7l)" for an ARMv7-based CPU, and "(v6l)" for
769* an ARMv6-one.
770*/
771if (hasARMv7) {772char* cpuProc = extract_cpuinfo_field(cpuinfo, cpuinfo_len,773"Processor");774if (cpuProc != NULL) {775D("found cpuProc = '%s'\n", cpuProc);776if (has_list_item(cpuProc, "(v6l)")) {777D("CPU processor and architecture mismatch!!\n");778hasARMv7 = 0;779}780free(cpuProc);781}782}783
784if (hasARMv7) {785g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_ARMv7;786}787
788/* The LDREX / STREX instructions are available from ARMv6 */789if (archNumber >= 6) {790g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_LDREX_STREX;791}792
793free(cpuArch);794}795
796/* Extract the list of CPU features from ELF hwcaps */797uint32_t hwcaps = 0;798hwcaps = get_elf_hwcap_from_getauxval(AT_HWCAP);799if (!hwcaps) {800D("Parsing /proc/self/auxv to extract ELF hwcaps!\n");801hwcaps = get_elf_hwcap_from_proc_self_auxv();802}803if (!hwcaps) {804// Parsing /proc/self/auxv will fail from regular application805// processes on some Android platform versions, when this happens806// parse proc/cpuinfo instead.807D("Parsing /proc/cpuinfo to extract ELF hwcaps!\n");808hwcaps = get_elf_hwcap_from_proc_cpuinfo(cpuinfo, cpuinfo_len);809}810
811if (hwcaps != 0) {812int has_vfp = (hwcaps & HWCAP_VFP);813int has_vfpv3 = (hwcaps & HWCAP_VFPv3);814int has_vfpv3d16 = (hwcaps & HWCAP_VFPv3D16);815int has_vfpv4 = (hwcaps & HWCAP_VFPv4);816int has_neon = (hwcaps & HWCAP_NEON);817int has_idiva = (hwcaps & HWCAP_IDIVA);818int has_idivt = (hwcaps & HWCAP_IDIVT);819int has_iwmmxt = (hwcaps & HWCAP_IWMMXT);820
821// The kernel does a poor job at ensuring consistency when822// describing CPU features. So lots of guessing is needed.823
824// 'vfpv4' implies VFPv3|VFP_FMA|FP16825if (has_vfpv4)826g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFPv3 |827ANDROID_CPU_ARM_FEATURE_VFP_FP16 |828ANDROID_CPU_ARM_FEATURE_VFP_FMA;829
830// 'vfpv3' or 'vfpv3d16' imply VFPv3. Note that unlike GCC,831// a value of 'vfpv3' doesn't necessarily mean that the D32832// feature is present, so be conservative. All CPUs in the833// field that support D32 also support NEON, so this should834// not be a problem in practice.835if (has_vfpv3 || has_vfpv3d16)836g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFPv3;837
838// 'vfp' is super ambiguous. Depending on the kernel, it can839// either mean VFPv2 or VFPv3. Make it depend on ARMv7.840if (has_vfp) {841if (g_cpuFeatures & ANDROID_CPU_ARM_FEATURE_ARMv7)842g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFPv3;843else844g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFPv2;845}846
847// Neon implies VFPv3|D32, and if vfpv4 is detected, NEON_FMA848if (has_neon) {849g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFPv3 |850ANDROID_CPU_ARM_FEATURE_NEON |851ANDROID_CPU_ARM_FEATURE_VFP_D32;852if (has_vfpv4)853g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_NEON_FMA;854}855
856// VFPv3 implies VFPv2 and ARMv7857if (g_cpuFeatures & ANDROID_CPU_ARM_FEATURE_VFPv3)858g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFPv2 |859ANDROID_CPU_ARM_FEATURE_ARMv7;860
861if (has_idiva)862g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_IDIV_ARM;863if (has_idivt)864g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_IDIV_THUMB2;865
866if (has_iwmmxt)867g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_iWMMXt;868}869
870/* Extract the list of CPU features from ELF hwcaps2 */871uint32_t hwcaps2 = 0;872hwcaps2 = get_elf_hwcap_from_getauxval(AT_HWCAP2);873if (hwcaps2 != 0) {874int has_aes = (hwcaps2 & HWCAP2_AES);875int has_pmull = (hwcaps2 & HWCAP2_PMULL);876int has_sha1 = (hwcaps2 & HWCAP2_SHA1);877int has_sha2 = (hwcaps2 & HWCAP2_SHA2);878int has_crc32 = (hwcaps2 & HWCAP2_CRC32);879
880if (has_aes)881g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_AES;882if (has_pmull)883g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_PMULL;884if (has_sha1)885g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_SHA1;886if (has_sha2)887g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_SHA2;888if (has_crc32)889g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_CRC32;890}891/* Extract the cpuid value from various fields */892// The CPUID value is broken up in several entries in /proc/cpuinfo.893// This table is used to rebuild it from the entries.894static const struct CpuIdEntry {895const char* field;896char format;897char bit_lshift;898char bit_length;899} cpu_id_entries[] = {900{ "CPU implementer", 'x', 24, 8 },901{ "CPU variant", 'x', 20, 4 },902{ "CPU part", 'x', 4, 12 },903{ "CPU revision", 'd', 0, 4 },904};905size_t i;906D("Parsing /proc/cpuinfo to recover CPUID\n");907for (i = 0;908i < sizeof(cpu_id_entries)/sizeof(cpu_id_entries[0]);909++i) {910const struct CpuIdEntry* entry = &cpu_id_entries[i];911char* value = extract_cpuinfo_field(cpuinfo,912cpuinfo_len,913entry->field);914if (value == NULL)915continue;916
917D("field=%s value='%s'\n", entry->field, value);918char* value_end = value + strlen(value);919int val = 0;920const char* start = value;921const char* p;922if (value[0] == '0' && (value[1] == 'x' || value[1] == 'X')) {923start += 2;924p = parse_hexadecimal(start, value_end, &val);925} else if (entry->format == 'x')926p = parse_hexadecimal(value, value_end, &val);927else928p = parse_decimal(value, value_end, &val);929
930if (p > (const char*)start) {931val &= ((1 << entry->bit_length)-1);932val <<= entry->bit_lshift;933g_cpuIdArm |= (uint32_t) val;934}935
936free(value);937}938
939// Handle kernel configuration bugs that prevent the correct940// reporting of CPU features.941static const struct CpuFix {942uint32_t cpuid;943uint64_t or_flags;944} cpu_fixes[] = {945/* The Nexus 4 (Qualcomm Krait) kernel configuration946* forgets to report IDIV support. */
947{ 0x510006f2, ANDROID_CPU_ARM_FEATURE_IDIV_ARM |948ANDROID_CPU_ARM_FEATURE_IDIV_THUMB2 },949{ 0x510006f3, ANDROID_CPU_ARM_FEATURE_IDIV_ARM |950ANDROID_CPU_ARM_FEATURE_IDIV_THUMB2 },951};952size_t n;953for (n = 0; n < sizeof(cpu_fixes)/sizeof(cpu_fixes[0]); ++n) {954const struct CpuFix* entry = &cpu_fixes[n];955
956if (g_cpuIdArm == entry->cpuid)957g_cpuFeatures |= entry->or_flags;958}959
960// Special case: The emulator-specific Android 4.2 kernel fails961// to report support for the 32-bit ARM IDIV instruction.962// Technically, this is a feature of the virtual CPU implemented963// by the emulator. Note that it could also support Thumb IDIV964// in the future, and this will have to be slightly updated.965char* hardware = extract_cpuinfo_field(cpuinfo,966cpuinfo_len,967"Hardware");968if (hardware) {969if (!strcmp(hardware, "Goldfish") &&970g_cpuIdArm == 0x4100c080 &&971(g_cpuFamily & ANDROID_CPU_ARM_FEATURE_ARMv7) != 0) {972g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_IDIV_ARM;973}974free(hardware);975}976}977#endif /* __arm__ */978#ifdef __aarch64__979{980/* Extract the list of CPU features from ELF hwcaps */981uint32_t hwcaps = 0;982hwcaps = get_elf_hwcap_from_getauxval(AT_HWCAP);983if (hwcaps != 0) {984int has_fp = (hwcaps & HWCAP_FP);985int has_asimd = (hwcaps & HWCAP_ASIMD);986int has_aes = (hwcaps & HWCAP_AES);987int has_pmull = (hwcaps & HWCAP_PMULL);988int has_sha1 = (hwcaps & HWCAP_SHA1);989int has_sha2 = (hwcaps & HWCAP_SHA2);990int has_crc32 = (hwcaps & HWCAP_CRC32);991
992if(has_fp == 0) {993D("ERROR: Floating-point unit missing, but is required by Android on AArch64 CPUs\n");994}995if(has_asimd == 0) {996D("ERROR: ASIMD unit missing, but is required by Android on AArch64 CPUs\n");997}998
999if (has_fp)1000g_cpuFeatures |= ANDROID_CPU_ARM64_FEATURE_FP;1001if (has_asimd)1002g_cpuFeatures |= ANDROID_CPU_ARM64_FEATURE_ASIMD;1003if (has_aes)1004g_cpuFeatures |= ANDROID_CPU_ARM64_FEATURE_AES;1005if (has_pmull)1006g_cpuFeatures |= ANDROID_CPU_ARM64_FEATURE_PMULL;1007if (has_sha1)1008g_cpuFeatures |= ANDROID_CPU_ARM64_FEATURE_SHA1;1009if (has_sha2)1010g_cpuFeatures |= ANDROID_CPU_ARM64_FEATURE_SHA2;1011if (has_crc32)1012g_cpuFeatures |= ANDROID_CPU_ARM64_FEATURE_CRC32;1013}1014}1015#endif /* __aarch64__ */1016
1017#if defined(__i386__) || defined(__x86_64__)1018int regs[4];1019
1020/* According to http://en.wikipedia.org/wiki/CPUID */
1021#define VENDOR_INTEL_b 0x756e65471022#define VENDOR_INTEL_c 0x6c65746e1023#define VENDOR_INTEL_d 0x49656e691024
1025x86_cpuid(0, regs);1026int vendorIsIntel = (regs[1] == VENDOR_INTEL_b &&1027regs[2] == VENDOR_INTEL_c &&1028regs[3] == VENDOR_INTEL_d);1029
1030x86_cpuid(1, regs);1031if ((regs[2] & (1 << 9)) != 0) {1032g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_SSSE3;1033}1034if ((regs[2] & (1 << 23)) != 0) {1035g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_POPCNT;1036}1037if ((regs[2] & (1 << 19)) != 0) {1038g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_SSE4_1;1039}1040if ((regs[2] & (1 << 20)) != 0) {1041g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_SSE4_2;1042}1043if (vendorIsIntel && (regs[2] & (1 << 22)) != 0) {1044g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_MOVBE;1045}1046if ((regs[2] & (1 << 25)) != 0) {1047g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_AES_NI;1048}1049if ((regs[2] & (1 << 28)) != 0) {1050g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_AVX;1051}1052if ((regs[2] & (1 << 30)) != 0) {1053g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_RDRAND;1054}1055
1056x86_cpuid(7, regs);1057if ((regs[1] & (1 << 5)) != 0) {1058g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_AVX2;1059}1060if ((regs[1] & (1 << 29)) != 0) {1061g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_SHA_NI;1062}1063
1064
1065#endif1066#if defined( __mips__)1067{ /* MIPS and MIPS64 */1068/* Extract the list of CPU features from ELF hwcaps */1069uint32_t hwcaps = 0;1070hwcaps = get_elf_hwcap_from_getauxval(AT_HWCAP);1071if (hwcaps != 0) {1072int has_r6 = (hwcaps & HWCAP_MIPS_R6);1073int has_msa = (hwcaps & HWCAP_MIPS_MSA);1074if (has_r6)1075g_cpuFeatures |= ANDROID_CPU_MIPS_FEATURE_R6;1076if (has_msa)1077g_cpuFeatures |= ANDROID_CPU_MIPS_FEATURE_MSA;1078}1079}1080#endif /* __mips__ */1081
1082free(cpuinfo);1083}
1084
1085
1086AndroidCpuFamily
1087android_getCpuFamily(void)1088{
1089pthread_once(&g_once, android_cpuInit);1090return g_cpuFamily;1091}
1092
1093
1094uint64_t
1095android_getCpuFeatures(void)1096{
1097pthread_once(&g_once, android_cpuInit);1098return g_cpuFeatures;1099}
1100
1101
1102int
1103android_getCpuCount(void)1104{
1105pthread_once(&g_once, android_cpuInit);1106return g_cpuCount;1107}
1108
1109static void1110android_cpuInitDummy(void)1111{
1112g_inited = 1;1113}
1114
1115int
1116android_setCpu(int cpu_count, uint64_t cpu_features)1117{
1118/* Fail if the library was already initialized. */1119if (g_inited)1120return 0;1121
1122android_cpuInitFamily();1123g_cpuCount = (cpu_count <= 0 ? 1 : cpu_count);1124g_cpuFeatures = cpu_features;1125pthread_once(&g_once, android_cpuInitDummy);1126
1127return 1;1128}
1129
1130#ifdef __arm__1131uint32_t
1132android_getCpuIdArm(void)1133{
1134pthread_once(&g_once, android_cpuInit);1135return g_cpuIdArm;1136}
1137
1138int
1139android_setCpuArm(int cpu_count, uint64_t cpu_features, uint32_t cpu_id)1140{
1141if (!android_setCpu(cpu_count, cpu_features))1142return 0;1143
1144g_cpuIdArm = cpu_id;1145return 1;1146}
1147#endif /* __arm__ */1148
1149/*
1150* Technical note: Making sense of ARM's FPU architecture versions.
1151*
1152* FPA was ARM's first attempt at an FPU architecture. There is no Android
1153* device that actually uses it since this technology was already obsolete
1154* when the project started. If you see references to FPA instructions
1155* somewhere, you can be sure that this doesn't apply to Android at all.
1156*
1157* FPA was followed by "VFP", soon renamed "VFPv1" due to the emergence of
1158* new versions / additions to it. ARM considers this obsolete right now,
1159* and no known Android device implements it either.
1160*
1161* VFPv2 added a few instructions to VFPv1, and is an *optional* extension
1162* supported by some ARMv5TE, ARMv6 and ARMv6T2 CPUs. Note that a device
1163* supporting the 'armeabi' ABI doesn't necessarily support these.
1164*
1165* VFPv3-D16 adds a few instructions on top of VFPv2 and is typically used
1166* on ARMv7-A CPUs which implement a FPU. Note that it is also mandated
1167* by the Android 'armeabi-v7a' ABI. The -D16 suffix in its name means
1168* that it provides 16 double-precision FPU registers (d0-d15) and 32
1169* single-precision ones (s0-s31) which happen to be mapped to the same
1170* register banks.
1171*
1172* VFPv3-D32 is the name of an extension to VFPv3-D16 that provides 16
1173* additional double precision registers (d16-d31). Note that there are
1174* still only 32 single precision registers.
1175*
1176* VFPv3xD is a *subset* of VFPv3-D16 that only provides single-precision
1177* registers. It is only used on ARMv7-M (i.e. on micro-controllers) which
1178* are not supported by Android. Note that it is not compatible with VFPv2.
1179*
1180* NOTE: The term 'VFPv3' usually designate either VFPv3-D16 or VFPv3-D32
1181* depending on context. For example GCC uses it for VFPv3-D32, but
1182* the Linux kernel code uses it for VFPv3-D16 (especially in
1183* /proc/cpuinfo). Always try to use the full designation when
1184* possible.
1185*
1186* NEON, a.k.a. "ARM Advanced SIMD" is an extension that provides
1187* instructions to perform parallel computations on vectors of 8, 16,
1188* 32, 64 and 128 bit quantities. NEON requires VFPv32-D32 since all
1189* NEON registers are also mapped to the same register banks.
1190*
1191* VFPv4-D16, adds a few instructions on top of VFPv3-D16 in order to
1192* perform fused multiply-accumulate on VFP registers, as well as
1193* half-precision (16-bit) conversion operations.
1194*
1195* VFPv4-D32 is VFPv4-D16 with 32, instead of 16, FPU double precision
1196* registers.
1197*
1198* VPFv4-NEON is VFPv4-D32 with NEON instructions. It also adds fused
1199* multiply-accumulate instructions that work on the NEON registers.
1200*
1201* NOTE: Similarly, "VFPv4" might either reference VFPv4-D16 or VFPv4-D32
1202* depending on context.
1203*
1204* The following information was determined by scanning the binutils-2.22
1205* sources:
1206*
1207* Basic VFP instruction subsets:
1208*
1209* #define FPU_VFP_EXT_V1xD 0x08000000 // Base VFP instruction set.
1210* #define FPU_VFP_EXT_V1 0x04000000 // Double-precision insns.
1211* #define FPU_VFP_EXT_V2 0x02000000 // ARM10E VFPr1.
1212* #define FPU_VFP_EXT_V3xD 0x01000000 // VFPv3 single-precision.
1213* #define FPU_VFP_EXT_V3 0x00800000 // VFPv3 double-precision.
1214* #define FPU_NEON_EXT_V1 0x00400000 // Neon (SIMD) insns.
1215* #define FPU_VFP_EXT_D32 0x00200000 // Registers D16-D31.
1216* #define FPU_VFP_EXT_FP16 0x00100000 // Half-precision extensions.
1217* #define FPU_NEON_EXT_FMA 0x00080000 // Neon fused multiply-add
1218* #define FPU_VFP_EXT_FMA 0x00040000 // VFP fused multiply-add
1219*
1220* FPU types (excluding NEON)
1221*
1222* FPU_VFP_V1xD (EXT_V1xD)
1223* |
1224* +--------------------------+
1225* | |
1226* FPU_VFP_V1 (+EXT_V1) FPU_VFP_V3xD (+EXT_V2+EXT_V3xD)
1227* | |
1228* | |
1229* FPU_VFP_V2 (+EXT_V2) FPU_VFP_V4_SP_D16 (+EXT_FP16+EXT_FMA)
1230* |
1231* FPU_VFP_V3D16 (+EXT_Vx3D+EXT_V3)
1232* |
1233* +--------------------------+
1234* | |
1235* FPU_VFP_V3 (+EXT_D32) FPU_VFP_V4D16 (+EXT_FP16+EXT_FMA)
1236* | |
1237* | FPU_VFP_V4 (+EXT_D32)
1238* |
1239* FPU_VFP_HARD (+EXT_FMA+NEON_EXT_FMA)
1240*
1241* VFP architectures:
1242*
1243* ARCH_VFP_V1xD (EXT_V1xD)
1244* |
1245* +------------------+
1246* | |
1247* | ARCH_VFP_V3xD (+EXT_V2+EXT_V3xD)
1248* | |
1249* | ARCH_VFP_V3xD_FP16 (+EXT_FP16)
1250* | |
1251* | ARCH_VFP_V4_SP_D16 (+EXT_FMA)
1252* |
1253* ARCH_VFP_V1 (+EXT_V1)
1254* |
1255* ARCH_VFP_V2 (+EXT_V2)
1256* |
1257* ARCH_VFP_V3D16 (+EXT_V3xD+EXT_V3)
1258* |
1259* +-------------------+
1260* | |
1261* | ARCH_VFP_V3D16_FP16 (+EXT_FP16)
1262* |
1263* +-------------------+
1264* | |
1265* | ARCH_VFP_V4_D16 (+EXT_FP16+EXT_FMA)
1266* | |
1267* | ARCH_VFP_V4 (+EXT_D32)
1268* | |
1269* | ARCH_NEON_VFP_V4 (+EXT_NEON+EXT_NEON_FMA)
1270* |
1271* ARCH_VFP_V3 (+EXT_D32)
1272* |
1273* +-------------------+
1274* | |
1275* | ARCH_VFP_V3_FP16 (+EXT_FP16)
1276* |
1277* ARCH_VFP_V3_PLUS_NEON_V1 (+EXT_NEON)
1278* |
1279* ARCH_NEON_FP16 (+EXT_FP16)
1280*
1281* -fpu=<name> values and their correspondance with FPU architectures above:
1282*
1283* {"vfp", FPU_ARCH_VFP_V2},
1284* {"vfp9", FPU_ARCH_VFP_V2},
1285* {"vfp3", FPU_ARCH_VFP_V3}, // For backwards compatbility.
1286* {"vfp10", FPU_ARCH_VFP_V2},
1287* {"vfp10-r0", FPU_ARCH_VFP_V1},
1288* {"vfpxd", FPU_ARCH_VFP_V1xD},
1289* {"vfpv2", FPU_ARCH_VFP_V2},
1290* {"vfpv3", FPU_ARCH_VFP_V3},
1291* {"vfpv3-fp16", FPU_ARCH_VFP_V3_FP16},
1292* {"vfpv3-d16", FPU_ARCH_VFP_V3D16},
1293* {"vfpv3-d16-fp16", FPU_ARCH_VFP_V3D16_FP16},
1294* {"vfpv3xd", FPU_ARCH_VFP_V3xD},
1295* {"vfpv3xd-fp16", FPU_ARCH_VFP_V3xD_FP16},
1296* {"neon", FPU_ARCH_VFP_V3_PLUS_NEON_V1},
1297* {"neon-fp16", FPU_ARCH_NEON_FP16},
1298* {"vfpv4", FPU_ARCH_VFP_V4},
1299* {"vfpv4-d16", FPU_ARCH_VFP_V4D16},
1300* {"fpv4-sp-d16", FPU_ARCH_VFP_V4_SP_D16},
1301* {"neon-vfpv4", FPU_ARCH_NEON_VFP_V4},
1302*
1303*
1304* Simplified diagram that only includes FPUs supported by Android:
1305* Only ARCH_VFP_V3D16 is actually mandated by the armeabi-v7a ABI,
1306* all others are optional and must be probed at runtime.
1307*
1308* ARCH_VFP_V3D16 (EXT_V1xD+EXT_V1+EXT_V2+EXT_V3xD+EXT_V3)
1309* |
1310* +-------------------+
1311* | |
1312* | ARCH_VFP_V3D16_FP16 (+EXT_FP16)
1313* |
1314* +-------------------+
1315* | |
1316* | ARCH_VFP_V4_D16 (+EXT_FP16+EXT_FMA)
1317* | |
1318* | ARCH_VFP_V4 (+EXT_D32)
1319* | |
1320* | ARCH_NEON_VFP_V4 (+EXT_NEON+EXT_NEON_FMA)
1321* |
1322* ARCH_VFP_V3 (+EXT_D32)
1323* |
1324* +-------------------+
1325* | |
1326* | ARCH_VFP_V3_FP16 (+EXT_FP16)
1327* |
1328* ARCH_VFP_V3_PLUS_NEON_V1 (+EXT_NEON)
1329* |
1330* ARCH_NEON_FP16 (+EXT_FP16)
1331*
1332*/
1333