diff options
| author | Jussi Pakkanen <jpakkane@gmail.com> | 2017-07-19 15:50:04 +0300 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2017-07-19 15:50:04 +0300 |
| commit | e89b6cdd1037d4c7cfdcb37555f2cbaf66f6ae05 (patch) | |
| tree | e812ee63ecf54b37d17611e8514c5223ff058e47 /test cases/common | |
| parent | acb7e3aaa0a006b36ad8fb86527e46c3b17ff70c (diff) | |
| parent | c8981ff111ccb2419c8689dadc567760e0a20750 (diff) | |
| download | meson-e89b6cdd1037d4c7cfdcb37555f2cbaf66f6ae05.tar.gz | |
Merge pull request #1374 from mesonbuild/simd
Add support for SIMD detection
Diffstat (limited to 'test cases/common')
| -rw-r--r-- | test cases/common/155 simd/fallback.c | 8 | ||||
| -rw-r--r-- | test cases/common/155 simd/meson.build | 43 | ||||
| -rw-r--r-- | test cases/common/155 simd/simd_avx.c | 43 | ||||
| -rw-r--r-- | test cases/common/155 simd/simd_avx2.c | 42 | ||||
| -rw-r--r-- | test cases/common/155 simd/simd_mmx.c | 63 | ||||
| -rw-r--r-- | test cases/common/155 simd/simd_neon.c | 20 | ||||
| -rw-r--r-- | test cases/common/155 simd/simd_sse.c | 29 | ||||
| -rw-r--r-- | test cases/common/155 simd/simd_sse2.c | 37 | ||||
| -rw-r--r-- | test cases/common/155 simd/simd_sse3.c | 38 | ||||
| -rw-r--r-- | test cases/common/155 simd/simd_sse41.c | 40 | ||||
| -rw-r--r-- | test cases/common/155 simd/simd_sse42.c | 43 | ||||
| -rw-r--r-- | test cases/common/155 simd/simd_ssse3.c | 48 | ||||
| -rw-r--r-- | test cases/common/155 simd/simdchecker.c | 93 | ||||
| -rw-r--r-- | test cases/common/155 simd/simdfuncs.h | 67 |
14 files changed, 614 insertions, 0 deletions
diff --git a/test cases/common/155 simd/fallback.c b/test cases/common/155 simd/fallback.c new file mode 100644 index 000000000..ab435f433 --- /dev/null +++ b/test cases/common/155 simd/fallback.c @@ -0,0 +1,8 @@ +#include<simdfuncs.h> + +void increment_fallback(float arr[4]) { + int i; + for(i=0; i<4; i++) { + arr[i]++; + } +} diff --git a/test cases/common/155 simd/meson.build b/test cases/common/155 simd/meson.build new file mode 100644 index 000000000..9da165185 --- /dev/null +++ b/test cases/common/155 simd/meson.build @@ -0,0 +1,43 @@ +project('simd', 'c') + +simd = import('unstable-simd') + +cc = meson.get_compiler('c') + +cdata = configuration_data() + +if not meson.is_cross_build() and host_machine.cpu_family() == 'arm' and cc.get_id() == 'clang' + message('Adding -march=armv7 because assuming that this build happens on Raspbian.') + message('Its Clang seems to be misconfigured and does not support NEON by default.') + add_project_arguments('-march=armv7', language : 'c') +endif + +if cc.get_id() == 'msvc' and cc.version().version_compare('<17') + error('MESON_SKIP_TEST VS2010 produces broken binaries on x86.') +endif + +# FIXME add [a, b] = function() +rval = simd.check('mysimds', + mmx : 'simd_mmx.c', + sse : 'simd_sse.c', + sse2 : 'simd_sse2.c', + sse3 : 'simd_sse3.c', + ssse3 : 'simd_ssse3.c', + sse41 : 'simd_sse41.c', + sse42 : 'simd_sse42.c', + avx : 'simd_avx.c', + avx2 : 'simd_avx2.c', + neon : 'simd_neon.c', + compiler : cc) + +simdlibs = rval[0] +cdata.merge_from(rval[1]) + +configure_file(output : 'simdconfig.h', + configuration : cdata) + +p = executable('simdtest', 'simdchecker.c', 'fallback.c', + link_with : simdlibs) + +test('simdtest', p) + diff --git a/test cases/common/155 simd/simd_avx.c b/test cases/common/155 simd/simd_avx.c new file mode 100644 index 000000000..989620ba3 --- /dev/null +++ b/test cases/common/155 simd/simd_avx.c @@ -0,0 +1,43 @@ +#include<simdconfig.h> +#include<simdfuncs.h> +#include<stdint.h> + +#ifdef _MSC_VER +#include<intrin.h> +int avx_available() { + return 1; +} +#else +#include<immintrin.h> +#include<cpuid.h> + +#ifdef __APPLE__ +/* + * Apple ships a broken __builtin_cpu_supports and + * some machines in the CI farm seem to be too + * old to have AVX so just always return 0 here. + */ +int avx_available() { return 0; } +#else + +int avx_available() { + return __builtin_cpu_supports("avx"); +} +#endif +#endif + +void increment_avx(float arr[4]) { + double darr[4]; + darr[0] = arr[0]; + darr[1] = arr[1]; + darr[2] = arr[2]; + darr[3] = arr[3]; + __m256d val = _mm256_loadu_pd(darr); + __m256d one = _mm256_set1_pd(1.0); + __m256d result = _mm256_add_pd(val, one); + _mm256_storeu_pd(darr, result); + arr[0] = (float)darr[0]; + arr[1] = (float)darr[1]; + arr[2] = (float)darr[2]; + arr[3] = (float)darr[3]; +} diff --git a/test cases/common/155 simd/simd_avx2.c b/test cases/common/155 simd/simd_avx2.c new file mode 100644 index 000000000..15297eb2b --- /dev/null +++ b/test cases/common/155 simd/simd_avx2.c @@ -0,0 +1,42 @@ +#include<simdconfig.h> +#include<simdfuncs.h> +#include<stdint.h> + +/* + * FIXME add proper runtime detection for VS. + */ + +#ifdef _MSC_VER +#include<intrin.h> +int avx2_available() { + return 0; +} +#else +#include<immintrin.h> +#include<cpuid.h> + +#if defined(__APPLE__) +int avx2_available() { return 0; } +#else +int avx2_available() { + return __builtin_cpu_supports("avx2"); +} +#endif +#endif + +void increment_avx2(float arr[4]) { + double darr[4]; + darr[0] = arr[0]; + darr[1] = arr[1]; + darr[2] = arr[2]; + darr[3] = arr[3]; + __m256d val = _mm256_loadu_pd(darr); + __m256d one = _mm256_set1_pd(1.0); + __m256d result = _mm256_add_pd(val, one); + _mm256_storeu_pd(darr, result); + one = _mm256_permute4x64_pd(one, 66); /* A no-op, just here to use AVX2. */ + arr[0] = (float)darr[0]; + arr[1] = (float)darr[1]; + arr[2] = (float)darr[2]; + arr[3] = (float)darr[3]; +} diff --git a/test cases/common/155 simd/simd_mmx.c b/test cases/common/155 simd/simd_mmx.c new file mode 100644 index 000000000..731abd14a --- /dev/null +++ b/test cases/common/155 simd/simd_mmx.c @@ -0,0 +1,63 @@ +#include<simdconfig.h> +#include<simdfuncs.h> + +#include<stdint.h> + +#ifdef _MSC_VER +#include<intrin.h> +int mmx_available() { + return 1; +} +/* Contrary to MSDN documentation, MMX intrinsics + * just plain don't work. + */ +void increment_mmx(float arr[4]) { + arr[0]++; + arr[1]++; + arr[2]++; + arr[3]++; +} +#elif defined(__MINGW32__) +int mmx_available() { + return 1; +} +/* MinGW does not seem to ship with MMX or it is broken. + */ +void increment_mmx(float arr[4]) { + arr[0]++; + arr[1]++; + arr[2]++; + arr[3]++; +} +#else +#include<mmintrin.h> +#include<cpuid.h> + +#if defined(__APPLE__) +int mmx_available() { return 1; } +#else +int mmx_available() { + return __builtin_cpu_supports("mmx"); +} +#endif +void increment_mmx(float arr[4]) { + /* Super ugly but we know that values in arr are always small + * enough to fit in int16; + */ + int i; + __m64 packed = _mm_set_pi16(arr[3], arr[2], arr[1], arr[0]); + __m64 incr = _mm_set1_pi16(1); + __m64 result = _mm_add_pi16(packed, incr); + /* Should be + * int64_t unpacker = _m_to_int64(result); + * but it does not exist on 32 bit platforms for some reason. + */ + int64_t unpacker = (int64_t)(result); + _mm_empty(); + for(i=0; i<4; i++) { + arr[i] = (float)(unpacker & ((1<<16)-1)); + unpacker >>= 16; + } +} + +#endif diff --git a/test cases/common/155 simd/simd_neon.c b/test cases/common/155 simd/simd_neon.c new file mode 100644 index 000000000..20820992b --- /dev/null +++ b/test cases/common/155 simd/simd_neon.c @@ -0,0 +1,20 @@ +#include<simdconfig.h> +#include<simdfuncs.h> + +#include<arm_neon.h> +#include<stdint.h> + +int neon_available() { + return 1; /* Incorrect, but I don't know how to check this properly. */ +} + +void increment_neon(float arr[4]) { + float32x2_t a1, a2, one; + a1 = vld1_f32(arr); + a2 = vld1_f32(&arr[2]); + one = vdup_n_f32(1.0); + a1 = vadd_f32(a1, one); + a2 = vadd_f32(a2, one); + vst1_f32(arr, a1); + vst1_f32(&arr[2], a2); +} diff --git a/test cases/common/155 simd/simd_sse.c b/test cases/common/155 simd/simd_sse.c new file mode 100644 index 000000000..3c9fe622e --- /dev/null +++ b/test cases/common/155 simd/simd_sse.c @@ -0,0 +1,29 @@ +#include<simdconfig.h> +#include<simdfuncs.h> + +#ifdef _MSC_VER +#include<intrin.h> +int sse_available() { + return 1; +} +#else + +#include<xmmintrin.h> +#include<cpuid.h> +#include<stdint.h> + +#if defined(__APPLE__) +int sse_available() { return 1; } +#else +int sse_available() { + return __builtin_cpu_supports("sse"); +} +#endif +#endif + +void increment_sse(float arr[4]) { + __m128 val = _mm_load_ps(arr); + __m128 one = _mm_set_ps1(1.0); + __m128 result = _mm_add_ps(val, one); + _mm_storeu_ps(arr, result); +} diff --git a/test cases/common/155 simd/simd_sse2.c b/test cases/common/155 simd/simd_sse2.c new file mode 100644 index 000000000..02745337b --- /dev/null +++ b/test cases/common/155 simd/simd_sse2.c @@ -0,0 +1,37 @@ +#include<simdconfig.h> +#include<simdfuncs.h> +#include<emmintrin.h> + +#ifdef _MSC_VER +int sse2_available() { + return 1; +} + +#else +#include<cpuid.h> +#include<stdint.h> + +#if defined(__APPLE__) +int sse2_available() { return 1; } +#else +int sse2_available() { + return __builtin_cpu_supports("sse2"); +} +#endif +#endif + +void increment_sse2(float arr[4]) { + double darr[4]; + __m128d val1 = _mm_set_pd(arr[0], arr[1]); + __m128d val2 = _mm_set_pd(arr[2], arr[3]); + __m128d one = _mm_set_pd(1.0, 1.0); + __m128d result = _mm_add_pd(val1, one); + _mm_store_pd(darr, result); + result = _mm_add_pd(val2, one); + _mm_store_pd(&darr[2], result); + arr[0] = (float)darr[1]; + arr[1] = (float)darr[0]; + arr[2] = (float)darr[3]; + arr[3] = (float)darr[2]; +} + diff --git a/test cases/common/155 simd/simd_sse3.c b/test cases/common/155 simd/simd_sse3.c new file mode 100644 index 000000000..e97d10285 --- /dev/null +++ b/test cases/common/155 simd/simd_sse3.c @@ -0,0 +1,38 @@ +#include<simdconfig.h> +#include<simdfuncs.h> + +#ifdef _MSC_VER +#include<intrin.h> +int sse3_available() { + return 1; +} +#else + +#include<pmmintrin.h> +#include<cpuid.h> +#include<stdint.h> + +#if defined(__APPLE__) +int sse3_available() { return 1; } +#else +int sse3_available() { + return __builtin_cpu_supports("sse3"); +} +#endif +#endif + +void increment_sse3(float arr[4]) { + double darr[4]; + __m128d val1 = _mm_set_pd(arr[0], arr[1]); + __m128d val2 = _mm_set_pd(arr[2], arr[3]); + __m128d one = _mm_set_pd(1.0, 1.0); + __m128d result = _mm_add_pd(val1, one); + _mm_store_pd(darr, result); + result = _mm_add_pd(val2, one); + _mm_store_pd(&darr[2], result); + result = _mm_hadd_pd(val1, val2); /* This does nothing. Only here so we use an SSE3 instruction. */ + arr[0] = (float)darr[1]; + arr[1] = (float)darr[0]; + arr[2] = (float)darr[3]; + arr[3] = (float)darr[2]; +} diff --git a/test cases/common/155 simd/simd_sse41.c b/test cases/common/155 simd/simd_sse41.c new file mode 100644 index 000000000..0308c7e49 --- /dev/null +++ b/test cases/common/155 simd/simd_sse41.c @@ -0,0 +1,40 @@ +#include<simdconfig.h> +#include<simdfuncs.h> + +#include<stdint.h> + +#ifdef _MSC_VER +#include<intrin.h> + +int sse41_available() { + return 1; +} + +#else +#include<smmintrin.h> +#include<cpuid.h> + +#if defined(__APPLE__) +int sse41_available() { return 1; } +#else +int sse41_available() { + return __builtin_cpu_supports("sse4.1"); +} +#endif +#endif + +void increment_sse41(float arr[4]) { + double darr[4]; + __m128d val1 = _mm_set_pd(arr[0], arr[1]); + __m128d val2 = _mm_set_pd(arr[2], arr[3]); + __m128d one = _mm_set_pd(1.0, 1.0); + __m128d result = _mm_add_pd(val1, one); + result = _mm_ceil_pd(result); /* A no-op, only here to use a SSE4.1 intrinsic. */ + _mm_store_pd(darr, result); + result = _mm_add_pd(val2, one); + _mm_store_pd(&darr[2], result); + arr[0] = (float)darr[1]; + arr[1] = (float)darr[0]; + arr[2] = (float)darr[3]; + arr[3] = (float)darr[2]; +} diff --git a/test cases/common/155 simd/simd_sse42.c b/test cases/common/155 simd/simd_sse42.c new file mode 100644 index 000000000..137ffc441 --- /dev/null +++ b/test cases/common/155 simd/simd_sse42.c @@ -0,0 +1,43 @@ +#include<simdconfig.h> +#include<simdfuncs.h> +#include<stdint.h> + +#ifdef _MSC_VER +#include<intrin.h> + +int sse42_available() { + return 1; +} + +#else + +#include<nmmintrin.h> +#include<cpuid.h> + +#ifdef __APPLE__ +int sse42_available() { + return 1; +} +#else +int sse42_available() { + return __builtin_cpu_supports("sse4.2"); +} +#endif + +#endif + +void increment_sse42(float arr[4]) { + double darr[4]; + __m128d val1 = _mm_set_pd(arr[0], arr[1]); + __m128d val2 = _mm_set_pd(arr[2], arr[3]); + __m128d one = _mm_set_pd(1.0, 1.0); + __m128d result = _mm_add_pd(val1, one); + _mm_store_pd(darr, result); + result = _mm_add_pd(val2, one); + _mm_store_pd(&darr[2], result); + _mm_crc32_u32(42, 99); /* A no-op, only here to use an SSE4.2 instruction. */ + arr[0] = (float)darr[1]; + arr[1] = (float)darr[0]; + arr[2] = (float)darr[3]; + arr[3] = (float)darr[2]; +} diff --git a/test cases/common/155 simd/simd_ssse3.c b/test cases/common/155 simd/simd_ssse3.c new file mode 100644 index 000000000..ab4dff4f8 --- /dev/null +++ b/test cases/common/155 simd/simd_ssse3.c @@ -0,0 +1,48 @@ +#include<simdconfig.h> +#include<simdfuncs.h> + +#include<emmintrin.h> +#include<tmmintrin.h> + +#ifdef _MSC_VER +#include<intrin.h> + +int ssse3_available() { + return 1; +} + +#else + +#include<cpuid.h> +#include<stdint.h> + +int ssse3_available() { +#ifdef __APPLE__ + return 1; +#elif defined(__clang__) + /* https://github.com/numpy/numpy/issues/8130 */ + return __builtin_cpu_supports("sse4.1"); +#else + return __builtin_cpu_supports("ssse3"); +#endif +} + +#endif + +void increment_ssse3(float arr[4]) { + double darr[4]; + __m128d val1 = _mm_set_pd(arr[0], arr[1]); + __m128d val2 = _mm_set_pd(arr[2], arr[3]); + __m128d one = _mm_set_pd(1.0, 1.0); + __m128d result = _mm_add_pd(val1, one); + __m128i tmp1, tmp2; + tmp1 = tmp2 = _mm_set1_epi16(0); + _mm_store_pd(darr, result); + result = _mm_add_pd(val2, one); + _mm_store_pd(&darr[2], result); + tmp1 = _mm_hadd_epi32(tmp1, tmp2); /* This does nothing. Only here so we use an SSSE3 instruction. */ + arr[0] = (float)darr[1]; + arr[1] = (float)darr[0]; + arr[2] = (float)darr[3]; + arr[3] = (float)darr[2]; +} diff --git a/test cases/common/155 simd/simdchecker.c b/test cases/common/155 simd/simdchecker.c new file mode 100644 index 000000000..222fbf3e3 --- /dev/null +++ b/test cases/common/155 simd/simdchecker.c @@ -0,0 +1,93 @@ +#include<simdfuncs.h> +#include<stdio.h> + +/* + * A function that checks at runtime which simd accelerations are + * available and calls the best one. Falls + * back to plain C implementation if SIMD is not available. + */ + +int main(int argc, char **argv) { + float four[4] = {2.0, 3.0, 4.0, 5.0}; + const float expected[4] = {3.0, 4.0, 5.0, 6.0}; + void (*fptr)(float[4]) = NULL; + const char *type; + int i; + +/* Add here. The first matched one is used so put "better" instruction + * sets at the top. + */ +#if HAVE_NEON + if(fptr == NULL && neon_available()) { + fptr = increment_neon; + type = "NEON"; + } +#endif +#if HAVE_AVX2 + if(fptr == NULL && avx2_available()) { + fptr = increment_avx2; + type = "AVX2"; + } +#endif +#if HAVE_AVX + if(fptr == NULL && avx_available()) { + fptr = increment_avx; + type = "AVX"; + } +#endif +#if HAVE_SSE42 + if(fptr == NULL && sse42_available()) { + fptr = increment_sse42; + type = "SSE42"; + } +#endif +#if HAVE_SSE41 + if(fptr == NULL && sse41_available()) { + fptr = increment_sse41; + type = "SSE41"; + } +#endif +#if HAVE_SSSE3 + if(fptr == NULL && ssse3_available()) { + fptr = increment_ssse3; + type = "SSSE3"; + } +#endif +#if HAVE_SSE3 + if(fptr == NULL && sse3_available()) { + fptr = increment_sse3; + type = "SSE3"; + } +#endif +#if HAVE_SSE2 + if(fptr == NULL && sse2_available()) { + fptr = increment_sse2; + type = "SSE2"; + } +#endif +#if HAVE_SSE + if(fptr == NULL && sse_available()) { + fptr = increment_sse; + type = "SSE"; + } +#endif +#if HAVE_MMX + if(fptr == NULL && mmx_available()) { + fptr = increment_mmx; + type = "MMX"; + } +#endif + if(fptr == NULL) { + fptr = increment_fallback; + type = "fallback"; + } + printf("Using %s.\n", type); + fptr(four); + for(i=0; i<4; i++) { + if(four[i] != expected[i]) { + printf("Increment function failed, got %f expected %f.\n", four[i], expected[i]); + return 1; + } + } + return 0; +} diff --git a/test cases/common/155 simd/simdfuncs.h b/test cases/common/155 simd/simdfuncs.h new file mode 100644 index 000000000..dfb056068 --- /dev/null +++ b/test cases/common/155 simd/simdfuncs.h @@ -0,0 +1,67 @@ +#pragma once + +#include<simdconfig.h> + +/* Yes, I do know that arr[4] decays into a pointer + * as a function argument. Don't do this in real code + * but for this test it is ok. + */ + +void increment_fallback(float arr[4]); + +#if HAVE_MMX +int mmx_available(); +void increment_mmx(float arr[4]); +#endif + +#if HAVE_SSE +int sse_available(); +void increment_sse(float arr[4]); +#endif + +#if HAVE_SSE2 +int sse2_available(); +void increment_sse2(float arr[4]); +#endif + +#if HAVE_SSE3 +int sse3_available(); +void increment_sse3(float arr[4]); +#endif + +#if HAVE_SSSE3 +int ssse3_available(); +void increment_ssse3(float arr[4]); +#endif + +#if HAVE_SSE41 +int sse41_available(); +void increment_sse41(float arr[4]); +#endif + +#if HAVE_SSE42 +int sse42_available(); +void increment_sse42(float arr[4]); +#endif + +#if HAVE_AVX +int avx_available(); +void increment_avx(float arr[4]); +#endif + +#if HAVE_AVX2 +int avx2_available(); +void increment_avx2(float arr[4]); +#endif + +#if HAVE_NEON +int neon_available(); +void increment_neon(float arr[4]); +#endif + +#if HAVE_ALTIVEC +int altivec_available(); +void increment_altivec(float arr[4]); +#endif + +/* And so on. */ |
