From be92a6a3a60666f4dd00c96833cfdbf7706ffb05 Mon Sep 17 00:00:00 2001 From: Jussi Pakkanen Date: Mon, 29 May 2017 20:17:18 +0300 Subject: Renamed test dir. --- test cases/common/139 simd/fallback.c | 8 --- test cases/common/139 simd/meson.build | 43 --------------- test cases/common/139 simd/simd_avx.c | 43 --------------- test cases/common/139 simd/simd_avx2.c | 42 --------------- test cases/common/139 simd/simd_mmx.c | 63 ---------------------- test cases/common/139 simd/simd_neon.c | 20 ------- test cases/common/139 simd/simd_sse.c | 29 ---------- test cases/common/139 simd/simd_sse2.c | 37 ------------- test cases/common/139 simd/simd_sse3.c | 38 ------------- test cases/common/139 simd/simd_sse41.c | 40 -------------- test cases/common/139 simd/simd_sse42.c | 43 --------------- test cases/common/139 simd/simd_ssse3.c | 48 ----------------- test cases/common/139 simd/simdchecker.c | 93 -------------------------------- test cases/common/139 simd/simdfuncs.h | 67 ----------------------- test cases/common/153 simd/fallback.c | 8 +++ test cases/common/153 simd/meson.build | 43 +++++++++++++++ test cases/common/153 simd/simd_avx.c | 43 +++++++++++++++ test cases/common/153 simd/simd_avx2.c | 42 +++++++++++++++ test cases/common/153 simd/simd_mmx.c | 63 ++++++++++++++++++++++ test cases/common/153 simd/simd_neon.c | 20 +++++++ test cases/common/153 simd/simd_sse.c | 29 ++++++++++ test cases/common/153 simd/simd_sse2.c | 37 +++++++++++++ test cases/common/153 simd/simd_sse3.c | 38 +++++++++++++ test cases/common/153 simd/simd_sse41.c | 40 ++++++++++++++ test cases/common/153 simd/simd_sse42.c | 43 +++++++++++++++ test cases/common/153 simd/simd_ssse3.c | 48 +++++++++++++++++ test cases/common/153 simd/simdchecker.c | 93 ++++++++++++++++++++++++++++++++ test cases/common/153 simd/simdfuncs.h | 67 +++++++++++++++++++++++ 28 files changed, 614 insertions(+), 614 deletions(-) delete mode 100644 test cases/common/139 simd/fallback.c delete mode 100644 test cases/common/139 simd/meson.build delete mode 100644 test cases/common/139 simd/simd_avx.c delete mode 100644 test cases/common/139 simd/simd_avx2.c delete mode 100644 test cases/common/139 simd/simd_mmx.c delete mode 100644 test cases/common/139 simd/simd_neon.c delete mode 100644 test cases/common/139 simd/simd_sse.c delete mode 100644 test cases/common/139 simd/simd_sse2.c delete mode 100644 test cases/common/139 simd/simd_sse3.c delete mode 100644 test cases/common/139 simd/simd_sse41.c delete mode 100644 test cases/common/139 simd/simd_sse42.c delete mode 100644 test cases/common/139 simd/simd_ssse3.c delete mode 100644 test cases/common/139 simd/simdchecker.c delete mode 100644 test cases/common/139 simd/simdfuncs.h create mode 100644 test cases/common/153 simd/fallback.c create mode 100644 test cases/common/153 simd/meson.build create mode 100644 test cases/common/153 simd/simd_avx.c create mode 100644 test cases/common/153 simd/simd_avx2.c create mode 100644 test cases/common/153 simd/simd_mmx.c create mode 100644 test cases/common/153 simd/simd_neon.c create mode 100644 test cases/common/153 simd/simd_sse.c create mode 100644 test cases/common/153 simd/simd_sse2.c create mode 100644 test cases/common/153 simd/simd_sse3.c create mode 100644 test cases/common/153 simd/simd_sse41.c create mode 100644 test cases/common/153 simd/simd_sse42.c create mode 100644 test cases/common/153 simd/simd_ssse3.c create mode 100644 test cases/common/153 simd/simdchecker.c create mode 100644 test cases/common/153 simd/simdfuncs.h diff --git a/test cases/common/139 simd/fallback.c b/test cases/common/139 simd/fallback.c deleted file mode 100644 index ab435f433..000000000 --- a/test cases/common/139 simd/fallback.c +++ /dev/null @@ -1,8 +0,0 @@ -#include - -void increment_fallback(float arr[4]) { - int i; - for(i=0; i<4; i++) { - arr[i]++; - } -} diff --git a/test cases/common/139 simd/meson.build b/test cases/common/139 simd/meson.build deleted file mode 100644 index d84b72248..000000000 --- a/test cases/common/139 simd/meson.build +++ /dev/null @@ -1,43 +0,0 @@ -project('simd', 'c') - -simd = import('simd') - -cc = meson.get_compiler('c') - -cdata = configuration_data() - -if not meson.is_cross_build() and host_machine.cpu_family() == 'arm' and cc.get_id() == 'clang' - message('Adding -march=armv7 because assuming that this build happens on Raspbian.') - message('Its Clang seems to be misconfigured and does not support NEON by default.') - add_project_arguments('-march=armv7', language : 'c') -endif - -if cc.get_id() == 'msvc' and cc.version().version_compare('<17') - error('MESON_SKIP_TEST VS2010 produces broken binaries on x86.') -endif - -# FIXME add [a, b] = function() -rval = simd.check('mysimds', - mmx : 'simd_mmx.c', - sse : 'simd_sse.c', - sse2 : 'simd_sse2.c', - sse3 : 'simd_sse3.c', - ssse3 : 'simd_ssse3.c', - sse41 : 'simd_sse41.c', - sse42 : 'simd_sse42.c', - avx : 'simd_avx.c', - avx2 : 'simd_avx2.c', - neon : 'simd_neon.c', - compiler : cc) - -simdlibs = rval[0] -cdata.merge_from(rval[1]) - -configure_file(output : 'simdconfig.h', - configuration : cdata) - -p = executable('simdtest', 'simdchecker.c', 'fallback.c', - link_with : simdlibs) - -test('simdtest', p) - diff --git a/test cases/common/139 simd/simd_avx.c b/test cases/common/139 simd/simd_avx.c deleted file mode 100644 index 989620ba3..000000000 --- a/test cases/common/139 simd/simd_avx.c +++ /dev/null @@ -1,43 +0,0 @@ -#include -#include -#include - -#ifdef _MSC_VER -#include -int avx_available() { - return 1; -} -#else -#include -#include - -#ifdef __APPLE__ -/* - * Apple ships a broken __builtin_cpu_supports and - * some machines in the CI farm seem to be too - * old to have AVX so just always return 0 here. - */ -int avx_available() { return 0; } -#else - -int avx_available() { - return __builtin_cpu_supports("avx"); -} -#endif -#endif - -void increment_avx(float arr[4]) { - double darr[4]; - darr[0] = arr[0]; - darr[1] = arr[1]; - darr[2] = arr[2]; - darr[3] = arr[3]; - __m256d val = _mm256_loadu_pd(darr); - __m256d one = _mm256_set1_pd(1.0); - __m256d result = _mm256_add_pd(val, one); - _mm256_storeu_pd(darr, result); - arr[0] = (float)darr[0]; - arr[1] = (float)darr[1]; - arr[2] = (float)darr[2]; - arr[3] = (float)darr[3]; -} diff --git a/test cases/common/139 simd/simd_avx2.c b/test cases/common/139 simd/simd_avx2.c deleted file mode 100644 index 15297eb2b..000000000 --- a/test cases/common/139 simd/simd_avx2.c +++ /dev/null @@ -1,42 +0,0 @@ -#include -#include -#include - -/* - * FIXME add proper runtime detection for VS. - */ - -#ifdef _MSC_VER -#include -int avx2_available() { - return 0; -} -#else -#include -#include - -#if defined(__APPLE__) -int avx2_available() { return 0; } -#else -int avx2_available() { - return __builtin_cpu_supports("avx2"); -} -#endif -#endif - -void increment_avx2(float arr[4]) { - double darr[4]; - darr[0] = arr[0]; - darr[1] = arr[1]; - darr[2] = arr[2]; - darr[3] = arr[3]; - __m256d val = _mm256_loadu_pd(darr); - __m256d one = _mm256_set1_pd(1.0); - __m256d result = _mm256_add_pd(val, one); - _mm256_storeu_pd(darr, result); - one = _mm256_permute4x64_pd(one, 66); /* A no-op, just here to use AVX2. */ - arr[0] = (float)darr[0]; - arr[1] = (float)darr[1]; - arr[2] = (float)darr[2]; - arr[3] = (float)darr[3]; -} diff --git a/test cases/common/139 simd/simd_mmx.c b/test cases/common/139 simd/simd_mmx.c deleted file mode 100644 index 731abd14a..000000000 --- a/test cases/common/139 simd/simd_mmx.c +++ /dev/null @@ -1,63 +0,0 @@ -#include -#include - -#include - -#ifdef _MSC_VER -#include -int mmx_available() { - return 1; -} -/* Contrary to MSDN documentation, MMX intrinsics - * just plain don't work. - */ -void increment_mmx(float arr[4]) { - arr[0]++; - arr[1]++; - arr[2]++; - arr[3]++; -} -#elif defined(__MINGW32__) -int mmx_available() { - return 1; -} -/* MinGW does not seem to ship with MMX or it is broken. - */ -void increment_mmx(float arr[4]) { - arr[0]++; - arr[1]++; - arr[2]++; - arr[3]++; -} -#else -#include -#include - -#if defined(__APPLE__) -int mmx_available() { return 1; } -#else -int mmx_available() { - return __builtin_cpu_supports("mmx"); -} -#endif -void increment_mmx(float arr[4]) { - /* Super ugly but we know that values in arr are always small - * enough to fit in int16; - */ - int i; - __m64 packed = _mm_set_pi16(arr[3], arr[2], arr[1], arr[0]); - __m64 incr = _mm_set1_pi16(1); - __m64 result = _mm_add_pi16(packed, incr); - /* Should be - * int64_t unpacker = _m_to_int64(result); - * but it does not exist on 32 bit platforms for some reason. - */ - int64_t unpacker = (int64_t)(result); - _mm_empty(); - for(i=0; i<4; i++) { - arr[i] = (float)(unpacker & ((1<<16)-1)); - unpacker >>= 16; - } -} - -#endif diff --git a/test cases/common/139 simd/simd_neon.c b/test cases/common/139 simd/simd_neon.c deleted file mode 100644 index 20820992b..000000000 --- a/test cases/common/139 simd/simd_neon.c +++ /dev/null @@ -1,20 +0,0 @@ -#include -#include - -#include -#include - -int neon_available() { - return 1; /* Incorrect, but I don't know how to check this properly. */ -} - -void increment_neon(float arr[4]) { - float32x2_t a1, a2, one; - a1 = vld1_f32(arr); - a2 = vld1_f32(&arr[2]); - one = vdup_n_f32(1.0); - a1 = vadd_f32(a1, one); - a2 = vadd_f32(a2, one); - vst1_f32(arr, a1); - vst1_f32(&arr[2], a2); -} diff --git a/test cases/common/139 simd/simd_sse.c b/test cases/common/139 simd/simd_sse.c deleted file mode 100644 index 3c9fe622e..000000000 --- a/test cases/common/139 simd/simd_sse.c +++ /dev/null @@ -1,29 +0,0 @@ -#include -#include - -#ifdef _MSC_VER -#include -int sse_available() { - return 1; -} -#else - -#include -#include -#include - -#if defined(__APPLE__) -int sse_available() { return 1; } -#else -int sse_available() { - return __builtin_cpu_supports("sse"); -} -#endif -#endif - -void increment_sse(float arr[4]) { - __m128 val = _mm_load_ps(arr); - __m128 one = _mm_set_ps1(1.0); - __m128 result = _mm_add_ps(val, one); - _mm_storeu_ps(arr, result); -} diff --git a/test cases/common/139 simd/simd_sse2.c b/test cases/common/139 simd/simd_sse2.c deleted file mode 100644 index 02745337b..000000000 --- a/test cases/common/139 simd/simd_sse2.c +++ /dev/null @@ -1,37 +0,0 @@ -#include -#include -#include - -#ifdef _MSC_VER -int sse2_available() { - return 1; -} - -#else -#include -#include - -#if defined(__APPLE__) -int sse2_available() { return 1; } -#else -int sse2_available() { - return __builtin_cpu_supports("sse2"); -} -#endif -#endif - -void increment_sse2(float arr[4]) { - double darr[4]; - __m128d val1 = _mm_set_pd(arr[0], arr[1]); - __m128d val2 = _mm_set_pd(arr[2], arr[3]); - __m128d one = _mm_set_pd(1.0, 1.0); - __m128d result = _mm_add_pd(val1, one); - _mm_store_pd(darr, result); - result = _mm_add_pd(val2, one); - _mm_store_pd(&darr[2], result); - arr[0] = (float)darr[1]; - arr[1] = (float)darr[0]; - arr[2] = (float)darr[3]; - arr[3] = (float)darr[2]; -} - diff --git a/test cases/common/139 simd/simd_sse3.c b/test cases/common/139 simd/simd_sse3.c deleted file mode 100644 index e97d10285..000000000 --- a/test cases/common/139 simd/simd_sse3.c +++ /dev/null @@ -1,38 +0,0 @@ -#include -#include - -#ifdef _MSC_VER -#include -int sse3_available() { - return 1; -} -#else - -#include -#include -#include - -#if defined(__APPLE__) -int sse3_available() { return 1; } -#else -int sse3_available() { - return __builtin_cpu_supports("sse3"); -} -#endif -#endif - -void increment_sse3(float arr[4]) { - double darr[4]; - __m128d val1 = _mm_set_pd(arr[0], arr[1]); - __m128d val2 = _mm_set_pd(arr[2], arr[3]); - __m128d one = _mm_set_pd(1.0, 1.0); - __m128d result = _mm_add_pd(val1, one); - _mm_store_pd(darr, result); - result = _mm_add_pd(val2, one); - _mm_store_pd(&darr[2], result); - result = _mm_hadd_pd(val1, val2); /* This does nothing. Only here so we use an SSE3 instruction. */ - arr[0] = (float)darr[1]; - arr[1] = (float)darr[0]; - arr[2] = (float)darr[3]; - arr[3] = (float)darr[2]; -} diff --git a/test cases/common/139 simd/simd_sse41.c b/test cases/common/139 simd/simd_sse41.c deleted file mode 100644 index 0308c7e49..000000000 --- a/test cases/common/139 simd/simd_sse41.c +++ /dev/null @@ -1,40 +0,0 @@ -#include -#include - -#include - -#ifdef _MSC_VER -#include - -int sse41_available() { - return 1; -} - -#else -#include -#include - -#if defined(__APPLE__) -int sse41_available() { return 1; } -#else -int sse41_available() { - return __builtin_cpu_supports("sse4.1"); -} -#endif -#endif - -void increment_sse41(float arr[4]) { - double darr[4]; - __m128d val1 = _mm_set_pd(arr[0], arr[1]); - __m128d val2 = _mm_set_pd(arr[2], arr[3]); - __m128d one = _mm_set_pd(1.0, 1.0); - __m128d result = _mm_add_pd(val1, one); - result = _mm_ceil_pd(result); /* A no-op, only here to use a SSE4.1 intrinsic. */ - _mm_store_pd(darr, result); - result = _mm_add_pd(val2, one); - _mm_store_pd(&darr[2], result); - arr[0] = (float)darr[1]; - arr[1] = (float)darr[0]; - arr[2] = (float)darr[3]; - arr[3] = (float)darr[2]; -} diff --git a/test cases/common/139 simd/simd_sse42.c b/test cases/common/139 simd/simd_sse42.c deleted file mode 100644 index 137ffc441..000000000 --- a/test cases/common/139 simd/simd_sse42.c +++ /dev/null @@ -1,43 +0,0 @@ -#include -#include -#include - -#ifdef _MSC_VER -#include - -int sse42_available() { - return 1; -} - -#else - -#include -#include - -#ifdef __APPLE__ -int sse42_available() { - return 1; -} -#else -int sse42_available() { - return __builtin_cpu_supports("sse4.2"); -} -#endif - -#endif - -void increment_sse42(float arr[4]) { - double darr[4]; - __m128d val1 = _mm_set_pd(arr[0], arr[1]); - __m128d val2 = _mm_set_pd(arr[2], arr[3]); - __m128d one = _mm_set_pd(1.0, 1.0); - __m128d result = _mm_add_pd(val1, one); - _mm_store_pd(darr, result); - result = _mm_add_pd(val2, one); - _mm_store_pd(&darr[2], result); - _mm_crc32_u32(42, 99); /* A no-op, only here to use an SSE4.2 instruction. */ - arr[0] = (float)darr[1]; - arr[1] = (float)darr[0]; - arr[2] = (float)darr[3]; - arr[3] = (float)darr[2]; -} diff --git a/test cases/common/139 simd/simd_ssse3.c b/test cases/common/139 simd/simd_ssse3.c deleted file mode 100644 index ab4dff4f8..000000000 --- a/test cases/common/139 simd/simd_ssse3.c +++ /dev/null @@ -1,48 +0,0 @@ -#include -#include - -#include -#include - -#ifdef _MSC_VER -#include - -int ssse3_available() { - return 1; -} - -#else - -#include -#include - -int ssse3_available() { -#ifdef __APPLE__ - return 1; -#elif defined(__clang__) - /* https://github.com/numpy/numpy/issues/8130 */ - return __builtin_cpu_supports("sse4.1"); -#else - return __builtin_cpu_supports("ssse3"); -#endif -} - -#endif - -void increment_ssse3(float arr[4]) { - double darr[4]; - __m128d val1 = _mm_set_pd(arr[0], arr[1]); - __m128d val2 = _mm_set_pd(arr[2], arr[3]); - __m128d one = _mm_set_pd(1.0, 1.0); - __m128d result = _mm_add_pd(val1, one); - __m128i tmp1, tmp2; - tmp1 = tmp2 = _mm_set1_epi16(0); - _mm_store_pd(darr, result); - result = _mm_add_pd(val2, one); - _mm_store_pd(&darr[2], result); - tmp1 = _mm_hadd_epi32(tmp1, tmp2); /* This does nothing. Only here so we use an SSSE3 instruction. */ - arr[0] = (float)darr[1]; - arr[1] = (float)darr[0]; - arr[2] = (float)darr[3]; - arr[3] = (float)darr[2]; -} diff --git a/test cases/common/139 simd/simdchecker.c b/test cases/common/139 simd/simdchecker.c deleted file mode 100644 index 222fbf3e3..000000000 --- a/test cases/common/139 simd/simdchecker.c +++ /dev/null @@ -1,93 +0,0 @@ -#include -#include - -/* - * A function that checks at runtime which simd accelerations are - * available and calls the best one. Falls - * back to plain C implementation if SIMD is not available. - */ - -int main(int argc, char **argv) { - float four[4] = {2.0, 3.0, 4.0, 5.0}; - const float expected[4] = {3.0, 4.0, 5.0, 6.0}; - void (*fptr)(float[4]) = NULL; - const char *type; - int i; - -/* Add here. The first matched one is used so put "better" instruction - * sets at the top. - */ -#if HAVE_NEON - if(fptr == NULL && neon_available()) { - fptr = increment_neon; - type = "NEON"; - } -#endif -#if HAVE_AVX2 - if(fptr == NULL && avx2_available()) { - fptr = increment_avx2; - type = "AVX2"; - } -#endif -#if HAVE_AVX - if(fptr == NULL && avx_available()) { - fptr = increment_avx; - type = "AVX"; - } -#endif -#if HAVE_SSE42 - if(fptr == NULL && sse42_available()) { - fptr = increment_sse42; - type = "SSE42"; - } -#endif -#if HAVE_SSE41 - if(fptr == NULL && sse41_available()) { - fptr = increment_sse41; - type = "SSE41"; - } -#endif -#if HAVE_SSSE3 - if(fptr == NULL && ssse3_available()) { - fptr = increment_ssse3; - type = "SSSE3"; - } -#endif -#if HAVE_SSE3 - if(fptr == NULL && sse3_available()) { - fptr = increment_sse3; - type = "SSE3"; - } -#endif -#if HAVE_SSE2 - if(fptr == NULL && sse2_available()) { - fptr = increment_sse2; - type = "SSE2"; - } -#endif -#if HAVE_SSE - if(fptr == NULL && sse_available()) { - fptr = increment_sse; - type = "SSE"; - } -#endif -#if HAVE_MMX - if(fptr == NULL && mmx_available()) { - fptr = increment_mmx; - type = "MMX"; - } -#endif - if(fptr == NULL) { - fptr = increment_fallback; - type = "fallback"; - } - printf("Using %s.\n", type); - fptr(four); - for(i=0; i<4; i++) { - if(four[i] != expected[i]) { - printf("Increment function failed, got %f expected %f.\n", four[i], expected[i]); - return 1; - } - } - return 0; -} diff --git a/test cases/common/139 simd/simdfuncs.h b/test cases/common/139 simd/simdfuncs.h deleted file mode 100644 index dfb056068..000000000 --- a/test cases/common/139 simd/simdfuncs.h +++ /dev/null @@ -1,67 +0,0 @@ -#pragma once - -#include - -/* Yes, I do know that arr[4] decays into a pointer - * as a function argument. Don't do this in real code - * but for this test it is ok. - */ - -void increment_fallback(float arr[4]); - -#if HAVE_MMX -int mmx_available(); -void increment_mmx(float arr[4]); -#endif - -#if HAVE_SSE -int sse_available(); -void increment_sse(float arr[4]); -#endif - -#if HAVE_SSE2 -int sse2_available(); -void increment_sse2(float arr[4]); -#endif - -#if HAVE_SSE3 -int sse3_available(); -void increment_sse3(float arr[4]); -#endif - -#if HAVE_SSSE3 -int ssse3_available(); -void increment_ssse3(float arr[4]); -#endif - -#if HAVE_SSE41 -int sse41_available(); -void increment_sse41(float arr[4]); -#endif - -#if HAVE_SSE42 -int sse42_available(); -void increment_sse42(float arr[4]); -#endif - -#if HAVE_AVX -int avx_available(); -void increment_avx(float arr[4]); -#endif - -#if HAVE_AVX2 -int avx2_available(); -void increment_avx2(float arr[4]); -#endif - -#if HAVE_NEON -int neon_available(); -void increment_neon(float arr[4]); -#endif - -#if HAVE_ALTIVEC -int altivec_available(); -void increment_altivec(float arr[4]); -#endif - -/* And so on. */ diff --git a/test cases/common/153 simd/fallback.c b/test cases/common/153 simd/fallback.c new file mode 100644 index 000000000..ab435f433 --- /dev/null +++ b/test cases/common/153 simd/fallback.c @@ -0,0 +1,8 @@ +#include + +void increment_fallback(float arr[4]) { + int i; + for(i=0; i<4; i++) { + arr[i]++; + } +} diff --git a/test cases/common/153 simd/meson.build b/test cases/common/153 simd/meson.build new file mode 100644 index 000000000..d84b72248 --- /dev/null +++ b/test cases/common/153 simd/meson.build @@ -0,0 +1,43 @@ +project('simd', 'c') + +simd = import('simd') + +cc = meson.get_compiler('c') + +cdata = configuration_data() + +if not meson.is_cross_build() and host_machine.cpu_family() == 'arm' and cc.get_id() == 'clang' + message('Adding -march=armv7 because assuming that this build happens on Raspbian.') + message('Its Clang seems to be misconfigured and does not support NEON by default.') + add_project_arguments('-march=armv7', language : 'c') +endif + +if cc.get_id() == 'msvc' and cc.version().version_compare('<17') + error('MESON_SKIP_TEST VS2010 produces broken binaries on x86.') +endif + +# FIXME add [a, b] = function() +rval = simd.check('mysimds', + mmx : 'simd_mmx.c', + sse : 'simd_sse.c', + sse2 : 'simd_sse2.c', + sse3 : 'simd_sse3.c', + ssse3 : 'simd_ssse3.c', + sse41 : 'simd_sse41.c', + sse42 : 'simd_sse42.c', + avx : 'simd_avx.c', + avx2 : 'simd_avx2.c', + neon : 'simd_neon.c', + compiler : cc) + +simdlibs = rval[0] +cdata.merge_from(rval[1]) + +configure_file(output : 'simdconfig.h', + configuration : cdata) + +p = executable('simdtest', 'simdchecker.c', 'fallback.c', + link_with : simdlibs) + +test('simdtest', p) + diff --git a/test cases/common/153 simd/simd_avx.c b/test cases/common/153 simd/simd_avx.c new file mode 100644 index 000000000..989620ba3 --- /dev/null +++ b/test cases/common/153 simd/simd_avx.c @@ -0,0 +1,43 @@ +#include +#include +#include + +#ifdef _MSC_VER +#include +int avx_available() { + return 1; +} +#else +#include +#include + +#ifdef __APPLE__ +/* + * Apple ships a broken __builtin_cpu_supports and + * some machines in the CI farm seem to be too + * old to have AVX so just always return 0 here. + */ +int avx_available() { return 0; } +#else + +int avx_available() { + return __builtin_cpu_supports("avx"); +} +#endif +#endif + +void increment_avx(float arr[4]) { + double darr[4]; + darr[0] = arr[0]; + darr[1] = arr[1]; + darr[2] = arr[2]; + darr[3] = arr[3]; + __m256d val = _mm256_loadu_pd(darr); + __m256d one = _mm256_set1_pd(1.0); + __m256d result = _mm256_add_pd(val, one); + _mm256_storeu_pd(darr, result); + arr[0] = (float)darr[0]; + arr[1] = (float)darr[1]; + arr[2] = (float)darr[2]; + arr[3] = (float)darr[3]; +} diff --git a/test cases/common/153 simd/simd_avx2.c b/test cases/common/153 simd/simd_avx2.c new file mode 100644 index 000000000..15297eb2b --- /dev/null +++ b/test cases/common/153 simd/simd_avx2.c @@ -0,0 +1,42 @@ +#include +#include +#include + +/* + * FIXME add proper runtime detection for VS. + */ + +#ifdef _MSC_VER +#include +int avx2_available() { + return 0; +} +#else +#include +#include + +#if defined(__APPLE__) +int avx2_available() { return 0; } +#else +int avx2_available() { + return __builtin_cpu_supports("avx2"); +} +#endif +#endif + +void increment_avx2(float arr[4]) { + double darr[4]; + darr[0] = arr[0]; + darr[1] = arr[1]; + darr[2] = arr[2]; + darr[3] = arr[3]; + __m256d val = _mm256_loadu_pd(darr); + __m256d one = _mm256_set1_pd(1.0); + __m256d result = _mm256_add_pd(val, one); + _mm256_storeu_pd(darr, result); + one = _mm256_permute4x64_pd(one, 66); /* A no-op, just here to use AVX2. */ + arr[0] = (float)darr[0]; + arr[1] = (float)darr[1]; + arr[2] = (float)darr[2]; + arr[3] = (float)darr[3]; +} diff --git a/test cases/common/153 simd/simd_mmx.c b/test cases/common/153 simd/simd_mmx.c new file mode 100644 index 000000000..731abd14a --- /dev/null +++ b/test cases/common/153 simd/simd_mmx.c @@ -0,0 +1,63 @@ +#include +#include + +#include + +#ifdef _MSC_VER +#include +int mmx_available() { + return 1; +} +/* Contrary to MSDN documentation, MMX intrinsics + * just plain don't work. + */ +void increment_mmx(float arr[4]) { + arr[0]++; + arr[1]++; + arr[2]++; + arr[3]++; +} +#elif defined(__MINGW32__) +int mmx_available() { + return 1; +} +/* MinGW does not seem to ship with MMX or it is broken. + */ +void increment_mmx(float arr[4]) { + arr[0]++; + arr[1]++; + arr[2]++; + arr[3]++; +} +#else +#include +#include + +#if defined(__APPLE__) +int mmx_available() { return 1; } +#else +int mmx_available() { + return __builtin_cpu_supports("mmx"); +} +#endif +void increment_mmx(float arr[4]) { + /* Super ugly but we know that values in arr are always small + * enough to fit in int16; + */ + int i; + __m64 packed = _mm_set_pi16(arr[3], arr[2], arr[1], arr[0]); + __m64 incr = _mm_set1_pi16(1); + __m64 result = _mm_add_pi16(packed, incr); + /* Should be + * int64_t unpacker = _m_to_int64(result); + * but it does not exist on 32 bit platforms for some reason. + */ + int64_t unpacker = (int64_t)(result); + _mm_empty(); + for(i=0; i<4; i++) { + arr[i] = (float)(unpacker & ((1<<16)-1)); + unpacker >>= 16; + } +} + +#endif diff --git a/test cases/common/153 simd/simd_neon.c b/test cases/common/153 simd/simd_neon.c new file mode 100644 index 000000000..20820992b --- /dev/null +++ b/test cases/common/153 simd/simd_neon.c @@ -0,0 +1,20 @@ +#include +#include + +#include +#include + +int neon_available() { + return 1; /* Incorrect, but I don't know how to check this properly. */ +} + +void increment_neon(float arr[4]) { + float32x2_t a1, a2, one; + a1 = vld1_f32(arr); + a2 = vld1_f32(&arr[2]); + one = vdup_n_f32(1.0); + a1 = vadd_f32(a1, one); + a2 = vadd_f32(a2, one); + vst1_f32(arr, a1); + vst1_f32(&arr[2], a2); +} diff --git a/test cases/common/153 simd/simd_sse.c b/test cases/common/153 simd/simd_sse.c new file mode 100644 index 000000000..3c9fe622e --- /dev/null +++ b/test cases/common/153 simd/simd_sse.c @@ -0,0 +1,29 @@ +#include +#include + +#ifdef _MSC_VER +#include +int sse_available() { + return 1; +} +#else + +#include +#include +#include + +#if defined(__APPLE__) +int sse_available() { return 1; } +#else +int sse_available() { + return __builtin_cpu_supports("sse"); +} +#endif +#endif + +void increment_sse(float arr[4]) { + __m128 val = _mm_load_ps(arr); + __m128 one = _mm_set_ps1(1.0); + __m128 result = _mm_add_ps(val, one); + _mm_storeu_ps(arr, result); +} diff --git a/test cases/common/153 simd/simd_sse2.c b/test cases/common/153 simd/simd_sse2.c new file mode 100644 index 000000000..02745337b --- /dev/null +++ b/test cases/common/153 simd/simd_sse2.c @@ -0,0 +1,37 @@ +#include +#include +#include + +#ifdef _MSC_VER +int sse2_available() { + return 1; +} + +#else +#include +#include + +#if defined(__APPLE__) +int sse2_available() { return 1; } +#else +int sse2_available() { + return __builtin_cpu_supports("sse2"); +} +#endif +#endif + +void increment_sse2(float arr[4]) { + double darr[4]; + __m128d val1 = _mm_set_pd(arr[0], arr[1]); + __m128d val2 = _mm_set_pd(arr[2], arr[3]); + __m128d one = _mm_set_pd(1.0, 1.0); + __m128d result = _mm_add_pd(val1, one); + _mm_store_pd(darr, result); + result = _mm_add_pd(val2, one); + _mm_store_pd(&darr[2], result); + arr[0] = (float)darr[1]; + arr[1] = (float)darr[0]; + arr[2] = (float)darr[3]; + arr[3] = (float)darr[2]; +} + diff --git a/test cases/common/153 simd/simd_sse3.c b/test cases/common/153 simd/simd_sse3.c new file mode 100644 index 000000000..e97d10285 --- /dev/null +++ b/test cases/common/153 simd/simd_sse3.c @@ -0,0 +1,38 @@ +#include +#include + +#ifdef _MSC_VER +#include +int sse3_available() { + return 1; +} +#else + +#include +#include +#include + +#if defined(__APPLE__) +int sse3_available() { return 1; } +#else +int sse3_available() { + return __builtin_cpu_supports("sse3"); +} +#endif +#endif + +void increment_sse3(float arr[4]) { + double darr[4]; + __m128d val1 = _mm_set_pd(arr[0], arr[1]); + __m128d val2 = _mm_set_pd(arr[2], arr[3]); + __m128d one = _mm_set_pd(1.0, 1.0); + __m128d result = _mm_add_pd(val1, one); + _mm_store_pd(darr, result); + result = _mm_add_pd(val2, one); + _mm_store_pd(&darr[2], result); + result = _mm_hadd_pd(val1, val2); /* This does nothing. Only here so we use an SSE3 instruction. */ + arr[0] = (float)darr[1]; + arr[1] = (float)darr[0]; + arr[2] = (float)darr[3]; + arr[3] = (float)darr[2]; +} diff --git a/test cases/common/153 simd/simd_sse41.c b/test cases/common/153 simd/simd_sse41.c new file mode 100644 index 000000000..0308c7e49 --- /dev/null +++ b/test cases/common/153 simd/simd_sse41.c @@ -0,0 +1,40 @@ +#include +#include + +#include + +#ifdef _MSC_VER +#include + +int sse41_available() { + return 1; +} + +#else +#include +#include + +#if defined(__APPLE__) +int sse41_available() { return 1; } +#else +int sse41_available() { + return __builtin_cpu_supports("sse4.1"); +} +#endif +#endif + +void increment_sse41(float arr[4]) { + double darr[4]; + __m128d val1 = _mm_set_pd(arr[0], arr[1]); + __m128d val2 = _mm_set_pd(arr[2], arr[3]); + __m128d one = _mm_set_pd(1.0, 1.0); + __m128d result = _mm_add_pd(val1, one); + result = _mm_ceil_pd(result); /* A no-op, only here to use a SSE4.1 intrinsic. */ + _mm_store_pd(darr, result); + result = _mm_add_pd(val2, one); + _mm_store_pd(&darr[2], result); + arr[0] = (float)darr[1]; + arr[1] = (float)darr[0]; + arr[2] = (float)darr[3]; + arr[3] = (float)darr[2]; +} diff --git a/test cases/common/153 simd/simd_sse42.c b/test cases/common/153 simd/simd_sse42.c new file mode 100644 index 000000000..137ffc441 --- /dev/null +++ b/test cases/common/153 simd/simd_sse42.c @@ -0,0 +1,43 @@ +#include +#include +#include + +#ifdef _MSC_VER +#include + +int sse42_available() { + return 1; +} + +#else + +#include +#include + +#ifdef __APPLE__ +int sse42_available() { + return 1; +} +#else +int sse42_available() { + return __builtin_cpu_supports("sse4.2"); +} +#endif + +#endif + +void increment_sse42(float arr[4]) { + double darr[4]; + __m128d val1 = _mm_set_pd(arr[0], arr[1]); + __m128d val2 = _mm_set_pd(arr[2], arr[3]); + __m128d one = _mm_set_pd(1.0, 1.0); + __m128d result = _mm_add_pd(val1, one); + _mm_store_pd(darr, result); + result = _mm_add_pd(val2, one); + _mm_store_pd(&darr[2], result); + _mm_crc32_u32(42, 99); /* A no-op, only here to use an SSE4.2 instruction. */ + arr[0] = (float)darr[1]; + arr[1] = (float)darr[0]; + arr[2] = (float)darr[3]; + arr[3] = (float)darr[2]; +} diff --git a/test cases/common/153 simd/simd_ssse3.c b/test cases/common/153 simd/simd_ssse3.c new file mode 100644 index 000000000..ab4dff4f8 --- /dev/null +++ b/test cases/common/153 simd/simd_ssse3.c @@ -0,0 +1,48 @@ +#include +#include + +#include +#include + +#ifdef _MSC_VER +#include + +int ssse3_available() { + return 1; +} + +#else + +#include +#include + +int ssse3_available() { +#ifdef __APPLE__ + return 1; +#elif defined(__clang__) + /* https://github.com/numpy/numpy/issues/8130 */ + return __builtin_cpu_supports("sse4.1"); +#else + return __builtin_cpu_supports("ssse3"); +#endif +} + +#endif + +void increment_ssse3(float arr[4]) { + double darr[4]; + __m128d val1 = _mm_set_pd(arr[0], arr[1]); + __m128d val2 = _mm_set_pd(arr[2], arr[3]); + __m128d one = _mm_set_pd(1.0, 1.0); + __m128d result = _mm_add_pd(val1, one); + __m128i tmp1, tmp2; + tmp1 = tmp2 = _mm_set1_epi16(0); + _mm_store_pd(darr, result); + result = _mm_add_pd(val2, one); + _mm_store_pd(&darr[2], result); + tmp1 = _mm_hadd_epi32(tmp1, tmp2); /* This does nothing. Only here so we use an SSSE3 instruction. */ + arr[0] = (float)darr[1]; + arr[1] = (float)darr[0]; + arr[2] = (float)darr[3]; + arr[3] = (float)darr[2]; +} diff --git a/test cases/common/153 simd/simdchecker.c b/test cases/common/153 simd/simdchecker.c new file mode 100644 index 000000000..222fbf3e3 --- /dev/null +++ b/test cases/common/153 simd/simdchecker.c @@ -0,0 +1,93 @@ +#include +#include + +/* + * A function that checks at runtime which simd accelerations are + * available and calls the best one. Falls + * back to plain C implementation if SIMD is not available. + */ + +int main(int argc, char **argv) { + float four[4] = {2.0, 3.0, 4.0, 5.0}; + const float expected[4] = {3.0, 4.0, 5.0, 6.0}; + void (*fptr)(float[4]) = NULL; + const char *type; + int i; + +/* Add here. The first matched one is used so put "better" instruction + * sets at the top. + */ +#if HAVE_NEON + if(fptr == NULL && neon_available()) { + fptr = increment_neon; + type = "NEON"; + } +#endif +#if HAVE_AVX2 + if(fptr == NULL && avx2_available()) { + fptr = increment_avx2; + type = "AVX2"; + } +#endif +#if HAVE_AVX + if(fptr == NULL && avx_available()) { + fptr = increment_avx; + type = "AVX"; + } +#endif +#if HAVE_SSE42 + if(fptr == NULL && sse42_available()) { + fptr = increment_sse42; + type = "SSE42"; + } +#endif +#if HAVE_SSE41 + if(fptr == NULL && sse41_available()) { + fptr = increment_sse41; + type = "SSE41"; + } +#endif +#if HAVE_SSSE3 + if(fptr == NULL && ssse3_available()) { + fptr = increment_ssse3; + type = "SSSE3"; + } +#endif +#if HAVE_SSE3 + if(fptr == NULL && sse3_available()) { + fptr = increment_sse3; + type = "SSE3"; + } +#endif +#if HAVE_SSE2 + if(fptr == NULL && sse2_available()) { + fptr = increment_sse2; + type = "SSE2"; + } +#endif +#if HAVE_SSE + if(fptr == NULL && sse_available()) { + fptr = increment_sse; + type = "SSE"; + } +#endif +#if HAVE_MMX + if(fptr == NULL && mmx_available()) { + fptr = increment_mmx; + type = "MMX"; + } +#endif + if(fptr == NULL) { + fptr = increment_fallback; + type = "fallback"; + } + printf("Using %s.\n", type); + fptr(four); + for(i=0; i<4; i++) { + if(four[i] != expected[i]) { + printf("Increment function failed, got %f expected %f.\n", four[i], expected[i]); + return 1; + } + } + return 0; +} diff --git a/test cases/common/153 simd/simdfuncs.h b/test cases/common/153 simd/simdfuncs.h new file mode 100644 index 000000000..dfb056068 --- /dev/null +++ b/test cases/common/153 simd/simdfuncs.h @@ -0,0 +1,67 @@ +#pragma once + +#include + +/* Yes, I do know that arr[4] decays into a pointer + * as a function argument. Don't do this in real code + * but for this test it is ok. + */ + +void increment_fallback(float arr[4]); + +#if HAVE_MMX +int mmx_available(); +void increment_mmx(float arr[4]); +#endif + +#if HAVE_SSE +int sse_available(); +void increment_sse(float arr[4]); +#endif + +#if HAVE_SSE2 +int sse2_available(); +void increment_sse2(float arr[4]); +#endif + +#if HAVE_SSE3 +int sse3_available(); +void increment_sse3(float arr[4]); +#endif + +#if HAVE_SSSE3 +int ssse3_available(); +void increment_ssse3(float arr[4]); +#endif + +#if HAVE_SSE41 +int sse41_available(); +void increment_sse41(float arr[4]); +#endif + +#if HAVE_SSE42 +int sse42_available(); +void increment_sse42(float arr[4]); +#endif + +#if HAVE_AVX +int avx_available(); +void increment_avx(float arr[4]); +#endif + +#if HAVE_AVX2 +int avx2_available(); +void increment_avx2(float arr[4]); +#endif + +#if HAVE_NEON +int neon_available(); +void increment_neon(float arr[4]); +#endif + +#if HAVE_ALTIVEC +int altivec_available(); +void increment_altivec(float arr[4]); +#endif + +/* And so on. */ -- cgit v1.2.3