From 621df77049ff9bd264367dc430e6e5fa562ed604 Mon Sep 17 00:00:00 2001 From: Dimitri John Ledkov Date: Wed, 7 Nov 2018 01:30:52 +0000 Subject: Fix simd instruction alignment on 32bit kernels. Resolves segfaults on i686 kernel. Also execute all available simd instructions, not just the best one. Signed-off-by: Dimitri John Ledkov Signed-off-by: Michael Hudson Bug-Debian: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=894774 --- test cases/common/152 simd/simd_sse2.c | 3 +- test cases/common/152 simd/simd_sse3.c | 3 +- test cases/common/152 simd/simd_sse41.c | 3 +- test cases/common/152 simd/simd_sse42.c | 3 +- test cases/common/152 simd/simd_ssse3.c | 3 +- test cases/common/152 simd/simdchecker.c | 55 +++++++++++++++++--------------- test cases/common/152 simd/simdtest.h | 9 ++++++ 7 files changed, 49 insertions(+), 30 deletions(-) create mode 100644 test cases/common/152 simd/simdtest.h diff --git a/test cases/common/152 simd/simd_sse2.c b/test cases/common/152 simd/simd_sse2.c index 02745337b..5d412fd41 100644 --- a/test cases/common/152 simd/simd_sse2.c +++ b/test cases/common/152 simd/simd_sse2.c @@ -1,6 +1,7 @@ #include #include #include +#include #ifdef _MSC_VER int sse2_available() { @@ -21,7 +22,7 @@ int sse2_available() { #endif void increment_sse2(float arr[4]) { - double darr[4]; + alignas(16) double darr[4]; __m128d val1 = _mm_set_pd(arr[0], arr[1]); __m128d val2 = _mm_set_pd(arr[2], arr[3]); __m128d one = _mm_set_pd(1.0, 1.0); diff --git a/test cases/common/152 simd/simd_sse3.c b/test cases/common/152 simd/simd_sse3.c index e97d10285..4aa47fd32 100644 --- a/test cases/common/152 simd/simd_sse3.c +++ b/test cases/common/152 simd/simd_sse3.c @@ -1,5 +1,6 @@ #include #include +#include #ifdef _MSC_VER #include @@ -22,7 +23,7 @@ int sse3_available() { #endif void increment_sse3(float arr[4]) { - double darr[4]; + alignas(16) double darr[4]; __m128d val1 = _mm_set_pd(arr[0], arr[1]); __m128d val2 = _mm_set_pd(arr[2], arr[3]); __m128d one = _mm_set_pd(1.0, 1.0); diff --git a/test cases/common/152 simd/simd_sse41.c b/test cases/common/152 simd/simd_sse41.c index 0308c7e49..466b30c5b 100644 --- a/test cases/common/152 simd/simd_sse41.c +++ b/test cases/common/152 simd/simd_sse41.c @@ -1,6 +1,7 @@ #include #include +#include #include #ifdef _MSC_VER @@ -24,7 +25,7 @@ int sse41_available() { #endif void increment_sse41(float arr[4]) { - double darr[4]; + alignas(16) double darr[4]; __m128d val1 = _mm_set_pd(arr[0], arr[1]); __m128d val2 = _mm_set_pd(arr[2], arr[3]); __m128d one = _mm_set_pd(1.0, 1.0); diff --git a/test cases/common/152 simd/simd_sse42.c b/test cases/common/152 simd/simd_sse42.c index 137ffc441..26d5ba814 100644 --- a/test cases/common/152 simd/simd_sse42.c +++ b/test cases/common/152 simd/simd_sse42.c @@ -1,6 +1,7 @@ #include #include #include +#include #ifdef _MSC_VER #include @@ -27,7 +28,7 @@ int sse42_available() { #endif void increment_sse42(float arr[4]) { - double darr[4]; + alignas(16) double darr[4]; __m128d val1 = _mm_set_pd(arr[0], arr[1]); __m128d val2 = _mm_set_pd(arr[2], arr[3]); __m128d one = _mm_set_pd(1.0, 1.0); diff --git a/test cases/common/152 simd/simd_ssse3.c b/test cases/common/152 simd/simd_ssse3.c index ab4dff4f8..d09916522 100644 --- a/test cases/common/152 simd/simd_ssse3.c +++ b/test cases/common/152 simd/simd_ssse3.c @@ -1,5 +1,6 @@ #include #include +#include #include #include @@ -30,7 +31,7 @@ int ssse3_available() { #endif void increment_ssse3(float arr[4]) { - double darr[4]; + alignas(16) double darr[4]; __m128d val1 = _mm_set_pd(arr[0], arr[1]); __m128d val2 = _mm_set_pd(arr[2], arr[3]); __m128d one = _mm_set_pd(1.0, 1.0); diff --git a/test cases/common/152 simd/simdchecker.c b/test cases/common/152 simd/simdchecker.c index 222fbf3e3..5e24751f9 100644 --- a/test cases/common/152 simd/simdchecker.c +++ b/test cases/common/152 simd/simdchecker.c @@ -1,5 +1,7 @@ #include +#include #include +#include /* * A function that checks at runtime which simd accelerations are @@ -8,86 +10,89 @@ */ int main(int argc, char **argv) { - float four[4] = {2.0, 3.0, 4.0, 5.0}; + static const float four_initial[4] = {2.0, 3.0, 4.0, 5.0}; + alignas(16) float four[4]; const float expected[4] = {3.0, 4.0, 5.0, 6.0}; void (*fptr)(float[4]) = NULL; const char *type; - int i; + int i, r=0; /* Add here. The first matched one is used so put "better" instruction * sets at the top. */ #if HAVE_NEON - if(fptr == NULL && neon_available()) { + if(neon_available()) { fptr = increment_neon; type = "NEON"; + #include } #endif #if HAVE_AVX2 - if(fptr == NULL && avx2_available()) { + if(avx2_available()) { fptr = increment_avx2; type = "AVX2"; + #include } #endif #if HAVE_AVX - if(fptr == NULL && avx_available()) { + if(avx_available()) { fptr = increment_avx; type = "AVX"; + #include } #endif #if HAVE_SSE42 - if(fptr == NULL && sse42_available()) { + if(sse42_available()) { fptr = increment_sse42; type = "SSE42"; + #include } #endif #if HAVE_SSE41 - if(fptr == NULL && sse41_available()) { + if(sse41_available()) { fptr = increment_sse41; type = "SSE41"; + #include } #endif #if HAVE_SSSE3 - if(fptr == NULL && ssse3_available()) { + if(ssse3_available()) { fptr = increment_ssse3; type = "SSSE3"; + #include } #endif #if HAVE_SSE3 - if(fptr == NULL && sse3_available()) { + if(sse3_available()) { fptr = increment_sse3; type = "SSE3"; + #include } #endif #if HAVE_SSE2 - if(fptr == NULL && sse2_available()) { + if(sse2_available()) { fptr = increment_sse2; type = "SSE2"; + #include } #endif #if HAVE_SSE - if(fptr == NULL && sse_available()) { + if(sse_available()) { fptr = increment_sse; type = "SSE"; + #include } #endif #if HAVE_MMX - if(fptr == NULL && mmx_available()) { + if(mmx_available()) { fptr = increment_mmx; type = "MMX"; + #include } #endif - if(fptr == NULL) { - fptr = increment_fallback; - type = "fallback"; - } - printf("Using %s.\n", type); - fptr(four); - for(i=0; i<4; i++) { - if(four[i] != expected[i]) { - printf("Increment function failed, got %f expected %f.\n", four[i], expected[i]); - return 1; - } - } - return 0; + fptr = increment_fallback; + type = "fallback"; + #include + + return r; } diff --git a/test cases/common/152 simd/simdtest.h b/test cases/common/152 simd/simdtest.h new file mode 100644 index 000000000..2bd07e7e3 --- /dev/null +++ b/test cases/common/152 simd/simdtest.h @@ -0,0 +1,9 @@ +memcpy(four, four_initial, sizeof(four_initial)); +printf("Using %s.\n", type); +fptr(four); +for(i=0; i<4; i++) { + if(four[i] != expected[i]) { + printf("Increment function failed, got %f expected %f.\n", four[i], expected[i]); + r=1; + } +} -- cgit v1.2.3 From 0859442480d4e0f607100a3bd62224156474a6b2 Mon Sep 17 00:00:00 2001 From: Jussi Pakkanen Date: Sat, 10 Nov 2018 21:18:03 +0200 Subject: Refactor check to its own function. --- test cases/common/152 simd/simdchecker.c | 134 +++++++++++++++++++++---------- test cases/common/152 simd/simdtest.h | 9 --- 2 files changed, 90 insertions(+), 53 deletions(-) delete mode 100644 test cases/common/152 simd/simdtest.h diff --git a/test cases/common/152 simd/simdchecker.c b/test cases/common/152 simd/simdchecker.c index 5e24751f9..bc7c10abe 100644 --- a/test cases/common/152 simd/simdchecker.c +++ b/test cases/common/152 simd/simdchecker.c @@ -3,96 +3,142 @@ #include #include -/* - * A function that checks at runtime which simd accelerations are - * available and calls the best one. Falls - * back to plain C implementation if SIMD is not available. - */ +typedef void (*simd_func)(float*); + +int check_simd_implementation(float *four, + const float *four_initial, + const char *simd_type, + const float *expected, + simd_func fptr, + const int blocksize) { + int rv = 0; + memcpy(four, four_initial, blocksize*sizeof(float)); + printf("Using %s.\n", simd_type); + fptr(four); + for(int i=0; i + r += check_simd_implementation(four, + four_initial, + "NEON", + expected, + increment_neon, + blocksize); } #endif #if HAVE_AVX2 if(avx2_available()) { - fptr = increment_avx2; - type = "AVX2"; - #include + r += check_simd_implementation(four, + four_initial, + "AVX2", + expected, + increment_avx2, + blocksize); } #endif #if HAVE_AVX if(avx_available()) { - fptr = increment_avx; - type = "AVX"; - #include + r += check_simd_implementation(four, + four_initial, + "AVC", + expected, + increment_avx, + blocksize); } #endif #if HAVE_SSE42 if(sse42_available()) { - fptr = increment_sse42; - type = "SSE42"; - #include + r += check_simd_implementation(four, + four_initial, + "SSR42", + expected, + increment_sse42, + blocksize); } #endif #if HAVE_SSE41 if(sse41_available()) { - fptr = increment_sse41; - type = "SSE41"; - #include + r += check_simd_implementation(four, + four_initial, + "SSE41", + expected, + increment_sse41, + blocksize); } #endif #if HAVE_SSSE3 if(ssse3_available()) { - fptr = increment_ssse3; - type = "SSSE3"; - #include + r += check_simd_implementation(four, + four_initial, + "SSSE3", + expected, + increment_ssse3, + blocksize); } #endif #if HAVE_SSE3 if(sse3_available()) { - fptr = increment_sse3; - type = "SSE3"; - #include + r += check_simd_implementation(four, + four_initial, + "SSE3", + expected, + increment_sse3, + blocksize); } #endif #if HAVE_SSE2 if(sse2_available()) { - fptr = increment_sse2; - type = "SSE2"; - #include + r += check_simd_implementation(four, + four_initial, + "SSE2", + expected, + increment_sse2, + blocksize); } #endif #if HAVE_SSE if(sse_available()) { - fptr = increment_sse; - type = "SSE"; - #include + r += check_simd_implementation(four, + four_initial, + "SSE", + expected, + increment_sse, + blocksize); } #endif #if HAVE_MMX if(mmx_available()) { - fptr = increment_mmx; - type = "MMX"; - #include + r += check_simd_implementation(four, + four_initial, + "MMX", + expected, + increment_mmx, + blocksize); } #endif - fptr = increment_fallback; - type = "fallback"; - #include - + r += check_simd_implementation(four, + four_initial, + "fallback", + expected, + increment_fallback, + blocksize); return r; } diff --git a/test cases/common/152 simd/simdtest.h b/test cases/common/152 simd/simdtest.h deleted file mode 100644 index 2bd07e7e3..000000000 --- a/test cases/common/152 simd/simdtest.h +++ /dev/null @@ -1,9 +0,0 @@ -memcpy(four, four_initial, sizeof(four_initial)); -printf("Using %s.\n", type); -fptr(four); -for(i=0; i<4; i++) { - if(four[i] != expected[i]) { - printf("Increment function failed, got %f expected %f.\n", four[i], expected[i]); - r=1; - } -} -- cgit v1.2.3 From 75648a2c9ecb85d227beafef68fd2330b73e5ffb Mon Sep 17 00:00:00 2001 From: Jussi Pakkanen Date: Sat, 10 Nov 2018 21:58:26 +0200 Subject: Fix alignment with MSVC. --- test cases/common/152 simd/simd_sse2.c | 3 +-- test cases/common/152 simd/simd_sse3.c | 3 +-- test cases/common/152 simd/simd_sse41.c | 3 +-- test cases/common/152 simd/simd_sse42.c | 3 +-- test cases/common/152 simd/simd_ssse3.c | 3 +-- test cases/common/152 simd/simdchecker.c | 3 +-- test cases/common/152 simd/simdfuncs.h | 8 ++++++++ 7 files changed, 14 insertions(+), 12 deletions(-) diff --git a/test cases/common/152 simd/simd_sse2.c b/test cases/common/152 simd/simd_sse2.c index 5d412fd41..271022ed1 100644 --- a/test cases/common/152 simd/simd_sse2.c +++ b/test cases/common/152 simd/simd_sse2.c @@ -1,7 +1,6 @@ #include #include #include -#include #ifdef _MSC_VER int sse2_available() { @@ -22,7 +21,7 @@ int sse2_available() { #endif void increment_sse2(float arr[4]) { - alignas(16) double darr[4]; + ALIGN_16 double darr[4]; __m128d val1 = _mm_set_pd(arr[0], arr[1]); __m128d val2 = _mm_set_pd(arr[2], arr[3]); __m128d one = _mm_set_pd(1.0, 1.0); diff --git a/test cases/common/152 simd/simd_sse3.c b/test cases/common/152 simd/simd_sse3.c index 4aa47fd32..89c2f8b1f 100644 --- a/test cases/common/152 simd/simd_sse3.c +++ b/test cases/common/152 simd/simd_sse3.c @@ -1,6 +1,5 @@ #include #include -#include #ifdef _MSC_VER #include @@ -23,7 +22,7 @@ int sse3_available() { #endif void increment_sse3(float arr[4]) { - alignas(16) double darr[4]; + ALIGN_16 double darr[4]; __m128d val1 = _mm_set_pd(arr[0], arr[1]); __m128d val2 = _mm_set_pd(arr[2], arr[3]); __m128d one = _mm_set_pd(1.0, 1.0); diff --git a/test cases/common/152 simd/simd_sse41.c b/test cases/common/152 simd/simd_sse41.c index 466b30c5b..859fb4366 100644 --- a/test cases/common/152 simd/simd_sse41.c +++ b/test cases/common/152 simd/simd_sse41.c @@ -1,7 +1,6 @@ #include #include -#include #include #ifdef _MSC_VER @@ -25,7 +24,7 @@ int sse41_available() { #endif void increment_sse41(float arr[4]) { - alignas(16) double darr[4]; + ALIGN_16 double darr[4]; __m128d val1 = _mm_set_pd(arr[0], arr[1]); __m128d val2 = _mm_set_pd(arr[2], arr[3]); __m128d one = _mm_set_pd(1.0, 1.0); diff --git a/test cases/common/152 simd/simd_sse42.c b/test cases/common/152 simd/simd_sse42.c index 26d5ba814..edd6e5b44 100644 --- a/test cases/common/152 simd/simd_sse42.c +++ b/test cases/common/152 simd/simd_sse42.c @@ -1,7 +1,6 @@ #include #include #include -#include #ifdef _MSC_VER #include @@ -28,7 +27,7 @@ int sse42_available() { #endif void increment_sse42(float arr[4]) { - alignas(16) double darr[4]; + ALIGN_16 double darr[4]; __m128d val1 = _mm_set_pd(arr[0], arr[1]); __m128d val2 = _mm_set_pd(arr[2], arr[3]); __m128d one = _mm_set_pd(1.0, 1.0); diff --git a/test cases/common/152 simd/simd_ssse3.c b/test cases/common/152 simd/simd_ssse3.c index d09916522..0156f7720 100644 --- a/test cases/common/152 simd/simd_ssse3.c +++ b/test cases/common/152 simd/simd_ssse3.c @@ -1,6 +1,5 @@ #include #include -#include #include #include @@ -31,7 +30,7 @@ int ssse3_available() { #endif void increment_ssse3(float arr[4]) { - alignas(16) double darr[4]; + ALIGN_16 double darr[4]; __m128d val1 = _mm_set_pd(arr[0], arr[1]); __m128d val2 = _mm_set_pd(arr[2], arr[3]); __m128d one = _mm_set_pd(1.0, 1.0); diff --git a/test cases/common/152 simd/simdchecker.c b/test cases/common/152 simd/simdchecker.c index bc7c10abe..cd6fe4f59 100644 --- a/test cases/common/152 simd/simdchecker.c +++ b/test cases/common/152 simd/simdchecker.c @@ -1,5 +1,4 @@ #include -#include #include #include @@ -26,7 +25,7 @@ int check_simd_implementation(float *four, int main(int argc, char **argv) { static const float four_initial[4] = {2.0, 3.0, 4.0, 5.0}; - alignas(16) float four[4]; + ALIGN_16 float four[4]; const float expected[4] = {3.0, 4.0, 5.0, 6.0}; int r=0; const int blocksize = 4; diff --git a/test cases/common/152 simd/simdfuncs.h b/test cases/common/152 simd/simdfuncs.h index dfb056068..c5e16583d 100644 --- a/test cases/common/152 simd/simdfuncs.h +++ b/test cases/common/152 simd/simdfuncs.h @@ -2,6 +2,14 @@ #include +#ifdef _MSC_VER +#define ALIGN_16 __declspec(align(16)) +#else +#include +#define ALIGN_16 alignas(16) +#endif + + /* Yes, I do know that arr[4] decays into a pointer * as a function argument. Don't do this in real code * but for this test it is ok. -- cgit v1.2.3