DU Results -

Source patches

du5matrix.hpp

161,163c161,164
< 	template<int a, int b, int c, int d> struct shuffle_control {
< 		static const int value = ((d & 0b11) << 6) + ((c & 0b11) << 4) + ((b & 0b11) << 2) + (a & 0b11);
< 	};
---
> 	static constexpr int shuffle_control(int a, int b, int c, int d) {
> 		a &= 0b11; b &= 0b11; c &= 0b11; d &= 0b11;
> 		return (d << 6) + (c << 4) + (b << 2) + a;
> 	}
170c171
< 		const auto swap_rows = shuffle_control<2,3,0,1>::value;
---
> 		const auto swap_rows = shuffle_control(2,3,0,1);
178,181c179,182
< 		res.up = _mm_shufflelo_epi16(block.up, shuffle_control<c1,c1,c1,c1>::value);
< 		res.up = _mm_shufflehi_epi16(res.up,   shuffle_control<c2,c2,c2,c2>::value);
< 		res.dn = _mm_shufflelo_epi16(block.dn, shuffle_control<c3,c3,c3,c3>::value);
< 		res.dn = _mm_shufflehi_epi16(res.dn,   shuffle_control<c4,c4,c4,c4>::value);
---
> 		res.up = _mm_shufflelo_epi16(block.up, shuffle_control(c1,c1,c1,c1));
> 		res.up = _mm_shufflehi_epi16(res.up,   shuffle_control(c2,c2,c2,c2));
> 		res.dn = _mm_shufflelo_epi16(block.dn, shuffle_control(c3,c3,c3,c3));
> 		res.dn = _mm_shufflehi_epi16(res.dn,   shuffle_control(c4,c4,c4,c4));
242,244c243,246
< 	template<int a, int b, int c, int d> struct shuffle_control {
< 		static const int value = ((d & 0b11) << 6) + ((c & 0b11) << 4) + ((b & 0b11) << 2) + (a & 0b11);
< 	};
---
> 	static constexpr int shuffle_control(int a, int b, int c, int d) {
> 		a &= 0b11; b &= 0b11; c &= 0b11; d &= 0b11;
> 		return (d << 6) + (c << 4) + (b << 2) + a;
> 	}
257c259
< 		block = _mm256_permute4x64_epi64(block, shuffle_control<1,0,3,2>::value);
---
> 		block = _mm256_permute4x64_epi64(block, shuffle_control(1,0,3,2));
266,269c268,271
< 		up = _mm256_shufflelo_epi16(block, shuffle_control<c1,c1,c1,c1>::value);
< 		up = _mm256_shufflehi_epi16(up,    shuffle_control<c2,c2,c2,c2>::value);
< 		dn = _mm256_shufflelo_epi16(block, shuffle_control<c3,c3,c3,c3>::value);
< 		dn = _mm256_shufflehi_epi16(dn,    shuffle_control<c4,c4,c4,c4>::value);
---
> 		up = _mm256_shufflelo_epi16(block, shuffle_control(c1,c1,c1,c1));
> 		up = _mm256_shufflehi_epi16(up,    shuffle_control(c2,c2,c2,c2));
> 		dn = _mm256_shufflelo_epi16(block, shuffle_control(c3,c3,c3,c3));
> 		dn = _mm256_shufflehi_epi16(dn,    shuffle_control(c4,c4,c4,c4));
345,347c347,350
< 	template<int a, int b, int c, int d> struct shuffle_control {
< 		static const int value = ((d & 0b11) << 6) + ((c & 0b11) << 4) + ((b & 0b11) << 2) + (a & 0b11);
< 	};
---
> 	static constexpr int shuffle_control(int a, int b, int c, int d) {
> 		a &= 0b11; b &= 0b11; c &= 0b11; d &= 0b11;
> 		return (d << 6) + (c << 4) + (b << 2) + a;
> 	}
360c363
< 		const auto control = shuffle_control<2,3,0,1>::value;
---
> 		const auto control = shuffle_control(2,3,0,1);

Flex error reports

C++ error reports

Linker error reports

Run output diffs

empty.in

size	mul/matrix_zero	mul/matrix_random	mul/matrix_one
64	0.243044	0.242044	0.243044
128	0.232408	0.232408	0.231406
256	0.226296	0.226296	0.226296
512	0.224582	0.224582	0.224582
1024	1.25729	1.3141	1.27032
2048	1.25484	1.25729	1.24774