diff --git a/libs/ardour/sse_functions_avx_linux.cc b/libs/ardour/sse_functions_avx_linux.cc
index aefc32246b..a7d42c9d3a 100644
--- a/libs/ardour/sse_functions_avx_linux.cc
+++ b/libs/ardour/sse_functions_avx_linux.cc
@@ -157,14 +157,6 @@ x86_sse_avx_compute_peak(const float *src, uint32_t nframes, float current)
 
 	vmax = avx_getmax_ps(vmax);
 
-	// zero upper 128 bit of 256 bit ymm register to avoid penalties using non-AVX
-	// instructions.
-
-	// _mm256_zeroupper();
-	// This is probably not needed in 2021 as compilers will insert them
-	// automatically. See stackoverflow reference:
-	// https://stackoverflow.com/questions/68736527/do-i-need-to-use-mm256-zeroupper-in-2021
-
 #if defined(__GNUC__) && (__GNUC__ < 5)
 	return *((float *)&vmax);
 #elif defined(__GNUC__) && (__GNUC__ < 8)
@@ -255,13 +247,6 @@ x86_sse_avx_find_peaks(const float *src, uint32_t nframes, float *minf, float *m
 	vmin = avx_getmin_ps(vmin);
 	vmax = avx_getmax_ps(vmax);
 
-	// There's a penalty going away from AVX mode to SSE mode. This can
-	// be avoided by ensuring to the CPU that rest of the routine is no
-	// longer interested in the upper portion of the YMM register.
-
-	// zero upper 128 bit of 256 bit ymm register to avoid penalties using non-AVX instructions
-	_mm256_zeroupper();
-
 	_mm_store_ss(minf, _mm256_castps256_ps128(vmin));
 	_mm_store_ss(maxf, _mm256_castps256_ps128(vmax));
 }
@@ -318,13 +303,6 @@ x86_sse_avx_apply_gain_to_buffer(float *dst, uint32_t nframes, float gain)
 		frames -= 8;
 	}
 
-
-	// There's a penalty going away from AVX mode to SSE mode. This can
-	// be avoided by ensuring to the CPU that rest of the routine is no
-	// longer interested in the upper portion of the YMM register.
-
-	_mm256_zeroupper(); // zeros the upper portion of YMM register
-
 	// Process the remaining samples
 	do {
 		__m128 g0 = _mm256_castps256_ps128(vgain);
@@ -486,13 +464,6 @@ x86_sse_avx_mix_buffers_with_gain_unaligned(float *dst, const float *src, uint32
 		nframes -= 8;
 	}
 
-
-	// There's a penalty going away from AVX mode to SSE mode. This can
-	// be avoided by ensuring the CPU that rest of the routine is no
-	// longer interested in the upper portion of the YMM register.
-
-	_mm256_zeroupper(); // zeros the upper portion of YMM register
-
 	// Process the remaining samples
 	do {
 		__m128 g0 = _mm_set_ss(gain);
@@ -586,13 +557,6 @@ x86_sse_avx_mix_buffers_with_gain_aligned(float *dst, const float *src, uint32_t
 		nframes -= 8;
 	}
 
-
-	// There's a penalty going from AVX mode to SSE mode. This can
-	// be avoided by ensuring the CPU that rest of the routine is no
-	// longer interested in the upper portion of the YMM register.
-
-	_mm256_zeroupper(); // zeros the upper portion of YMM register
-
 	// Process the remaining samples, one sample at a time.
 	do {
 		__m128 g0 = _mm256_castps256_ps128(vgain); // use the same register
@@ -676,12 +640,6 @@ x86_sse_avx_mix_buffers_no_gain_unaligned(float *dst, const float *src, uint32_t
 		nframes -= 8;
 	}
 
-	// There's a penalty going away from AVX mode to SSE mode. This can
-	// be avoided by ensuring the CPU that rest of the routine is no
-	// longer interested in the upper portion of the YMM register.
-
-	_mm256_zeroupper(); // zeros the upper portion of YMM register
-
 	// Process the remaining samples
 	do {
 		while (nframes > 0) {
@@ -807,12 +765,6 @@ x86_sse_avx_mix_buffers_no_gain_aligned(float *dst, const float *src, uint32_t n
 		nframes -= 8;
 	}
 
-	// There's a penalty going from AVX mode to SSE mode. This can
-	// be avoided by ensuring the CPU that rest of the routine is no
-	// longer interested in the upper portion of the YMM register.
-
-	_mm256_zeroupper(); // zeros the upper portion of YMM register
-
 	// Process the remaining samples
 	do {
 		while (nframes > 0) {