Revert "[HVAC] Add AVX implementation"

This reverts commit aef47b1f05d8b9add616745cb5c503f42f916eb6.
This commit is contained in:
Behdad Esfahbod 2024-09-24 14:16:51 -06:00
parent 6983b08dfd
commit 79b8508573

View file

@ -37,118 +37,6 @@
namespace OT {
#include <immintrin.h>
static inline void updatePointsAVXscale(const float* x, const float* y, float scalar, unsigned deltasCount, contour_point_t* points)
{
__m256 scalarVec = _mm256_set1_ps(scalar);
unsigned i = 0;
// Process 8 elements at a time with AVX
for (; i <= deltasCount - 8; i += 8)
{
// Load 8 elements from x and y arrays
__m256 xVec = _mm256_loadu_ps(&x[i]);
__m256 yVec = _mm256_loadu_ps(&y[i]);
// Gather point x and y values
__m256 pointXVec = _mm256_set_ps(points[i+7].x, points[i+6].x, points[i+5].x, points[i+4].x,
points[i+3].x, points[i+2].x, points[i+1].x, points[i].x);
__m256 pointYVec = _mm256_set_ps(points[i+7].y, points[i+6].y, points[i+5].y, points[i+4].y,
points[i+3].y, points[i+2].y, points[i+1].y, points[i].y);
// Multiply x and y vectors by the scalar
xVec = _mm256_mul_ps(xVec, scalarVec);
yVec = _mm256_mul_ps(yVec, scalarVec);
// Add the scaled x and y to the point coordinates
pointXVec = _mm256_add_ps(pointXVec, xVec);
pointYVec = _mm256_add_ps(pointYVec, yVec);
// Store the updated coordinates back to the points array
points[i].x = _mm256_cvtss_f32(pointXVec);
points[i].y = _mm256_cvtss_f32(pointYVec);
points[i+1].x = _mm256_cvtss_f32(_mm256_permute_ps(pointXVec, _MM_SHUFFLE(0, 0, 0, 1)));
points[i+1].y = _mm256_cvtss_f32(_mm256_permute_ps(pointYVec, _MM_SHUFFLE(0, 0, 0, 1)));
points[i+2].x = _mm256_cvtss_f32(_mm256_permute_ps(pointXVec, _MM_SHUFFLE(0, 0, 0, 2)));
points[i+2].y = _mm256_cvtss_f32(_mm256_permute_ps(pointYVec, _MM_SHUFFLE(0, 0, 0, 2)));
points[i+3].x = _mm256_cvtss_f32(_mm256_permute_ps(pointXVec, _MM_SHUFFLE(0, 0, 0, 3)));
points[i+3].y = _mm256_cvtss_f32(_mm256_permute_ps(pointYVec, _MM_SHUFFLE(0, 0, 0, 3)));
points[i+4].x = _mm256_cvtss_f32(_mm256_permute_ps(pointXVec, _MM_SHUFFLE(0, 0, 0, 4)));
points[i+4].y = _mm256_cvtss_f32(_mm256_permute_ps(pointYVec, _MM_SHUFFLE(0, 0, 0, 4)));
points[i+5].x = _mm256_cvtss_f32(_mm256_permute_ps(pointXVec, _MM_SHUFFLE(0, 0, 0, 5)));
points[i+5].y = _mm256_cvtss_f32(_mm256_permute_ps(pointYVec, _MM_SHUFFLE(0, 0, 0, 5)));
points[i+6].x = _mm256_cvtss_f32(_mm256_permute_ps(pointXVec, _MM_SHUFFLE(0, 0, 0, 6)));
points[i+6].y = _mm256_cvtss_f32(_mm256_permute_ps(pointYVec, _MM_SHUFFLE(0, 0, 0, 6)));
points[i+7].x = _mm256_cvtss_f32(_mm256_permute_ps(pointXVec, _MM_SHUFFLE(0, 0, 0, 7)));
points[i+7].y = _mm256_cvtss_f32(_mm256_permute_ps(pointYVec, _MM_SHUFFLE(0, 0, 0, 7)));
}
// Process remaining elements
for (; i < deltasCount; i++)
{
points[i].x += x[i] * scalar;
points[i].y += y[i] * scalar;
}
}
static inline void updatePointsAVX(const float* x, const float* y, unsigned deltasCount, contour_point_t* points)
{
unsigned i = 0;
// Process 8 elements at a time with AVX
for (; i <= deltasCount - 8; i += 8)
{
// Load 8 elements from x and y arrays
__m256 xVec = _mm256_loadu_ps(&x[i]);
__m256 yVec = _mm256_loadu_ps(&y[i]);
// Gather point x and y values into AVX vectors
__m256 pointXVec = _mm256_set_ps(points[i+7].x, points[i+6].x, points[i+5].x, points[i+4].x,
points[i+3].x, points[i+2].x, points[i+1].x, points[i].x);
__m256 pointYVec = _mm256_set_ps(points[i+7].y, points[i+6].y, points[i+5].y, points[i+4].y,
points[i+3].y, points[i+2].y, points[i+1].y, points[i].y);
// Add x and y values to the point coordinates
pointXVec = _mm256_add_ps(pointXVec, xVec);
pointYVec = _mm256_add_ps(pointYVec, yVec);
// Scatter the updated coordinates back to the points array
points[i].x = ((float*)&pointXVec)[0];
points[i].y = ((float*)&pointYVec)[0];
points[i+1].x = ((float*)&pointXVec)[1];
points[i+1].y = ((float*)&pointYVec)[1];
points[i+2].x = ((float*)&pointXVec)[2];
points[i+2].y = ((float*)&pointYVec)[2];
points[i+3].x = ((float*)&pointXVec)[3];
points[i+3].y = ((float*)&pointYVec)[3];
points[i+4].x = ((float*)&pointXVec)[4];
points[i+4].y = ((float*)&pointYVec)[4];
points[i+5].x = ((float*)&pointXVec)[5];
points[i+5].y = ((float*)&pointYVec)[5];
points[i+6].x = ((float*)&pointXVec)[6];
points[i+6].y = ((float*)&pointYVec)[6];
points[i+7].x = ((float*)&pointXVec)[7];
points[i+7].y = ((float*)&pointYVec)[7];
}
// Process remaining elements
for (; i < deltasCount; i++)
{
points[i].x += x[i];
points[i].y += y[i];
}
}
struct GlyphVariationDelta
{
unsigned get_size (unsigned deltasCount) const
@ -162,20 +50,12 @@ struct GlyphVariationDelta
const SparseVarRegionList &varRegionList) const
{
float scalar = varRegionList.evaluate (regionIndex, coords, coords.length);
if (scalar == 0.f)
if (scalar == 0)
return;
const float *x = deltasZ;
const float *y = deltasZ + deltasCount;
if (scalar == 1.f)
updatePointsAVX(x, y, deltasCount, points.arrayZ);
else
updatePointsAVXscale(x, y, scalar, deltasCount, points.arrayZ);
return;
if (scalar == 1.f)
if (scalar == 1)
for (unsigned i = 0; i < deltasCount; i++)
{
auto &point = points[i];