mirror of
https://github.com/harfbuzz/harfbuzz.git
synced 2025-04-05 05:25:05 +00:00
Revert "[HVAC] Add AVX implementation"
This reverts commit aef47b1f05d8b9add616745cb5c503f42f916eb6.
This commit is contained in:
parent
6983b08dfd
commit
79b8508573
1 changed files with 2 additions and 122 deletions
|
@ -37,118 +37,6 @@
|
|||
|
||||
namespace OT {
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
static inline void updatePointsAVXscale(const float* x, const float* y, float scalar, unsigned deltasCount, contour_point_t* points)
|
||||
{
|
||||
__m256 scalarVec = _mm256_set1_ps(scalar);
|
||||
|
||||
unsigned i = 0;
|
||||
|
||||
// Process 8 elements at a time with AVX
|
||||
for (; i <= deltasCount - 8; i += 8)
|
||||
{
|
||||
// Load 8 elements from x and y arrays
|
||||
__m256 xVec = _mm256_loadu_ps(&x[i]);
|
||||
__m256 yVec = _mm256_loadu_ps(&y[i]);
|
||||
|
||||
// Gather point x and y values
|
||||
__m256 pointXVec = _mm256_set_ps(points[i+7].x, points[i+6].x, points[i+5].x, points[i+4].x,
|
||||
points[i+3].x, points[i+2].x, points[i+1].x, points[i].x);
|
||||
__m256 pointYVec = _mm256_set_ps(points[i+7].y, points[i+6].y, points[i+5].y, points[i+4].y,
|
||||
points[i+3].y, points[i+2].y, points[i+1].y, points[i].y);
|
||||
|
||||
// Multiply x and y vectors by the scalar
|
||||
xVec = _mm256_mul_ps(xVec, scalarVec);
|
||||
yVec = _mm256_mul_ps(yVec, scalarVec);
|
||||
|
||||
// Add the scaled x and y to the point coordinates
|
||||
pointXVec = _mm256_add_ps(pointXVec, xVec);
|
||||
pointYVec = _mm256_add_ps(pointYVec, yVec);
|
||||
|
||||
// Store the updated coordinates back to the points array
|
||||
points[i].x = _mm256_cvtss_f32(pointXVec);
|
||||
points[i].y = _mm256_cvtss_f32(pointYVec);
|
||||
|
||||
points[i+1].x = _mm256_cvtss_f32(_mm256_permute_ps(pointXVec, _MM_SHUFFLE(0, 0, 0, 1)));
|
||||
points[i+1].y = _mm256_cvtss_f32(_mm256_permute_ps(pointYVec, _MM_SHUFFLE(0, 0, 0, 1)));
|
||||
|
||||
points[i+2].x = _mm256_cvtss_f32(_mm256_permute_ps(pointXVec, _MM_SHUFFLE(0, 0, 0, 2)));
|
||||
points[i+2].y = _mm256_cvtss_f32(_mm256_permute_ps(pointYVec, _MM_SHUFFLE(0, 0, 0, 2)));
|
||||
|
||||
points[i+3].x = _mm256_cvtss_f32(_mm256_permute_ps(pointXVec, _MM_SHUFFLE(0, 0, 0, 3)));
|
||||
points[i+3].y = _mm256_cvtss_f32(_mm256_permute_ps(pointYVec, _MM_SHUFFLE(0, 0, 0, 3)));
|
||||
|
||||
points[i+4].x = _mm256_cvtss_f32(_mm256_permute_ps(pointXVec, _MM_SHUFFLE(0, 0, 0, 4)));
|
||||
points[i+4].y = _mm256_cvtss_f32(_mm256_permute_ps(pointYVec, _MM_SHUFFLE(0, 0, 0, 4)));
|
||||
|
||||
points[i+5].x = _mm256_cvtss_f32(_mm256_permute_ps(pointXVec, _MM_SHUFFLE(0, 0, 0, 5)));
|
||||
points[i+5].y = _mm256_cvtss_f32(_mm256_permute_ps(pointYVec, _MM_SHUFFLE(0, 0, 0, 5)));
|
||||
|
||||
points[i+6].x = _mm256_cvtss_f32(_mm256_permute_ps(pointXVec, _MM_SHUFFLE(0, 0, 0, 6)));
|
||||
points[i+6].y = _mm256_cvtss_f32(_mm256_permute_ps(pointYVec, _MM_SHUFFLE(0, 0, 0, 6)));
|
||||
|
||||
points[i+7].x = _mm256_cvtss_f32(_mm256_permute_ps(pointXVec, _MM_SHUFFLE(0, 0, 0, 7)));
|
||||
points[i+7].y = _mm256_cvtss_f32(_mm256_permute_ps(pointYVec, _MM_SHUFFLE(0, 0, 0, 7)));
|
||||
}
|
||||
|
||||
// Process remaining elements
|
||||
for (; i < deltasCount; i++)
|
||||
{
|
||||
points[i].x += x[i] * scalar;
|
||||
points[i].y += y[i] * scalar;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void updatePointsAVX(const float* x, const float* y, unsigned deltasCount, contour_point_t* points)
|
||||
{
|
||||
unsigned i = 0;
|
||||
|
||||
// Process 8 elements at a time with AVX
|
||||
for (; i <= deltasCount - 8; i += 8)
|
||||
{
|
||||
// Load 8 elements from x and y arrays
|
||||
__m256 xVec = _mm256_loadu_ps(&x[i]);
|
||||
__m256 yVec = _mm256_loadu_ps(&y[i]);
|
||||
|
||||
// Gather point x and y values into AVX vectors
|
||||
__m256 pointXVec = _mm256_set_ps(points[i+7].x, points[i+6].x, points[i+5].x, points[i+4].x,
|
||||
points[i+3].x, points[i+2].x, points[i+1].x, points[i].x);
|
||||
__m256 pointYVec = _mm256_set_ps(points[i+7].y, points[i+6].y, points[i+5].y, points[i+4].y,
|
||||
points[i+3].y, points[i+2].y, points[i+1].y, points[i].y);
|
||||
|
||||
// Add x and y values to the point coordinates
|
||||
pointXVec = _mm256_add_ps(pointXVec, xVec);
|
||||
pointYVec = _mm256_add_ps(pointYVec, yVec);
|
||||
|
||||
// Scatter the updated coordinates back to the points array
|
||||
points[i].x = ((float*)&pointXVec)[0];
|
||||
points[i].y = ((float*)&pointYVec)[0];
|
||||
points[i+1].x = ((float*)&pointXVec)[1];
|
||||
points[i+1].y = ((float*)&pointYVec)[1];
|
||||
points[i+2].x = ((float*)&pointXVec)[2];
|
||||
points[i+2].y = ((float*)&pointYVec)[2];
|
||||
points[i+3].x = ((float*)&pointXVec)[3];
|
||||
points[i+3].y = ((float*)&pointYVec)[3];
|
||||
points[i+4].x = ((float*)&pointXVec)[4];
|
||||
points[i+4].y = ((float*)&pointYVec)[4];
|
||||
points[i+5].x = ((float*)&pointXVec)[5];
|
||||
points[i+5].y = ((float*)&pointYVec)[5];
|
||||
points[i+6].x = ((float*)&pointXVec)[6];
|
||||
points[i+6].y = ((float*)&pointYVec)[6];
|
||||
points[i+7].x = ((float*)&pointXVec)[7];
|
||||
points[i+7].y = ((float*)&pointYVec)[7];
|
||||
}
|
||||
|
||||
// Process remaining elements
|
||||
for (; i < deltasCount; i++)
|
||||
{
|
||||
points[i].x += x[i];
|
||||
points[i].y += y[i];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
struct GlyphVariationDelta
|
||||
{
|
||||
unsigned get_size (unsigned deltasCount) const
|
||||
|
@ -162,20 +50,12 @@ struct GlyphVariationDelta
|
|||
const SparseVarRegionList &varRegionList) const
|
||||
{
|
||||
float scalar = varRegionList.evaluate (regionIndex, coords, coords.length);
|
||||
if (scalar == 0.f)
|
||||
if (scalar == 0)
|
||||
return;
|
||||
|
||||
const float *x = deltasZ;
|
||||
const float *y = deltasZ + deltasCount;
|
||||
|
||||
|
||||
if (scalar == 1.f)
|
||||
updatePointsAVX(x, y, deltasCount, points.arrayZ);
|
||||
else
|
||||
updatePointsAVXscale(x, y, scalar, deltasCount, points.arrayZ);
|
||||
return;
|
||||
|
||||
if (scalar == 1.f)
|
||||
if (scalar == 1)
|
||||
for (unsigned i = 0; i < deltasCount; i++)
|
||||
{
|
||||
auto &point = points[i];
|
||||
|
|
Loading…
Add table
Reference in a new issue