[VARC] Speed up MultiVarData::get_delta

This commit is contained in:
Behdad Esfahbod 2025-02-23 21:34:54 -07:00
parent e41dc20c1c
commit 1ba907209f
2 changed files with 123 additions and 11 deletions

View file

@ -1854,6 +1854,121 @@ struct TupleValues
return it;
}
};
struct fetcher_t
{
fetcher_t (const unsigned char *p_, unsigned len_)
: p (p_), end (p_ + len_) {}
private:
const unsigned char *p;
const unsigned char * const end;
signed run_count = 0;
unsigned width = 0;
bool ensure_run ()
{
if (likely (run_count > 0)) return true;
if (unlikely (p >= end))
{
run_count = 0;
return false;
}
unsigned control = *p++;
run_count = (control & VALUE_RUN_COUNT_MASK) + 1;
width = control & VALUES_SIZE_MASK;
switch (width)
{
case VALUES_ARE_ZEROS: width = 0; break;
case VALUES_ARE_BYTES: width = HBINT8::static_size; break;
case VALUES_ARE_WORDS: width = HBINT16::static_size; break;
case VALUES_ARE_LONGS: width = HBINT32::static_size; break;
default: assert (false);
}
if (unlikely (p + run_count * width > end))
{
run_count = 0;
return false;
}
return true;
}
void skip (unsigned n)
{
while (n)
{
if (unlikely (!ensure_run ()))
return;
unsigned i = hb_min (n, (unsigned) run_count);
run_count -= i;
n -= i;
p += i * width;
}
}
template <bool scaled>
void _add_to (hb_array_t<float> out, float scale = 1.0f)
{
unsigned n = out.length;
for (unsigned i = 0; i < n;)
{
if (unlikely (!ensure_run ()))
break;
unsigned count = hb_min (n - i, (unsigned) run_count);
switch (width)
{
case 1:
{
const auto *pp = (const HBINT8 *) p;
for (unsigned j = 0; j < count; j++)
out.arrayZ[i + j] += scaled ? *pp++ * scale : *pp++;
}
break;
case 2:
{
const auto *pp = (const HBINT16 *) p;
for (unsigned j = 0; j < count; j++)
out.arrayZ[i + j] += scaled ? *pp++ * scale : *pp++;
}
break;
case 4:
{
const auto *pp = (const HBINT32 *) p;
for (unsigned j = 0; j < count; j++)
out.arrayZ[i + j] += scaled ? *pp++ * scale : *pp++;
}
break;
}
p += count * width;
run_count -= count;
i += count;
}
}
public:
void add_to (hb_array_t<float> out, float scale = 1.0f)
{
unsigned n = out.length;
if (scale == 0.0f)
{
skip (n);
return;
}
#ifndef HB_OPTIMIZE_SIZE
if (scale == 1.0f)
_add_to<false> (out);
else
#endif
_add_to<true> (out, scale);
}
};
};
struct TupleList : CFF2Index
@ -1863,6 +1978,12 @@ struct TupleList : CFF2Index
auto bytes = CFF2Index::operator [] (i);
return TupleValues::iter_t (bytes.arrayZ, bytes.length);
}
TupleValues::fetcher_t fetcher (unsigned i) const
{
auto bytes = CFF2Index::operator [] (i);
return TupleValues::fetcher_t (bytes.arrayZ, bytes.length);
}
};

View file

@ -3147,23 +3147,14 @@ struct MultiVarData
{
auto &deltaSets = StructAfter<decltype (deltaSetsX)> (regionIndices);
auto values_iter = deltaSets[inner];
auto values_iter = deltaSets.fetcher (inner);
unsigned regionCount = regionIndices.len;
unsigned count = out.length;
for (unsigned regionIndex = 0; regionIndex < regionCount; regionIndex++)
{
float scalar = regions.evaluate (regionIndices.arrayZ[regionIndex],
coords, coord_count,
cache);
if (scalar == 1.f)
for (unsigned i = 0; i < count; i++)
out.arrayZ[i] += *values_iter++;
else if (scalar)
for (unsigned i = 0; i < count; i++)
out.arrayZ[i] += *values_iter++ * scalar;
else
values_iter += count;
values_iter.add_to (out, scalar);
}
}