patch 9.2.0432: blob to string conversion can be improved

Problem:  blob to string conversion can be improved
Solution: Compute the output size up front and use a single alloc plus
          mch_memmove() (Yasuhiro Matsumoto).

Replace per-byte ga_append/snprintf loops with bulk allocation and
mch_memmove in three hot paths: blob2string() (used by string()),
string_from_blob(), and the UTF-16/UCS path of f_blob2str(). For a
16 MiB blob, string(blob) is ~28x faster and blob2str() is ~2x faster.

Benchmark (16 MiB blob, 5 iterations, total seconds):

| | Before | After | Speedup |
|---|---:|---:|---:|
| `string(blob)` | 6.422 | 0.225 | 28.5x |
| `blob2str(b)` | 0.504 | 0.265 | 1.90x |
| `blob2str(b, {encoding: 'utf-8'})` | 0.507 | 0.282 | 1.80x |
| `blob2str(b, {encoding: 'utf-16le'})` | 0.407 | 0.202 | 2.01x |

closes: #20112

Signed-off-by: Yasuhiro Matsumoto <mattn.jp@gmail.com>
Signed-off-by: Christian Brabandt <cb@256bit.org>
This commit is contained in:
Yasuhiro Matsumoto
2026-05-02 15:49:17 +00:00
committed by Christian Brabandt
parent e1e92fea92
commit 3bd25c63b4
3 changed files with 70 additions and 52 deletions
+30 -16
View File
@@ -263,33 +263,47 @@ write_blob(FILE *fd, blob_T *blob)
* Convert a blob to a readable form: "0z00112233.44556677.8899"
*/
char_u *
blob2string(blob_T *blob, char_u **tofree, char_u *numbuf)
blob2string(blob_T *blob, char_u **tofree, char_u *numbuf UNUSED)
{
int i;
garray_T ga;
static const char hex_chars[] = "0123456789ABCDEF";
int blen;
size_t total;
char_u *buf;
char_u *p;
char_u *src;
if (blob == NULL)
if (blob == NULL || (blen = blob_len(blob)) == 0)
{
*tofree = NULL;
return (char_u *)"0z";
}
// Store bytes in the growarray.
ga_init2(&ga, 1, 4000);
GA_CONCAT_LITERAL(&ga, "0z");
for (i = 0; i < blob_len(blob); i++)
// 2 ("0z") + 2 hex per byte + one '.' every 4 bytes + NUL terminator.
total = 2 + (size_t)blen * 2 + (size_t)((blen - 1) / 4) + 1;
buf = alloc(total);
if (buf == NULL)
{
size_t numbuflen;
*tofree = NULL;
return (char_u *)"0z";
}
p = buf;
*p++ = '0';
*p++ = 'z';
src = (char_u *)blob->bv_ga.ga_data;
for (int i = 0; i < blen; i++)
{
unsigned b;
if (i > 0 && (i & 3) == 0)
GA_CONCAT_LITERAL(&ga, ".");
numbuflen = vim_snprintf_safelen((char *)numbuf, NUMBUFLEN,
"%02X", blob_get(blob, i));
ga_concat_len(&ga, numbuf, numbuflen);
*p++ = '.';
b = src[i];
*p++ = hex_chars[b >> 4];
*p++ = hex_chars[b & 0xf];
}
ga_append(&ga, NUL); // append a NUL at the end
*tofree = ga.ga_data;
return *tofree;
*p = NUL;
*tofree = buf;
return buf;
}
/*
+38 -36
View File
@@ -1262,43 +1262,42 @@ blob_from_string(char_u *str, blob_T *blob)
static int
string_from_blob(blob_T *blob, long *start_idx, string_T *ret)
{
garray_T str_ga;
long blen;
int idx;
long blen = blob_len(blob);
long start = *start_idx;
char_u *src;
char_u *nl;
long line_len;
ga_init2(&str_ga, sizeof(char), 80);
blen = blob_len(blob);
for (idx = *start_idx; idx < blen; idx++)
{
char_u byte = (char_u)blob_get(blob, idx);
if (byte == NL)
{
idx++;
break;
}
if (byte == NUL)
byte = NL;
ga_append(&str_ga, byte);
}
if (str_ga.ga_data != NULL)
{
ret->string = vim_strnsave(str_ga.ga_data, str_ga.ga_len);
ret->length = str_ga.ga_len;
}
else
if (start >= blen)
{
ret->string = vim_strsave((char_u *)"");
ret->length = 0;
*start_idx = blen;
return (ret->string == NULL) ? FAIL : OK;
}
*start_idx = idx;
ga_clear(&str_ga);
return (ret->string == NULL) ? FAIL : OK;
src = (char_u *)blob->bv_ga.ga_data + start;
nl = (char_u *)memchr(src, NL, (size_t)(blen - start));
line_len = (nl == NULL) ? (blen - start) : (long)(nl - src);
ret->string = alloc(line_len + 1);
if (ret->string == NULL)
{
ret->length = 0;
return FAIL;
}
if (line_len > 0)
mch_memmove(ret->string, src, (size_t)line_len);
ret->string[line_len] = NUL;
ret->length = (size_t)line_len;
// A NUL byte in the blob represents a NL in the resulting string.
for (long i = 0; i < line_len; i++)
if (ret->string[i] == NUL)
ret->string[i] = NL;
*start_idx = start + line_len + (nl != NULL ? 1 : 0);
return OK;
}
/*
@@ -1486,11 +1485,14 @@ f_blob2str(typval_T *argvars, typval_T *rettv)
garray_T blob_ga;
int nul_size = (from_prop & ENC_4BYTE) ? 4 : 2;
ga_init2(&blob_ga, 1, blen + nul_size);
for (long i = 0; i < blen; i++)
ga_append(&blob_ga, (int)(unsigned char)blob_get(blob, i));
// Add NUL terminator (2 bytes for UTF-16/UCS-2, 4 bytes for UTF-32/UCS-4)
for (int i = 0; i < nul_size; i++)
ga_append(&blob_ga, NUL);
if (ga_grow(&blob_ga, blen + nul_size) == OK)
{
if (blen > 0)
mch_memmove(blob_ga.ga_data, blob->bv_ga.ga_data, (size_t)blen);
// NUL terminator (2 bytes for UTF-16/UCS-2, 4 bytes for UTF-32/UCS-4)
vim_memset((char_u *)blob_ga.ga_data + blen, 0, (size_t)nul_size);
blob_ga.ga_len = blen + nul_size;
}
// Convert the entire blob at once
vimconv_T vimconv;
+2
View File
@@ -729,6 +729,8 @@ static char *(features[]) =
static int included_patches[] =
{ /* Add new patch number below this line */
/**/
432,
/**/
431,
/**/