mirror of
https://github.com/macvim-dev/macvim.git
synced 2026-06-11 15:37:29 +02:00
patch 9.2.0432: blob to string conversion can be improved
Problem: blob to string conversion can be improved
Solution: Compute the output size up front and use a single alloc plus
mch_memmove() (Yasuhiro Matsumoto).
Replace per-byte ga_append/snprintf loops with bulk allocation and
mch_memmove in three hot paths: blob2string() (used by string()),
string_from_blob(), and the UTF-16/UCS path of f_blob2str(). For a
16 MiB blob, string(blob) is ~28x faster and blob2str() is ~2x faster.
Benchmark (16 MiB blob, 5 iterations, total seconds):
| | Before | After | Speedup |
|---|---:|---:|---:|
| `string(blob)` | 6.422 | 0.225 | 28.5x |
| `blob2str(b)` | 0.504 | 0.265 | 1.90x |
| `blob2str(b, {encoding: 'utf-8'})` | 0.507 | 0.282 | 1.80x |
| `blob2str(b, {encoding: 'utf-16le'})` | 0.407 | 0.202 | 2.01x |
closes: #20112
Signed-off-by: Yasuhiro Matsumoto <mattn.jp@gmail.com>
Signed-off-by: Christian Brabandt <cb@256bit.org>
This commit is contained in:
committed by
Christian Brabandt
parent
e1e92fea92
commit
3bd25c63b4
+30
-16
@@ -263,33 +263,47 @@ write_blob(FILE *fd, blob_T *blob)
|
||||
* Convert a blob to a readable form: "0z00112233.44556677.8899"
|
||||
*/
|
||||
char_u *
|
||||
blob2string(blob_T *blob, char_u **tofree, char_u *numbuf)
|
||||
blob2string(blob_T *blob, char_u **tofree, char_u *numbuf UNUSED)
|
||||
{
|
||||
int i;
|
||||
garray_T ga;
|
||||
static const char hex_chars[] = "0123456789ABCDEF";
|
||||
int blen;
|
||||
size_t total;
|
||||
char_u *buf;
|
||||
char_u *p;
|
||||
char_u *src;
|
||||
|
||||
if (blob == NULL)
|
||||
if (blob == NULL || (blen = blob_len(blob)) == 0)
|
||||
{
|
||||
*tofree = NULL;
|
||||
return (char_u *)"0z";
|
||||
}
|
||||
|
||||
// Store bytes in the growarray.
|
||||
ga_init2(&ga, 1, 4000);
|
||||
GA_CONCAT_LITERAL(&ga, "0z");
|
||||
for (i = 0; i < blob_len(blob); i++)
|
||||
// 2 ("0z") + 2 hex per byte + one '.' every 4 bytes + NUL terminator.
|
||||
total = 2 + (size_t)blen * 2 + (size_t)((blen - 1) / 4) + 1;
|
||||
buf = alloc(total);
|
||||
if (buf == NULL)
|
||||
{
|
||||
size_t numbuflen;
|
||||
*tofree = NULL;
|
||||
return (char_u *)"0z";
|
||||
}
|
||||
|
||||
p = buf;
|
||||
*p++ = '0';
|
||||
*p++ = 'z';
|
||||
src = (char_u *)blob->bv_ga.ga_data;
|
||||
for (int i = 0; i < blen; i++)
|
||||
{
|
||||
unsigned b;
|
||||
|
||||
if (i > 0 && (i & 3) == 0)
|
||||
GA_CONCAT_LITERAL(&ga, ".");
|
||||
numbuflen = vim_snprintf_safelen((char *)numbuf, NUMBUFLEN,
|
||||
"%02X", blob_get(blob, i));
|
||||
ga_concat_len(&ga, numbuf, numbuflen);
|
||||
*p++ = '.';
|
||||
b = src[i];
|
||||
*p++ = hex_chars[b >> 4];
|
||||
*p++ = hex_chars[b & 0xf];
|
||||
}
|
||||
ga_append(&ga, NUL); // append a NUL at the end
|
||||
*tofree = ga.ga_data;
|
||||
return *tofree;
|
||||
*p = NUL;
|
||||
*tofree = buf;
|
||||
return buf;
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
+38
-36
@@ -1262,43 +1262,42 @@ blob_from_string(char_u *str, blob_T *blob)
|
||||
static int
|
||||
string_from_blob(blob_T *blob, long *start_idx, string_T *ret)
|
||||
{
|
||||
garray_T str_ga;
|
||||
long blen;
|
||||
int idx;
|
||||
long blen = blob_len(blob);
|
||||
long start = *start_idx;
|
||||
char_u *src;
|
||||
char_u *nl;
|
||||
long line_len;
|
||||
|
||||
ga_init2(&str_ga, sizeof(char), 80);
|
||||
|
||||
blen = blob_len(blob);
|
||||
|
||||
for (idx = *start_idx; idx < blen; idx++)
|
||||
{
|
||||
char_u byte = (char_u)blob_get(blob, idx);
|
||||
if (byte == NL)
|
||||
{
|
||||
idx++;
|
||||
break;
|
||||
}
|
||||
|
||||
if (byte == NUL)
|
||||
byte = NL;
|
||||
|
||||
ga_append(&str_ga, byte);
|
||||
}
|
||||
|
||||
if (str_ga.ga_data != NULL)
|
||||
{
|
||||
ret->string = vim_strnsave(str_ga.ga_data, str_ga.ga_len);
|
||||
ret->length = str_ga.ga_len;
|
||||
}
|
||||
else
|
||||
if (start >= blen)
|
||||
{
|
||||
ret->string = vim_strsave((char_u *)"");
|
||||
ret->length = 0;
|
||||
*start_idx = blen;
|
||||
return (ret->string == NULL) ? FAIL : OK;
|
||||
}
|
||||
*start_idx = idx;
|
||||
|
||||
ga_clear(&str_ga);
|
||||
return (ret->string == NULL) ? FAIL : OK;
|
||||
src = (char_u *)blob->bv_ga.ga_data + start;
|
||||
nl = (char_u *)memchr(src, NL, (size_t)(blen - start));
|
||||
line_len = (nl == NULL) ? (blen - start) : (long)(nl - src);
|
||||
|
||||
ret->string = alloc(line_len + 1);
|
||||
if (ret->string == NULL)
|
||||
{
|
||||
ret->length = 0;
|
||||
return FAIL;
|
||||
}
|
||||
if (line_len > 0)
|
||||
mch_memmove(ret->string, src, (size_t)line_len);
|
||||
ret->string[line_len] = NUL;
|
||||
ret->length = (size_t)line_len;
|
||||
|
||||
// A NUL byte in the blob represents a NL in the resulting string.
|
||||
for (long i = 0; i < line_len; i++)
|
||||
if (ret->string[i] == NUL)
|
||||
ret->string[i] = NL;
|
||||
|
||||
*start_idx = start + line_len + (nl != NULL ? 1 : 0);
|
||||
return OK;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1486,11 +1485,14 @@ f_blob2str(typval_T *argvars, typval_T *rettv)
|
||||
garray_T blob_ga;
|
||||
int nul_size = (from_prop & ENC_4BYTE) ? 4 : 2;
|
||||
ga_init2(&blob_ga, 1, blen + nul_size);
|
||||
for (long i = 0; i < blen; i++)
|
||||
ga_append(&blob_ga, (int)(unsigned char)blob_get(blob, i));
|
||||
// Add NUL terminator (2 bytes for UTF-16/UCS-2, 4 bytes for UTF-32/UCS-4)
|
||||
for (int i = 0; i < nul_size; i++)
|
||||
ga_append(&blob_ga, NUL);
|
||||
if (ga_grow(&blob_ga, blen + nul_size) == OK)
|
||||
{
|
||||
if (blen > 0)
|
||||
mch_memmove(blob_ga.ga_data, blob->bv_ga.ga_data, (size_t)blen);
|
||||
// NUL terminator (2 bytes for UTF-16/UCS-2, 4 bytes for UTF-32/UCS-4)
|
||||
vim_memset((char_u *)blob_ga.ga_data + blen, 0, (size_t)nul_size);
|
||||
blob_ga.ga_len = blen + nul_size;
|
||||
}
|
||||
|
||||
// Convert the entire blob at once
|
||||
vimconv_T vimconv;
|
||||
|
||||
@@ -729,6 +729,8 @@ static char *(features[]) =
|
||||
|
||||
static int included_patches[] =
|
||||
{ /* Add new patch number below this line */
|
||||
/**/
|
||||
432,
|
||||
/**/
|
||||
431,
|
||||
/**/
|
||||
|
||||
Reference in New Issue
Block a user