mirror of
https://github.com/git/git.git
synced 2025-12-12 20:36:24 +01:00
xdfile_t currently uses chastore_t which is an arena allocator. I
think that xrecord_t used to be a linked list and recs didn't exist
originally. When recs was added I think they forgot to remove
xdfile_t.next, but was overlooked. This dual data structure setup
makes the code somewhat confusing.
Additionally the C type chastore_t isn't FFI friendly, and provides
little to no performance benefit over using realloc to grow an array.
Performance impact of deleting fields from xdfile_t:
Deleting ha is about 5% slower.
Deleting cha is about 5% faster.
Delete ha, but keep cha
time hyperfine --warmup 3 -L exe build_v2.51.0/git,build_delete_ha/git '{exe} log --oneline --shortstat --diff-algorithm=myers -3000 v2.39.1 >/dev/null'
Benchmark 1: build_v2.51.0/git log --oneline --shortstat --diff-algorithm=myers -3000 v2.39.1 >/dev/null
Time (mean ± σ): 1.269 s ± 0.017 s [User: 1.135 s, System: 0.128 s]
Range (min … max): 1.249 s … 1.286 s 10 runs
Benchmark 2: build_delete_ha/git log --oneline --shortstat --diff-algorithm=myers -3000 v2.39.1 >/dev/null
Time (mean ± σ): 1.339 s ± 0.017 s [User: 1.234 s, System: 0.099 s]
Range (min … max): 1.320 s … 1.358 s 10 runs
Summary
build_v2.51.0/git log --oneline --shortstat --diff-algorithm=myers -3000 v2.39.1 >/dev/null ran
1.06 ± 0.02 times faster than build_delete_ha/git log --oneline --shortstat --diff-algorithm=myers -3000 v2.39.1 >/dev/null
Delete cha, but keep ha
time hyperfine --warmup 3 -L exe build_v2.51.0/git,build_delete_chastore/git '{exe} log --oneline --shortstat --diff-algorithm=myers -3000 v2.39.1 >/dev/null'
Benchmark 1: build_v2.51.0/git log --oneline --shortstat --diff-algorithm=myers -3000 v2.39.1 >/dev/null
Time (mean ± σ): 1.290 s ± 0.001 s [User: 1.154 s, System: 0.130 s]
Range (min … max): 1.288 s … 1.292 s 10 runs
Benchmark 2: build_delete_chastore/git log --oneline --shortstat --diff-algorithm=myers -3000 v2.39.1 >/dev/null
Time (mean ± σ): 1.232 s ± 0.017 s [User: 1.105 s, System: 0.121 s]
Range (min … max): 1.205 s … 1.249 s 10 runs
Summary
build_delete_chastore/git log --oneline --shortstat --diff-algorithm=myers -3000 v2.39.1 >/dev/null ran
1.05 ± 0.01 times faster than build_v2.51.0/git log --oneline --shortstat --diff-algorithm=myers -3000 v2.39.1 >/dev/null
Delete ha AND chastore
time hyperfine --warmup 3 -L exe build_v2.51.0/git,build_delete_ha_and_chastore/git '{exe} log --oneline --shortstat --diff-algorithm=myers -3000 v2.39.1 >/dev/null'
Benchmark 1: build_v2.51.0/git log --oneline --shortstat --diff-algorithm=myers -3000 v2.39.1 >/dev/null
Time (mean ± σ): 1.291 s ± 0.002 s [User: 1.156 s, System: 0.129 s]
Range (min … max): 1.287 s … 1.295 s 10 runs
Benchmark 2: build_delete_ha_and_chastore/git log --oneline --shortstat --diff-algorithm=myers -3000 v2.39.1 >/dev/null
Time (mean ± σ): 1.306 s ± 0.001 s [User: 1.195 s, System: 0.105 s]
Range (min … max): 1.305 s … 1.308 s 10 runs
Summary
build_v2.51.0/git log --oneline --shortstat --diff-algorithm=myers -3000 v2.39.1 >/dev/null ran
1.01 ± 0.00 times faster than build_delete_ha_and_chastore/git log --oneline --shortstat --diff-algorithm=myers -3000 v2.39.1 >/dev/null
Best-viewed-with: --color-words
Signed-off-by: Ezekiel Newren <ezekielnewren@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
325 lines
7.9 KiB
C
325 lines
7.9 KiB
C
/*
|
|
* LibXDiff by Davide Libenzi ( File Differential Library )
|
|
* Copyright (C) 2003 Davide Libenzi
|
|
*
|
|
* This library is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
* License as published by the Free Software Foundation; either
|
|
* version 2.1 of the License, or (at your option) any later version.
|
|
*
|
|
* This library is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* Lesser General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
* License along with this library; if not, see
|
|
* <http://www.gnu.org/licenses/>.
|
|
*
|
|
* Davide Libenzi <davidel@xmailserver.org>
|
|
*
|
|
*/
|
|
|
|
#include "xinclude.h"
|
|
|
|
|
|
static int xdl_emit_record(xdfile_t *xdf, long ri, char const *pre, xdemitcb_t *ecb)
|
|
{
|
|
xrecord_t *rec = &xdf->recs[ri];
|
|
|
|
if (xdl_emit_diffrec(rec->ptr, rec->size, pre, strlen(pre), ecb) < 0)
|
|
return -1;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static long saturating_add(long a, long b)
|
|
{
|
|
return signed_add_overflows(a, b) ? LONG_MAX : a + b;
|
|
}
|
|
|
|
/*
|
|
* Starting at the passed change atom, find the latest change atom to be included
|
|
* inside the differential hunk according to the specified configuration.
|
|
* Also advance xscr if the first changes must be discarded.
|
|
*/
|
|
xdchange_t *xdl_get_hunk(xdchange_t **xscr, xdemitconf_t const *xecfg)
|
|
{
|
|
xdchange_t *xch, *xchp, *lxch;
|
|
long max_common = saturating_add(saturating_add(xecfg->ctxlen,
|
|
xecfg->ctxlen),
|
|
xecfg->interhunkctxlen);
|
|
long max_ignorable = xecfg->ctxlen;
|
|
long ignored = 0; /* number of ignored blank lines */
|
|
|
|
/* remove ignorable changes that are too far before other changes */
|
|
for (xchp = *xscr; xchp && xchp->ignore; xchp = xchp->next) {
|
|
xch = xchp->next;
|
|
|
|
if (xch == NULL ||
|
|
xch->i1 - (xchp->i1 + xchp->chg1) >= max_ignorable)
|
|
*xscr = xch;
|
|
}
|
|
|
|
if (!*xscr)
|
|
return NULL;
|
|
|
|
lxch = *xscr;
|
|
|
|
for (xchp = *xscr, xch = xchp->next; xch; xchp = xch, xch = xch->next) {
|
|
long distance = xch->i1 - (xchp->i1 + xchp->chg1);
|
|
if (distance > max_common)
|
|
break;
|
|
|
|
if (distance < max_ignorable && (!xch->ignore || lxch == xchp)) {
|
|
lxch = xch;
|
|
ignored = 0;
|
|
} else if (distance < max_ignorable && xch->ignore) {
|
|
ignored += xch->chg2;
|
|
} else if (lxch != xchp &&
|
|
xch->i1 + ignored - (lxch->i1 + lxch->chg1) > max_common) {
|
|
break;
|
|
} else if (!xch->ignore) {
|
|
lxch = xch;
|
|
ignored = 0;
|
|
} else {
|
|
ignored += xch->chg2;
|
|
}
|
|
}
|
|
|
|
return lxch;
|
|
}
|
|
|
|
|
|
static long def_ff(const char *rec, long len, char *buf, long sz)
|
|
{
|
|
if (len > 0 &&
|
|
(isalpha((unsigned char)*rec) || /* identifier? */
|
|
*rec == '_' || /* also identifier? */
|
|
*rec == '$')) { /* identifiers from VMS and other esoterico */
|
|
if (len > sz)
|
|
len = sz;
|
|
while (0 < len && isspace((unsigned char)rec[len - 1]))
|
|
len--;
|
|
memcpy(buf, rec, len);
|
|
return len;
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
static long match_func_rec(xdfile_t *xdf, xdemitconf_t const *xecfg, long ri,
|
|
char *buf, long sz)
|
|
{
|
|
xrecord_t *rec = &xdf->recs[ri];
|
|
|
|
if (!xecfg->find_func)
|
|
return def_ff(rec->ptr, rec->size, buf, sz);
|
|
return xecfg->find_func(rec->ptr, rec->size, buf, sz, xecfg->find_func_priv);
|
|
}
|
|
|
|
static int is_func_rec(xdfile_t *xdf, xdemitconf_t const *xecfg, long ri)
|
|
{
|
|
char dummy[1];
|
|
return match_func_rec(xdf, xecfg, ri, dummy, sizeof(dummy)) >= 0;
|
|
}
|
|
|
|
struct func_line {
|
|
long len;
|
|
char buf[80];
|
|
};
|
|
|
|
static long get_func_line(xdfenv_t *xe, xdemitconf_t const *xecfg,
|
|
struct func_line *func_line, long start, long limit)
|
|
{
|
|
long l, size, step = (start > limit) ? -1 : 1;
|
|
char *buf, dummy[1];
|
|
|
|
buf = func_line ? func_line->buf : dummy;
|
|
size = func_line ? sizeof(func_line->buf) : sizeof(dummy);
|
|
|
|
for (l = start; l != limit && 0 <= l && l < xe->xdf1.nrec; l += step) {
|
|
long len = match_func_rec(&xe->xdf1, xecfg, l, buf, size);
|
|
if (len >= 0) {
|
|
if (func_line)
|
|
func_line->len = len;
|
|
return l;
|
|
}
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
static int is_empty_rec(xdfile_t *xdf, long ri)
|
|
{
|
|
xrecord_t *rec = &xdf->recs[ri];
|
|
long i = 0;
|
|
|
|
for (; i < rec->size && XDL_ISSPACE(rec->ptr[i]); i++);
|
|
|
|
return i == rec->size;
|
|
}
|
|
|
|
int xdl_emit_diff(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb,
|
|
xdemitconf_t const *xecfg) {
|
|
long s1, s2, e1, e2, lctx;
|
|
xdchange_t *xch, *xche;
|
|
long funclineprev = -1;
|
|
struct func_line func_line = { 0 };
|
|
|
|
for (xch = xscr; xch; xch = xche->next) {
|
|
xdchange_t *xchp = xch;
|
|
xche = xdl_get_hunk(&xch, xecfg);
|
|
if (!xch)
|
|
break;
|
|
|
|
pre_context_calculation:
|
|
s1 = XDL_MAX(xch->i1 - xecfg->ctxlen, 0);
|
|
s2 = XDL_MAX(xch->i2 - xecfg->ctxlen, 0);
|
|
|
|
if (xecfg->flags & XDL_EMIT_FUNCCONTEXT) {
|
|
long fs1, i1 = xch->i1;
|
|
|
|
/* Appended chunk? */
|
|
if (i1 >= xe->xdf1.nrec) {
|
|
long i2 = xch->i2;
|
|
|
|
/*
|
|
* We don't need additional context if
|
|
* a whole function was added.
|
|
*/
|
|
while (i2 < xe->xdf2.nrec) {
|
|
if (is_func_rec(&xe->xdf2, xecfg, i2))
|
|
goto post_context_calculation;
|
|
i2++;
|
|
}
|
|
|
|
/*
|
|
* Otherwise get more context from the
|
|
* pre-image.
|
|
*/
|
|
i1 = xe->xdf1.nrec - 1;
|
|
}
|
|
|
|
fs1 = get_func_line(xe, xecfg, NULL, i1, -1);
|
|
while (fs1 > 0 && !is_empty_rec(&xe->xdf1, fs1 - 1) &&
|
|
!is_func_rec(&xe->xdf1, xecfg, fs1 - 1))
|
|
fs1--;
|
|
if (fs1 < 0)
|
|
fs1 = 0;
|
|
if (fs1 < s1) {
|
|
s2 = XDL_MAX(s2 - (s1 - fs1), 0);
|
|
s1 = fs1;
|
|
|
|
/*
|
|
* Did we extend context upwards into an
|
|
* ignored change?
|
|
*/
|
|
while (xchp != xch &&
|
|
xchp->i1 + xchp->chg1 <= s1 &&
|
|
xchp->i2 + xchp->chg2 <= s2)
|
|
xchp = xchp->next;
|
|
|
|
/* If so, show it after all. */
|
|
if (xchp != xch) {
|
|
xch = xchp;
|
|
goto pre_context_calculation;
|
|
}
|
|
}
|
|
}
|
|
|
|
post_context_calculation:
|
|
lctx = xecfg->ctxlen;
|
|
lctx = XDL_MIN(lctx, xe->xdf1.nrec - (xche->i1 + xche->chg1));
|
|
lctx = XDL_MIN(lctx, xe->xdf2.nrec - (xche->i2 + xche->chg2));
|
|
|
|
e1 = xche->i1 + xche->chg1 + lctx;
|
|
e2 = xche->i2 + xche->chg2 + lctx;
|
|
|
|
if (xecfg->flags & XDL_EMIT_FUNCCONTEXT) {
|
|
long fe1 = get_func_line(xe, xecfg, NULL,
|
|
xche->i1 + xche->chg1,
|
|
xe->xdf1.nrec);
|
|
while (fe1 > 0 && is_empty_rec(&xe->xdf1, fe1 - 1))
|
|
fe1--;
|
|
if (fe1 < 0)
|
|
fe1 = xe->xdf1.nrec;
|
|
if (fe1 > e1) {
|
|
e2 = XDL_MIN(e2 + (fe1 - e1), xe->xdf2.nrec);
|
|
e1 = fe1;
|
|
}
|
|
|
|
/*
|
|
* Overlap with next change? Then include it
|
|
* in the current hunk and start over to find
|
|
* its new end.
|
|
*/
|
|
if (xche->next) {
|
|
long l = XDL_MIN(xche->next->i1,
|
|
xe->xdf1.nrec - 1);
|
|
if (l - xecfg->ctxlen <= e1 ||
|
|
get_func_line(xe, xecfg, NULL, l, e1) < 0) {
|
|
xche = xche->next;
|
|
goto post_context_calculation;
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Emit current hunk header.
|
|
*/
|
|
|
|
if (xecfg->flags & XDL_EMIT_FUNCNAMES) {
|
|
get_func_line(xe, xecfg, &func_line,
|
|
s1 - 1, funclineprev);
|
|
funclineprev = s1 - 1;
|
|
}
|
|
if (!(xecfg->flags & XDL_EMIT_NO_HUNK_HDR) &&
|
|
xdl_emit_hunk_hdr(s1 + 1, e1 - s1, s2 + 1, e2 - s2,
|
|
func_line.buf, func_line.len, ecb) < 0)
|
|
return -1;
|
|
|
|
/*
|
|
* Emit pre-context.
|
|
*/
|
|
for (; s2 < xch->i2; s2++)
|
|
if (xdl_emit_record(&xe->xdf2, s2, " ", ecb) < 0)
|
|
return -1;
|
|
|
|
for (s1 = xch->i1, s2 = xch->i2;; xch = xch->next) {
|
|
/*
|
|
* Merge previous with current change atom.
|
|
*/
|
|
for (; s1 < xch->i1 && s2 < xch->i2; s1++, s2++)
|
|
if (xdl_emit_record(&xe->xdf2, s2, " ", ecb) < 0)
|
|
return -1;
|
|
|
|
/*
|
|
* Removes lines from the first file.
|
|
*/
|
|
for (s1 = xch->i1; s1 < xch->i1 + xch->chg1; s1++)
|
|
if (xdl_emit_record(&xe->xdf1, s1, "-", ecb) < 0)
|
|
return -1;
|
|
|
|
/*
|
|
* Adds lines from the second file.
|
|
*/
|
|
for (s2 = xch->i2; s2 < xch->i2 + xch->chg2; s2++)
|
|
if (xdl_emit_record(&xe->xdf2, s2, "+", ecb) < 0)
|
|
return -1;
|
|
|
|
if (xch == xche)
|
|
break;
|
|
s1 = xch->i1 + xch->chg1;
|
|
s2 = xch->i2 + xch->chg2;
|
|
}
|
|
|
|
/*
|
|
* Emit post-context.
|
|
*/
|
|
for (s2 = xche->i2 + xche->chg2; s2 < e2; s2++)
|
|
if (xdl_emit_record(&xe->xdf2, s2, " ", ecb) < 0)
|
|
return -1;
|
|
}
|
|
|
|
return 0;
|
|
}
|