From b7b8449e5ae7c6a0bb3e87c82d9dd6fd382baf62 Mon Sep 17 00:00:00 2001 From: Ezekiel Newren Date: Wed, 29 Apr 2026 22:08:10 +0000 Subject: [PATCH 1/6] xdiff/xdl_cleanup_records: delete local recs pointer Simplify the first 2 for loops by directly indexing the xdfile.recs. recs is unused in the last 2 for loops, remove it. Best viewed with --color-words. Signed-off-by: Ezekiel Newren Signed-off-by: Junio C Hamano --- xdiff/xprepare.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c index cd4fc405eb..d6e1901d2d 100644 --- a/xdiff/xprepare.c +++ b/xdiff/xprepare.c @@ -269,7 +269,6 @@ static bool xdl_clean_mmatch(uint8_t const *action, long i, long s, long e) { */ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xdf2) { long i, nm, mlim; - xrecord_t *recs; xdlclass_t *rcrec; uint8_t *action1 = NULL, *action2 = NULL; bool need_min = !!(cf->flags & XDF_NEED_MINIMAL); @@ -293,16 +292,18 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd */ if ((mlim = xdl_bogosqrt((long)xdf1->nrec)) > XDL_MAX_EQLIMIT) mlim = XDL_MAX_EQLIMIT; - for (i = xdf1->dstart, recs = &xdf1->recs[xdf1->dstart]; i <= xdf1->dend; i++, recs++) { - rcrec = cf->rcrecs[recs->minimal_perfect_hash]; + for (i = xdf1->dstart; i <= xdf1->dend; i++) { + size_t mph1 = xdf1->recs[i].minimal_perfect_hash; + rcrec = cf->rcrecs[mph1]; nm = rcrec ? rcrec->len2 : 0; action1[i] = (nm == 0) ? DISCARD: (nm >= mlim && !need_min) ? INVESTIGATE: KEEP; } if ((mlim = xdl_bogosqrt((long)xdf2->nrec)) > XDL_MAX_EQLIMIT) mlim = XDL_MAX_EQLIMIT; - for (i = xdf2->dstart, recs = &xdf2->recs[xdf2->dstart]; i <= xdf2->dend; i++, recs++) { - rcrec = cf->rcrecs[recs->minimal_perfect_hash]; + for (i = xdf2->dstart; i <= xdf2->dend; i++) { + size_t mph2 = xdf2->recs[i].minimal_perfect_hash; + rcrec = cf->rcrecs[mph2]; nm = rcrec ? rcrec->len1 : 0; action2[i] = (nm == 0) ? DISCARD: (nm >= mlim && !need_min) ? INVESTIGATE: KEEP; } @@ -312,8 +313,7 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd * false, or become true. */ xdf1->nreff = 0; - for (i = xdf1->dstart, recs = &xdf1->recs[xdf1->dstart]; - i <= xdf1->dend; i++, recs++) { + for (i = xdf1->dstart; i <= xdf1->dend; i++) { if (action1[i] == KEEP || (action1[i] == INVESTIGATE && !xdl_clean_mmatch(action1, i, xdf1->dstart, xdf1->dend))) { xdf1->reference_index[xdf1->nreff++] = i; @@ -324,8 +324,7 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd } xdf2->nreff = 0; - for (i = xdf2->dstart, recs = &xdf2->recs[xdf2->dstart]; - i <= xdf2->dend; i++, recs++) { + for (i = xdf2->dstart; i <= xdf2->dend; i++) { if (action2[i] == KEEP || (action2[i] == INVESTIGATE && !xdl_clean_mmatch(action2, i, xdf2->dstart, xdf2->dend))) { xdf2->reference_index[xdf2->nreff++] = i; From c37f8cda0525d7041d9a22e477117a2962f9f675 Mon Sep 17 00:00:00 2001 From: Ezekiel Newren Date: Wed, 29 Apr 2026 22:08:11 +0000 Subject: [PATCH 2/6] xdiff: use unambiguous types in xdl_bogo_sqrt() There is no real square root for a negative number and size_t may not be large enough for certain applications, replace long with uint64_t. Signed-off-by: Ezekiel Newren Signed-off-by: Junio C Hamano --- xdiff/xdiffi.c | 2 +- xdiff/xprepare.c | 4 ++-- xdiff/xutils.c | 4 ++-- xdiff/xutils.h | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/xdiff/xdiffi.c b/xdiff/xdiffi.c index 4376f943db..88708c12a3 100644 --- a/xdiff/xdiffi.c +++ b/xdiff/xdiffi.c @@ -348,7 +348,7 @@ int xdl_do_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, kvdf += xe->xdf2.nreff + 1; kvdb += xe->xdf2.nreff + 1; - xenv.mxcost = xdl_bogosqrt(ndiags); + xenv.mxcost = (long)xdl_bogosqrt((uint64_t)ndiags); if (xenv.mxcost < XDL_MAX_COST_MIN) xenv.mxcost = XDL_MAX_COST_MIN; xenv.snake_cnt = XDL_SNAKE_CNT; diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c index d6e1901d2d..48fb5ce6fe 100644 --- a/xdiff/xprepare.c +++ b/xdiff/xprepare.c @@ -290,7 +290,7 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd /* * Initialize temporary arrays with DISCARD, KEEP, or INVESTIGATE. */ - if ((mlim = xdl_bogosqrt((long)xdf1->nrec)) > XDL_MAX_EQLIMIT) + if ((mlim = (long)xdl_bogosqrt((uint64_t)xdf1->nrec)) > XDL_MAX_EQLIMIT) mlim = XDL_MAX_EQLIMIT; for (i = xdf1->dstart; i <= xdf1->dend; i++) { size_t mph1 = xdf1->recs[i].minimal_perfect_hash; @@ -299,7 +299,7 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd action1[i] = (nm == 0) ? DISCARD: (nm >= mlim && !need_min) ? INVESTIGATE: KEEP; } - if ((mlim = xdl_bogosqrt((long)xdf2->nrec)) > XDL_MAX_EQLIMIT) + if ((mlim = (long)xdl_bogosqrt((uint64_t)xdf2->nrec)) > XDL_MAX_EQLIMIT) mlim = XDL_MAX_EQLIMIT; for (i = xdf2->dstart; i <= xdf2->dend; i++) { size_t mph2 = xdf2->recs[i].minimal_perfect_hash; diff --git a/xdiff/xutils.c b/xdiff/xutils.c index 77ee1ad9c8..9a999acdc0 100644 --- a/xdiff/xutils.c +++ b/xdiff/xutils.c @@ -23,8 +23,8 @@ #include "xinclude.h" -long xdl_bogosqrt(long n) { - long i; +uint64_t xdl_bogosqrt(uint64_t n) { + uint64_t i; /* * Classical integer square root approximation using shifts. diff --git a/xdiff/xutils.h b/xdiff/xutils.h index 615b4a9d35..58f9d74cda 100644 --- a/xdiff/xutils.h +++ b/xdiff/xutils.h @@ -25,7 +25,7 @@ -long xdl_bogosqrt(long n); +uint64_t xdl_bogosqrt(uint64_t n); int xdl_emit_diffrec(char const *rec, long size, char const *pre, long psize, xdemitcb_t *ecb); int xdl_cha_init(chastore_t *cha, long isize, long icount); From 216802587ef8505568e533788d06015c0bfc6fef Mon Sep 17 00:00:00 2001 From: Ezekiel Newren Date: Wed, 29 Apr 2026 22:08:12 +0000 Subject: [PATCH 3/6] xdiff/xdl_cleanup_records: use unambiguous types Change the parameters of xdl_clean_mmatch() and the local variables i, nm, mlim in xdl_cleanup_records() to use unambiguous types. Best viewed with --color-words. Signed-off-by: Ezekiel Newren Signed-off-by: Junio C Hamano --- xdiff/xprepare.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c index 48fb5ce6fe..386668a92d 100644 --- a/xdiff/xprepare.c +++ b/xdiff/xprepare.c @@ -197,8 +197,8 @@ void xdl_free_env(xdfenv_t *xe) { } -static bool xdl_clean_mmatch(uint8_t const *action, long i, long s, long e) { - long r, rdis0, rpdis0, rdis1, rpdis1; +static bool xdl_clean_mmatch(uint8_t const *action, ptrdiff_t i, ptrdiff_t s, ptrdiff_t e) { + ptrdiff_t r, rdis0, rpdis0, rdis1, rpdis1; /* * Limits the window that is examined during the similar-lines @@ -268,7 +268,7 @@ static bool xdl_clean_mmatch(uint8_t const *action, long i, long s, long e) { * might be potentially discarded if they appear in a run of discardable. */ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xdf2) { - long i, nm, mlim; + ptrdiff_t i, nm, mlim; xdlclass_t *rcrec; uint8_t *action1 = NULL, *action2 = NULL; bool need_min = !!(cf->flags & XDF_NEED_MINIMAL); From f99a023df2c3024ccfedc4e1259d5b0732154461 Mon Sep 17 00:00:00 2001 From: Ezekiel Newren Date: Wed, 29 Apr 2026 22:08:13 +0000 Subject: [PATCH 4/6] xdiff/xdl_cleanup_records: make limits more clear Make the handling of per-file limits and the minimal-case clearer. * Use explicit per-file limit variables (mlim1, mlim2) and initialize them. * The additional condition `!need_min` is redudant now, remove it. Best viewed with --color-words. Helped-by: Phillip Wood Signed-off-by: Ezekiel Newren Signed-off-by: Junio C Hamano --- xdiff/xprepare.c | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c index 386668a92d..7141dbc058 100644 --- a/xdiff/xprepare.c +++ b/xdiff/xprepare.c @@ -268,7 +268,7 @@ static bool xdl_clean_mmatch(uint8_t const *action, ptrdiff_t i, ptrdiff_t s, pt * might be potentially discarded if they appear in a run of discardable. */ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xdf2) { - ptrdiff_t i, nm, mlim; + ptrdiff_t i, nm, mlim1, mlim2; xdlclass_t *rcrec; uint8_t *action1 = NULL, *action2 = NULL; bool need_min = !!(cf->flags & XDF_NEED_MINIMAL); @@ -290,22 +290,34 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd /* * Initialize temporary arrays with DISCARD, KEEP, or INVESTIGATE. */ - if ((mlim = (long)xdl_bogosqrt((uint64_t)xdf1->nrec)) > XDL_MAX_EQLIMIT) - mlim = XDL_MAX_EQLIMIT; + if (need_min) { + /* i.e. infinity */ + mlim1 = PTRDIFF_MAX; + } else { + mlim1 = xdl_bogosqrt((uint64_t)xdf1->nrec); + if (mlim1 > XDL_MAX_EQLIMIT) + mlim1 = XDL_MAX_EQLIMIT; + } for (i = xdf1->dstart; i <= xdf1->dend; i++) { size_t mph1 = xdf1->recs[i].minimal_perfect_hash; rcrec = cf->rcrecs[mph1]; nm = rcrec ? rcrec->len2 : 0; - action1[i] = (nm == 0) ? DISCARD: (nm >= mlim && !need_min) ? INVESTIGATE: KEEP; + action1[i] = (nm == 0) ? DISCARD: nm >= mlim1 ? INVESTIGATE: KEEP; } - if ((mlim = (long)xdl_bogosqrt((uint64_t)xdf2->nrec)) > XDL_MAX_EQLIMIT) - mlim = XDL_MAX_EQLIMIT; + if (need_min) { + /* i.e. infinity */ + mlim2 = PTRDIFF_MAX; + } else { + mlim2 = xdl_bogosqrt((uint64_t)xdf2->nrec); + if (mlim2 > XDL_MAX_EQLIMIT) + mlim2 = XDL_MAX_EQLIMIT; + } for (i = xdf2->dstart; i <= xdf2->dend; i++) { size_t mph2 = xdf2->recs[i].minimal_perfect_hash; rcrec = cf->rcrecs[mph2]; nm = rcrec ? rcrec->len1 : 0; - action2[i] = (nm == 0) ? DISCARD: (nm >= mlim && !need_min) ? INVESTIGATE: KEEP; + action2[i] = (nm == 0) ? DISCARD: nm >= mlim2 ? INVESTIGATE: KEEP; } /* From c355f69cda6d2df4972131b77dc0702d82b29d8b Mon Sep 17 00:00:00 2001 From: Ezekiel Newren Date: Wed, 29 Apr 2026 22:08:14 +0000 Subject: [PATCH 5/6] xdiff/xdl_cleanup_records: make setting action easier to follow Rewrite nested ternaries with a clear if/else ladder for action1/action2 to improve readability while preserving behavior. Signed-off-by: Ezekiel Newren Signed-off-by: Junio C Hamano --- xdiff/xprepare.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c index 7141dbc058..ddd0577676 100644 --- a/xdiff/xprepare.c +++ b/xdiff/xprepare.c @@ -302,7 +302,12 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd size_t mph1 = xdf1->recs[i].minimal_perfect_hash; rcrec = cf->rcrecs[mph1]; nm = rcrec ? rcrec->len2 : 0; - action1[i] = (nm == 0) ? DISCARD: nm >= mlim1 ? INVESTIGATE: KEEP; + if (nm == 0) + action1[i] = DISCARD; + else if (nm < mlim1) + action1[i] = KEEP; + else /* nm >= mlim1 */ + action1[i] = INVESTIGATE; } if (need_min) { @@ -317,7 +322,12 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd size_t mph2 = xdf2->recs[i].minimal_perfect_hash; rcrec = cf->rcrecs[mph2]; nm = rcrec ? rcrec->len1 : 0; - action2[i] = (nm == 0) ? DISCARD: nm >= mlim2 ? INVESTIGATE: KEEP; + if (nm == 0) + action2[i] = DISCARD; + else if (nm < mlim2) + action2[i] = KEEP; + else /* nm >= mlim2 */ + action2[i] = INVESTIGATE; } /* From f87808b7014cf06db4a7e19b193cf9aa7e965ebc Mon Sep 17 00:00:00 2001 From: Ezekiel Newren Date: Wed, 29 Apr 2026 22:08:15 +0000 Subject: [PATCH 6/6] xdiff/xdl_cleanup_records: make execution of action easier to follow Helped-by: Phillip Wood Signed-off-by: Ezekiel Newren Signed-off-by: Junio C Hamano --- xdiff/xprepare.c | 40 ++++++++++++++++++++++++++++++---------- 1 file changed, 30 insertions(+), 10 deletions(-) diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c index ddd0577676..beef711067 100644 --- a/xdiff/xprepare.c +++ b/xdiff/xprepare.c @@ -336,24 +336,44 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd */ xdf1->nreff = 0; for (i = xdf1->dstart; i <= xdf1->dend; i++) { - if (action1[i] == KEEP || - (action1[i] == INVESTIGATE && !xdl_clean_mmatch(action1, i, xdf1->dstart, xdf1->dend))) { + uint8_t action = action1[i]; + + if (action == INVESTIGATE) { + if (!xdl_clean_mmatch(action1, i, xdf1->dstart, xdf1->dend)) + action = KEEP; + else + action = DISCARD; + } + + if (action == KEEP) { xdf1->reference_index[xdf1->nreff++] = i; - /* changed[i] remains false, i.e. keep */ - } else + /* changed[i] remains false */ + } else if (action == DISCARD) { xdf1->changed[i] = true; - /* i.e. discard */ + } else { + BUG("Illegal state for action"); + } } xdf2->nreff = 0; for (i = xdf2->dstart; i <= xdf2->dend; i++) { - if (action2[i] == KEEP || - (action2[i] == INVESTIGATE && !xdl_clean_mmatch(action2, i, xdf2->dstart, xdf2->dend))) { + uint8_t action = action2[i]; + + if (action == INVESTIGATE) { + if (!xdl_clean_mmatch(action2, i, xdf2->dstart, xdf2->dend)) + action = KEEP; + else + action = DISCARD; + } + + if (action == KEEP) { xdf2->reference_index[xdf2->nreff++] = i; - /* changed[i] remains false, i.e. keep */ - } else + /* changed[i] remains false */ + } else if (action == DISCARD) { xdf2->changed[i] = true; - /* i.e. discard */ + } else { + BUG("Illegal state for action"); + } } cleanup: