diff --git a/xdiff/xdiffi.c b/xdiff/xdiffi.c index 5455b4690d..c5a892f91e 100644 --- a/xdiff/xdiffi.c +++ b/xdiff/xdiffi.c @@ -348,7 +348,7 @@ int xdl_do_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, kvdf += xe->xdf2.nreff + 1; kvdb += xe->xdf2.nreff + 1; - xenv.mxcost = xdl_bogosqrt(ndiags); + xenv.mxcost = (long)xdl_bogosqrt((uint64_t)ndiags); if (xenv.mxcost < XDL_MAX_COST_MIN) xenv.mxcost = XDL_MAX_COST_MIN; xenv.snake_cnt = XDL_SNAKE_CNT; diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c index cd4fc405eb..beef711067 100644 --- a/xdiff/xprepare.c +++ b/xdiff/xprepare.c @@ -197,8 +197,8 @@ void xdl_free_env(xdfenv_t *xe) { } -static bool xdl_clean_mmatch(uint8_t const *action, long i, long s, long e) { - long r, rdis0, rpdis0, rdis1, rpdis1; +static bool xdl_clean_mmatch(uint8_t const *action, ptrdiff_t i, ptrdiff_t s, ptrdiff_t e) { + ptrdiff_t r, rdis0, rpdis0, rdis1, rpdis1; /* * Limits the window that is examined during the similar-lines @@ -268,8 +268,7 @@ static bool xdl_clean_mmatch(uint8_t const *action, long i, long s, long e) { * might be potentially discarded if they appear in a run of discardable. */ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xdf2) { - long i, nm, mlim; - xrecord_t *recs; + ptrdiff_t i, nm, mlim1, mlim2; xdlclass_t *rcrec; uint8_t *action1 = NULL, *action2 = NULL; bool need_min = !!(cf->flags & XDF_NEED_MINIMAL); @@ -291,20 +290,44 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd /* * Initialize temporary arrays with DISCARD, KEEP, or INVESTIGATE. */ - if ((mlim = xdl_bogosqrt((long)xdf1->nrec)) > XDL_MAX_EQLIMIT) - mlim = XDL_MAX_EQLIMIT; - for (i = xdf1->dstart, recs = &xdf1->recs[xdf1->dstart]; i <= xdf1->dend; i++, recs++) { - rcrec = cf->rcrecs[recs->minimal_perfect_hash]; + if (need_min) { + /* i.e. infinity */ + mlim1 = PTRDIFF_MAX; + } else { + mlim1 = xdl_bogosqrt((uint64_t)xdf1->nrec); + if (mlim1 > XDL_MAX_EQLIMIT) + mlim1 = XDL_MAX_EQLIMIT; + } + for (i = xdf1->dstart; i <= xdf1->dend; i++) { + size_t mph1 = xdf1->recs[i].minimal_perfect_hash; + rcrec = cf->rcrecs[mph1]; nm = rcrec ? rcrec->len2 : 0; - action1[i] = (nm == 0) ? DISCARD: (nm >= mlim && !need_min) ? INVESTIGATE: KEEP; + if (nm == 0) + action1[i] = DISCARD; + else if (nm < mlim1) + action1[i] = KEEP; + else /* nm >= mlim1 */ + action1[i] = INVESTIGATE; } - if ((mlim = xdl_bogosqrt((long)xdf2->nrec)) > XDL_MAX_EQLIMIT) - mlim = XDL_MAX_EQLIMIT; - for (i = xdf2->dstart, recs = &xdf2->recs[xdf2->dstart]; i <= xdf2->dend; i++, recs++) { - rcrec = cf->rcrecs[recs->minimal_perfect_hash]; + if (need_min) { + /* i.e. infinity */ + mlim2 = PTRDIFF_MAX; + } else { + mlim2 = xdl_bogosqrt((uint64_t)xdf2->nrec); + if (mlim2 > XDL_MAX_EQLIMIT) + mlim2 = XDL_MAX_EQLIMIT; + } + for (i = xdf2->dstart; i <= xdf2->dend; i++) { + size_t mph2 = xdf2->recs[i].minimal_perfect_hash; + rcrec = cf->rcrecs[mph2]; nm = rcrec ? rcrec->len1 : 0; - action2[i] = (nm == 0) ? DISCARD: (nm >= mlim && !need_min) ? INVESTIGATE: KEEP; + if (nm == 0) + action2[i] = DISCARD; + else if (nm < mlim2) + action2[i] = KEEP; + else /* nm >= mlim2 */ + action2[i] = INVESTIGATE; } /* @@ -312,27 +335,45 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd * false, or become true. */ xdf1->nreff = 0; - for (i = xdf1->dstart, recs = &xdf1->recs[xdf1->dstart]; - i <= xdf1->dend; i++, recs++) { - if (action1[i] == KEEP || - (action1[i] == INVESTIGATE && !xdl_clean_mmatch(action1, i, xdf1->dstart, xdf1->dend))) { + for (i = xdf1->dstart; i <= xdf1->dend; i++) { + uint8_t action = action1[i]; + + if (action == INVESTIGATE) { + if (!xdl_clean_mmatch(action1, i, xdf1->dstart, xdf1->dend)) + action = KEEP; + else + action = DISCARD; + } + + if (action == KEEP) { xdf1->reference_index[xdf1->nreff++] = i; - /* changed[i] remains false, i.e. keep */ - } else + /* changed[i] remains false */ + } else if (action == DISCARD) { xdf1->changed[i] = true; - /* i.e. discard */ + } else { + BUG("Illegal state for action"); + } } xdf2->nreff = 0; - for (i = xdf2->dstart, recs = &xdf2->recs[xdf2->dstart]; - i <= xdf2->dend; i++, recs++) { - if (action2[i] == KEEP || - (action2[i] == INVESTIGATE && !xdl_clean_mmatch(action2, i, xdf2->dstart, xdf2->dend))) { + for (i = xdf2->dstart; i <= xdf2->dend; i++) { + uint8_t action = action2[i]; + + if (action == INVESTIGATE) { + if (!xdl_clean_mmatch(action2, i, xdf2->dstart, xdf2->dend)) + action = KEEP; + else + action = DISCARD; + } + + if (action == KEEP) { xdf2->reference_index[xdf2->nreff++] = i; - /* changed[i] remains false, i.e. keep */ - } else + /* changed[i] remains false */ + } else if (action == DISCARD) { xdf2->changed[i] = true; - /* i.e. discard */ + } else { + BUG("Illegal state for action"); + } } cleanup: diff --git a/xdiff/xutils.c b/xdiff/xutils.c index 77ee1ad9c8..9a999acdc0 100644 --- a/xdiff/xutils.c +++ b/xdiff/xutils.c @@ -23,8 +23,8 @@ #include "xinclude.h" -long xdl_bogosqrt(long n) { - long i; +uint64_t xdl_bogosqrt(uint64_t n) { + uint64_t i; /* * Classical integer square root approximation using shifts. diff --git a/xdiff/xutils.h b/xdiff/xutils.h index 615b4a9d35..58f9d74cda 100644 --- a/xdiff/xutils.h +++ b/xdiff/xutils.h @@ -25,7 +25,7 @@ -long xdl_bogosqrt(long n); +uint64_t xdl_bogosqrt(uint64_t n); int xdl_emit_diffrec(char const *rec, long size, char const *pre, long psize, xdemitcb_t *ecb); int xdl_cha_init(chastore_t *cha, long isize, long icount);