Merge branch 'xdiff-new' into xdiff-upstream-v2.52.0

2025-12-18 12:01:36 +01:00 · 2025-11-17 16:26:15 -08:00
parent c4f8b15dd9 d741f0e230
commit d353c6f2c8
11 changed files with 409 additions and 401 deletions
--- a/src/xdiff/xdiff.h
+++ b/src/xdiff/xdiff.h
@@ -87,7 +87,7 @@ typedef struct s_xpparam {
 	size_t ignore_regex_nr;
 #endif

-	/* See Documentation/diff-options.txt. */
+	/* See Documentation/diff-options.adoc. */
 	char **anchors;
 	size_t anchors_nr;
 } xpparam_t;
--- a/src/xdiff/xdiffi.c
+++ b/src/xdiff/xdiffi.c
@@ -22,6 +22,11 @@

 #include "xinclude.h"

+static unsigned long get_hash(xdfile_t *xdf, long index)
+{
+	return xdf->recs[xdf->rindex[index]].ha;
+}
+
 #define XDL_MAX_COST_MIN 256
 #define XDL_HEUR_MIN_COST 256
 #define XDL_LINE_MAX (long)((1UL << (CHAR_BIT * sizeof(long) - 1)) - 1)
@@ -42,8 +47,8 @@ typedef struct s_xdpsplit {
 * using this algorithm, so a little bit of heuristic is needed to cut the
 * search and to return a suboptimal point.
 */
-static long xdl_split(unsigned long const *ha1, long off1, long lim1,
-		      unsigned long const *ha2, long off2, long lim2,
+static long xdl_split(xdfile_t *xdf1, long off1, long lim1,
+		      xdfile_t *xdf2, long off2, long lim2,
 		      long *kvdf, long *kvdb, int need_min, xdpsplit_t *spl,
 		      xdalgoenv_t *xenv) {
 	long dmin = off1 - lim2, dmax = lim1 - off2;
@@ -87,7 +92,7 @@ static long xdl_split(unsigned long const *ha1, long off1, long lim1,
 				i1 = kvdf[d + 1];
 			prev1 = i1;
 			i2 = i1 - d;
-			for (; i1 < lim1 && i2 < lim2 && ha1[i1] == ha2[i2]; i1++, i2++);
+			for (; i1 < lim1 && i2 < lim2 && get_hash(xdf1, i1) == get_hash(xdf2, i2); i1++, i2++);
 			if (i1 - prev1 > xenv->snake_cnt)
 				got_snake = 1;
 			kvdf[d] = i1;
@@ -124,7 +129,7 @@ static long xdl_split(unsigned long const *ha1, long off1, long lim1,
 				i1 = kvdb[d + 1] - 1;
 			prev1 = i1;
 			i2 = i1 - d;
-			for (; i1 > off1 && i2 > off2 && ha1[i1 - 1] == ha2[i2 - 1]; i1--, i2--);
+			for (; i1 > off1 && i2 > off2 && get_hash(xdf1, i1 - 1) == get_hash(xdf2, i2 - 1); i1--, i2--);
 			if (prev1 - i1 > xenv->snake_cnt)
 				got_snake = 1;
 			kvdb[d] = i1;
@@ -159,7 +164,7 @@ static long xdl_split(unsigned long const *ha1, long off1, long lim1,
 				if (v > XDL_K_HEUR * ec && v > best &&
 				    off1 + xenv->snake_cnt <= i1 && i1 < lim1 &&
 				    off2 + xenv->snake_cnt <= i2 && i2 < lim2) {
-					for (k = 1; ha1[i1 - k] == ha2[i2 - k]; k++)
+					for (k = 1; get_hash(xdf1, i1 - k) == get_hash(xdf2, i2 - k); k++)
 						if (k == xenv->snake_cnt) {
 							best = v;
 							spl->i1 = i1;
@@ -183,7 +188,7 @@ static long xdl_split(unsigned long const *ha1, long off1, long lim1,
 				if (v > XDL_K_HEUR * ec && v > best &&
 				    off1 < i1 && i1 <= lim1 - xenv->snake_cnt &&
 				    off2 < i2 && i2 <= lim2 - xenv->snake_cnt) {
-					for (k = 0; ha1[i1 + k] == ha2[i2 + k]; k++)
+					for (k = 0; get_hash(xdf1, i1 + k) == get_hash(xdf2, i2 + k); k++)
 						if (k == xenv->snake_cnt - 1) {
 							best = v;
 							spl->i1 = i1;
@@ -211,8 +216,10 @@ static long xdl_split(unsigned long const *ha1, long off1, long lim1,
 			for (d = fmax; d >= fmin; d -= 2) {
 				i1 = XDL_MIN(kvdf[d], lim1);
 				i2 = i1 - d;
-				if (lim2 < i2)
-					i1 = lim2 + d, i2 = lim2;
+				if (lim2 < i2) {
+					i1 = lim2 + d;
+					i2 = lim2;
+				}
 				if (fbest < i1 + i2) {
 					fbest = i1 + i2;
 					fbest1 = i1;
@@ -223,8 +230,10 @@ static long xdl_split(unsigned long const *ha1, long off1, long lim1,
 			for (d = bmax; d >= bmin; d -= 2) {
 				i1 = XDL_MAX(off1, kvdb[d]);
 				i2 = i1 - d;
-				if (i2 < off2)
-					i1 = off2 + d, i2 = off2;
+				if (i2 < off2) {
+					i1 = off2 + d;
+					i2 = off2;
+				}
 				if (i1 + i2 < bbest) {
 					bbest = i1 + i2;
 					bbest1 = i1;
@@ -253,33 +262,26 @@ static long xdl_split(unsigned long const *ha1, long off1, long lim1,
 * sub-boxes by calling the box splitting function. Note that the real job
 * (marking changed lines) is done in the two boundary reaching checks.
 */
-int xdl_recs_cmp(diffdata_t *dd1, long off1, long lim1,
-		 diffdata_t *dd2, long off2, long lim2,
+int xdl_recs_cmp(xdfile_t *xdf1, long off1, long lim1,
+		 xdfile_t *xdf2, long off2, long lim2,
 		 long *kvdf, long *kvdb, int need_min, xdalgoenv_t *xenv) {
-	unsigned long const *ha1 = dd1->ha, *ha2 = dd2->ha;

 	/*
 	 * Shrink the box by walking through each diagonal snake (SW and NE).
 	 */
-	for (; off1 < lim1 && off2 < lim2 && ha1[off1] == ha2[off2]; off1++, off2++);
-	for (; off1 < lim1 && off2 < lim2 && ha1[lim1 - 1] == ha2[lim2 - 1]; lim1--, lim2--);
+	for (; off1 < lim1 && off2 < lim2 && get_hash(xdf1, off1) == get_hash(xdf2, off2); off1++, off2++);
+	for (; off1 < lim1 && off2 < lim2 && get_hash(xdf1, lim1 - 1) == get_hash(xdf2, lim2 - 1); lim1--, lim2--);

 	/*
 	 * If one dimension is empty, then all records on the other one must
 	 * be obviously changed.
 	 */
 	if (off1 == lim1) {
-		char *rchg2 = dd2->rchg;
-		long *rindex2 = dd2->rindex;
-
 		for (; off2 < lim2; off2++)
-			rchg2[rindex2[off2]] = 1;
+			xdf2->changed[xdf2->rindex[off2]] = true;
 	} else if (off2 == lim2) {
-		char *rchg1 = dd1->rchg;
-		long *rindex1 = dd1->rindex;
-
 		for (; off1 < lim1; off1++)
-			rchg1[rindex1[off1]] = 1;
+			xdf1->changed[xdf1->rindex[off1]] = true;
 	} else {
 		xdpsplit_t spl;
 		spl.i1 = spl.i2 = 0;
@@ -287,7 +289,7 @@ int xdl_recs_cmp(diffdata_t *dd1, long off1, long lim1,
 		/*
 		 * Divide ...
 		 */
-		if (xdl_split(ha1, off1, lim1, ha2, off2, lim2, kvdf, kvdb,
+		if (xdl_split(xdf1, off1, lim1, xdf2, off2, lim2, kvdf, kvdb,
 			      need_min, &spl, xenv) < 0) {

 			return -1;
@@ -296,9 +298,9 @@ int xdl_recs_cmp(diffdata_t *dd1, long off1, long lim1,
 		/*
 		 * ... et Impera.
 		 */
-		if (xdl_recs_cmp(dd1, off1, spl.i1, dd2, off2, spl.i2,
+		if (xdl_recs_cmp(xdf1, off1, spl.i1, xdf2, off2, spl.i2,
 				 kvdf, kvdb, spl.min_lo, xenv) < 0 ||
-		    xdl_recs_cmp(dd1, spl.i1, lim1, dd2, spl.i2, lim2,
+		    xdl_recs_cmp(xdf1, spl.i1, lim1, xdf2, spl.i2, lim2,
 				 kvdf, kvdb, spl.min_hi, xenv) < 0) {

 			return -1;
@@ -314,7 +316,6 @@ int xdl_do_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
 	long ndiags;
 	long *kvd, *kvdf, *kvdb;
 	xdalgoenv_t xenv;
-	diffdata_t dd1, dd2;
 	int res;

 	if (xdl_prepare_env(mf1, mf2, xpp, xe) < 0)
@@ -353,16 +354,7 @@ int xdl_do_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
 	xenv.snake_cnt = XDL_SNAKE_CNT;
 	xenv.heur_min = XDL_HEUR_MIN_COST;

-	dd1.nrec = xe->xdf1.nreff;
-	dd1.ha = xe->xdf1.ha;
-	dd1.rchg = xe->xdf1.rchg;
-	dd1.rindex = xe->xdf1.rindex;
-	dd2.nrec = xe->xdf2.nreff;
-	dd2.ha = xe->xdf2.ha;
-	dd2.rchg = xe->xdf2.rchg;
-	dd2.rindex = xe->xdf2.rindex;
-
-	res = xdl_recs_cmp(&dd1, 0, dd1.nrec, &dd2, 0, dd2.nrec,
+	res = xdl_recs_cmp(&xe->xdf1, 0, xe->xdf1.nreff, &xe->xdf2, 0, xe->xdf2.nreff,
 			   kvdf, kvdb, (xpp->flags & XDF_NEED_MINIMAL) != 0,
 			   &xenv);
 	xdl_free(kvd);
@@ -497,13 +489,13 @@ static void measure_split(const xdfile_t *xdf, long split,
 		m->indent = -1;
 	} else {
 		m->end_of_file = 0;
-		m->indent = xget_indent(xdf->recs[split]);
+		m->indent = xget_indent(&xdf->recs[split]);
 	}

 	m->pre_blank = 0;
 	m->pre_indent = -1;
 	for (i = split - 1; i >= 0; i--) {
-		m->pre_indent = xget_indent(xdf->recs[i]);
+		m->pre_indent = xget_indent(&xdf->recs[i]);
 		if (m->pre_indent != -1)
 			break;
 		m->pre_blank += 1;
@@ -516,7 +508,7 @@ static void measure_split(const xdfile_t *xdf, long split,
 	m->post_blank = 0;
 	m->post_indent = -1;
 	for (i = split + 1; i < xdf->nrec; i++) {
-		m->post_indent = xget_indent(xdf->recs[i]);
+		m->post_indent = xget_indent(&xdf->recs[i]);
 		if (m->post_indent != -1)
 			break;
 		m->post_blank += 1;
@@ -716,7 +708,7 @@ struct xdlgroup {
 static void group_init(xdfile_t *xdf, struct xdlgroup *g)
 {
 	g->start = g->end = 0;
-	while (xdf->rchg[g->end])
+	while (xdf->changed[g->end])
 		g->end++;
 }

@@ -730,7 +722,7 @@ static inline int group_next(xdfile_t *xdf, struct xdlgroup *g)
 		return -1;

 	g->start = g->end + 1;
-	for (g->end = g->start; xdf->rchg[g->end]; g->end++)
+	for (g->end = g->start; xdf->changed[g->end]; g->end++)
 		;

 	return 0;
@@ -746,7 +738,7 @@ static inline int group_previous(xdfile_t *xdf, struct xdlgroup *g)
 		return -1;

 	g->end = g->start - 1;
-	for (g->start = g->end; xdf->rchg[g->start - 1]; g->start--)
+	for (g->start = g->end; xdf->changed[g->start - 1]; g->start--)
 		;

 	return 0;
@@ -760,11 +752,11 @@ static inline int group_previous(xdfile_t *xdf, struct xdlgroup *g)
 static int group_slide_down(xdfile_t *xdf, struct xdlgroup *g)
 {
 	if (g->end < xdf->nrec &&
-	    recs_match(xdf->recs[g->start], xdf->recs[g->end])) {
-		xdf->rchg[g->start++] = 0;
-		xdf->rchg[g->end++] = 1;
+	    recs_match(&xdf->recs[g->start], &xdf->recs[g->end])) {
+		xdf->changed[g->start++] = false;
+		xdf->changed[g->end++] = true;

-		while (xdf->rchg[g->end])
+		while (xdf->changed[g->end])
 			g->end++;

 		return 0;
@@ -781,11 +773,11 @@ static int group_slide_down(xdfile_t *xdf, struct xdlgroup *g)
 static int group_slide_up(xdfile_t *xdf, struct xdlgroup *g)
 {
 	if (g->start > 0 &&
-	    recs_match(xdf->recs[g->start - 1], xdf->recs[g->end - 1])) {
-		xdf->rchg[--g->start] = 1;
-		xdf->rchg[--g->end] = 0;
+	    recs_match(&xdf->recs[g->start - 1], &xdf->recs[g->end - 1])) {
+		xdf->changed[--g->start] = true;
+		xdf->changed[--g->end] = false;

-		while (xdf->rchg[g->start - 1])
+		while (xdf->changed[g->start - 1])
 			g->start--;

 		return 0;
@@ -794,7 +786,7 @@ static int group_slide_up(xdfile_t *xdf, struct xdlgroup *g)
 	}
 }

-static void xdl_bug(const char *msg)
+void xdl_bug(const char *msg)
 {
 	fprintf(stderr, "BUG: %s\n", msg);
 	exit(1);
@@ -946,16 +938,16 @@ int xdl_change_compact(xdfile_t *xdf, xdfile_t *xdfo, long flags) {

 int xdl_build_script(xdfenv_t *xe, xdchange_t **xscr) {
 	xdchange_t *cscr = NULL, *xch;
-	char *rchg1 = xe->xdf1.rchg, *rchg2 = xe->xdf2.rchg;
+	bool *changed1 = xe->xdf1.changed, *changed2 = xe->xdf2.changed;
 	long i1, i2, l1, l2;

 	/*
 	 * Trivial. Collects "groups" of changes and creates an edit script.
 	 */
 	for (i1 = xe->xdf1.nrec, i2 = xe->xdf2.nrec; i1 >= 0 || i2 >= 0; i1--, i2--)
-		if (rchg1[i1 - 1] || rchg2[i2 - 1]) {
-			for (l1 = i1; rchg1[i1 - 1]; i1--);
-			for (l2 = i2; rchg2[i2 - 1]; i2--);
+		if (changed1[i1 - 1] || changed2[i2 - 1]) {
+			for (l1 = i1; changed1[i1 - 1]; i1--);
+			for (l2 = i2; changed2[i2 - 1]; i2--);

 			if (!(xch = xdl_add_change(cscr, i1, i2, l1 - i1, l2 - i2))) {
 				xdl_free_script(cscr);
@@ -1002,16 +994,16 @@ static void xdl_mark_ignorable_lines(xdchange_t *xscr, xdfenv_t *xe, long flags)

 	for (xch = xscr; xch; xch = xch->next) {
 		int ignore = 1;
-		xrecord_t **rec;
+		xrecord_t *rec;
 		long i;

 		rec = &xe->xdf1.recs[xch->i1];
 		for (i = 0; i < xch->chg1 && ignore; i++)
-			ignore = xdl_blankline(rec[i]->ptr, rec[i]->size, flags);
+			ignore = xdl_blankline(rec[i].ptr, rec[i].size, flags);

 		rec = &xe->xdf2.recs[xch->i2];
 		for (i = 0; i < xch->chg2 && ignore; i++)
-			ignore = xdl_blankline(rec[i]->ptr, rec[i]->size, flags);
+			ignore = xdl_blankline(rec[i].ptr, rec[i].size, flags);

 		xch->ignore = ignore;
 	}
@@ -1020,7 +1012,7 @@ static void xdl_mark_ignorable_lines(xdchange_t *xscr, xdfenv_t *xe, long flags)
 #if 0 // unused by Vim
 static int record_matches_regex(xrecord_t *rec, xpparam_t const *xpp) {
 	regmatch_t regmatch;
-	int i;
+	size_t i;

 	for (i = 0; i < xpp->ignore_regex_nr; i++)
 		if (!regexec_buf(xpp->ignore_regex[i], rec->ptr, rec->size, 1,
@@ -1036,7 +1028,7 @@ static void xdl_mark_ignorable_regex(xdchange_t *xscr, const xdfenv_t *xe,
 	xdchange_t *xch;

 	for (xch = xscr; xch; xch = xch->next) {
-		xrecord_t **rec;
+		xrecord_t *rec;
 		int ignore = 1;
 		long i;

@@ -1048,11 +1040,11 @@ static void xdl_mark_ignorable_regex(xdchange_t *xscr, const xdfenv_t *xe,

 		rec = &xe->xdf1.recs[xch->i1];
 		for (i = 0; i < xch->chg1 && ignore; i++)
-			ignore = record_matches_regex(rec[i], xpp);
+			ignore = record_matches_regex(&rec[i], xpp);

 		rec = &xe->xdf2.recs[xch->i2];
 		for (i = 0; i < xch->chg2 && ignore; i++)
-			ignore = record_matches_regex(rec[i], xpp);
+			ignore = record_matches_regex(&rec[i], xpp);

 		xch->ignore = ignore;
 	}
--- a/src/xdiff/xdiffi.h
+++ b/src/xdiff/xdiffi.h
@@ -24,13 +24,6 @@
 #define XDIFFI_H


-typedef struct s_diffdata {
-	long nrec;
-	unsigned long const *ha;
-	long *rindex;
-	char *rchg;
-} diffdata_t;
-
 typedef struct s_xdalgoenv {
 	long mxcost;
 	long snake_cnt;
@@ -46,8 +39,8 @@ typedef struct s_xdchange {



-int xdl_recs_cmp(diffdata_t *dd1, long off1, long lim1,
-		 diffdata_t *dd2, long off2, long lim2,
+int xdl_recs_cmp(xdfile_t *xdf1, long off1, long lim1,
+		 xdfile_t *xdf2, long off2, long lim2,
 		 long *kvdf, long *kvdb, int need_min, xdalgoenv_t *xenv);
 int xdl_do_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
 		xdfenv_t *xe);
--- a/src/xdiff/xemit.c
+++ b/src/xdiff/xemit.c
@@ -22,27 +22,26 @@

 #include "xinclude.h"

-static long xdl_get_rec(xdfile_t *xdf, long ri, char const **rec) {

-	*rec = xdf->recs[ri]->ptr;
-
-	return xdf->recs[ri]->size;
-}
-
-
-static int xdl_emit_record(xdfile_t *xdf, long ri, char const *pre, xdemitcb_t *ecb) {
-	long size, psize = (long)strlen(pre);
-	char const *rec;
-
-	size = xdl_get_rec(xdf, ri, &rec);
-	if (xdl_emit_diffrec(rec, size, pre, psize, ecb) < 0) {
+static int xdl_emit_record(xdfile_t *xdf, long ri, char const *pre, xdemitcb_t *ecb)
+{
+	xrecord_t *rec = &xdf->recs[ri];

+	if (xdl_emit_diffrec(rec->ptr, rec->size, pre, (long)strlen(pre), ecb) < 0)
 		return -1;
-	}

 	return 0;
 }

+#define signed_add_overflows(a, b) \
+    ((b) > LONG_MAX - (a))
+
+static long saturating_add(long a, long b)
+{
+	return signed_add_overflows(a, b) ? LONG_MAX : a + b;
+}
+
+#undef signed_add_overflows

 /*
 * Starting at the passed change atom, find the latest change atom to be included
@@ -52,9 +51,11 @@ static int xdl_emit_record(xdfile_t *xdf, long ri, char const *pre, xdemitcb_t *
 xdchange_t *xdl_get_hunk(xdchange_t **xscr, xdemitconf_t const *xecfg)
 {
 	xdchange_t *xch, *xchp, *lxch;
-	long max_common = 2 * xecfg->ctxlen + xecfg->interhunkctxlen;
+	long max_common = saturating_add(saturating_add(xecfg->ctxlen,
+							xecfg->ctxlen),
+					 xecfg->interhunkctxlen);
 	long max_ignorable = xecfg->ctxlen;
-	unsigned long ignored = 0; /* number of ignored blank lines */
+	long ignored = 0; /* number of ignored blank lines */

 	/* remove ignorable changes that are too far before other changes */
 	for (xchp = *xscr; xchp && xchp->ignore; xchp = xchp->next) {
@@ -81,7 +82,7 @@ xdchange_t *xdl_get_hunk(xdchange_t **xscr, xdemitconf_t const *xecfg)
 		} else if (distance < max_ignorable && xch->ignore) {
 			ignored += xch->chg2;
 		} else if (lxch != xchp &&
-			   xch->i1 + (long)ignored - (lxch->i1 + lxch->chg1) > max_common) {
+			   xch->i1 + ignored - (lxch->i1 + lxch->chg1) > max_common) {
 			break;
 		} else if (!xch->ignore) {
 			lxch = xch;
@@ -117,11 +118,11 @@ static long def_ff(const char *rec, long len, char *buf, long sz)
 static long match_func_rec(xdfile_t *xdf, xdemitconf_t const *xecfg, long ri,
 			   char *buf, long sz)
 {
-	const char *rec;
-	long len = xdl_get_rec(xdf, ri, &rec);
+	xrecord_t *rec = &xdf->recs[ri];
+
 	if (!xecfg->find_func)
-		return def_ff(rec, len, buf, sz);
-	return xecfg->find_func(rec, len, buf, sz, xecfg->find_func_priv);
+		return def_ff(rec->ptr, rec->size, buf, sz);
+	return xecfg->find_func(rec->ptr, rec->size, buf, sz, xecfg->find_func_priv);
 }
 #endif

@@ -163,14 +164,12 @@ static long get_func_line(xdfenv_t *xe, xdemitconf_t const *xecfg,
 #if 0
 static int is_empty_rec(xdfile_t *xdf, long ri)
 {
-	const char *rec;
-	long len = xdl_get_rec(xdf, ri, &rec);
+	xrecord_t *rec = &xdf->recs[ri];
+	long i = 0;

-	while (len > 0 && XDL_ISSPACE(*rec)) {
-		rec++;
-		len--;
-	}
-	return !len;
+	for (; i < rec->size && XDL_ISSPACE(rec->ptr[i]); i++);
+
+	return i == rec->size;
 }
 #endif

--- a/src/xdiff/xhistogram.c
+++ b/src/xdiff/xhistogram.c
@@ -43,8 +43,8 @@

 #include "xinclude.h"

-#define MAX_PTR	INT_MAX
-#define MAX_CNT	INT_MAX
+#define MAX_PTR	UINT_MAX
+#define MAX_CNT	UINT_MAX

 #define LINE_END(n) (line##n + count##n - 1)
 #define LINE_END_PTR(n) (*line##n + *count##n - 1)
@@ -86,7 +86,7 @@ struct region {
 	((LINE_MAP(index, ptr))->cnt)

 #define REC(env, s, l) \
-	(env->xdf##s.recs[l - 1])
+	(&env->xdf##s.recs[l - 1])

 static int cmp_recs(xrecord_t *r1, xrecord_t *r2)
 {
@@ -102,11 +102,11 @@ static int cmp_recs(xrecord_t *r1, xrecord_t *r2)

 static int scanA(struct histindex *index, int line1, int count1)
 {
-	int ptr, tbl_idx;
+	unsigned int ptr, tbl_idx;
 	unsigned int chain_len;
 	struct record **rec_chain, *rec;

-	for (ptr = LINE_END(1); line1 <= ptr; ptr--) {
+	for (ptr = LINE_END(1); (unsigned int)line1 <= ptr; ptr--) {
 		tbl_idx = TABLE_HASH(index, 1, ptr);
 		rec_chain = index->records + tbl_idx;
 		rec = *rec_chain;
@@ -181,14 +181,14 @@ static int try_lcs(struct histindex *index, struct region *lcs, int b_ptr,
 			be = bs;
 			rc = rec->cnt;

-			while (line1 < (int)as && line2 < (int)bs
+			while ((unsigned int)line1 < as && (unsigned int)line2 < bs
 				&& CMP(index, 1, as - 1, 2, bs - 1)) {
 				as--;
 				bs--;
 				if (1 < rc)
 					rc = XDL_MIN(rc, CNT(index, as));
 			}
-			while ((int)ae < LINE_END(1) && (int)be < LINE_END(2)
+			while (ae < (unsigned int)LINE_END(1) && be < (unsigned int)LINE_END(2)
 				&& CMP(index, 1, ae + 1, 2, be + 1)) {
 				ae++;
 				be++;
@@ -313,16 +313,16 @@ redo:
 	if (count1 <= 0 && count2 <= 0)
 		return 0;

-	if (LINE_END(1) >= MAX_PTR)
+	if ((unsigned int)LINE_END(1) >= MAX_PTR)
 		return -1;

 	if (!count1) {
 		while(count2--)
-			env->xdf2.rchg[line2++ - 1] = 1;
+			env->xdf2.changed[line2++ - 1] = true;
 		return 0;
 	} else if (!count2) {
 		while(count1--)
-			env->xdf1.rchg[line1++ - 1] = 1;
+			env->xdf1.changed[line1++ - 1] = true;
 		return 0;
 	}

@@ -335,9 +335,9 @@ redo:
 	else {
 		if (lcs.begin1 == 0 && lcs.begin2 == 0) {
 			while (count1--)
-				env->xdf1.rchg[line1++ - 1] = 1;
+				env->xdf1.changed[line1++ - 1] = true;
 			while (count2--)
-				env->xdf2.rchg[line2++ - 1] = 1;
+				env->xdf2.changed[line2++ - 1] = true;
 			result = 0;
 		} else {
 			result = histogram_diff(xpp, env,
--- a/src/xdiff/xinclude.h
+++ b/src/xdiff/xinclude.h
@@ -43,6 +43,9 @@
 # define inline __inline
 #endif

+// Vim replacement for BUG()
+void xdl_bug(const char *msg);
+
 #if !defined(XINCLUDE_H)
 #define XINCLUDE_H

--- a/src/xdiff/xpatience.c
+++ b/src/xdiff/xpatience.c
@@ -19,6 +19,7 @@
 *  Davide Libenzi <davidel@xmailserver.org>
 *
 */
+
 #include "xinclude.h"

 /*
@@ -63,7 +64,7 @@ struct hashmap {

 		/*
 		 * If 1, this entry can serve as an anchor. See
-		 * Documentation/diff-options.txt for more information.
+		 * Documentation/diff-options.adoc for more information.
 		 */
 		unsigned anchor : 1;
 	} *entries, *first, *last;
@@ -75,8 +76,8 @@ struct hashmap {

 static int is_anchor(xpparam_t const *xpp, const char *line)
 {
-	int i;
-	for (i = 0; i < (int)xpp->anchors_nr; i++) {
+	size_t i;
+	for (i = 0; i < xpp->anchors_nr; i++) {
 		if (!strncmp(line, xpp->anchors[i], strlen(xpp->anchors[i])))
 			return 1;
 	}
@@ -87,9 +88,9 @@ static int is_anchor(xpparam_t const *xpp, const char *line)
 static void insert_record(xpparam_t const *xpp, int line, struct hashmap *map,
 			  int pass)
 {
-	xrecord_t **records = pass == 1 ?
+	xrecord_t *records = pass == 1 ?
 		map->env->xdf1.recs : map->env->xdf2.recs;
-	xrecord_t *record = records[line - 1];
+	xrecord_t *record = &records[line - 1];
 	/*
 	 * After xdl_prepare_env() (or more precisely, due to
 	 * xdl_classify_record()), the "ha" member of the records (AKA lines)
@@ -120,7 +121,7 @@ static void insert_record(xpparam_t const *xpp, int line, struct hashmap *map,
 		return;
 	map->entries[index].line1 = line;
 	map->entries[index].hash = record->ha;
-	map->entries[index].anchor = is_anchor(xpp, map->env->xdf1.recs[line - 1]->ptr);
+	map->entries[index].anchor = is_anchor(xpp, map->env->xdf1.recs[line - 1].ptr);
 	if (!map->first)
 		map->first = map->entries + index;
 	if (map->last) {
@@ -245,8 +246,8 @@ static int find_longest_common_sequence(struct hashmap *map, struct entry **res)

 static int match(struct hashmap *map, int line1, int line2)
 {
-	xrecord_t *record1 = map->env->xdf1.recs[line1 - 1];
-	xrecord_t *record2 = map->env->xdf2.recs[line2 - 1];
+	xrecord_t *record1 = &map->env->xdf1.recs[line1 - 1];
+	xrecord_t *record2 = &map->env->xdf2.recs[line2 - 1];
 	return record1->ha == record2->ha;
 }

@@ -330,11 +331,11 @@ static int patience_diff(xpparam_t const *xpp, xdfenv_t *env,
 	/* trivial case: one side is empty */
 	if (!count1) {
 		while(count2--)
-			env->xdf2.rchg[line2++ - 1] = 1;
+			env->xdf2.changed[line2++ - 1] = true;
 		return 0;
 	} else if (!count2) {
 		while(count1--)
-			env->xdf1.rchg[line1++ - 1] = 1;
+			env->xdf1.changed[line1++ - 1] = true;
 		return 0;
 	}

@@ -346,9 +347,9 @@ static int patience_diff(xpparam_t const *xpp, xdfenv_t *env,
 	/* are there any matching lines at all? */
 	if (!map.has_matches) {
 		while(count1--)
-			env->xdf1.rchg[line1++ - 1] = 1;
+			env->xdf1.changed[line1++ - 1] = true;
 		while(count2--)
-			env->xdf2.rchg[line2++ - 1] = 1;
+			env->xdf2.changed[line2++ - 1] = true;
 		xdl_free(map.entries);
 		return 0;
 	}
--- a/src/xdiff/xprepare.c
+++ b/src/xdiff/xprepare.c
@@ -29,12 +29,13 @@
 #define XDL_GUESS_NLINES1 256
 #define XDL_GUESS_NLINES2 20

+#define DISCARD 0
+#define KEEP 1
+#define INVESTIGATE 2

 typedef struct s_xdlclass {
 	struct s_xdlclass *next;
-	unsigned long ha;
-	char const *line;
-	long size;
+	xrecord_t rec;
 	long idx;
 	long len1, len2;
 } xdlclass_t;
@@ -53,21 +54,6 @@ typedef struct s_xdlclassifier {



-static int xdl_init_classifier(xdlclassifier_t *cf, long size, long flags);
-static void xdl_free_classifier(xdlclassifier_t *cf);
-static int xdl_classify_record(unsigned int pass, xdlclassifier_t *cf, xrecord_t **rhash,
-			       unsigned int hbits, xrecord_t *rec);
-static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_t const *xpp,
-			   xdlclassifier_t *cf, xdfile_t *xdf);
-static void xdl_free_ctx(xdfile_t *xdf);
-static int xdl_clean_mmatch(char const *dis, long i, long s, long e);
-static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xdf2);
-static int xdl_trim_ends(xdfile_t *xdf1, xdfile_t *xdf2);
-static int xdl_optimize_ctxs(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xdf2);
-
-
-
-
 static int xdl_init_classifier(xdlclassifier_t *cf, long size, long flags) {
 	cf->flags = flags;

@@ -106,17 +92,14 @@ static void xdl_free_classifier(xdlclassifier_t *cf) {
 }


-static int xdl_classify_record(unsigned int pass, xdlclassifier_t *cf, xrecord_t **rhash,
-			       unsigned int hbits, xrecord_t *rec) {
+static int xdl_classify_record(unsigned int pass, xdlclassifier_t *cf, xrecord_t *rec) {
 	long hi;
-	char const *line;
 	xdlclass_t *rcrec;

-	line = rec->ptr;
 	hi = (long) XDL_HASHLONG(rec->ha, cf->hbits);
 	for (rcrec = cf->rchash[hi]; rcrec; rcrec = rcrec->next)
-		if (rcrec->ha == rec->ha &&
-				xdl_recmatch(rcrec->line, rcrec->size,
+		if (rcrec->rec.ha == rec->ha &&
+				xdl_recmatch(rcrec->rec.ptr, rcrec->rec.size,
 					rec->ptr, rec->size, cf->flags))
 			break;

@@ -129,9 +112,7 @@ static int xdl_classify_record(unsigned int pass, xdlclassifier_t *cf, xrecord_t
 		if (XDL_ALLOC_GROW(cf->rcrecs, cf->count, cf->alloc))
 				return -1;
 		cf->rcrecs[rcrec->idx] = rcrec;
-		rcrec->line = line;
-		rcrec->size = rec->size;
-		rcrec->ha = rec->ha;
+		rcrec->rec = *rec;
 		rcrec->len1 = rcrec->len2 = 0;
 		rcrec->next = cf->rchash[hi];
 		cf->rchash[hi] = rcrec;
@@ -141,107 +122,263 @@ static int xdl_classify_record(unsigned int pass, xdlclassifier_t *cf, xrecord_t

 	rec->ha = (unsigned long) rcrec->idx;

-	hi = (long) XDL_HASHLONG(rec->ha, hbits);
-	rec->next = rhash[hi];
-	rhash[hi] = rec;
-
 	return 0;
 }


+static void xdl_free_ctx(xdfile_t *xdf)
+{
+	xdl_free(xdf->rindex);
+	xdl_free(xdf->changed - 1);
+	xdl_free(xdf->recs);
+}
+
+
 static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_t const *xpp,
 			   xdlclassifier_t *cf, xdfile_t *xdf) {
-	unsigned int hbits;
-	long nrec, hsize, bsize;
+	long bsize;
 	unsigned long hav;
 	char const *blk, *cur, *top, *prev;
 	xrecord_t *crec;
-	xrecord_t **recs;
-	xrecord_t **rhash;
-	unsigned long *ha;
-	char *rchg;
-	long *rindex;

-	ha = NULL;
-	rindex = NULL;
-	rchg = NULL;
-	rhash = NULL;
-	recs = NULL;
+	xdf->rindex = NULL;
+	xdf->changed = NULL;
+	xdf->recs = NULL;

-	if (xdl_cha_init(&xdf->rcha, sizeof(xrecord_t), narec / 4 + 1) < 0)
-		goto abort;
-	if (!XDL_ALLOC_ARRAY(recs, narec))
+	if (!XDL_ALLOC_ARRAY(xdf->recs, narec))
 		goto abort;

-	hbits = xdl_hashbits((unsigned int) narec);
-	hsize = 1 << hbits;
-	if (!XDL_CALLOC_ARRAY(rhash, hsize))
-		goto abort;
-
-	nrec = 0;
+	xdf->nrec = 0;
 	if ((cur = blk = xdl_mmfile_first(mf, &bsize))) {
 		for (top = blk + bsize; cur < top; ) {
 			prev = cur;
 			hav = xdl_hash_record(&cur, top, xpp->flags);
-			if (XDL_ALLOC_GROW(recs, nrec + 1, narec))
-				goto abort;
-			if (!(crec = xdl_cha_alloc(&xdf->rcha)))
+			if (XDL_ALLOC_GROW(xdf->recs, xdf->nrec + 1, narec))
 				goto abort;
+			crec = &xdf->recs[xdf->nrec++];
 			crec->ptr = prev;
 			crec->size = (long) (cur - prev);
 			crec->ha = hav;
-			recs[nrec++] = crec;
-			if (xdl_classify_record(pass, cf, rhash, hbits, crec) < 0)
+			if (xdl_classify_record(pass, cf, crec) < 0)
 				goto abort;
 		}
 	}

-	if (!XDL_CALLOC_ARRAY(rchg, nrec + 2))
+	if (!XDL_CALLOC_ARRAY(xdf->changed, xdf->nrec + 2))
 		goto abort;

 	if ((XDF_DIFF_ALG(xpp->flags) != XDF_PATIENCE_DIFF) &&
 	    (XDF_DIFF_ALG(xpp->flags) != XDF_HISTOGRAM_DIFF)) {
-		if (!XDL_ALLOC_ARRAY(rindex, nrec + 1))
-			goto abort;
-		if (!XDL_ALLOC_ARRAY(ha, nrec + 1))
+		if (!XDL_ALLOC_ARRAY(xdf->rindex, xdf->nrec + 1))
 			goto abort;
 	}

-	xdf->nrec = nrec;
-	xdf->recs = recs;
-	xdf->hbits = hbits;
-	xdf->rhash = rhash;
-	xdf->rchg = rchg + 1;
-	xdf->rindex = rindex;
+	xdf->changed += 1;
 	xdf->nreff = 0;
-	xdf->ha = ha;
 	xdf->dstart = 0;
-	xdf->dend = nrec - 1;
+	xdf->dend = xdf->nrec - 1;

 	return 0;

 abort:
-	xdl_free(ha);
-	xdl_free(rindex);
-	xdl_free(rchg);
-	xdl_free(rhash);
-	xdl_free(recs);
-	xdl_cha_free(&xdf->rcha);
+	xdl_free_ctx(xdf);
 	return -1;
 }


-static void xdl_free_ctx(xdfile_t *xdf) {
+void xdl_free_env(xdfenv_t *xe) {

-	xdl_free(xdf->rhash);
-	xdl_free(xdf->rindex);
-	xdl_free(xdf->rchg - 1);
-	xdl_free(xdf->ha);
-	xdl_free(xdf->recs);
-	xdl_cha_free(&xdf->rcha);
+	xdl_free_ctx(&xe->xdf2);
+	xdl_free_ctx(&xe->xdf1);
 }


+static bool xdl_clean_mmatch(uint8_t const *action, long i, long s, long e) {
+	long r, rdis0, rpdis0, rdis1, rpdis1;
+
+	/*
+	 * Limits the window that is examined during the similar-lines
+	 * scan. The loops below stops when action[i - r] == KEEP
+	 * (line that has no match), but there are corner cases where
+	 * the loop proceed all the way to the extremities by causing
+	 * huge performance penalties in case of big files.
+	 */
+	if (i - s > XDL_SIMSCAN_WINDOW)
+		s = i - XDL_SIMSCAN_WINDOW;
+	if (e - i > XDL_SIMSCAN_WINDOW)
+		e = i + XDL_SIMSCAN_WINDOW;
+
+	/*
+	 * Scans the lines before 'i' to find a run of lines that either
+	 * have no match (action[j] == DISCARD) or have multiple matches
+	 * (action[j] == INVESTIGATE). Note that we always call this
+	 * function with action[i] == INVESTIGATE, so the current line
+	 * (i) is already a multimatch line.
+	 */
+	for (r = 1, rdis0 = 0, rpdis0 = 1; (i - r) >= s; r++) {
+		if (action[i - r] == DISCARD)
+			rdis0++;
+		else if (action[i - r] == INVESTIGATE)
+			rpdis0++;
+		else if (action[i - r] == KEEP)
+			break;
+		else
+			xdl_bug("Illegal value for action[i - r]");
+	}
+	/*
+	 * If the run before the line 'i' found only multimatch lines,
+	 * we return false and hence we don't make the current line (i)
+	 * discarded. We want to discard multimatch lines only when
+	 * they appear in the middle of runs with nomatch lines
+	 * (action[j] == DISCARD).
+	 */
+	if (rdis0 == 0)
+		return 0;
+	for (r = 1, rdis1 = 0, rpdis1 = 1; (i + r) <= e; r++) {
+		if (action[i + r] == DISCARD)
+			rdis1++;
+		else if (action[i + r] == INVESTIGATE)
+			rpdis1++;
+		else if (action[i + r] == KEEP)
+			break;
+		else
+			xdl_bug("Illegal value for action[i + r]");
+	}
+	/*
+	 * If the run after the line 'i' found only multimatch lines,
+	 * we return false and hence we don't make the current line (i)
+	 * discarded.
+	 */
+	if (rdis1 == 0)
+		return false;
+	rdis1 += rdis0;
+	rpdis1 += rpdis0;
+
+	return rpdis1 * XDL_KPDIS_RUN < (rpdis1 + rdis1);
+}
+
+
+/*
+ * Try to reduce the problem complexity, discard records that have no
+ * matches on the other file. Also, lines that have multiple matches
+ * might be potentially discarded if they appear in a run of discardable.
+ */
+static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xdf2) {
+	long i, nm, nreff, mlim;
+	xrecord_t *recs;
+	xdlclass_t *rcrec;
+	uint8_t *action1 = NULL, *action2 = NULL;
+	bool need_min = !!(cf->flags & XDF_NEED_MINIMAL);
+	int ret = 0;
+
+	/*
+	 * Create temporary arrays that will help us decide if
+	 * changed[i] should remain false, or become true.
+	 */
+	if (!XDL_CALLOC_ARRAY(action1, xdf1->nrec + 1)) {
+		ret = -1;
+		goto cleanup;
+	}
+	if (!XDL_CALLOC_ARRAY(action2, xdf2->nrec + 1)) {
+		ret = -1;
+		goto cleanup;
+	}
+
+	/*
+	 * Initialize temporary arrays with DISCARD, KEEP, or INVESTIGATE.
+	 */
+	if ((mlim = xdl_bogosqrt(xdf1->nrec)) > XDL_MAX_EQLIMIT)
+		mlim = XDL_MAX_EQLIMIT;
+	for (i = xdf1->dstart, recs = &xdf1->recs[xdf1->dstart]; i <= xdf1->dend; i++, recs++) {
+		rcrec = cf->rcrecs[recs->ha];
+		nm = rcrec ? rcrec->len2 : 0;
+		action1[i] = (nm == 0) ? DISCARD: (nm >= mlim && !need_min) ? INVESTIGATE: KEEP;
+	}
+
+	if ((mlim = xdl_bogosqrt(xdf2->nrec)) > XDL_MAX_EQLIMIT)
+		mlim = XDL_MAX_EQLIMIT;
+	for (i = xdf2->dstart, recs = &xdf2->recs[xdf2->dstart]; i <= xdf2->dend; i++, recs++) {
+		rcrec = cf->rcrecs[recs->ha];
+		nm = rcrec ? rcrec->len1 : 0;
+		action2[i] = (nm == 0) ? DISCARD: (nm >= mlim && !need_min) ? INVESTIGATE: KEEP;
+	}
+
+	/*
+	 * Use temporary arrays to decide if changed[i] should remain
+	 * false, or become true.
+	 */
+	for (nreff = 0, i = xdf1->dstart, recs = &xdf1->recs[xdf1->dstart];
+	     i <= xdf1->dend; i++, recs++) {
+		if (action1[i] == KEEP ||
+		    (action1[i] == INVESTIGATE && !xdl_clean_mmatch(action1, i, xdf1->dstart, xdf1->dend))) {
+			xdf1->rindex[nreff++] = i;
+			/* changed[i] remains false, i.e. keep */
+		} else
+			xdf1->changed[i] = true;
+			/* i.e. discard */
+	}
+	xdf1->nreff = nreff;
+
+	for (nreff = 0, i = xdf2->dstart, recs = &xdf2->recs[xdf2->dstart];
+	     i <= xdf2->dend; i++, recs++) {
+		if (action2[i] == KEEP ||
+		    (action2[i] == INVESTIGATE && !xdl_clean_mmatch(action2, i, xdf2->dstart, xdf2->dend))) {
+			xdf2->rindex[nreff++] = i;
+			/* changed[i] remains false, i.e. keep */
+		} else
+			xdf2->changed[i] = true;
+			/* i.e. discard */
+	}
+	xdf2->nreff = nreff;
+
+cleanup:
+	xdl_free(action1);
+	xdl_free(action2);
+
+	return ret;
+}
+
+
+/*
+ * Early trim initial and terminal matching records.
+ */
+static int xdl_trim_ends(xdfile_t *xdf1, xdfile_t *xdf2) {
+	long i, lim;
+	xrecord_t *recs1, *recs2;
+
+	recs1 = xdf1->recs;
+	recs2 = xdf2->recs;
+	for (i = 0, lim = XDL_MIN(xdf1->nrec, xdf2->nrec); i < lim;
+	     i++, recs1++, recs2++)
+		if (recs1->ha != recs2->ha)
+			break;
+
+	xdf1->dstart = xdf2->dstart = i;
+
+	recs1 = xdf1->recs + xdf1->nrec - 1;
+	recs2 = xdf2->recs + xdf2->nrec - 1;
+	for (lim -= i, i = 0; i < lim; i++, recs1--, recs2--)
+		if (recs1->ha != recs2->ha)
+			break;
+
+	xdf1->dend = xdf1->nrec - i - 1;
+	xdf2->dend = xdf2->nrec - i - 1;
+
+	return 0;
+}
+
+
+static int xdl_optimize_ctxs(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xdf2) {
+
+	if (xdl_trim_ends(xdf1, xdf2) < 0 ||
+	    xdl_cleanup_records(cf, xdf1, xdf2) < 0) {
+
+		return -1;
+	}
+
+	return 0;
+}
+
 int xdl_prepare_env(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
 		    xdfenv_t *xe) {
 	long enl1, enl2, sample;
@@ -291,171 +428,3 @@ int xdl_prepare_env(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,

 	return 0;
 }
-
-
-void xdl_free_env(xdfenv_t *xe) {
-
-	xdl_free_ctx(&xe->xdf2);
-	xdl_free_ctx(&xe->xdf1);
-}
-
-
-static int xdl_clean_mmatch(char const *dis, long i, long s, long e) {
-	long r, rdis0, rpdis0, rdis1, rpdis1;
-
-	/*
-	 * Limits the window the is examined during the similar-lines
-	 * scan. The loops below stops when dis[i - r] == 1 (line that
-	 * has no match), but there are corner cases where the loop
-	 * proceed all the way to the extremities by causing huge
-	 * performance penalties in case of big files.
-	 */
-	if (i - s > XDL_SIMSCAN_WINDOW)
-		s = i - XDL_SIMSCAN_WINDOW;
-	if (e - i > XDL_SIMSCAN_WINDOW)
-		e = i + XDL_SIMSCAN_WINDOW;
-
-	/*
-	 * Scans the lines before 'i' to find a run of lines that either
-	 * have no match (dis[j] == 0) or have multiple matches (dis[j] > 1).
-	 * Note that we always call this function with dis[i] > 1, so the
-	 * current line (i) is already a multimatch line.
-	 */
-	for (r = 1, rdis0 = 0, rpdis0 = 1; (i - r) >= s; r++) {
-		if (!dis[i - r])
-			rdis0++;
-		else if (dis[i - r] == 2)
-			rpdis0++;
-		else
-			break;
-	}
-	/*
-	 * If the run before the line 'i' found only multimatch lines, we
-	 * return 0 and hence we don't make the current line (i) discarded.
-	 * We want to discard multimatch lines only when they appear in the
-	 * middle of runs with nomatch lines (dis[j] == 0).
-	 */
-	if (rdis0 == 0)
-		return 0;
-	for (r = 1, rdis1 = 0, rpdis1 = 1; (i + r) <= e; r++) {
-		if (!dis[i + r])
-			rdis1++;
-		else if (dis[i + r] == 2)
-			rpdis1++;
-		else
-			break;
-	}
-	/*
-	 * If the run after the line 'i' found only multimatch lines, we
-	 * return 0 and hence we don't make the current line (i) discarded.
-	 */
-	if (rdis1 == 0)
-		return 0;
-	rdis1 += rdis0;
-	rpdis1 += rpdis0;
-
-	return rpdis1 * XDL_KPDIS_RUN < (rpdis1 + rdis1);
-}
-
-
-/*
- * Try to reduce the problem complexity, discard records that have no
- * matches on the other file. Also, lines that have multiple matches
- * might be potentially discarded if they happear in a run of discardable.
- */
-static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xdf2) {
-	long i, nm, nreff, mlim;
-	xrecord_t **recs;
-	xdlclass_t *rcrec;
-	char *dis, *dis1, *dis2;
-
-	if (!XDL_CALLOC_ARRAY(dis, xdf1->nrec + xdf2->nrec + 2))
-		return -1;
-	dis1 = dis;
-	dis2 = dis1 + xdf1->nrec + 1;
-
-	if ((mlim = xdl_bogosqrt(xdf1->nrec)) > XDL_MAX_EQLIMIT)
-		mlim = XDL_MAX_EQLIMIT;
-	for (i = xdf1->dstart, recs = &xdf1->recs[xdf1->dstart]; i <= xdf1->dend; i++, recs++) {
-		rcrec = cf->rcrecs[(*recs)->ha];
-		nm = rcrec ? rcrec->len2 : 0;
-		dis1[i] = (nm == 0) ? 0: (nm >= mlim) ? 2: 1;
-	}
-
-	if ((mlim = xdl_bogosqrt(xdf2->nrec)) > XDL_MAX_EQLIMIT)
-		mlim = XDL_MAX_EQLIMIT;
-	for (i = xdf2->dstart, recs = &xdf2->recs[xdf2->dstart]; i <= xdf2->dend; i++, recs++) {
-		rcrec = cf->rcrecs[(*recs)->ha];
-		nm = rcrec ? rcrec->len1 : 0;
-		dis2[i] = (nm == 0) ? 0: (nm >= mlim) ? 2: 1;
-	}
-
-	for (nreff = 0, i = xdf1->dstart, recs = &xdf1->recs[xdf1->dstart];
-	     i <= xdf1->dend; i++, recs++) {
-		if (dis1[i] == 1 ||
-		    (dis1[i] == 2 && !xdl_clean_mmatch(dis1, i, xdf1->dstart, xdf1->dend))) {
-			xdf1->rindex[nreff] = i;
-			xdf1->ha[nreff] = (*recs)->ha;
-			nreff++;
-		} else
-			xdf1->rchg[i] = 1;
-	}
-	xdf1->nreff = nreff;
-
-	for (nreff = 0, i = xdf2->dstart, recs = &xdf2->recs[xdf2->dstart];
-	     i <= xdf2->dend; i++, recs++) {
-		if (dis2[i] == 1 ||
-		    (dis2[i] == 2 && !xdl_clean_mmatch(dis2, i, xdf2->dstart, xdf2->dend))) {
-			xdf2->rindex[nreff] = i;
-			xdf2->ha[nreff] = (*recs)->ha;
-			nreff++;
-		} else
-			xdf2->rchg[i] = 1;
-	}
-	xdf2->nreff = nreff;
-
-	xdl_free(dis);
-
-	return 0;
-}
-
-
-/*
- * Early trim initial and terminal matching records.
- */
-static int xdl_trim_ends(xdfile_t *xdf1, xdfile_t *xdf2) {
-	long i, lim;
-	xrecord_t **recs1, **recs2;
-
-	recs1 = xdf1->recs;
-	recs2 = xdf2->recs;
-	for (i = 0, lim = XDL_MIN(xdf1->nrec, xdf2->nrec); i < lim;
-	     i++, recs1++, recs2++)
-		if ((*recs1)->ha != (*recs2)->ha)
-			break;
-
-	xdf1->dstart = xdf2->dstart = i;
-
-	recs1 = xdf1->recs + xdf1->nrec - 1;
-	recs2 = xdf2->recs + xdf2->nrec - 1;
-	for (lim -= i, i = 0; i < lim; i++, recs1--, recs2--)
-		if ((*recs1)->ha != (*recs2)->ha)
-			break;
-
-	xdf1->dend = xdf1->nrec - i - 1;
-	xdf2->dend = xdf2->nrec - i - 1;
-
-	return 0;
-}
-
-
-static int xdl_optimize_ctxs(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xdf2) {
-
-	if (xdl_trim_ends(xdf1, xdf2) < 0 ||
-	    xdl_cleanup_records(cf, xdf1, xdf2) < 0) {
-
-		return -1;
-	}
-
-	return 0;
-}
--- a/src/xdiff/xtypes.h
+++ b/src/xdiff/xtypes.h
@@ -39,23 +39,18 @@ typedef struct s_chastore {
 } chastore_t;

 typedef struct s_xrecord {
-	struct s_xrecord *next;
 	char const *ptr;
 	long size;
 	unsigned long ha;
 } xrecord_t;

 typedef struct s_xdfile {
-	chastore_t rcha;
+	xrecord_t *recs;
 	long nrec;
-	unsigned int hbits;
-	xrecord_t **rhash;
 	long dstart, dend;
-	xrecord_t **recs;
-	char *rchg;
+	bool *changed;
 	long *rindex;
 	long nreff;
-	unsigned long *ha;
 } xdfile_t;

 typedef struct s_xdfenv {
--- a/src/xdiff/xutils.c
+++ b/src/xdiff/xutils.c
@@ -249,7 +249,7 @@ int xdl_recmatch(const char *l1, long s1, const char *l2, long s2, long flags)
 	return 1;
 }

-static unsigned long xdl_hash_record_with_whitespace(char const **data,
+unsigned long xdl_hash_record_with_whitespace(char const **data,
 		char const *top, long flags) {
 	unsigned long ha = 5381;
 	char const *ptr = *data;
@@ -294,19 +294,67 @@ static unsigned long xdl_hash_record_with_whitespace(char const **data,
 	return ha;
 }

-unsigned long xdl_hash_record(char const **data, char const *top, long flags) {
-	unsigned long ha = 5381;
+/*
+ * Compiler reassociation barrier: pretend to modify X and Y to disallow
+ * changing evaluation order with respect to following uses of X and Y.
+ */
+#ifdef __GNUC__
+#define REASSOC_FENCE(x, y) __asm__("" : "+r"(x), "+r"(y))
+#else
+#define REASSOC_FENCE(x, y)
+#endif
+
+unsigned long xdl_hash_record_verbatim(char const **data, char const *top) {
+	unsigned long ha = 5381, c0, c1;
 	char const *ptr = *data;
-
-	if (flags & XDF_WHITESPACE_FLAGS)
-		return xdl_hash_record_with_whitespace(data, top, flags);
-
+#if 0
+	/*
+	 * The baseline form of the optimized loop below. This is the djb2
+	 * hash (the above function uses a variant with XOR instead of ADD).
+	 */
 	for (; ptr < top && *ptr != '\n'; ptr++) {
 		ha += (ha << 5);
-		ha ^= (unsigned long) *ptr;
+		ha += (unsigned long) *ptr;
 	}
 	*data = ptr < top ? ptr + 1: ptr;
+#else
+	/* Process two characters per iteration. */
+	if (top - ptr >= 2) do {
+		if ((c0 = ptr[0]) == '\n') {
+			*data = ptr + 1;
+			return ha;
+		}
+		if ((c1 = ptr[1]) == '\n') {
+			*data = ptr + 2;
+			c0 += ha;
+			REASSOC_FENCE(c0, ha);
+			ha = ha * 32 + c0;
+			return ha;
+		}
+		/*
+		 * Combine characters C0 and C1 into the hash HA. We have
+		 * HA = (HA * 33 + C0) * 33 + C1, and we want to ensure
+		 * that dependency chain over HA is just one multiplication
+		 * and one addition, i.e. we want to evaluate this as
+		 * HA = HA * 33 * 33 + (C0 * 33 + C1), and likewise prefer
+		 * (C0 * 32 + (C0 + C1)) for the expression in parenthesis.
+		 */
+		ha *= 33 * 33;
+		c1 += c0;
+		REASSOC_FENCE(c1, c0);
+		c1 += c0 * 32;
+		REASSOC_FENCE(c1, ha);
+		ha += c1;

+		ptr += 2;
+	} while (ptr < top - 1);
+	*data = top;
+	if (ptr < top && (c0 = ptr[0]) != '\n') {
+		c0 += ha;
+		REASSOC_FENCE(c0, ha);
+		ha = ha * 32 + c0;
+	}
+#endif
 	return ha;
 }

@@ -375,7 +423,7 @@ static int xdl_format_hunk_hdr(long s1, long c1, long s2, long c2,
 	nb += 3;
 	if (func && funclen) {
 		buf[nb++] = ' ';
-		if (funclen > (long)sizeof(buf) - nb - 1)
+		if ((size_t)funclen > sizeof(buf) - nb - 1)
 			funclen = sizeof(buf) - nb - 1;
 		memcpy(buf + nb, func, funclen);
 		nb += funclen;
@@ -416,17 +464,17 @@ int xdl_fall_back_diff(xdfenv_t *diff_env, xpparam_t const *xpp,
 	mmfile_t subfile1, subfile2;
 	xdfenv_t env;

-	subfile1.ptr = (char *)diff_env->xdf1.recs[line1 - 1]->ptr;
-	subfile1.size = diff_env->xdf1.recs[line1 + count1 - 2]->ptr +
-		diff_env->xdf1.recs[line1 + count1 - 2]->size - subfile1.ptr;
-	subfile2.ptr = (char *)diff_env->xdf2.recs[line2 - 1]->ptr;
-	subfile2.size = diff_env->xdf2.recs[line2 + count2 - 2]->ptr +
-		diff_env->xdf2.recs[line2 + count2 - 2]->size - subfile2.ptr;
+	subfile1.ptr = (char *)diff_env->xdf1.recs[line1 - 1].ptr;
+	subfile1.size = diff_env->xdf1.recs[line1 + count1 - 2].ptr +
+		diff_env->xdf1.recs[line1 + count1 - 2].size - subfile1.ptr;
+	subfile2.ptr = (char *)diff_env->xdf2.recs[line2 - 1].ptr;
+	subfile2.size = diff_env->xdf2.recs[line2 + count2 - 2].ptr +
+		diff_env->xdf2.recs[line2 + count2 - 2].size - subfile2.ptr;
 	if (xdl_do_diff(&subfile1, &subfile2, xpp, &env) < 0)
 		return -1;

-	memcpy(diff_env->xdf1.rchg + line1 - 1, env.xdf1.rchg, count1);
-	memcpy(diff_env->xdf2.rchg + line2 - 1, env.xdf2.rchg, count2);
+	memcpy(diff_env->xdf1.changed + line1 - 1, env.xdf1.changed, count1);
+	memcpy(diff_env->xdf2.changed + line2 - 1, env.xdf2.changed, count2);

 	xdl_free_env(&env);

@@ -437,7 +485,7 @@ void* xdl_alloc_grow_helper(void *p, long nr, long *alloc, size_t size)
 {
 	void *tmp = NULL;
 	size_t n = ((LONG_MAX - 16) / 2 >= *alloc) ? 2 * *alloc + 16 : LONG_MAX;
-	if (nr > (long)n)
+	if ((size_t)nr > n)
 		n = nr;
 	if (SIZE_MAX / size >= n)
 		tmp = xdl_realloc(p, n * size);
--- a/src/xdiff/xutils.h
+++ b/src/xdiff/xutils.h
@@ -34,7 +34,15 @@ void *xdl_cha_alloc(chastore_t *cha);
 long xdl_guess_lines(mmfile_t *mf, long sample);
 int xdl_blankline(const char *line, long size, long flags);
 int xdl_recmatch(const char *l1, long s1, const char *l2, long s2, long flags);
-unsigned long xdl_hash_record(char const **data, char const *top, long flags);
+unsigned long xdl_hash_record_verbatim(char const **data, char const *top);
+unsigned long xdl_hash_record_with_whitespace(char const **data, char const *top, long flags);
+static inline unsigned long xdl_hash_record(char const **data, char const *top, long flags)
+{
+	if (flags & XDF_WHITESPACE_FLAGS)
+		return xdl_hash_record_with_whitespace(data, top, flags);
+	else
+		return xdl_hash_record_verbatim(data, top);
+}
 unsigned int xdl_hashbits(unsigned int size);
 int xdl_num_out(char *out, long val);
 int xdl_emit_hunk_hdr(long s1, long c1, long s2, long c2,