Files
git-mirror/versioncmp.c
SZEDER Gábor 51acfa9db5 versioncmp: use earliest-longest contained suffix to determine sorting order
When comparing tagnames, it is possible that a tagname contains more
than one of the configured prerelease suffixes around the first
different character.  After fixing a bug in the previous commit such a
tagname is sorted according to the contained suffix which comes first
in the configuration.  This is, however, not quite the right thing to
do in the following corner cases:

  1.   $ git -c versionsort.suffix=-bar
             -c versionsort.suffix=-foo-baz
             -c versionsort.suffix=-foo-bar
             tag -l --sort=version:refname 'v1*'
       v1.0-foo-bar
       v1.0-foo-baz

     The suffix of the tagname 'v1.0-foo-bar' is clearly '-foo-bar',
     so it should be listed last.  However, as it also contains '-bar'
     around the first different character, it is listed first instead,
     because that '-bar' suffix comes first the configuration.

  2. One of the configured suffixes starts with the other:

       $ git -c versionsort.prereleasesuffix=-pre \
             -c versionsort.prereleasesuffix=-prerelease \
             tag -l --sort=version:refname 'v2*'
       v2.0-prerelease1
       v2.0-pre1
       v2.0-pre2

     Here the tagname 'v2.0-prerelease1' should be the last.  When
     comparing 'v2.0-pre1' and 'v2.0-prerelease1' the first different
     characters are '1' and 'r', respectively.  Since this first
     different character must be part of the configured suffix, the
     '-pre' suffix is not recognized in the first tagname.  OTOH, the
     '-prerelease' suffix is properly recognized in
     'v2.0-prerelease1', thus it is listed first.

Improve version sort in these corner cases, and

  - look for a configured prerelease suffix containing the first
    different character or ending right before it, so the '-pre'
    suffixes are recognized in case (2).  This also means that
    when comparing tagnames 'v2.0-pre1' and 'v2.0-pre2',
    swap_prereleases() would find the '-pre' suffix in both, but then
    it will return "undecided" and the caller will do the right thing
    by sorting based in '1' and '2'.

  - If the tagname contains more than one suffix, then give precedence
    to the contained suffix that starts at the earliest offset in the
    tagname to address (1).

  - If there are more than one suffixes starting at that earliest
    position, then give precedence to the longest of those suffixes,
    thus ensuring that in (2) the tagname 'v2.0-prerelease1' won't be
    sorted based on the '-pre' suffix.

Add tests for these corner cases and adjust the documentation
accordingly.

Signed-off-by: SZEDER Gábor <szeder.dev@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-12-08 11:11:57 -08:00

172 lines
4.4 KiB
C

#include "cache.h"
#include "string-list.h"
/*
* versioncmp(): copied from string/strverscmp.c in glibc commit
* ee9247c38a8def24a59eb5cfb7196a98bef8cfdc, reformatted to Git coding
* style. The implementation is under LGPL-2.1 and Git relicenses it
* to GPLv2.
*/
/*
* states: S_N: normal, S_I: comparing integral part, S_F: comparing
* fractionnal parts, S_Z: idem but with leading Zeroes only
*/
#define S_N 0x0
#define S_I 0x3
#define S_F 0x6
#define S_Z 0x9
/* result_type: CMP: return diff; LEN: compare using len_diff/diff */
#define CMP 2
#define LEN 3
static const struct string_list *prereleases;
static int initialized;
/*
* off is the offset of the first different character in the two strings
* s1 and s2. If either s1 or s2 contains a prerelease suffix containing
* that offset or a suffix ends right before that offset, then that
* string will be forced to be on top.
*
* If both s1 and s2 contain a (different) suffix around that position,
* their order is determined by the order of those two suffixes in the
* configuration.
* If any of the strings contains more than one different suffixes around
* that position, then that string is sorted according to the contained
* suffix which starts at the earliest offset in that string.
* If more than one different contained suffixes start at that earliest
* offset, then that string is sorted according to the longest of those
* suffixes.
*
* Return non-zero if *diff contains the return value for versioncmp()
*/
static int swap_prereleases(const char *s1,
const char *s2,
int off,
int *diff)
{
int i, i1 = -1, i2 = -1;
int start_at1 = off, start_at2 = off, match_len1 = -1, match_len2 = -1;
for (i = 0; i < prereleases->nr; i++) {
const char *suffix = prereleases->items[i].string;
int j, start, end, suffix_len = strlen(suffix);
if (suffix_len < off)
start = off - suffix_len;
else
start = 0;
end = match_len1 < suffix_len ? start_at1 : start_at1-1;
for (j = start; j <= end; j++)
if (starts_with(s1 + j, suffix)) {
i1 = i;
start_at1 = j;
match_len1 = suffix_len;
break;
}
end = match_len2 < suffix_len ? start_at2 : start_at2-1;
for (j = start; j <= end; j++)
if (starts_with(s2 + j, suffix)) {
i2 = i;
start_at2 = j;
match_len2 = suffix_len;
break;
}
}
if (i1 == -1 && i2 == -1)
return 0;
if (i1 == i2)
/* Found the same suffix in both, e.g. "-rc" in "v1.0-rcX"
* and "v1.0-rcY": the caller should decide based on "X"
* and "Y". */
return 0;
if (i1 >= 0 && i2 >= 0)
*diff = i1 - i2;
else if (i1 >= 0)
*diff = -1;
else /* if (i2 >= 0) */
*diff = 1;
return 1;
}
/*
* Compare S1 and S2 as strings holding indices/version numbers,
* returning less than, equal to or greater than zero if S1 is less
* than, equal to or greater than S2 (for more info, see the texinfo
* doc).
*/
int versioncmp(const char *s1, const char *s2)
{
const unsigned char *p1 = (const unsigned char *) s1;
const unsigned char *p2 = (const unsigned char *) s2;
unsigned char c1, c2;
int state, diff;
/*
* Symbol(s) 0 [1-9] others
* Transition (10) 0 (01) d (00) x
*/
static const uint8_t next_state[] = {
/* state x d 0 */
/* S_N */ S_N, S_I, S_Z,
/* S_I */ S_N, S_I, S_I,
/* S_F */ S_N, S_F, S_F,
/* S_Z */ S_N, S_F, S_Z
};
static const int8_t result_type[] = {
/* state x/x x/d x/0 d/x d/d d/0 0/x 0/d 0/0 */
/* S_N */ CMP, CMP, CMP, CMP, LEN, CMP, CMP, CMP, CMP,
/* S_I */ CMP, -1, -1, +1, LEN, LEN, +1, LEN, LEN,
/* S_F */ CMP, CMP, CMP, CMP, CMP, CMP, CMP, CMP, CMP,
/* S_Z */ CMP, +1, +1, -1, CMP, CMP, -1, CMP, CMP
};
if (p1 == p2)
return 0;
c1 = *p1++;
c2 = *p2++;
/* Hint: '0' is a digit too. */
state = S_N + ((c1 == '0') + (isdigit (c1) != 0));
while ((diff = c1 - c2) == 0) {
if (c1 == '\0')
return diff;
state = next_state[state];
c1 = *p1++;
c2 = *p2++;
state += (c1 == '0') + (isdigit (c1) != 0);
}
if (!initialized) {
initialized = 1;
prereleases = git_config_get_value_multi("versionsort.prereleasesuffix");
}
if (prereleases && swap_prereleases(s1, s2, (const char *) p1 - s1 - 1,
&diff))
return diff;
state = result_type[state * 3 + (((c2 == '0') + (isdigit (c2) != 0)))];
switch (state) {
case CMP:
return diff;
case LEN:
while (isdigit (*p1++))
if (!isdigit (*p2++))
return 1;
return isdigit (*p2) ? -1 : diff;
default:
return state;
}
}