Merge branch 'jk/parse-int' into jch

Introduce a more robust way to parse a decimal integer stored in a piece of memory that is not necessarily terminated with NUL (which Asan strict-string-check complains even when use of strtol() is safe due to varified existence of whitespace after the digits). * jk/parse-int: fsck: use parse_unsigned_from_buf() for parsing timestamp cache-tree: use parse_int_from_buf() parse: add functions for parsing from non-string buffers parse: prefer bool to int for boolean returns
2025-12-12 20:36:24 +01:00 · 2025-12-12 15:53:07 +09:00
parent 265888763b 247c6598a8
commit d8593af887
8 changed files with 263 additions and 80 deletions
--- a/1
+++ b/1
@@ -1510,6 +1510,7 @@ CLAR_TEST_SUITES += u-mem-pool
 CLAR_TEST_SUITES += u-oid-array
 CLAR_TEST_SUITES += u-oidmap
 CLAR_TEST_SUITES += u-oidtree
+CLAR_TEST_SUITES += u-parse-int
 CLAR_TEST_SUITES += u-prio-queue
 CLAR_TEST_SUITES += u-reftable-basics
 CLAR_TEST_SUITES += u-reftable-block
--- a/cache-tree.c
+++ b/cache-tree.c
@@ -16,6 +16,7 @@
 #include "promisor-remote.h"
 #include "trace.h"
 #include "trace2.h"
+#include "parse.h"

 #ifndef DEBUG_CACHE_TREE
 #define DEBUG_CACHE_TREE 0
@@ -550,32 +551,13 @@ void cache_tree_write(struct strbuf *sb, struct cache_tree *root)

 static int parse_int(const char **ptr, unsigned long *len_p, int *out)
 {
-	const char *s = *ptr;
-	unsigned long len = *len_p;
-	int ret = 0;
-	int sign = 1;
+	const char *ep;

-	while (len && *s == '-') {
-		sign *= -1;
-		s++;
-		len--;
-	}
-
-	while (len) {
-		if (!isdigit(*s))
-			break;
-		ret *= 10;
-		ret += *s - '0';
-		s++;
-		len--;
-	}
-
-	if (s == *ptr)
+	if (!parse_int_from_buf(*ptr, *len_p, &ep, out))
 		return -1;

-	*ptr = s;
-	*len_p = len;
-	*out = sign * ret;
+	*len_p -= ep - *ptr;
+	*ptr = ep;
 	return 0;
 }

--- a/compat/posix.h
+++ b/compat/posix.h
@@ -253,6 +253,8 @@ char *gitdirname(char *);
 typedef uintmax_t timestamp_t;
 #define PRItime PRIuMAX
 #define parse_timestamp strtoumax
+#define parse_timestamp_from_buf(buf, len, ep, result) \
+	parse_unsigned_from_buf((buf), (len), (ep), (result), TIME_MAX)
 #define TIME_MAX UINTMAX_MAX
 #define TIME_MIN 0

--- a/fsck.c
+++ b/fsck.c
@@ -860,28 +860,13 @@ static int verify_headers(const void *data, unsigned long size,
 		FSCK_MSG_UNTERMINATED_HEADER, "unterminated header");
 }

-static timestamp_t parse_timestamp_from_buf(const char **start, const char *end)
-{
-	const char *p = *start;
-	char buf[24]; /* big enough for 2^64 */
-	size_t i = 0;
-
-	while (p < end && isdigit(*p)) {
-		if (i >= ARRAY_SIZE(buf) - 1)
-			return TIME_MAX;
-		buf[i++] = *p++;
-	}
-	buf[i] = '\0';
-	*start = p;
-	return parse_timestamp(buf, NULL, 10);
-}
-
 static int fsck_ident(const char **ident, const char *ident_end,
 		      const struct object_id *oid, enum object_type type,
 		      struct fsck_options *options)
 {
 	const char *p = *ident;
 	const char *nl;
+	timestamp_t timestamp;

 	nl = memchr(p, '\n', ident_end - p);
 	if (!nl)
@@ -933,7 +918,8 @@ static int fsck_ident(const char **ident, const char *ident_end,
 			      "invalid author/committer line - bad date");
 	if (*p == '0' && p[1] != ' ')
 		return report(options, oid, type, FSCK_MSG_ZERO_PADDED_DATE, "invalid author/committer line - zero-padded date");
-	if (date_overflows(parse_timestamp_from_buf(&p, ident_end)))
+	if (!parse_timestamp_from_buf(p, ident_end - p, &p, &timestamp) ||
+	    date_overflows(timestamp))
 		return report(options, oid, type, FSCK_MSG_BAD_DATE_OVERFLOW, "invalid author/committer line - date causes integer overflow");
 	if (*p != ' ')
 		return report(options, oid, type, FSCK_MSG_BAD_DATE, "invalid author/committer line - bad date");
--- a/parse.c
+++ b/parse.c
@@ -15,7 +15,7 @@ static uintmax_t get_unit_factor(const char *end)
 	return 0;
 }

-int git_parse_signed(const char *value, intmax_t *ret, intmax_t max)
+bool git_parse_signed(const char *value, intmax_t *ret, intmax_t max)
 {
 	if (value && *value) {
 		char *end;
@@ -28,30 +28,30 @@ int git_parse_signed(const char *value, intmax_t *ret, intmax_t max)
 		errno = 0;
 		val = strtoimax(value, &end, 0);
 		if (errno == ERANGE)
-			return 0;
+			return false;
 		if (end == value) {
 			errno = EINVAL;
-			return 0;
+			return false;
 		}
 		factor = get_unit_factor(end);
 		if (!factor) {
 			errno = EINVAL;
-			return 0;
+			return false;
 		}
 		if ((val < 0 && (-max - 1) / factor > val) ||
 		    (val > 0 && max / factor < val)) {
 			errno = ERANGE;
-			return 0;
+			return false;
 		}
 		val *= factor;
 		*ret = val;
-		return 1;
+		return true;
 	}
 	errno = EINVAL;
-	return 0;
+	return false;
 }

-int git_parse_unsigned(const char *value, uintmax_t *ret, uintmax_t max)
+bool git_parse_unsigned(const char *value, uintmax_t *ret, uintmax_t max)
 {
 	if (value && *value) {
 		char *end;
@@ -61,71 +61,71 @@ int git_parse_unsigned(const char *value, uintmax_t *ret, uintmax_t max)
 		/* negative values would be accepted by strtoumax */
 		if (strchr(value, '-')) {
 			errno = EINVAL;
-			return 0;
+			return false;
 		}
 		errno = 0;
 		val = strtoumax(value, &end, 0);
 		if (errno == ERANGE)
-			return 0;
+			return false;
 		if (end == value) {
 			errno = EINVAL;
-			return 0;
+			return false;
 		}
 		factor = get_unit_factor(end);
 		if (!factor) {
 			errno = EINVAL;
-			return 0;
+			return false;
 		}
 		if (unsigned_mult_overflows(factor, val) ||
 		    factor * val > max) {
 			errno = ERANGE;
-			return 0;
+			return false;
 		}
 		val *= factor;
 		*ret = val;
-		return 1;
+		return true;
 	}
 	errno = EINVAL;
-	return 0;
+	return false;
 }

-int git_parse_int(const char *value, int *ret)
+bool git_parse_int(const char *value, int *ret)
 {
 	intmax_t tmp;
 	if (!git_parse_signed(value, &tmp, maximum_signed_value_of_type(int)))
-		return 0;
+		return false;
 	*ret = tmp;
-	return 1;
+	return true;
 }

-int git_parse_int64(const char *value, int64_t *ret)
+bool git_parse_int64(const char *value, int64_t *ret)
 {
 	intmax_t tmp;
 	if (!git_parse_signed(value, &tmp, maximum_signed_value_of_type(int64_t)))
-		return 0;
+		return false;
 	*ret = tmp;
-	return 1;
+	return true;
 }

-int git_parse_ulong(const char *value, unsigned long *ret)
+bool git_parse_ulong(const char *value, unsigned long *ret)
 {
 	uintmax_t tmp;
 	if (!git_parse_unsigned(value, &tmp, maximum_unsigned_value_of_type(long)))
-		return 0;
+		return false;
 	*ret = tmp;
-	return 1;
+	return true;
 }

-int git_parse_ssize_t(const char *value, ssize_t *ret)
+bool git_parse_ssize_t(const char *value, ssize_t *ret)
 {
 	intmax_t tmp;
 	if (!git_parse_signed(value, &tmp, maximum_signed_value_of_type(ssize_t)))
-		return 0;
+		return false;
 	*ret = tmp;
-	return 1;
+	return true;
 }

-int git_parse_double(const char *value, double *ret)
+bool git_parse_double(const char *value, double *ret)
 {
 	char *end;
 	double val;
@@ -133,25 +133,25 @@ int git_parse_double(const char *value, double *ret)

 	if (!value || !*value) {
 		errno = EINVAL;
-		return 0;
+		return false;
 	}

 	errno = 0;
 	val = strtod(value, &end);
 	if (errno == ERANGE)
-		return 0;
+		return false;
 	if (end == value) {
 		errno = EINVAL;
-		return 0;
+		return false;
 	}
 	factor = get_unit_factor(end);
 	if (!factor) {
 		errno = EINVAL;
-		return 0;
+		return false;
 	}
 	val *= factor;
 	*ret = val;
-	return 1;
+	return true;
 }

 int git_parse_maybe_bool_text(const char *value)
@@ -209,3 +209,99 @@ unsigned long git_env_ulong(const char *k, unsigned long val)
 		die(_("failed to parse %s"), k);
 	return val;
 }
+
+/*
+ * Helper that handles both signed/unsigned cases. If "negate" is NULL,
+ * negative values are disallowed. If not NULL and the input is negative,
+ * the value is range-checked but the caller is responsible for actually doing
+ * the negatiion. You probably don't want to use this! Use one of
+ * parse_signed_from_buf() or parse_unsigned_from_buf() below.
+ */
+static bool parse_from_buf_internal(const char *buf, size_t len,
+				    const char **ep, bool *negate,
+				    uintmax_t *ret, uintmax_t max)
+{
+	const char *end = buf + len;
+	uintmax_t val = 0;
+
+	while (buf < end && isspace(*buf))
+		buf++;
+
+	if (negate)
+		*negate = false;
+	if (buf < end && *buf == '-') {
+		if (!negate) {
+			errno = EINVAL;
+			return false;
+		}
+		buf++;
+		*negate = true;
+		/* Assume negative range is always one larger than positive. */
+		max = max + 1;
+	} else if (buf < end && *buf == '+') {
+		buf++;
+	}
+
+	if (buf == end || !isdigit(*buf)) {
+		errno = EINVAL;
+		return false;
+	}
+
+	while (buf < end && isdigit(*buf)) {
+		int digit = *buf - '0';
+
+		if (val > max / 10) {
+			errno = ERANGE;
+			return false;
+		}
+		val *= 10;
+		if (val > max - digit) {
+			errno = ERANGE;
+			return false;
+		}
+		val += digit;
+
+		buf++;
+	}
+
+	*ep = buf;
+	*ret = val;
+	return true;
+}
+
+bool parse_unsigned_from_buf(const char *buf, size_t len, const char **ep,
+			     uintmax_t *ret, uintmax_t max)
+{
+	return parse_from_buf_internal(buf, len, ep, NULL, ret, max);
+}
+
+bool parse_signed_from_buf(const char *buf, size_t len, const char **ep,
+			   intmax_t *ret, intmax_t max)
+{
+	uintmax_t u_ret;
+	bool negate;
+
+	if (!parse_from_buf_internal(buf, len, ep, &negate, &u_ret, max))
+		return false;
+	/*
+	 * Range already checked internally, but we must apply negation
+	 * ourselves since only we have the signed integer type.
+	 */
+	if (negate) {
+		*ret = u_ret;
+		*ret = -*ret;
+	} else {
+		*ret = u_ret;
+	}
+	return true;
+}
+
+bool parse_int_from_buf(const char *buf, size_t len, const char **ep, int *ret)
+{
+	intmax_t tmp;
+	if (!parse_signed_from_buf(buf, len, ep, &tmp,
+				   maximum_signed_value_of_type(int)))
+		return false;
+	*ret = tmp;
+	return true;
+}
--- a/parse.h
+++ b/parse.h
@@ -1,13 +1,13 @@
 #ifndef PARSE_H
 #define PARSE_H

-int git_parse_signed(const char *value, intmax_t *ret, intmax_t max);
-int git_parse_unsigned(const char *value, uintmax_t *ret, uintmax_t max);
-int git_parse_ssize_t(const char *, ssize_t *);
-int git_parse_ulong(const char *, unsigned long *);
-int git_parse_int(const char *value, int *ret);
-int git_parse_int64(const char *value, int64_t *ret);
-int git_parse_double(const char *value, double *ret);
+bool git_parse_signed(const char *value, intmax_t *ret, intmax_t max);
+bool git_parse_unsigned(const char *value, uintmax_t *ret, uintmax_t max);
+bool git_parse_ssize_t(const char *, ssize_t *);
+bool git_parse_ulong(const char *, unsigned long *);
+bool git_parse_int(const char *value, int *ret);
+bool git_parse_int64(const char *value, int64_t *ret);
+bool git_parse_double(const char *value, double *ret);

 /**
 * Same as `git_config_bool`, except that it returns -1 on error rather
@@ -19,4 +19,21 @@ int git_parse_maybe_bool_text(const char *value);
 int git_env_bool(const char *, int);
 unsigned long git_env_ulong(const char *, unsigned long);

+/*
+ * These functions parse an integer from a buffer that does not need to be
+ * NUL-terminated. They return true on success, or false if no integer is found
+ * (in which case errno is set to EINVAL) or if the integer is out of the
+ * allowable range (in which case errno is ERANGE).
+ *
+ * You must pass in a non-NULL value for "ep", which returns a pointer to the
+ * next character in the buf (similar to strtol(), etc).
+ *
+ * These functions always parse in base 10 (and do not allow input like "0xff"
+ * to switch to base 16). They do not allow unit suffixes like git_parse_int(),
+ * above.
+ */
+bool parse_unsigned_from_buf(const char *buf, size_t len, const char **ep, uintmax_t *ret, uintmax_t max);
+bool parse_signed_from_buf(const char *buf, size_t len, const char **ep, intmax_t *ret, intmax_t max);
+bool parse_int_from_buf(const char *buf, size_t len, const char **ep, int *ret);
+
 #endif /* PARSE_H */
--- a/t/meson.build
+++ b/t/meson.build
@@ -8,6 +8,7 @@ clar_test_suites = [
  'unit-tests/u-oid-array.c',
  'unit-tests/u-oidmap.c',
  'unit-tests/u-oidtree.c',
+  'unit-tests/u-parse-int.c',
  'unit-tests/u-prio-queue.c',
  'unit-tests/u-reftable-basics.c',
  'unit-tests/u-reftable-block.c',
--- a/t/unit-tests/u-parse-int.c
+++ b/t/unit-tests/u-parse-int.c
@@ -0,0 +1,98 @@
+#include "unit-test.h"
+#include "parse.h"
+
+static void check_int(const char *buf, size_t len,
+		      size_t expect_ep_ofs, int expect_errno,
+		      int expect_result)
+{
+	const char *ep;
+	int result;
+	bool ok = parse_int_from_buf(buf, len, &ep, &result);
+
+	if (expect_errno) {
+		cl_assert(!ok);
+		cl_assert_equal_i(expect_errno, errno);
+		return;
+	}
+
+	cl_assert(ok);
+	cl_assert_equal_i(expect_result, result);
+	cl_assert_equal_i(expect_ep_ofs, ep - buf);
+}
+
+static void check_int_str(const char *buf, size_t ofs, int err, int res)
+{
+	check_int(buf, strlen(buf), ofs, err, res);
+}
+
+static void check_int_full(const char *buf, int res)
+{
+	check_int_str(buf, strlen(buf), 0, res);
+}
+
+static void check_int_err(const char *buf, int err)
+{
+	check_int(buf, strlen(buf), 0, err, 0);
+}
+
+void test_parse_int__basic(void)
+{
+	cl_invoke(check_int_full("0", 0));
+	cl_invoke(check_int_full("11", 11));
+	cl_invoke(check_int_full("-23", -23));
+	cl_invoke(check_int_full("+23", 23));
+
+	cl_invoke(check_int_str("  31337  ", 7, 0, 31337));
+
+	cl_invoke(check_int_err("  garbage", EINVAL));
+	cl_invoke(check_int_err("", EINVAL));
+	cl_invoke(check_int_err("-", EINVAL));
+
+	cl_invoke(check_int("123", 2, 2, 0, 12));
+}
+
+void test_parse_int__range(void)
+{
+	/*
+	 * These assume a 32-bit int. We could avoid that with some
+	 * conditionals, but it's probably better for the test to
+	 * fail noisily and we can decide how to handle it then.
+	 */
+	cl_invoke(check_int_full("2147483647", 2147483647));
+	cl_invoke(check_int_err("2147483648", ERANGE));
+	cl_invoke(check_int_full("-2147483647", -2147483647));
+	cl_invoke(check_int_full("-2147483648", -2147483648));
+	cl_invoke(check_int_err("-2147483649", ERANGE));
+}
+
+static void check_unsigned(const char *buf, uintmax_t max,
+			   int expect_errno, uintmax_t expect_result)
+{
+	const char *ep;
+	uintmax_t result;
+	bool ok = parse_unsigned_from_buf(buf, strlen(buf), &ep, &result, max);
+
+	if (expect_errno) {
+		cl_assert(!ok);
+		cl_assert_equal_i(expect_errno, errno);
+		return;
+	}
+
+	cl_assert(ok);
+	cl_assert_equal_s(ep, "");
+	/*
+	 * Do not use cl_assert_equal_i_fmt(..., PRIuMAX) here. The macro
+	 * casts to int under the hood, corrupting the values.
+	 */
+	clar__assert_equal(CLAR_CURRENT_FILE, CLAR_CURRENT_FUNC,
+			   CLAR_CURRENT_LINE,
+			   "expect_result != result", 1,
+			   "%"PRIuMAX, expect_result, result);
+}
+
+void test_parse_int__unsigned(void)
+{
+	cl_invoke(check_unsigned("4294967295", UINT_MAX, 0, 4294967295U));
+	cl_invoke(check_unsigned("1053", 1000, ERANGE, 0));
+	cl_invoke(check_unsigned("-17", UINT_MAX, EINVAL, 0));
+}