Merge branch 'jk/parse-int' into jch

Introduce a more robust way to parse a decimal integer stored in a
piece of memory that is not necessarily terminated with NUL (which
Asan strict-string-check complains even when use of strtol() is
safe due to varified existence of whitespace after the digits).

* jk/parse-int:
  fsck: use parse_unsigned_from_buf() for parsing timestamp
  cache-tree: use parse_int_from_buf()
  parse: add functions for parsing from non-string buffers
  parse: prefer bool to int for boolean returns
This commit is contained in:
Junio C Hamano
2025-12-12 15:53:07 +09:00
8 changed files with 263 additions and 80 deletions

View File

@@ -1510,6 +1510,7 @@ CLAR_TEST_SUITES += u-mem-pool
CLAR_TEST_SUITES += u-oid-array
CLAR_TEST_SUITES += u-oidmap
CLAR_TEST_SUITES += u-oidtree
CLAR_TEST_SUITES += u-parse-int
CLAR_TEST_SUITES += u-prio-queue
CLAR_TEST_SUITES += u-reftable-basics
CLAR_TEST_SUITES += u-reftable-block

View File

@@ -16,6 +16,7 @@
#include "promisor-remote.h"
#include "trace.h"
#include "trace2.h"
#include "parse.h"
#ifndef DEBUG_CACHE_TREE
#define DEBUG_CACHE_TREE 0
@@ -550,32 +551,13 @@ void cache_tree_write(struct strbuf *sb, struct cache_tree *root)
static int parse_int(const char **ptr, unsigned long *len_p, int *out)
{
const char *s = *ptr;
unsigned long len = *len_p;
int ret = 0;
int sign = 1;
const char *ep;
while (len && *s == '-') {
sign *= -1;
s++;
len--;
}
while (len) {
if (!isdigit(*s))
break;
ret *= 10;
ret += *s - '0';
s++;
len--;
}
if (s == *ptr)
if (!parse_int_from_buf(*ptr, *len_p, &ep, out))
return -1;
*ptr = s;
*len_p = len;
*out = sign * ret;
*len_p -= ep - *ptr;
*ptr = ep;
return 0;
}

View File

@@ -253,6 +253,8 @@ char *gitdirname(char *);
typedef uintmax_t timestamp_t;
#define PRItime PRIuMAX
#define parse_timestamp strtoumax
#define parse_timestamp_from_buf(buf, len, ep, result) \
parse_unsigned_from_buf((buf), (len), (ep), (result), TIME_MAX)
#define TIME_MAX UINTMAX_MAX
#define TIME_MIN 0

20
fsck.c
View File

@@ -860,28 +860,13 @@ static int verify_headers(const void *data, unsigned long size,
FSCK_MSG_UNTERMINATED_HEADER, "unterminated header");
}
static timestamp_t parse_timestamp_from_buf(const char **start, const char *end)
{
const char *p = *start;
char buf[24]; /* big enough for 2^64 */
size_t i = 0;
while (p < end && isdigit(*p)) {
if (i >= ARRAY_SIZE(buf) - 1)
return TIME_MAX;
buf[i++] = *p++;
}
buf[i] = '\0';
*start = p;
return parse_timestamp(buf, NULL, 10);
}
static int fsck_ident(const char **ident, const char *ident_end,
const struct object_id *oid, enum object_type type,
struct fsck_options *options)
{
const char *p = *ident;
const char *nl;
timestamp_t timestamp;
nl = memchr(p, '\n', ident_end - p);
if (!nl)
@@ -933,7 +918,8 @@ static int fsck_ident(const char **ident, const char *ident_end,
"invalid author/committer line - bad date");
if (*p == '0' && p[1] != ' ')
return report(options, oid, type, FSCK_MSG_ZERO_PADDED_DATE, "invalid author/committer line - zero-padded date");
if (date_overflows(parse_timestamp_from_buf(&p, ident_end)))
if (!parse_timestamp_from_buf(p, ident_end - p, &p, &timestamp) ||
date_overflows(timestamp))
return report(options, oid, type, FSCK_MSG_BAD_DATE_OVERFLOW, "invalid author/committer line - date causes integer overflow");
if (*p != ' ')
return report(options, oid, type, FSCK_MSG_BAD_DATE, "invalid author/committer line - bad date");

162
parse.c
View File

@@ -15,7 +15,7 @@ static uintmax_t get_unit_factor(const char *end)
return 0;
}
int git_parse_signed(const char *value, intmax_t *ret, intmax_t max)
bool git_parse_signed(const char *value, intmax_t *ret, intmax_t max)
{
if (value && *value) {
char *end;
@@ -28,30 +28,30 @@ int git_parse_signed(const char *value, intmax_t *ret, intmax_t max)
errno = 0;
val = strtoimax(value, &end, 0);
if (errno == ERANGE)
return 0;
return false;
if (end == value) {
errno = EINVAL;
return 0;
return false;
}
factor = get_unit_factor(end);
if (!factor) {
errno = EINVAL;
return 0;
return false;
}
if ((val < 0 && (-max - 1) / factor > val) ||
(val > 0 && max / factor < val)) {
errno = ERANGE;
return 0;
return false;
}
val *= factor;
*ret = val;
return 1;
return true;
}
errno = EINVAL;
return 0;
return false;
}
int git_parse_unsigned(const char *value, uintmax_t *ret, uintmax_t max)
bool git_parse_unsigned(const char *value, uintmax_t *ret, uintmax_t max)
{
if (value && *value) {
char *end;
@@ -61,71 +61,71 @@ int git_parse_unsigned(const char *value, uintmax_t *ret, uintmax_t max)
/* negative values would be accepted by strtoumax */
if (strchr(value, '-')) {
errno = EINVAL;
return 0;
return false;
}
errno = 0;
val = strtoumax(value, &end, 0);
if (errno == ERANGE)
return 0;
return false;
if (end == value) {
errno = EINVAL;
return 0;
return false;
}
factor = get_unit_factor(end);
if (!factor) {
errno = EINVAL;
return 0;
return false;
}
if (unsigned_mult_overflows(factor, val) ||
factor * val > max) {
errno = ERANGE;
return 0;
return false;
}
val *= factor;
*ret = val;
return 1;
return true;
}
errno = EINVAL;
return 0;
return false;
}
int git_parse_int(const char *value, int *ret)
bool git_parse_int(const char *value, int *ret)
{
intmax_t tmp;
if (!git_parse_signed(value, &tmp, maximum_signed_value_of_type(int)))
return 0;
return false;
*ret = tmp;
return 1;
return true;
}
int git_parse_int64(const char *value, int64_t *ret)
bool git_parse_int64(const char *value, int64_t *ret)
{
intmax_t tmp;
if (!git_parse_signed(value, &tmp, maximum_signed_value_of_type(int64_t)))
return 0;
return false;
*ret = tmp;
return 1;
return true;
}
int git_parse_ulong(const char *value, unsigned long *ret)
bool git_parse_ulong(const char *value, unsigned long *ret)
{
uintmax_t tmp;
if (!git_parse_unsigned(value, &tmp, maximum_unsigned_value_of_type(long)))
return 0;
return false;
*ret = tmp;
return 1;
return true;
}
int git_parse_ssize_t(const char *value, ssize_t *ret)
bool git_parse_ssize_t(const char *value, ssize_t *ret)
{
intmax_t tmp;
if (!git_parse_signed(value, &tmp, maximum_signed_value_of_type(ssize_t)))
return 0;
return false;
*ret = tmp;
return 1;
return true;
}
int git_parse_double(const char *value, double *ret)
bool git_parse_double(const char *value, double *ret)
{
char *end;
double val;
@@ -133,25 +133,25 @@ int git_parse_double(const char *value, double *ret)
if (!value || !*value) {
errno = EINVAL;
return 0;
return false;
}
errno = 0;
val = strtod(value, &end);
if (errno == ERANGE)
return 0;
return false;
if (end == value) {
errno = EINVAL;
return 0;
return false;
}
factor = get_unit_factor(end);
if (!factor) {
errno = EINVAL;
return 0;
return false;
}
val *= factor;
*ret = val;
return 1;
return true;
}
int git_parse_maybe_bool_text(const char *value)
@@ -209,3 +209,99 @@ unsigned long git_env_ulong(const char *k, unsigned long val)
die(_("failed to parse %s"), k);
return val;
}
/*
* Helper that handles both signed/unsigned cases. If "negate" is NULL,
* negative values are disallowed. If not NULL and the input is negative,
* the value is range-checked but the caller is responsible for actually doing
* the negatiion. You probably don't want to use this! Use one of
* parse_signed_from_buf() or parse_unsigned_from_buf() below.
*/
static bool parse_from_buf_internal(const char *buf, size_t len,
const char **ep, bool *negate,
uintmax_t *ret, uintmax_t max)
{
const char *end = buf + len;
uintmax_t val = 0;
while (buf < end && isspace(*buf))
buf++;
if (negate)
*negate = false;
if (buf < end && *buf == '-') {
if (!negate) {
errno = EINVAL;
return false;
}
buf++;
*negate = true;
/* Assume negative range is always one larger than positive. */
max = max + 1;
} else if (buf < end && *buf == '+') {
buf++;
}
if (buf == end || !isdigit(*buf)) {
errno = EINVAL;
return false;
}
while (buf < end && isdigit(*buf)) {
int digit = *buf - '0';
if (val > max / 10) {
errno = ERANGE;
return false;
}
val *= 10;
if (val > max - digit) {
errno = ERANGE;
return false;
}
val += digit;
buf++;
}
*ep = buf;
*ret = val;
return true;
}
bool parse_unsigned_from_buf(const char *buf, size_t len, const char **ep,
uintmax_t *ret, uintmax_t max)
{
return parse_from_buf_internal(buf, len, ep, NULL, ret, max);
}
bool parse_signed_from_buf(const char *buf, size_t len, const char **ep,
intmax_t *ret, intmax_t max)
{
uintmax_t u_ret;
bool negate;
if (!parse_from_buf_internal(buf, len, ep, &negate, &u_ret, max))
return false;
/*
* Range already checked internally, but we must apply negation
* ourselves since only we have the signed integer type.
*/
if (negate) {
*ret = u_ret;
*ret = -*ret;
} else {
*ret = u_ret;
}
return true;
}
bool parse_int_from_buf(const char *buf, size_t len, const char **ep, int *ret)
{
intmax_t tmp;
if (!parse_signed_from_buf(buf, len, ep, &tmp,
maximum_signed_value_of_type(int)))
return false;
*ret = tmp;
return true;
}

31
parse.h
View File

@@ -1,13 +1,13 @@
#ifndef PARSE_H
#define PARSE_H
int git_parse_signed(const char *value, intmax_t *ret, intmax_t max);
int git_parse_unsigned(const char *value, uintmax_t *ret, uintmax_t max);
int git_parse_ssize_t(const char *, ssize_t *);
int git_parse_ulong(const char *, unsigned long *);
int git_parse_int(const char *value, int *ret);
int git_parse_int64(const char *value, int64_t *ret);
int git_parse_double(const char *value, double *ret);
bool git_parse_signed(const char *value, intmax_t *ret, intmax_t max);
bool git_parse_unsigned(const char *value, uintmax_t *ret, uintmax_t max);
bool git_parse_ssize_t(const char *, ssize_t *);
bool git_parse_ulong(const char *, unsigned long *);
bool git_parse_int(const char *value, int *ret);
bool git_parse_int64(const char *value, int64_t *ret);
bool git_parse_double(const char *value, double *ret);
/**
* Same as `git_config_bool`, except that it returns -1 on error rather
@@ -19,4 +19,21 @@ int git_parse_maybe_bool_text(const char *value);
int git_env_bool(const char *, int);
unsigned long git_env_ulong(const char *, unsigned long);
/*
* These functions parse an integer from a buffer that does not need to be
* NUL-terminated. They return true on success, or false if no integer is found
* (in which case errno is set to EINVAL) or if the integer is out of the
* allowable range (in which case errno is ERANGE).
*
* You must pass in a non-NULL value for "ep", which returns a pointer to the
* next character in the buf (similar to strtol(), etc).
*
* These functions always parse in base 10 (and do not allow input like "0xff"
* to switch to base 16). They do not allow unit suffixes like git_parse_int(),
* above.
*/
bool parse_unsigned_from_buf(const char *buf, size_t len, const char **ep, uintmax_t *ret, uintmax_t max);
bool parse_signed_from_buf(const char *buf, size_t len, const char **ep, intmax_t *ret, intmax_t max);
bool parse_int_from_buf(const char *buf, size_t len, const char **ep, int *ret);
#endif /* PARSE_H */

View File

@@ -8,6 +8,7 @@ clar_test_suites = [
'unit-tests/u-oid-array.c',
'unit-tests/u-oidmap.c',
'unit-tests/u-oidtree.c',
'unit-tests/u-parse-int.c',
'unit-tests/u-prio-queue.c',
'unit-tests/u-reftable-basics.c',
'unit-tests/u-reftable-block.c',

View File

@@ -0,0 +1,98 @@
#include "unit-test.h"
#include "parse.h"
static void check_int(const char *buf, size_t len,
size_t expect_ep_ofs, int expect_errno,
int expect_result)
{
const char *ep;
int result;
bool ok = parse_int_from_buf(buf, len, &ep, &result);
if (expect_errno) {
cl_assert(!ok);
cl_assert_equal_i(expect_errno, errno);
return;
}
cl_assert(ok);
cl_assert_equal_i(expect_result, result);
cl_assert_equal_i(expect_ep_ofs, ep - buf);
}
static void check_int_str(const char *buf, size_t ofs, int err, int res)
{
check_int(buf, strlen(buf), ofs, err, res);
}
static void check_int_full(const char *buf, int res)
{
check_int_str(buf, strlen(buf), 0, res);
}
static void check_int_err(const char *buf, int err)
{
check_int(buf, strlen(buf), 0, err, 0);
}
void test_parse_int__basic(void)
{
cl_invoke(check_int_full("0", 0));
cl_invoke(check_int_full("11", 11));
cl_invoke(check_int_full("-23", -23));
cl_invoke(check_int_full("+23", 23));
cl_invoke(check_int_str(" 31337 ", 7, 0, 31337));
cl_invoke(check_int_err(" garbage", EINVAL));
cl_invoke(check_int_err("", EINVAL));
cl_invoke(check_int_err("-", EINVAL));
cl_invoke(check_int("123", 2, 2, 0, 12));
}
void test_parse_int__range(void)
{
/*
* These assume a 32-bit int. We could avoid that with some
* conditionals, but it's probably better for the test to
* fail noisily and we can decide how to handle it then.
*/
cl_invoke(check_int_full("2147483647", 2147483647));
cl_invoke(check_int_err("2147483648", ERANGE));
cl_invoke(check_int_full("-2147483647", -2147483647));
cl_invoke(check_int_full("-2147483648", -2147483648));
cl_invoke(check_int_err("-2147483649", ERANGE));
}
static void check_unsigned(const char *buf, uintmax_t max,
int expect_errno, uintmax_t expect_result)
{
const char *ep;
uintmax_t result;
bool ok = parse_unsigned_from_buf(buf, strlen(buf), &ep, &result, max);
if (expect_errno) {
cl_assert(!ok);
cl_assert_equal_i(expect_errno, errno);
return;
}
cl_assert(ok);
cl_assert_equal_s(ep, "");
/*
* Do not use cl_assert_equal_i_fmt(..., PRIuMAX) here. The macro
* casts to int under the hood, corrupting the values.
*/
clar__assert_equal(CLAR_CURRENT_FILE, CLAR_CURRENT_FUNC,
CLAR_CURRENT_LINE,
"expect_result != result", 1,
"%"PRIuMAX, expect_result, result);
}
void test_parse_int__unsigned(void)
{
cl_invoke(check_unsigned("4294967295", UINT_MAX, 0, 4294967295U));
cl_invoke(check_unsigned("1053", 1000, ERANGE, 0));
cl_invoke(check_unsigned("-17", UINT_MAX, EINVAL, 0));
}