[libc++] Fixes std::to_chars for bases != 10.
While working on D70631, Microsoft's unit tests discovered an issue. Our `std::to_chars` implementation for bases != 10 uses the range `[first,last)` as temporary buffer. This violates the contract for to_chars: [charconv.to.chars]/1 http://eel.is/c++draft/charconv#to.chars-1 `to_chars_result to_chars(char* first, char* last, see below value, int base = 10);` "If the member ec of the return value is such that the value is equal to the value of a value-initialized errc, the conversion was successful and the member ptr is the one-past-the-end pointer of the characters written." Our implementation modifies the range `[member ptr, last)`, which causes Microsoft's test to fail. Their test verifies the buffer `[member ptr, last)` is unchanged. (The test is only done when the conversion is successful.) While looking at the code I noticed the performance for bases != 10 also is suboptimal. This is tracked in D97705. This patch fixes the issue and adds a benchmark. This benchmark will be used as baseline for D97705. Reviewed By: #libc, Quuxplusone, zoecarver Differential Revision: https://reviews.llvm.org/D100722
This commit is contained in:
parent
ba631240ae
commit
9393060f90
|
|
@ -0,0 +1,58 @@
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||||
|
// See https://llvm.org/LICENSE.txt for license information.
|
||||||
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#include <array>
|
||||||
|
#include <charconv>
|
||||||
|
#include <random>
|
||||||
|
|
||||||
|
#include "benchmark/benchmark.h"
|
||||||
|
#include "test_macros.h"
|
||||||
|
|
||||||
|
static const std::array<unsigned, 1000> input = [] {
|
||||||
|
std::mt19937 generator;
|
||||||
|
std::uniform_int_distribution<unsigned> distribution(0, std::numeric_limits<unsigned>::max());
|
||||||
|
std::array<unsigned, 1000> result;
|
||||||
|
std::generate_n(result.begin(), result.size(), [&] { return distribution(generator); });
|
||||||
|
return result;
|
||||||
|
}();
|
||||||
|
|
||||||
|
static void BM_to_chars_good(benchmark::State& state) {
|
||||||
|
char buffer[128];
|
||||||
|
int base = state.range(0);
|
||||||
|
while (state.KeepRunningBatch(input.size()))
|
||||||
|
for (auto value : input)
|
||||||
|
benchmark::DoNotOptimize(std::to_chars(buffer, &buffer[128], value, base));
|
||||||
|
}
|
||||||
|
BENCHMARK(BM_to_chars_good)->DenseRange(2, 36, 1);
|
||||||
|
|
||||||
|
static void BM_to_chars_bad(benchmark::State& state) {
|
||||||
|
char buffer[128];
|
||||||
|
int base = state.range(0);
|
||||||
|
struct sample {
|
||||||
|
unsigned size;
|
||||||
|
unsigned value;
|
||||||
|
};
|
||||||
|
std::array<sample, 1000> data;
|
||||||
|
// Assume the failure occurs, on average, halfway during the conversion.
|
||||||
|
std::transform(input.begin(), input.end(), data.begin(), [&](unsigned value) {
|
||||||
|
std::to_chars_result result = std::to_chars(buffer, &buffer[128], value, base);
|
||||||
|
return sample{unsigned((result.ptr - buffer) / 2), value};
|
||||||
|
});
|
||||||
|
|
||||||
|
while (state.KeepRunningBatch(data.size()))
|
||||||
|
for (auto element : data)
|
||||||
|
benchmark::DoNotOptimize(std::to_chars(buffer, &buffer[element.size], element.value, base));
|
||||||
|
}
|
||||||
|
BENCHMARK(BM_to_chars_bad)->DenseRange(2, 36, 1);
|
||||||
|
|
||||||
|
int main(int argc, char** argv) {
|
||||||
|
benchmark::Initialize(&argc, argv);
|
||||||
|
if (benchmark::ReportUnrecognizedArguments(argc, argv))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
benchmark::RunSpecifiedBenchmarks();
|
||||||
|
}
|
||||||
|
|
@ -79,6 +79,7 @@ namespace std {
|
||||||
#include <__utility/to_underlying.h>
|
#include <__utility/to_underlying.h>
|
||||||
#include <cmath> // for log2f
|
#include <cmath> // for log2f
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
|
#include <cstdlib> // for _LIBCPP_UNREACHABLE
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <limits>
|
#include <limits>
|
||||||
#include <type_traits>
|
#include <type_traits>
|
||||||
|
|
@ -400,33 +401,54 @@ __to_chars_integral(char* __first, char* __last, _Tp __value, int __base,
|
||||||
return __to_chars_integral(__first, __last, __x, __base, false_type());
|
return __to_chars_integral(__first, __last, __x, __base, false_type());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename _Tp>
|
||||||
|
_LIBCPP_AVAILABILITY_TO_CHARS _LIBCPP_INLINE_VISIBILITY int __to_chars_integral_width(_Tp __value, unsigned __base) {
|
||||||
|
_LIBCPP_ASSERT(__value >= 0, "The function requires a non-negative value.");
|
||||||
|
|
||||||
|
unsigned __base_2 = __base * __base;
|
||||||
|
unsigned __base_3 = __base_2 * __base;
|
||||||
|
unsigned __base_4 = __base_2 * __base_2;
|
||||||
|
|
||||||
|
int __r = 0;
|
||||||
|
while (true) {
|
||||||
|
if (__value < __base)
|
||||||
|
return __r + 1;
|
||||||
|
if (__value < __base_2)
|
||||||
|
return __r + 2;
|
||||||
|
if (__value < __base_3)
|
||||||
|
return __r + 3;
|
||||||
|
if (__value < __base_4)
|
||||||
|
return __r + 4;
|
||||||
|
|
||||||
|
__value /= __base_4;
|
||||||
|
__r += 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
_LIBCPP_UNREACHABLE();
|
||||||
|
}
|
||||||
|
|
||||||
template <typename _Tp>
|
template <typename _Tp>
|
||||||
_LIBCPP_AVAILABILITY_TO_CHARS
|
_LIBCPP_AVAILABILITY_TO_CHARS
|
||||||
inline _LIBCPP_INLINE_VISIBILITY to_chars_result
|
inline _LIBCPP_INLINE_VISIBILITY to_chars_result
|
||||||
__to_chars_integral(char* __first, char* __last, _Tp __value, int __base,
|
__to_chars_integral(char* __first, char* __last, _Tp __value, int __base,
|
||||||
false_type)
|
false_type)
|
||||||
{
|
{
|
||||||
if (__base == 10)
|
if (__base == 10)
|
||||||
return __to_chars_itoa(__first, __last, __value, false_type());
|
return __to_chars_itoa(__first, __last, __value, false_type());
|
||||||
|
|
||||||
auto __p = __last;
|
ptrdiff_t __cap = __last - __first;
|
||||||
while (__p != __first)
|
int __n = __to_chars_integral_width(__value, __base);
|
||||||
{
|
if (__n > __cap)
|
||||||
auto __c = __value % __base;
|
return {__last, errc::value_too_large};
|
||||||
__value /= __base;
|
|
||||||
*--__p = "0123456789abcdefghijklmnopqrstuvwxyz"[__c];
|
|
||||||
if (__value == 0)
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
auto __len = __last - __p;
|
__last = __first + __n;
|
||||||
if (__value != 0 || !__len)
|
char* __p = __last;
|
||||||
return {__last, errc::value_too_large};
|
do {
|
||||||
else
|
unsigned __c = __value % __base;
|
||||||
{
|
__value /= __base;
|
||||||
_VSTD::memmove(__first, __p, __len);
|
*--__p = "0123456789abcdefghijklmnopqrstuvwxyz"[__c];
|
||||||
return {__first + __len, {}};
|
} while (__value != 0);
|
||||||
}
|
return {__last, errc(0)};
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename _Tp, typename enable_if<is_integral<_Tp>::value, int>::type = 0>
|
template <typename _Tp, typename enable_if<is_integral<_Tp>::value, int>::type = 0>
|
||||||
|
|
|
||||||
|
|
@ -12,6 +12,7 @@
|
||||||
#include <charconv>
|
#include <charconv>
|
||||||
#include <cassert>
|
#include <cassert>
|
||||||
#include <limits>
|
#include <limits>
|
||||||
|
#include <numeric>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
|
@ -105,8 +106,13 @@ struct to_chars_test_base
|
||||||
using std::to_chars;
|
using std::to_chars;
|
||||||
std::to_chars_result r;
|
std::to_chars_result r;
|
||||||
|
|
||||||
|
// Poison the buffer for testing whether a successful std::to_chars
|
||||||
|
// doesn't modify data beyond r.ptr.
|
||||||
|
std::iota(buf, buf + sizeof(buf), 1);
|
||||||
r = to_chars(buf, buf + sizeof(buf), v, args...);
|
r = to_chars(buf, buf + sizeof(buf), v, args...);
|
||||||
assert(r.ec == std::errc{});
|
assert(r.ec == std::errc{});
|
||||||
|
for (size_t i = r.ptr - buf; i < sizeof(buf); ++i)
|
||||||
|
assert(buf[i] == static_cast<char>(i + 1));
|
||||||
*r.ptr = '\0';
|
*r.ptr = '\0';
|
||||||
|
|
||||||
auto a = fromchars(buf, r.ptr, args...);
|
auto a = fromchars(buf, r.ptr, args...);
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue