[libc++] Fixes std::to_chars for bases != 10.

While working on D70631, Microsoft's unit tests discovered an issue.
Our `std::to_chars` implementation for bases != 10 uses the range
`[first,last)` as temporary buffer. This violates the contract for
to_chars:
[charconv.to.chars]/1 http://eel.is/c++draft/charconv#to.chars-1
`to_chars_result to_chars(char* first, char* last, see below value, int base = 10);`
"If the member ec of the return value is such that the value is equal to
the value of a value-initialized errc, the conversion was successful and
the member ptr is the one-past-the-end pointer of the characters
written."

Our implementation modifies the range `[member ptr, last)`, which causes
Microsoft's test to fail. Their test verifies the buffer
`[member ptr, last)` is unchanged. (The test is only done when the
conversion is successful.)

While looking at the code I noticed the performance for bases != 10 also
is suboptimal. This is tracked in D97705.

This patch fixes the issue and adds a benchmark. This benchmark will be
used as baseline for D97705.

Reviewed By: #libc, Quuxplusone, zoecarver

Differential Revision: https://reviews.llvm.org/D100722
This commit is contained in:
Mark de Wever 2021-02-27 16:52:39 +01:00
parent ba631240ae
commit 9393060f90
3 changed files with 105 additions and 19 deletions

View File

@ -0,0 +1,58 @@
//===----------------------------------------------------------------------===//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include <array>
#include <charconv>
#include <random>
#include "benchmark/benchmark.h"
#include "test_macros.h"
static const std::array<unsigned, 1000> input = [] {
std::mt19937 generator;
std::uniform_int_distribution<unsigned> distribution(0, std::numeric_limits<unsigned>::max());
std::array<unsigned, 1000> result;
std::generate_n(result.begin(), result.size(), [&] { return distribution(generator); });
return result;
}();
static void BM_to_chars_good(benchmark::State& state) {
char buffer[128];
int base = state.range(0);
while (state.KeepRunningBatch(input.size()))
for (auto value : input)
benchmark::DoNotOptimize(std::to_chars(buffer, &buffer[128], value, base));
}
BENCHMARK(BM_to_chars_good)->DenseRange(2, 36, 1);
static void BM_to_chars_bad(benchmark::State& state) {
char buffer[128];
int base = state.range(0);
struct sample {
unsigned size;
unsigned value;
};
std::array<sample, 1000> data;
// Assume the failure occurs, on average, halfway during the conversion.
std::transform(input.begin(), input.end(), data.begin(), [&](unsigned value) {
std::to_chars_result result = std::to_chars(buffer, &buffer[128], value, base);
return sample{unsigned((result.ptr - buffer) / 2), value};
});
while (state.KeepRunningBatch(data.size()))
for (auto element : data)
benchmark::DoNotOptimize(std::to_chars(buffer, &buffer[element.size], element.value, base));
}
BENCHMARK(BM_to_chars_bad)->DenseRange(2, 36, 1);
int main(int argc, char** argv) {
benchmark::Initialize(&argc, argv);
if (benchmark::ReportUnrecognizedArguments(argc, argv))
return 1;
benchmark::RunSpecifiedBenchmarks();
}

View File

@ -79,6 +79,7 @@ namespace std {
#include <__utility/to_underlying.h>
#include <cmath> // for log2f
#include <cstdint>
#include <cstdlib> // for _LIBCPP_UNREACHABLE
#include <cstring>
#include <limits>
#include <type_traits>
@ -400,33 +401,54 @@ __to_chars_integral(char* __first, char* __last, _Tp __value, int __base,
return __to_chars_integral(__first, __last, __x, __base, false_type());
}
template <typename _Tp>
_LIBCPP_AVAILABILITY_TO_CHARS _LIBCPP_INLINE_VISIBILITY int __to_chars_integral_width(_Tp __value, unsigned __base) {
_LIBCPP_ASSERT(__value >= 0, "The function requires a non-negative value.");
unsigned __base_2 = __base * __base;
unsigned __base_3 = __base_2 * __base;
unsigned __base_4 = __base_2 * __base_2;
int __r = 0;
while (true) {
if (__value < __base)
return __r + 1;
if (__value < __base_2)
return __r + 2;
if (__value < __base_3)
return __r + 3;
if (__value < __base_4)
return __r + 4;
__value /= __base_4;
__r += 4;
}
_LIBCPP_UNREACHABLE();
}
template <typename _Tp>
_LIBCPP_AVAILABILITY_TO_CHARS
inline _LIBCPP_INLINE_VISIBILITY to_chars_result
__to_chars_integral(char* __first, char* __last, _Tp __value, int __base,
false_type)
{
if (__base == 10)
return __to_chars_itoa(__first, __last, __value, false_type());
if (__base == 10)
return __to_chars_itoa(__first, __last, __value, false_type());
auto __p = __last;
while (__p != __first)
{
auto __c = __value % __base;
__value /= __base;
*--__p = "0123456789abcdefghijklmnopqrstuvwxyz"[__c];
if (__value == 0)
break;
}
ptrdiff_t __cap = __last - __first;
int __n = __to_chars_integral_width(__value, __base);
if (__n > __cap)
return {__last, errc::value_too_large};
auto __len = __last - __p;
if (__value != 0 || !__len)
return {__last, errc::value_too_large};
else
{
_VSTD::memmove(__first, __p, __len);
return {__first + __len, {}};
}
__last = __first + __n;
char* __p = __last;
do {
unsigned __c = __value % __base;
__value /= __base;
*--__p = "0123456789abcdefghijklmnopqrstuvwxyz"[__c];
} while (__value != 0);
return {__last, errc(0)};
}
template <typename _Tp, typename enable_if<is_integral<_Tp>::value, int>::type = 0>

View File

@ -12,6 +12,7 @@
#include <charconv>
#include <cassert>
#include <limits>
#include <numeric>
#include <string.h>
#include <stdlib.h>
@ -105,8 +106,13 @@ struct to_chars_test_base
using std::to_chars;
std::to_chars_result r;
// Poison the buffer for testing whether a successful std::to_chars
// doesn't modify data beyond r.ptr.
std::iota(buf, buf + sizeof(buf), 1);
r = to_chars(buf, buf + sizeof(buf), v, args...);
assert(r.ec == std::errc{});
for (size_t i = r.ptr - buf; i < sizeof(buf); ++i)
assert(buf[i] == static_cast<char>(i + 1));
*r.ptr = '\0';
auto a = fromchars(buf, r.ptr, args...);