[clang-tidy] extend bugprone-signed-char-misuse check.
Summary: Cover a new use case when using a 'signed char' as an integer might lead to issue with non-ASCII characters. Comparing a 'signed char' with an 'unsigned char' using equality / unequality operator produces an unexpected result for non-ASCII characters. Reviewers: aaron.ballman, alexfh, hokein, njames93 Reviewed By: njames93 Subscribers: xazax.hun, cfe-commits Tags: #clang, #clang-tools-extra Differential Revision: https://reviews.llvm.org/D75749
This commit is contained in:
parent
ee862adf60
commit
04410c565a
|
|
@ -18,6 +18,8 @@ namespace clang {
|
|||
namespace tidy {
|
||||
namespace bugprone {
|
||||
|
||||
static constexpr int UnsignedASCIIUpperBound = 127;
|
||||
|
||||
static Matcher<TypedefDecl> hasAnyListedName(const std::string &Names) {
|
||||
const std::vector<std::string> NameList =
|
||||
utils::options::parseStringList(Names);
|
||||
|
|
@ -33,25 +35,29 @@ void SignedCharMisuseCheck::storeOptions(ClangTidyOptions::OptionMap &Opts) {
|
|||
Options.store(Opts, "CharTypdefsToIgnore", CharTypdefsToIgnoreList);
|
||||
}
|
||||
|
||||
void SignedCharMisuseCheck::registerMatchers(MatchFinder *Finder) {
|
||||
// Create a matcher for char -> integer cast.
|
||||
BindableMatcher<clang::Stmt> SignedCharMisuseCheck::charCastExpression(
|
||||
bool IsSigned, const Matcher<clang::QualType> &IntegerType,
|
||||
const std::string &CastBindName) const {
|
||||
// We can ignore typedefs which are some kind of integer types
|
||||
// (e.g. typedef char sal_Int8). In this case, we don't need to
|
||||
// worry about the misinterpretation of char values.
|
||||
const auto IntTypedef = qualType(
|
||||
hasDeclaration(typedefDecl(hasAnyListedName(CharTypdefsToIgnoreList))));
|
||||
|
||||
const auto SignedCharType = expr(hasType(qualType(
|
||||
allOf(isAnyCharacter(), isSignedInteger(), unless(IntTypedef)))));
|
||||
auto CharTypeExpr = expr();
|
||||
if (IsSigned) {
|
||||
CharTypeExpr = expr(hasType(
|
||||
qualType(isAnyCharacter(), isSignedInteger(), unless(IntTypedef))));
|
||||
} else {
|
||||
CharTypeExpr = expr(hasType(qualType(
|
||||
isAnyCharacter(), unless(isSignedInteger()), unless(IntTypedef))));
|
||||
}
|
||||
|
||||
const auto IntegerType = qualType(allOf(isInteger(), unless(isAnyCharacter()),
|
||||
unless(booleanType())))
|
||||
.bind("integerType");
|
||||
|
||||
// We are interested in signed char -> integer conversion.
|
||||
const auto ImplicitCastExpr =
|
||||
implicitCastExpr(hasSourceExpression(SignedCharType),
|
||||
implicitCastExpr(hasSourceExpression(CharTypeExpr),
|
||||
hasImplicitDestinationType(IntegerType))
|
||||
.bind("castExpression");
|
||||
.bind(CastBindName);
|
||||
|
||||
const auto CStyleCastExpr = cStyleCastExpr(has(ImplicitCastExpr));
|
||||
const auto StaticCastExpr = cxxStaticCastExpr(has(ImplicitCastExpr));
|
||||
|
|
@ -59,44 +65,84 @@ void SignedCharMisuseCheck::registerMatchers(MatchFinder *Finder) {
|
|||
|
||||
// We catch any type of casts to an integer. We need to have these cast
|
||||
// expressions explicitly to catch only those casts which are direct children
|
||||
// of an assignment/declaration.
|
||||
const auto CastExpr = expr(anyOf(ImplicitCastExpr, CStyleCastExpr,
|
||||
StaticCastExpr, FunctionalCastExpr));
|
||||
// of the checked expressions. (e.g. assignment, declaration).
|
||||
return expr(anyOf(ImplicitCastExpr, CStyleCastExpr, StaticCastExpr,
|
||||
FunctionalCastExpr));
|
||||
}
|
||||
|
||||
// Catch assignments with the suspicious type conversion.
|
||||
const auto AssignmentOperatorExpr = expr(binaryOperator(
|
||||
hasOperatorName("="), hasLHS(hasType(IntegerType)), hasRHS(CastExpr)));
|
||||
void SignedCharMisuseCheck::registerMatchers(MatchFinder *Finder) {
|
||||
const auto IntegerType =
|
||||
qualType(isInteger(), unless(isAnyCharacter()), unless(booleanType()))
|
||||
.bind("integerType");
|
||||
const auto SignedCharCastExpr =
|
||||
charCastExpression(true, IntegerType, "signedCastExpression");
|
||||
const auto UnSignedCharCastExpr =
|
||||
charCastExpression(false, IntegerType, "unsignedCastExpression");
|
||||
|
||||
// Catch assignments with singed char -> integer conversion.
|
||||
const auto AssignmentOperatorExpr =
|
||||
expr(binaryOperator(hasOperatorName("="), hasLHS(hasType(IntegerType)),
|
||||
hasRHS(SignedCharCastExpr)));
|
||||
|
||||
Finder->addMatcher(AssignmentOperatorExpr, this);
|
||||
|
||||
// Catch declarations with the suspicious type conversion.
|
||||
const auto Declaration =
|
||||
varDecl(isDefinition(), hasType(IntegerType), hasInitializer(CastExpr));
|
||||
// Catch declarations with singed char -> integer conversion.
|
||||
const auto Declaration = varDecl(isDefinition(), hasType(IntegerType),
|
||||
hasInitializer(SignedCharCastExpr));
|
||||
|
||||
Finder->addMatcher(Declaration, this);
|
||||
|
||||
// Catch signed char/unsigned char comparison.
|
||||
const auto CompareOperator =
|
||||
expr(binaryOperator(hasAnyOperatorName("==", "!="),
|
||||
anyOf(allOf(hasLHS(SignedCharCastExpr),
|
||||
hasRHS(UnSignedCharCastExpr)),
|
||||
allOf(hasLHS(UnSignedCharCastExpr),
|
||||
hasRHS(SignedCharCastExpr)))))
|
||||
.bind("comparison");
|
||||
|
||||
Finder->addMatcher(CompareOperator, this);
|
||||
}
|
||||
|
||||
void SignedCharMisuseCheck::check(const MatchFinder::MatchResult &Result) {
|
||||
const auto *CastExpression =
|
||||
Result.Nodes.getNodeAs<ImplicitCastExpr>("castExpression");
|
||||
const auto *IntegerType = Result.Nodes.getNodeAs<QualType>("integerType");
|
||||
assert(CastExpression);
|
||||
assert(IntegerType);
|
||||
const auto *SignedCastExpression =
|
||||
Result.Nodes.getNodeAs<ImplicitCastExpr>("signedCastExpression");
|
||||
|
||||
// Ignore the match if we know that the value is not negative.
|
||||
// Ignore the match if we know that the signed char's value is not negative.
|
||||
// The potential misinterpretation happens for negative values only.
|
||||
Expr::EvalResult EVResult;
|
||||
if (!CastExpression->isValueDependent() &&
|
||||
CastExpression->getSubExpr()->EvaluateAsInt(EVResult, *Result.Context)) {
|
||||
llvm::APSInt Value1 = EVResult.Val.getInt();
|
||||
if (Value1.isNonNegative())
|
||||
if (!SignedCastExpression->isValueDependent() &&
|
||||
SignedCastExpression->getSubExpr()->EvaluateAsInt(EVResult,
|
||||
*Result.Context)) {
|
||||
llvm::APSInt Value = EVResult.Val.getInt();
|
||||
if (Value.isNonNegative())
|
||||
return;
|
||||
}
|
||||
|
||||
diag(CastExpression->getBeginLoc(),
|
||||
"'signed char' to %0 conversion; "
|
||||
"consider casting to 'unsigned char' first.")
|
||||
<< *IntegerType;
|
||||
if (const auto *Comparison = Result.Nodes.getNodeAs<Expr>("comparison")) {
|
||||
const auto *UnSignedCastExpression =
|
||||
Result.Nodes.getNodeAs<ImplicitCastExpr>("unsignedCastExpression");
|
||||
|
||||
// We can ignore the ASCII value range also for unsigned char.
|
||||
Expr::EvalResult EVResult;
|
||||
if (!UnSignedCastExpression->isValueDependent() &&
|
||||
UnSignedCastExpression->getSubExpr()->EvaluateAsInt(EVResult,
|
||||
*Result.Context)) {
|
||||
llvm::APSInt Value = EVResult.Val.getInt();
|
||||
if (Value <= UnsignedASCIIUpperBound)
|
||||
return;
|
||||
}
|
||||
|
||||
diag(Comparison->getBeginLoc(),
|
||||
"comparison between 'signed char' and 'unsigned char'");
|
||||
} else if (const auto *IntegerType =
|
||||
Result.Nodes.getNodeAs<QualType>("integerType")) {
|
||||
diag(SignedCastExpression->getBeginLoc(),
|
||||
"'signed char' to %0 conversion; "
|
||||
"consider casting to 'unsigned char' first.")
|
||||
<< *IntegerType;
|
||||
} else
|
||||
llvm_unreachable("Unexpected match");
|
||||
}
|
||||
|
||||
} // namespace bugprone
|
||||
|
|
|
|||
|
|
@ -15,13 +15,11 @@ namespace clang {
|
|||
namespace tidy {
|
||||
namespace bugprone {
|
||||
|
||||
/// Finds ``signed char`` -> integer conversions which might indicate a programming
|
||||
/// error. The basic problem with the ``signed char``, that it might store the
|
||||
/// non-ASCII characters as negative values. The human programmer probably
|
||||
/// expects that after an integer conversion the converted value matches with the
|
||||
/// character code (a value from [0..255]), however, the actual value is in
|
||||
/// [-128..127] interval. This also applies to the plain ``char`` type on
|
||||
/// those implementations which represent ``char`` similar to ``signed char``.
|
||||
/// Finds those ``signed char`` -> integer conversions which might indicate a
|
||||
/// programming error. The basic problem with the ``signed char``, that it might
|
||||
/// store the non-ASCII characters as negative values. This behavior can cause a
|
||||
/// misunderstanding of the written code both when an explicit and when an
|
||||
/// implicit conversion happens.
|
||||
///
|
||||
/// For the user-facing documentation see:
|
||||
/// http://clang.llvm.org/extra/clang-tidy/checks/bugprone-signed-char-misuse.html
|
||||
|
|
@ -34,6 +32,11 @@ public:
|
|||
void check(const ast_matchers::MatchFinder::MatchResult &Result) override;
|
||||
|
||||
private:
|
||||
ast_matchers::internal::BindableMatcher<clang::Stmt> charCastExpression(
|
||||
bool IsSigned,
|
||||
const ast_matchers::internal::Matcher<clang::QualType> &IntegerType,
|
||||
const std::string &CastBindName) const;
|
||||
|
||||
const std::string CharTypdefsToIgnoreList;
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -3,27 +3,39 @@
|
|||
bugprone-signed-char-misuse
|
||||
===========================
|
||||
|
||||
Finds ``signed char`` -> integer conversions which might indicate a programming
|
||||
error. The basic problem with the ``signed char``, that it might store the
|
||||
non-ASCII characters as negative values. The human programmer probably
|
||||
expects that after an integer conversion the converted value matches with the
|
||||
Finds those ``signed char`` -> integer conversions which might indicate a
|
||||
programming error. The basic problem with the ``signed char``, that it might
|
||||
store the non-ASCII characters as negative values. This behavior can cause a
|
||||
misunderstanding of the written code both when an explicit and when an
|
||||
implicit conversion happens.
|
||||
|
||||
When the code contains an explicit ``signed char`` -> integer conversion, the
|
||||
human programmer probably expects that the converted value matches with the
|
||||
character code (a value from [0..255]), however, the actual value is in
|
||||
[-128..127] interval. This also applies to the plain ``char`` type on
|
||||
those implementations which represent ``char`` similar to ``signed char``.
|
||||
[-128..127] interval. To avoid this kind of misinterpretation, the desired way
|
||||
of converting from a ``signed char`` to an integer value is converting to
|
||||
``unsigned char`` first, which stores all the characters in the positive [0..255]
|
||||
interval which matches the known character codes.
|
||||
|
||||
To avoid this kind of misinterpretation, the desired way of converting from a
|
||||
``signed char`` to an integer value is converting to ``unsigned char`` first,
|
||||
which stores all the characters in the positive [0..255] interval which matches
|
||||
with the known character codes.
|
||||
In case of implicit conversion, the programmer might not actually be aware
|
||||
that a conversion happened and char value is used as an integer. There are
|
||||
some use cases when this unawareness might lead to a functionally imperfect code.
|
||||
For example, checking the equality of a ``signed char`` and an ``unsigned char``
|
||||
variable is something we should avoid in C++ code. During this comparison,
|
||||
the two variables are converted to integers which have different value ranges.
|
||||
For ``signed char``, the non-ASCII characters are stored as a value in [-128..-1]
|
||||
interval, while the same characters are stored in the [128..255] interval for
|
||||
an ``unsigned char``.
|
||||
|
||||
It depends on the actual platform whether ``char`` is handled as ``signed char``
|
||||
It depends on the actual platform whether plain ``char`` is handled as ``signed char``
|
||||
by default and so it is caught by this check or not. To change the default behavior
|
||||
you can use ``-funsigned-char`` and ``-fsigned-char`` compilation options.
|
||||
|
||||
Currently, this check is limited to assignments and variable declarations,
|
||||
where a ``signed char`` is assigned to an integer variable. There are other
|
||||
use cases where the same misinterpretation might lead to similar bogus
|
||||
behavior.
|
||||
where a ``signed char`` is assigned to an integer variable and to
|
||||
equality/inequality comparisons between ``signed char`` and ``unsigned char``.
|
||||
There are other use cases where the unexpected value ranges might lead to
|
||||
similar bogus behavior.
|
||||
|
||||
See also:
|
||||
`STR34-C. Cast characters to unsigned char before converting to larger integer sizes
|
||||
|
|
@ -67,6 +79,29 @@ an ``unsigned char`` value first.
|
|||
return IChar;
|
||||
}
|
||||
|
||||
Another use case is checking the equality of two ``char`` variables with
|
||||
different signedness. Inside the non-ASCII value range this comparison between
|
||||
a ``signed char`` and an ``unsigned char`` always returns ``false``.
|
||||
|
||||
.. code-block:: c++
|
||||
|
||||
bool compare(signed char SChar, unsigned char USChar) {
|
||||
if (SChar == USChar)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
The easiest way to fix this kind of comparison is casting one of the arguments,
|
||||
so both arguments will have the same type.
|
||||
|
||||
.. code-block:: c++
|
||||
|
||||
bool compare(signed char SChar, unsigned char USChar) {
|
||||
if (static_cast<unsigned char>(SChar) == USChar)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
.. option:: CharTypdefsToIgnore
|
||||
|
||||
A semicolon-separated list of typedef names. In this list, we can list
|
||||
|
|
|
|||
|
|
@ -62,6 +62,34 @@ int CharPointer(signed char *CCharacter) {
|
|||
return NCharacter;
|
||||
}
|
||||
|
||||
int SignedUnsignedCharEquality(signed char SCharacter) {
|
||||
unsigned char USCharacter = 'a';
|
||||
if (SCharacter == USCharacter) // CHECK-MESSAGES: [[@LINE]]:7: warning: comparison between 'signed char' and 'unsigned char' [bugprone-signed-char-misuse]
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int SignedUnsignedCharIneqiality(signed char SCharacter) {
|
||||
unsigned char USCharacter = 'a';
|
||||
if (SCharacter != USCharacter) // CHECK-MESSAGES: [[@LINE]]:7: warning: comparison between 'signed char' and 'unsigned char' [bugprone-signed-char-misuse]
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int CompareWithNonAsciiConstant(unsigned char USCharacter) {
|
||||
const signed char SCharacter = -5;
|
||||
if (USCharacter == SCharacter) // CHECK-MESSAGES: [[@LINE]]:7: warning: comparison between 'signed char' and 'unsigned char' [bugprone-signed-char-misuse]
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int CompareWithUnsignedNonAsciiConstant(signed char SCharacter) {
|
||||
const unsigned char USCharacter = 128;
|
||||
if (USCharacter == SCharacter) // CHECK-MESSAGES: [[@LINE]]:7: warning: comparison between 'signed char' and 'unsigned char' [bugprone-signed-char-misuse]
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////
|
||||
/// Test cases correctly ignored by the check.
|
||||
|
||||
|
|
@ -121,3 +149,61 @@ unsigned char CharToCharCast() {
|
|||
|
||||
return USCharacter;
|
||||
}
|
||||
|
||||
int FixComparisonWithSignedCharCast(signed char SCharacter) {
|
||||
unsigned char USCharacter = 'a';
|
||||
if (SCharacter == static_cast<signed char>(USCharacter))
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int FixComparisonWithUnSignedCharCast(signed char SCharacter) {
|
||||
unsigned char USCharacter = 'a';
|
||||
if (static_cast<unsigned char>(SCharacter) == USCharacter)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Make sure we don't catch other type of char comparison.
|
||||
int SameCharTypeComparison(signed char SCharacter) {
|
||||
signed char SCharacter2 = 'a';
|
||||
if (SCharacter == SCharacter2)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Make sure we don't catch other type of char comparison.
|
||||
int SameCharTypeComparison2(unsigned char USCharacter) {
|
||||
unsigned char USCharacter2 = 'a';
|
||||
if (USCharacter == USCharacter2)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Make sure we don't catch integer - char comparison.
|
||||
int CharIntComparison(signed char SCharacter) {
|
||||
int ICharacter = 10;
|
||||
if (SCharacter == ICharacter)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int CompareWithAsciiLiteral(unsigned char USCharacter) {
|
||||
if (USCharacter == 'x') // no warning
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int CompareWithAsciiConstant(unsigned char USCharacter) {
|
||||
const signed char SCharacter = 'a';
|
||||
if (USCharacter == SCharacter)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int CompareWithUnsignedAsciiConstant(signed char SCharacter) {
|
||||
const unsigned char USCharacter = 'a';
|
||||
if (USCharacter == SCharacter)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue