[clang-tidy] extend bugprone-signed-char-misuse check.

Summary:
Cover a new use case when using a 'signed char' as an integer
might lead to issue with non-ASCII characters. Comparing
a 'signed char' with an 'unsigned char' using equality / unequality
operator produces an unexpected result for non-ASCII characters.

Reviewers: aaron.ballman, alexfh, hokein, njames93

Reviewed By: njames93

Subscribers: xazax.hun, cfe-commits

Tags: #clang, #clang-tools-extra

Differential Revision: https://reviews.llvm.org/D75749
This commit is contained in:
Tamás Zolnai 2020-03-14 17:57:02 +01:00
parent ee862adf60
commit 04410c565a
4 changed files with 224 additions and 54 deletions

View File

@ -18,6 +18,8 @@ namespace clang {
namespace tidy { namespace tidy {
namespace bugprone { namespace bugprone {
static constexpr int UnsignedASCIIUpperBound = 127;
static Matcher<TypedefDecl> hasAnyListedName(const std::string &Names) { static Matcher<TypedefDecl> hasAnyListedName(const std::string &Names) {
const std::vector<std::string> NameList = const std::vector<std::string> NameList =
utils::options::parseStringList(Names); utils::options::parseStringList(Names);
@ -33,25 +35,29 @@ void SignedCharMisuseCheck::storeOptions(ClangTidyOptions::OptionMap &Opts) {
Options.store(Opts, "CharTypdefsToIgnore", CharTypdefsToIgnoreList); Options.store(Opts, "CharTypdefsToIgnore", CharTypdefsToIgnoreList);
} }
void SignedCharMisuseCheck::registerMatchers(MatchFinder *Finder) { // Create a matcher for char -> integer cast.
BindableMatcher<clang::Stmt> SignedCharMisuseCheck::charCastExpression(
bool IsSigned, const Matcher<clang::QualType> &IntegerType,
const std::string &CastBindName) const {
// We can ignore typedefs which are some kind of integer types // We can ignore typedefs which are some kind of integer types
// (e.g. typedef char sal_Int8). In this case, we don't need to // (e.g. typedef char sal_Int8). In this case, we don't need to
// worry about the misinterpretation of char values. // worry about the misinterpretation of char values.
const auto IntTypedef = qualType( const auto IntTypedef = qualType(
hasDeclaration(typedefDecl(hasAnyListedName(CharTypdefsToIgnoreList)))); hasDeclaration(typedefDecl(hasAnyListedName(CharTypdefsToIgnoreList))));
const auto SignedCharType = expr(hasType(qualType( auto CharTypeExpr = expr();
allOf(isAnyCharacter(), isSignedInteger(), unless(IntTypedef))))); if (IsSigned) {
CharTypeExpr = expr(hasType(
qualType(isAnyCharacter(), isSignedInteger(), unless(IntTypedef))));
} else {
CharTypeExpr = expr(hasType(qualType(
isAnyCharacter(), unless(isSignedInteger()), unless(IntTypedef))));
}
const auto IntegerType = qualType(allOf(isInteger(), unless(isAnyCharacter()),
unless(booleanType())))
.bind("integerType");
// We are interested in signed char -> integer conversion.
const auto ImplicitCastExpr = const auto ImplicitCastExpr =
implicitCastExpr(hasSourceExpression(SignedCharType), implicitCastExpr(hasSourceExpression(CharTypeExpr),
hasImplicitDestinationType(IntegerType)) hasImplicitDestinationType(IntegerType))
.bind("castExpression"); .bind(CastBindName);
const auto CStyleCastExpr = cStyleCastExpr(has(ImplicitCastExpr)); const auto CStyleCastExpr = cStyleCastExpr(has(ImplicitCastExpr));
const auto StaticCastExpr = cxxStaticCastExpr(has(ImplicitCastExpr)); const auto StaticCastExpr = cxxStaticCastExpr(has(ImplicitCastExpr));
@ -59,44 +65,84 @@ void SignedCharMisuseCheck::registerMatchers(MatchFinder *Finder) {
// We catch any type of casts to an integer. We need to have these cast // We catch any type of casts to an integer. We need to have these cast
// expressions explicitly to catch only those casts which are direct children // expressions explicitly to catch only those casts which are direct children
// of an assignment/declaration. // of the checked expressions. (e.g. assignment, declaration).
const auto CastExpr = expr(anyOf(ImplicitCastExpr, CStyleCastExpr, return expr(anyOf(ImplicitCastExpr, CStyleCastExpr, StaticCastExpr,
StaticCastExpr, FunctionalCastExpr)); FunctionalCastExpr));
}
// Catch assignments with the suspicious type conversion. void SignedCharMisuseCheck::registerMatchers(MatchFinder *Finder) {
const auto AssignmentOperatorExpr = expr(binaryOperator( const auto IntegerType =
hasOperatorName("="), hasLHS(hasType(IntegerType)), hasRHS(CastExpr))); qualType(isInteger(), unless(isAnyCharacter()), unless(booleanType()))
.bind("integerType");
const auto SignedCharCastExpr =
charCastExpression(true, IntegerType, "signedCastExpression");
const auto UnSignedCharCastExpr =
charCastExpression(false, IntegerType, "unsignedCastExpression");
// Catch assignments with singed char -> integer conversion.
const auto AssignmentOperatorExpr =
expr(binaryOperator(hasOperatorName("="), hasLHS(hasType(IntegerType)),
hasRHS(SignedCharCastExpr)));
Finder->addMatcher(AssignmentOperatorExpr, this); Finder->addMatcher(AssignmentOperatorExpr, this);
// Catch declarations with the suspicious type conversion. // Catch declarations with singed char -> integer conversion.
const auto Declaration = const auto Declaration = varDecl(isDefinition(), hasType(IntegerType),
varDecl(isDefinition(), hasType(IntegerType), hasInitializer(CastExpr)); hasInitializer(SignedCharCastExpr));
Finder->addMatcher(Declaration, this); Finder->addMatcher(Declaration, this);
// Catch signed char/unsigned char comparison.
const auto CompareOperator =
expr(binaryOperator(hasAnyOperatorName("==", "!="),
anyOf(allOf(hasLHS(SignedCharCastExpr),
hasRHS(UnSignedCharCastExpr)),
allOf(hasLHS(UnSignedCharCastExpr),
hasRHS(SignedCharCastExpr)))))
.bind("comparison");
Finder->addMatcher(CompareOperator, this);
} }
void SignedCharMisuseCheck::check(const MatchFinder::MatchResult &Result) { void SignedCharMisuseCheck::check(const MatchFinder::MatchResult &Result) {
const auto *CastExpression = const auto *SignedCastExpression =
Result.Nodes.getNodeAs<ImplicitCastExpr>("castExpression"); Result.Nodes.getNodeAs<ImplicitCastExpr>("signedCastExpression");
const auto *IntegerType = Result.Nodes.getNodeAs<QualType>("integerType");
assert(CastExpression);
assert(IntegerType);
// Ignore the match if we know that the value is not negative. // Ignore the match if we know that the signed char's value is not negative.
// The potential misinterpretation happens for negative values only. // The potential misinterpretation happens for negative values only.
Expr::EvalResult EVResult; Expr::EvalResult EVResult;
if (!CastExpression->isValueDependent() && if (!SignedCastExpression->isValueDependent() &&
CastExpression->getSubExpr()->EvaluateAsInt(EVResult, *Result.Context)) { SignedCastExpression->getSubExpr()->EvaluateAsInt(EVResult,
llvm::APSInt Value1 = EVResult.Val.getInt(); *Result.Context)) {
if (Value1.isNonNegative()) llvm::APSInt Value = EVResult.Val.getInt();
if (Value.isNonNegative())
return; return;
} }
diag(CastExpression->getBeginLoc(), if (const auto *Comparison = Result.Nodes.getNodeAs<Expr>("comparison")) {
"'signed char' to %0 conversion; " const auto *UnSignedCastExpression =
"consider casting to 'unsigned char' first.") Result.Nodes.getNodeAs<ImplicitCastExpr>("unsignedCastExpression");
<< *IntegerType;
// We can ignore the ASCII value range also for unsigned char.
Expr::EvalResult EVResult;
if (!UnSignedCastExpression->isValueDependent() &&
UnSignedCastExpression->getSubExpr()->EvaluateAsInt(EVResult,
*Result.Context)) {
llvm::APSInt Value = EVResult.Val.getInt();
if (Value <= UnsignedASCIIUpperBound)
return;
}
diag(Comparison->getBeginLoc(),
"comparison between 'signed char' and 'unsigned char'");
} else if (const auto *IntegerType =
Result.Nodes.getNodeAs<QualType>("integerType")) {
diag(SignedCastExpression->getBeginLoc(),
"'signed char' to %0 conversion; "
"consider casting to 'unsigned char' first.")
<< *IntegerType;
} else
llvm_unreachable("Unexpected match");
} }
} // namespace bugprone } // namespace bugprone

View File

@ -15,13 +15,11 @@ namespace clang {
namespace tidy { namespace tidy {
namespace bugprone { namespace bugprone {
/// Finds ``signed char`` -> integer conversions which might indicate a programming /// Finds those ``signed char`` -> integer conversions which might indicate a
/// error. The basic problem with the ``signed char``, that it might store the /// programming error. The basic problem with the ``signed char``, that it might
/// non-ASCII characters as negative values. The human programmer probably /// store the non-ASCII characters as negative values. This behavior can cause a
/// expects that after an integer conversion the converted value matches with the /// misunderstanding of the written code both when an explicit and when an
/// character code (a value from [0..255]), however, the actual value is in /// implicit conversion happens.
/// [-128..127] interval. This also applies to the plain ``char`` type on
/// those implementations which represent ``char`` similar to ``signed char``.
/// ///
/// For the user-facing documentation see: /// For the user-facing documentation see:
/// http://clang.llvm.org/extra/clang-tidy/checks/bugprone-signed-char-misuse.html /// http://clang.llvm.org/extra/clang-tidy/checks/bugprone-signed-char-misuse.html
@ -34,6 +32,11 @@ public:
void check(const ast_matchers::MatchFinder::MatchResult &Result) override; void check(const ast_matchers::MatchFinder::MatchResult &Result) override;
private: private:
ast_matchers::internal::BindableMatcher<clang::Stmt> charCastExpression(
bool IsSigned,
const ast_matchers::internal::Matcher<clang::QualType> &IntegerType,
const std::string &CastBindName) const;
const std::string CharTypdefsToIgnoreList; const std::string CharTypdefsToIgnoreList;
}; };

View File

@ -3,27 +3,39 @@
bugprone-signed-char-misuse bugprone-signed-char-misuse
=========================== ===========================
Finds ``signed char`` -> integer conversions which might indicate a programming Finds those ``signed char`` -> integer conversions which might indicate a
error. The basic problem with the ``signed char``, that it might store the programming error. The basic problem with the ``signed char``, that it might
non-ASCII characters as negative values. The human programmer probably store the non-ASCII characters as negative values. This behavior can cause a
expects that after an integer conversion the converted value matches with the misunderstanding of the written code both when an explicit and when an
implicit conversion happens.
When the code contains an explicit ``signed char`` -> integer conversion, the
human programmer probably expects that the converted value matches with the
character code (a value from [0..255]), however, the actual value is in character code (a value from [0..255]), however, the actual value is in
[-128..127] interval. This also applies to the plain ``char`` type on [-128..127] interval. To avoid this kind of misinterpretation, the desired way
those implementations which represent ``char`` similar to ``signed char``. of converting from a ``signed char`` to an integer value is converting to
``unsigned char`` first, which stores all the characters in the positive [0..255]
interval which matches the known character codes.
To avoid this kind of misinterpretation, the desired way of converting from a In case of implicit conversion, the programmer might not actually be aware
``signed char`` to an integer value is converting to ``unsigned char`` first, that a conversion happened and char value is used as an integer. There are
which stores all the characters in the positive [0..255] interval which matches some use cases when this unawareness might lead to a functionally imperfect code.
with the known character codes. For example, checking the equality of a ``signed char`` and an ``unsigned char``
variable is something we should avoid in C++ code. During this comparison,
the two variables are converted to integers which have different value ranges.
For ``signed char``, the non-ASCII characters are stored as a value in [-128..-1]
interval, while the same characters are stored in the [128..255] interval for
an ``unsigned char``.
It depends on the actual platform whether ``char`` is handled as ``signed char`` It depends on the actual platform whether plain ``char`` is handled as ``signed char``
by default and so it is caught by this check or not. To change the default behavior by default and so it is caught by this check or not. To change the default behavior
you can use ``-funsigned-char`` and ``-fsigned-char`` compilation options. you can use ``-funsigned-char`` and ``-fsigned-char`` compilation options.
Currently, this check is limited to assignments and variable declarations, Currently, this check is limited to assignments and variable declarations,
where a ``signed char`` is assigned to an integer variable. There are other where a ``signed char`` is assigned to an integer variable and to
use cases where the same misinterpretation might lead to similar bogus equality/inequality comparisons between ``signed char`` and ``unsigned char``.
behavior. There are other use cases where the unexpected value ranges might lead to
similar bogus behavior.
See also: See also:
`STR34-C. Cast characters to unsigned char before converting to larger integer sizes `STR34-C. Cast characters to unsigned char before converting to larger integer sizes
@ -67,6 +79,29 @@ an ``unsigned char`` value first.
return IChar; return IChar;
} }
Another use case is checking the equality of two ``char`` variables with
different signedness. Inside the non-ASCII value range this comparison between
a ``signed char`` and an ``unsigned char`` always returns ``false``.
.. code-block:: c++
bool compare(signed char SChar, unsigned char USChar) {
if (SChar == USChar)
return true;
return false;
}
The easiest way to fix this kind of comparison is casting one of the arguments,
so both arguments will have the same type.
.. code-block:: c++
bool compare(signed char SChar, unsigned char USChar) {
if (static_cast<unsigned char>(SChar) == USChar)
return true;
return false;
}
.. option:: CharTypdefsToIgnore .. option:: CharTypdefsToIgnore
A semicolon-separated list of typedef names. In this list, we can list A semicolon-separated list of typedef names. In this list, we can list

View File

@ -62,6 +62,34 @@ int CharPointer(signed char *CCharacter) {
return NCharacter; return NCharacter;
} }
int SignedUnsignedCharEquality(signed char SCharacter) {
unsigned char USCharacter = 'a';
if (SCharacter == USCharacter) // CHECK-MESSAGES: [[@LINE]]:7: warning: comparison between 'signed char' and 'unsigned char' [bugprone-signed-char-misuse]
return 1;
return 0;
}
int SignedUnsignedCharIneqiality(signed char SCharacter) {
unsigned char USCharacter = 'a';
if (SCharacter != USCharacter) // CHECK-MESSAGES: [[@LINE]]:7: warning: comparison between 'signed char' and 'unsigned char' [bugprone-signed-char-misuse]
return 1;
return 0;
}
int CompareWithNonAsciiConstant(unsigned char USCharacter) {
const signed char SCharacter = -5;
if (USCharacter == SCharacter) // CHECK-MESSAGES: [[@LINE]]:7: warning: comparison between 'signed char' and 'unsigned char' [bugprone-signed-char-misuse]
return 1;
return 0;
}
int CompareWithUnsignedNonAsciiConstant(signed char SCharacter) {
const unsigned char USCharacter = 128;
if (USCharacter == SCharacter) // CHECK-MESSAGES: [[@LINE]]:7: warning: comparison between 'signed char' and 'unsigned char' [bugprone-signed-char-misuse]
return 1;
return 0;
}
/////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////
/// Test cases correctly ignored by the check. /// Test cases correctly ignored by the check.
@ -121,3 +149,61 @@ unsigned char CharToCharCast() {
return USCharacter; return USCharacter;
} }
int FixComparisonWithSignedCharCast(signed char SCharacter) {
unsigned char USCharacter = 'a';
if (SCharacter == static_cast<signed char>(USCharacter))
return 1;
return 0;
}
int FixComparisonWithUnSignedCharCast(signed char SCharacter) {
unsigned char USCharacter = 'a';
if (static_cast<unsigned char>(SCharacter) == USCharacter)
return 1;
return 0;
}
// Make sure we don't catch other type of char comparison.
int SameCharTypeComparison(signed char SCharacter) {
signed char SCharacter2 = 'a';
if (SCharacter == SCharacter2)
return 1;
return 0;
}
// Make sure we don't catch other type of char comparison.
int SameCharTypeComparison2(unsigned char USCharacter) {
unsigned char USCharacter2 = 'a';
if (USCharacter == USCharacter2)
return 1;
return 0;
}
// Make sure we don't catch integer - char comparison.
int CharIntComparison(signed char SCharacter) {
int ICharacter = 10;
if (SCharacter == ICharacter)
return 1;
return 0;
}
int CompareWithAsciiLiteral(unsigned char USCharacter) {
if (USCharacter == 'x') // no warning
return 1;
return 0;
}
int CompareWithAsciiConstant(unsigned char USCharacter) {
const signed char SCharacter = 'a';
if (USCharacter == SCharacter)
return 1;
return 0;
}
int CompareWithUnsignedAsciiConstant(signed char SCharacter) {
const unsigned char USCharacter = 'a';
if (USCharacter == SCharacter)
return 1;
return 0;
}