Implement hex escape sequences in string and character literals, e.g. L"\x12345"

llvm-svn: 39462
This commit is contained in:
Chris Lattner 2007-05-20 05:00:58 +00:00
parent db5ac601e7
commit c10adde406
1 changed files with 34 additions and 11 deletions

View File

@ -34,7 +34,8 @@ static int HexDigitValue(char C) {
/// either a character or a string literal.
static unsigned ProcessCharEscape(const char *&ThisTokBuf,
const char *ThisTokEnd, bool &HadError,
SourceLocation Loc, Preprocessor &PP) {
SourceLocation Loc, bool IsWide,
Preprocessor &PP) {
// Skip the '\' char.
++ThisTokBuf;
@ -74,19 +75,36 @@ static unsigned ProcessCharEscape(const char *&ThisTokBuf,
break;
//case 'u': case 'U': // FIXME: UCNs.
case 'x': // Hex escape.
if (ThisTokBuf == ThisTokEnd ||
(ResultChar = HexDigitValue(*ThisTokBuf)) == ~0U) {
case 'x': { // Hex escape.
ResultChar = 0;
if (ThisTokBuf == ThisTokEnd || !isxdigit(*ThisTokBuf)) {
PP.Diag(Loc, diag::err_hex_escape_no_digits);
HadError = 1;
ResultChar = 0;
break;
}
++ThisTokBuf; // Consumed one hex digit.
// FIXME: warn_hex_escape_too_large. '\x12345'
assert(0 && "hex escape: unimp!");
bool Overflow = false;
for (; ThisTokBuf != ThisTokEnd; ++ThisTokBuf) {
int CharVal = HexDigitValue(ThisTokBuf[0]);
if (CharVal == -1) break;
Overflow |= ResultChar & 0xF0000000; // About to shift out a digit?
ResultChar <<= 4;
ResultChar |= CharVal;
}
// See if any bits will be truncated when evaluated as a character.
unsigned CharWidth = IsWide ? PP.getTargetInfo().getWCharWidth(Loc)
: PP.getTargetInfo().getCharWidth(Loc);
if (CharWidth != 32 && (ResultChar >> CharWidth) != 0) {
Overflow = true;
ResultChar &= ~0U >> (32-CharWidth);
}
// Check for overflow.
if (Overflow) // Too many digits to fit in
PP.Diag(Loc, diag::warn_hex_escape_too_large);
break;
}
case '0': case '1': case '2': case '3':
case '4': case '5': case '6': case '7':
// Octal escapes.
@ -429,7 +447,7 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end,
if (begin[0] != '\\') // If this is a normal character, consume it.
ResultChar = *begin++;
else // Otherwise, this is an escape character.
ResultChar = ProcessCharEscape(begin, end, HadError, Loc, PP);
ResultChar = ProcessCharEscape(begin, end, HadError, Loc, IsWide, PP);
// If this is a multi-character constant (e.g. 'abc'), handle it. These are
// implementation defined (C99 6.4.4.4p10).
@ -575,7 +593,11 @@ StringLiteralParser(const LexerToken *StringToks, unsigned NumStringToks,
// TODO: Input character set mapping support.
// Skip L marker for wide strings.
if (ThisTokBuf[0] == 'L') ++ThisTokBuf;
bool ThisIsWide = false;
if (ThisTokBuf[0] == 'L') {
++ThisTokBuf;
ThisIsWide = true;
}
assert(ThisTokBuf[0] == '"' && "Expected quote, lexer broken?");
++ThisTokBuf;
@ -607,7 +629,8 @@ StringLiteralParser(const LexerToken *StringToks, unsigned NumStringToks,
// Otherwise, this is an escape character. Process it.
unsigned ResultChar = ProcessCharEscape(ThisTokBuf, ThisTokEnd, hadError,
StringToks[i].getLocation(), PP);
StringToks[i].getLocation(),
ThisIsWide, PP);
// Note: our internal rep of wide char tokens is always little-endian.
*ResultPtr++ = ResultChar & 0xFF;