Implement hex escape sequences in string and character literals, e.g. L"\x12345"

llvm-svn: 39462
2007-05-20 05:00:58 +00:00 · 2007-05-20 05:00:58 +00:00 · c10adde406
parent db5ac601e7
commit c10adde406
1 changed files with 34 additions and 11 deletions
--- a/clang/Lex/LiteralSupport.cpp
+++ b/clang/Lex/LiteralSupport.cpp
@ -34,7 +34,8 @@ static int HexDigitValue(char C) {
 /// either a character or a string literal.
 static unsigned ProcessCharEscape(const char *&ThisTokBuf,
                                  const char *ThisTokEnd, bool &HadError,
-                                  SourceLocation Loc, Preprocessor &PP) {
+                                  SourceLocation Loc, bool IsWide,
+                                  Preprocessor &PP) {
  // Skip the '\' char.
  ++ThisTokBuf;

@ -74,19 +75,36 @@ static unsigned ProcessCharEscape(const char *&ThisTokBuf,
    break;
    
    //case 'u': case 'U':  // FIXME: UCNs.
-  case 'x': // Hex escape.
-    if (ThisTokBuf == ThisTokEnd ||
-        (ResultChar = HexDigitValue(*ThisTokBuf)) == ~0U) {
+  case 'x': { // Hex escape.
+    ResultChar = 0;
+    if (ThisTokBuf == ThisTokEnd || !isxdigit(*ThisTokBuf)) {
      PP.Diag(Loc, diag::err_hex_escape_no_digits);
      HadError = 1;
-      ResultChar = 0;
      break;
    }
-    ++ThisTokBuf; // Consumed one hex digit.
    
-    // FIXME: warn_hex_escape_too_large. '\x12345'
-    assert(0 && "hex escape: unimp!");
+    bool Overflow = false;
+    for (; ThisTokBuf != ThisTokEnd; ++ThisTokBuf) {
+      int CharVal = HexDigitValue(ThisTokBuf[0]);
+      if (CharVal == -1) break;
+      Overflow |= ResultChar & 0xF0000000;  // About to shift out a digit?
+      ResultChar <<= 4;
+      ResultChar |= CharVal;
+    }
+
+    // See if any bits will be truncated when evaluated as a character.
+    unsigned CharWidth = IsWide ? PP.getTargetInfo().getWCharWidth(Loc)
+                                : PP.getTargetInfo().getCharWidth(Loc);
+    if (CharWidth != 32 && (ResultChar >> CharWidth) != 0) {
+      Overflow = true;
+      ResultChar &= ~0U >> (32-CharWidth);
+    }
+    
+    // Check for overflow.
+    if (Overflow)   // Too many digits to fit in
+      PP.Diag(Loc, diag::warn_hex_escape_too_large);
    break;
+  }
  case '0': case '1': case '2': case '3':
  case '4': case '5': case '6': case '7':
    // Octal escapes.
@ -429,7 +447,7 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end,
    if (begin[0] != '\\')     // If this is a normal character, consume it.
      ResultChar = *begin++;
    else                      // Otherwise, this is an escape character.
-      ResultChar = ProcessCharEscape(begin, end, HadError, Loc, PP);
+      ResultChar = ProcessCharEscape(begin, end, HadError, Loc, IsWide, PP);

    // If this is a multi-character constant (e.g. 'abc'), handle it.  These are
    // implementation defined (C99 6.4.4.4p10).
@ -575,7 +593,11 @@ StringLiteralParser(const LexerToken *StringToks, unsigned NumStringToks,
    // TODO: Input character set mapping support.
    
    // Skip L marker for wide strings.
-    if (ThisTokBuf[0] == 'L') ++ThisTokBuf;
+    bool ThisIsWide = false;
+    if (ThisTokBuf[0] == 'L') {
+      ++ThisTokBuf;
+      ThisIsWide = true;
+    }
    
    assert(ThisTokBuf[0] == '"' && "Expected quote, lexer broken?");
    ++ThisTokBuf;
@ -607,7 +629,8 @@ StringLiteralParser(const LexerToken *StringToks, unsigned NumStringToks,
      
      // Otherwise, this is an escape character.  Process it.
      unsigned ResultChar = ProcessCharEscape(ThisTokBuf, ThisTokEnd, hadError,
-                                              StringToks[i].getLocation(), PP);
+                                              StringToks[i].getLocation(),
+                                              ThisIsWide, PP);
      
      // Note: our internal rep of wide char tokens is always little-endian.
      *ResultPtr++ = ResultChar & 0xFF;