1266 lines
		
	
	
		
			28 KiB
		
	
	
	
		
			C++
		
	
	
	
			
		
		
	
	
			1266 lines
		
	
	
		
			28 KiB
		
	
	
	
		
			C++
		
	
	
	
| //===--- RustDemangle.cpp ---------------------------------------*- C++ -*-===//
 | |
| //
 | |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 | |
| // See https://llvm.org/LICENSE.txt for license information.
 | |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 | |
| //
 | |
| //===----------------------------------------------------------------------===//
 | |
| //
 | |
| // This file defines a demangler for Rust v0 mangled symbols as specified in
 | |
| // https://rust-lang.github.io/rfcs/2603-rust-symbol-name-mangling-v0.html
 | |
| //
 | |
| //===----------------------------------------------------------------------===//
 | |
| 
 | |
| #include "llvm/Demangle/Demangle.h"
 | |
| #include "llvm/Demangle/StringView.h"
 | |
| #include "llvm/Demangle/Utility.h"
 | |
| 
 | |
| #include <algorithm>
 | |
| #include <cassert>
 | |
| #include <cstdint>
 | |
| #include <cstring>
 | |
| #include <limits>
 | |
| 
 | |
| using namespace llvm;
 | |
| 
 | |
| using llvm::itanium_demangle::OutputBuffer;
 | |
| using llvm::itanium_demangle::ScopedOverride;
 | |
| using llvm::itanium_demangle::StringView;
 | |
| 
 | |
| namespace {
 | |
| 
 | |
| struct Identifier {
 | |
|   StringView Name;
 | |
|   bool Punycode;
 | |
| 
 | |
|   bool empty() const { return Name.empty(); }
 | |
| };
 | |
| 
 | |
| enum class BasicType {
 | |
|   Bool,
 | |
|   Char,
 | |
|   I8,
 | |
|   I16,
 | |
|   I32,
 | |
|   I64,
 | |
|   I128,
 | |
|   ISize,
 | |
|   U8,
 | |
|   U16,
 | |
|   U32,
 | |
|   U64,
 | |
|   U128,
 | |
|   USize,
 | |
|   F32,
 | |
|   F64,
 | |
|   Str,
 | |
|   Placeholder,
 | |
|   Unit,
 | |
|   Variadic,
 | |
|   Never,
 | |
| };
 | |
| 
 | |
| enum class IsInType {
 | |
|   No,
 | |
|   Yes,
 | |
| };
 | |
| 
 | |
| enum class LeaveGenericsOpen {
 | |
|   No,
 | |
|   Yes,
 | |
| };
 | |
| 
 | |
| class Demangler {
 | |
|   // Maximum recursion level. Used to avoid stack overflow.
 | |
|   size_t MaxRecursionLevel;
 | |
|   // Current recursion level.
 | |
|   size_t RecursionLevel;
 | |
|   size_t BoundLifetimes;
 | |
|   // Input string that is being demangled with "_R" prefix removed.
 | |
|   StringView Input;
 | |
|   // Position in the input string.
 | |
|   size_t Position;
 | |
|   // When true, print methods append the output to the stream.
 | |
|   // When false, the output is suppressed.
 | |
|   bool Print;
 | |
|   // True if an error occurred.
 | |
|   bool Error;
 | |
| 
 | |
| public:
 | |
|   // Demangled output.
 | |
|   OutputBuffer Output;
 | |
| 
 | |
|   Demangler(size_t MaxRecursionLevel = 500);
 | |
| 
 | |
|   bool demangle(StringView MangledName);
 | |
| 
 | |
| private:
 | |
|   bool demanglePath(IsInType Type,
 | |
|                     LeaveGenericsOpen LeaveOpen = LeaveGenericsOpen::No);
 | |
|   void demangleImplPath(IsInType InType);
 | |
|   void demangleGenericArg();
 | |
|   void demangleType();
 | |
|   void demangleFnSig();
 | |
|   void demangleDynBounds();
 | |
|   void demangleDynTrait();
 | |
|   void demangleOptionalBinder();
 | |
|   void demangleConst();
 | |
|   void demangleConstInt();
 | |
|   void demangleConstBool();
 | |
|   void demangleConstChar();
 | |
| 
 | |
|   template <typename Callable> void demangleBackref(Callable Demangler) {
 | |
|     uint64_t Backref = parseBase62Number();
 | |
|     if (Error || Backref >= Position) {
 | |
|       Error = true;
 | |
|       return;
 | |
|     }
 | |
| 
 | |
|     if (!Print)
 | |
|       return;
 | |
| 
 | |
|     ScopedOverride<size_t> SavePosition(Position, Position);
 | |
|     Position = Backref;
 | |
|     Demangler();
 | |
|   }
 | |
| 
 | |
|   Identifier parseIdentifier();
 | |
|   uint64_t parseOptionalBase62Number(char Tag);
 | |
|   uint64_t parseBase62Number();
 | |
|   uint64_t parseDecimalNumber();
 | |
|   uint64_t parseHexNumber(StringView &HexDigits);
 | |
| 
 | |
|   void print(char C);
 | |
|   void print(StringView S);
 | |
|   void printDecimalNumber(uint64_t N);
 | |
|   void printBasicType(BasicType);
 | |
|   void printLifetime(uint64_t Index);
 | |
|   void printIdentifier(Identifier Ident);
 | |
| 
 | |
|   char look() const;
 | |
|   char consume();
 | |
|   bool consumeIf(char Prefix);
 | |
| 
 | |
|   bool addAssign(uint64_t &A, uint64_t B);
 | |
|   bool mulAssign(uint64_t &A, uint64_t B);
 | |
| };
 | |
| 
 | |
| } // namespace
 | |
| 
 | |
| char *llvm::rustDemangle(const char *MangledName) {
 | |
|   if (MangledName == nullptr)
 | |
|     return nullptr;
 | |
| 
 | |
|   // Return early if mangled name doesn't look like a Rust symbol.
 | |
|   StringView Mangled(MangledName);
 | |
|   if (!Mangled.startsWith("_R"))
 | |
|     return nullptr;
 | |
| 
 | |
|   Demangler D;
 | |
|   if (!initializeOutputBuffer(nullptr, nullptr, D.Output, 1024))
 | |
|     return nullptr;
 | |
| 
 | |
|   if (!D.demangle(Mangled)) {
 | |
|     std::free(D.Output.getBuffer());
 | |
|     return nullptr;
 | |
|   }
 | |
| 
 | |
|   D.Output += '\0';
 | |
| 
 | |
|   return D.Output.getBuffer();
 | |
| }
 | |
| 
 | |
| Demangler::Demangler(size_t MaxRecursionLevel)
 | |
|     : MaxRecursionLevel(MaxRecursionLevel) {}
 | |
| 
 | |
| static inline bool isDigit(const char C) { return '0' <= C && C <= '9'; }
 | |
| 
 | |
| static inline bool isHexDigit(const char C) {
 | |
|   return ('0' <= C && C <= '9') || ('a' <= C && C <= 'f');
 | |
| }
 | |
| 
 | |
| static inline bool isLower(const char C) { return 'a' <= C && C <= 'z'; }
 | |
| 
 | |
| static inline bool isUpper(const char C) { return 'A' <= C && C <= 'Z'; }
 | |
| 
 | |
| /// Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
 | |
| static inline bool isValid(const char C) {
 | |
|   return isDigit(C) || isLower(C) || isUpper(C) || C == '_';
 | |
| }
 | |
| 
 | |
| // Demangles Rust v0 mangled symbol. Returns true when successful, and false
 | |
| // otherwise. The demangled symbol is stored in Output field. It is
 | |
| // responsibility of the caller to free the memory behind the output stream.
 | |
| //
 | |
| // <symbol-name> = "_R" <path> [<instantiating-crate>]
 | |
| bool Demangler::demangle(StringView Mangled) {
 | |
|   Position = 0;
 | |
|   Error = false;
 | |
|   Print = true;
 | |
|   RecursionLevel = 0;
 | |
|   BoundLifetimes = 0;
 | |
| 
 | |
|   if (!Mangled.consumeFront("_R")) {
 | |
|     Error = true;
 | |
|     return false;
 | |
|   }
 | |
|   size_t Dot = Mangled.find('.');
 | |
|   Input = Mangled.substr(0, Dot);
 | |
|   StringView Suffix = Mangled.dropFront(Dot);
 | |
| 
 | |
|   demanglePath(IsInType::No);
 | |
| 
 | |
|   if (Position != Input.size()) {
 | |
|     ScopedOverride<bool> SavePrint(Print, false);
 | |
|     demanglePath(IsInType::No);
 | |
|   }
 | |
| 
 | |
|   if (Position != Input.size())
 | |
|     Error = true;
 | |
| 
 | |
|   if (!Suffix.empty()) {
 | |
|     print(" (");
 | |
|     print(Suffix);
 | |
|     print(")");
 | |
|   }
 | |
| 
 | |
|   return !Error;
 | |
| }
 | |
| 
 | |
| // Demangles a path. InType indicates whether a path is inside a type. When
 | |
| // LeaveOpen is true, a closing `>` after generic arguments is omitted from the
 | |
| // output. Return value indicates whether generics arguments have been left
 | |
| // open.
 | |
| //
 | |
| // <path> = "C" <identifier>               // crate root
 | |
| //        | "M" <impl-path> <type>         // <T> (inherent impl)
 | |
| //        | "X" <impl-path> <type> <path>  // <T as Trait> (trait impl)
 | |
| //        | "Y" <type> <path>              // <T as Trait> (trait definition)
 | |
| //        | "N" <ns> <path> <identifier>   // ...::ident (nested path)
 | |
| //        | "I" <path> {<generic-arg>} "E" // ...<T, U> (generic args)
 | |
| //        | <backref>
 | |
| // <identifier> = [<disambiguator>] <undisambiguated-identifier>
 | |
| // <ns> = "C"      // closure
 | |
| //      | "S"      // shim
 | |
| //      | <A-Z>    // other special namespaces
 | |
| //      | <a-z>    // internal namespaces
 | |
| bool Demangler::demanglePath(IsInType InType, LeaveGenericsOpen LeaveOpen) {
 | |
|   if (Error || RecursionLevel >= MaxRecursionLevel) {
 | |
|     Error = true;
 | |
|     return false;
 | |
|   }
 | |
|   ScopedOverride<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1);
 | |
| 
 | |
|   switch (consume()) {
 | |
|   case 'C': {
 | |
|     parseOptionalBase62Number('s');
 | |
|     printIdentifier(parseIdentifier());
 | |
|     break;
 | |
|   }
 | |
|   case 'M': {
 | |
|     demangleImplPath(InType);
 | |
|     print("<");
 | |
|     demangleType();
 | |
|     print(">");
 | |
|     break;
 | |
|   }
 | |
|   case 'X': {
 | |
|     demangleImplPath(InType);
 | |
|     print("<");
 | |
|     demangleType();
 | |
|     print(" as ");
 | |
|     demanglePath(IsInType::Yes);
 | |
|     print(">");
 | |
|     break;
 | |
|   }
 | |
|   case 'Y': {
 | |
|     print("<");
 | |
|     demangleType();
 | |
|     print(" as ");
 | |
|     demanglePath(IsInType::Yes);
 | |
|     print(">");
 | |
|     break;
 | |
|   }
 | |
|   case 'N': {
 | |
|     char NS = consume();
 | |
|     if (!isLower(NS) && !isUpper(NS)) {
 | |
|       Error = true;
 | |
|       break;
 | |
|     }
 | |
|     demanglePath(InType);
 | |
| 
 | |
|     uint64_t Disambiguator = parseOptionalBase62Number('s');
 | |
|     Identifier Ident = parseIdentifier();
 | |
| 
 | |
|     if (isUpper(NS)) {
 | |
|       // Special namespaces
 | |
|       print("::{");
 | |
|       if (NS == 'C')
 | |
|         print("closure");
 | |
|       else if (NS == 'S')
 | |
|         print("shim");
 | |
|       else
 | |
|         print(NS);
 | |
|       if (!Ident.empty()) {
 | |
|         print(":");
 | |
|         printIdentifier(Ident);
 | |
|       }
 | |
|       print('#');
 | |
|       printDecimalNumber(Disambiguator);
 | |
|       print('}');
 | |
|     } else {
 | |
|       // Implementation internal namespaces.
 | |
|       if (!Ident.empty()) {
 | |
|         print("::");
 | |
|         printIdentifier(Ident);
 | |
|       }
 | |
|     }
 | |
|     break;
 | |
|   }
 | |
|   case 'I': {
 | |
|     demanglePath(InType);
 | |
|     // Omit "::" when in a type, where it is optional.
 | |
|     if (InType == IsInType::No)
 | |
|       print("::");
 | |
|     print("<");
 | |
|     for (size_t I = 0; !Error && !consumeIf('E'); ++I) {
 | |
|       if (I > 0)
 | |
|         print(", ");
 | |
|       demangleGenericArg();
 | |
|     }
 | |
|     if (LeaveOpen == LeaveGenericsOpen::Yes)
 | |
|       return true;
 | |
|     else
 | |
|       print(">");
 | |
|     break;
 | |
|   }
 | |
|   case 'B': {
 | |
|     bool IsOpen = false;
 | |
|     demangleBackref([&] { IsOpen = demanglePath(InType, LeaveOpen); });
 | |
|     return IsOpen;
 | |
|   }
 | |
|   default:
 | |
|     Error = true;
 | |
|     break;
 | |
|   }
 | |
| 
 | |
|   return false;
 | |
| }
 | |
| 
 | |
| // <impl-path> = [<disambiguator>] <path>
 | |
| // <disambiguator> = "s" <base-62-number>
 | |
| void Demangler::demangleImplPath(IsInType InType) {
 | |
|   ScopedOverride<bool> SavePrint(Print, false);
 | |
|   parseOptionalBase62Number('s');
 | |
|   demanglePath(InType);
 | |
| }
 | |
| 
 | |
| // <generic-arg> = <lifetime>
 | |
| //               | <type>
 | |
| //               | "K" <const>
 | |
| // <lifetime> = "L" <base-62-number>
 | |
| void Demangler::demangleGenericArg() {
 | |
|   if (consumeIf('L'))
 | |
|     printLifetime(parseBase62Number());
 | |
|   else if (consumeIf('K'))
 | |
|     demangleConst();
 | |
|   else
 | |
|     demangleType();
 | |
| }
 | |
| 
 | |
| // <basic-type> = "a"      // i8
 | |
| //              | "b"      // bool
 | |
| //              | "c"      // char
 | |
| //              | "d"      // f64
 | |
| //              | "e"      // str
 | |
| //              | "f"      // f32
 | |
| //              | "h"      // u8
 | |
| //              | "i"      // isize
 | |
| //              | "j"      // usize
 | |
| //              | "l"      // i32
 | |
| //              | "m"      // u32
 | |
| //              | "n"      // i128
 | |
| //              | "o"      // u128
 | |
| //              | "s"      // i16
 | |
| //              | "t"      // u16
 | |
| //              | "u"      // ()
 | |
| //              | "v"      // ...
 | |
| //              | "x"      // i64
 | |
| //              | "y"      // u64
 | |
| //              | "z"      // !
 | |
| //              | "p"      // placeholder (e.g. for generic params), shown as _
 | |
| static bool parseBasicType(char C, BasicType &Type) {
 | |
|   switch (C) {
 | |
|   case 'a':
 | |
|     Type = BasicType::I8;
 | |
|     return true;
 | |
|   case 'b':
 | |
|     Type = BasicType::Bool;
 | |
|     return true;
 | |
|   case 'c':
 | |
|     Type = BasicType::Char;
 | |
|     return true;
 | |
|   case 'd':
 | |
|     Type = BasicType::F64;
 | |
|     return true;
 | |
|   case 'e':
 | |
|     Type = BasicType::Str;
 | |
|     return true;
 | |
|   case 'f':
 | |
|     Type = BasicType::F32;
 | |
|     return true;
 | |
|   case 'h':
 | |
|     Type = BasicType::U8;
 | |
|     return true;
 | |
|   case 'i':
 | |
|     Type = BasicType::ISize;
 | |
|     return true;
 | |
|   case 'j':
 | |
|     Type = BasicType::USize;
 | |
|     return true;
 | |
|   case 'l':
 | |
|     Type = BasicType::I32;
 | |
|     return true;
 | |
|   case 'm':
 | |
|     Type = BasicType::U32;
 | |
|     return true;
 | |
|   case 'n':
 | |
|     Type = BasicType::I128;
 | |
|     return true;
 | |
|   case 'o':
 | |
|     Type = BasicType::U128;
 | |
|     return true;
 | |
|   case 'p':
 | |
|     Type = BasicType::Placeholder;
 | |
|     return true;
 | |
|   case 's':
 | |
|     Type = BasicType::I16;
 | |
|     return true;
 | |
|   case 't':
 | |
|     Type = BasicType::U16;
 | |
|     return true;
 | |
|   case 'u':
 | |
|     Type = BasicType::Unit;
 | |
|     return true;
 | |
|   case 'v':
 | |
|     Type = BasicType::Variadic;
 | |
|     return true;
 | |
|   case 'x':
 | |
|     Type = BasicType::I64;
 | |
|     return true;
 | |
|   case 'y':
 | |
|     Type = BasicType::U64;
 | |
|     return true;
 | |
|   case 'z':
 | |
|     Type = BasicType::Never;
 | |
|     return true;
 | |
|   default:
 | |
|     return false;
 | |
|   }
 | |
| }
 | |
| 
 | |
| void Demangler::printBasicType(BasicType Type) {
 | |
|   switch (Type) {
 | |
|   case BasicType::Bool:
 | |
|     print("bool");
 | |
|     break;
 | |
|   case BasicType::Char:
 | |
|     print("char");
 | |
|     break;
 | |
|   case BasicType::I8:
 | |
|     print("i8");
 | |
|     break;
 | |
|   case BasicType::I16:
 | |
|     print("i16");
 | |
|     break;
 | |
|   case BasicType::I32:
 | |
|     print("i32");
 | |
|     break;
 | |
|   case BasicType::I64:
 | |
|     print("i64");
 | |
|     break;
 | |
|   case BasicType::I128:
 | |
|     print("i128");
 | |
|     break;
 | |
|   case BasicType::ISize:
 | |
|     print("isize");
 | |
|     break;
 | |
|   case BasicType::U8:
 | |
|     print("u8");
 | |
|     break;
 | |
|   case BasicType::U16:
 | |
|     print("u16");
 | |
|     break;
 | |
|   case BasicType::U32:
 | |
|     print("u32");
 | |
|     break;
 | |
|   case BasicType::U64:
 | |
|     print("u64");
 | |
|     break;
 | |
|   case BasicType::U128:
 | |
|     print("u128");
 | |
|     break;
 | |
|   case BasicType::USize:
 | |
|     print("usize");
 | |
|     break;
 | |
|   case BasicType::F32:
 | |
|     print("f32");
 | |
|     break;
 | |
|   case BasicType::F64:
 | |
|     print("f64");
 | |
|     break;
 | |
|   case BasicType::Str:
 | |
|     print("str");
 | |
|     break;
 | |
|   case BasicType::Placeholder:
 | |
|     print("_");
 | |
|     break;
 | |
|   case BasicType::Unit:
 | |
|     print("()");
 | |
|     break;
 | |
|   case BasicType::Variadic:
 | |
|     print("...");
 | |
|     break;
 | |
|   case BasicType::Never:
 | |
|     print("!");
 | |
|     break;
 | |
|   }
 | |
| }
 | |
| 
 | |
| // <type> = | <basic-type>
 | |
| //          | <path>                      // named type
 | |
| //          | "A" <type> <const>          // [T; N]
 | |
| //          | "S" <type>                  // [T]
 | |
| //          | "T" {<type>} "E"            // (T1, T2, T3, ...)
 | |
| //          | "R" [<lifetime>] <type>     // &T
 | |
| //          | "Q" [<lifetime>] <type>     // &mut T
 | |
| //          | "P" <type>                  // *const T
 | |
| //          | "O" <type>                  // *mut T
 | |
| //          | "F" <fn-sig>                // fn(...) -> ...
 | |
| //          | "D" <dyn-bounds> <lifetime> // dyn Trait<Assoc = X> + Send + 'a
 | |
| //          | <backref>                   // backref
 | |
| void Demangler::demangleType() {
 | |
|   if (Error || RecursionLevel >= MaxRecursionLevel) {
 | |
|     Error = true;
 | |
|     return;
 | |
|   }
 | |
|   ScopedOverride<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1);
 | |
| 
 | |
|   size_t Start = Position;
 | |
|   char C = consume();
 | |
|   BasicType Type;
 | |
|   if (parseBasicType(C, Type))
 | |
|     return printBasicType(Type);
 | |
| 
 | |
|   switch (C) {
 | |
|   case 'A':
 | |
|     print("[");
 | |
|     demangleType();
 | |
|     print("; ");
 | |
|     demangleConst();
 | |
|     print("]");
 | |
|     break;
 | |
|   case 'S':
 | |
|     print("[");
 | |
|     demangleType();
 | |
|     print("]");
 | |
|     break;
 | |
|   case 'T': {
 | |
|     print("(");
 | |
|     size_t I = 0;
 | |
|     for (; !Error && !consumeIf('E'); ++I) {
 | |
|       if (I > 0)
 | |
|         print(", ");
 | |
|       demangleType();
 | |
|     }
 | |
|     if (I == 1)
 | |
|       print(",");
 | |
|     print(")");
 | |
|     break;
 | |
|   }
 | |
|   case 'R':
 | |
|   case 'Q':
 | |
|     print('&');
 | |
|     if (consumeIf('L')) {
 | |
|       if (auto Lifetime = parseBase62Number()) {
 | |
|         printLifetime(Lifetime);
 | |
|         print(' ');
 | |
|       }
 | |
|     }
 | |
|     if (C == 'Q')
 | |
|       print("mut ");
 | |
|     demangleType();
 | |
|     break;
 | |
|   case 'P':
 | |
|     print("*const ");
 | |
|     demangleType();
 | |
|     break;
 | |
|   case 'O':
 | |
|     print("*mut ");
 | |
|     demangleType();
 | |
|     break;
 | |
|   case 'F':
 | |
|     demangleFnSig();
 | |
|     break;
 | |
|   case 'D':
 | |
|     demangleDynBounds();
 | |
|     if (consumeIf('L')) {
 | |
|       if (auto Lifetime = parseBase62Number()) {
 | |
|         print(" + ");
 | |
|         printLifetime(Lifetime);
 | |
|       }
 | |
|     } else {
 | |
|       Error = true;
 | |
|     }
 | |
|     break;
 | |
|   case 'B':
 | |
|     demangleBackref([&] { demangleType(); });
 | |
|     break;
 | |
|   default:
 | |
|     Position = Start;
 | |
|     demanglePath(IsInType::Yes);
 | |
|     break;
 | |
|   }
 | |
| }
 | |
| 
 | |
| // <fn-sig> := [<binder>] ["U"] ["K" <abi>] {<type>} "E" <type>
 | |
| // <abi> = "C"
 | |
| //       | <undisambiguated-identifier>
 | |
| void Demangler::demangleFnSig() {
 | |
|   ScopedOverride<size_t> SaveBoundLifetimes(BoundLifetimes, BoundLifetimes);
 | |
|   demangleOptionalBinder();
 | |
| 
 | |
|   if (consumeIf('U'))
 | |
|     print("unsafe ");
 | |
| 
 | |
|   if (consumeIf('K')) {
 | |
|     print("extern \"");
 | |
|     if (consumeIf('C')) {
 | |
|       print("C");
 | |
|     } else {
 | |
|       Identifier Ident = parseIdentifier();
 | |
|       if (Ident.Punycode)
 | |
|         Error = true;
 | |
|       for (char C : Ident.Name) {
 | |
|         // When mangling ABI string, the "-" is replaced with "_".
 | |
|         if (C == '_')
 | |
|           C = '-';
 | |
|         print(C);
 | |
|       }
 | |
|     }
 | |
|     print("\" ");
 | |
|   }
 | |
| 
 | |
|   print("fn(");
 | |
|   for (size_t I = 0; !Error && !consumeIf('E'); ++I) {
 | |
|     if (I > 0)
 | |
|       print(", ");
 | |
|     demangleType();
 | |
|   }
 | |
|   print(")");
 | |
| 
 | |
|   if (consumeIf('u')) {
 | |
|     // Skip the unit type from the output.
 | |
|   } else {
 | |
|     print(" -> ");
 | |
|     demangleType();
 | |
|   }
 | |
| }
 | |
| 
 | |
| // <dyn-bounds> = [<binder>] {<dyn-trait>} "E"
 | |
| void Demangler::demangleDynBounds() {
 | |
|   ScopedOverride<size_t> SaveBoundLifetimes(BoundLifetimes, BoundLifetimes);
 | |
|   print("dyn ");
 | |
|   demangleOptionalBinder();
 | |
|   for (size_t I = 0; !Error && !consumeIf('E'); ++I) {
 | |
|     if (I > 0)
 | |
|       print(" + ");
 | |
|     demangleDynTrait();
 | |
|   }
 | |
| }
 | |
| 
 | |
| // <dyn-trait> = <path> {<dyn-trait-assoc-binding>}
 | |
| // <dyn-trait-assoc-binding> = "p" <undisambiguated-identifier> <type>
 | |
| void Demangler::demangleDynTrait() {
 | |
|   bool IsOpen = demanglePath(IsInType::Yes, LeaveGenericsOpen::Yes);
 | |
|   while (!Error && consumeIf('p')) {
 | |
|     if (!IsOpen) {
 | |
|       IsOpen = true;
 | |
|       print('<');
 | |
|     } else {
 | |
|       print(", ");
 | |
|     }
 | |
|     print(parseIdentifier().Name);
 | |
|     print(" = ");
 | |
|     demangleType();
 | |
|   }
 | |
|   if (IsOpen)
 | |
|     print(">");
 | |
| }
 | |
| 
 | |
| // Demangles optional binder and updates the number of bound lifetimes.
 | |
| //
 | |
| // <binder> = "G" <base-62-number>
 | |
| void Demangler::demangleOptionalBinder() {
 | |
|   uint64_t Binder = parseOptionalBase62Number('G');
 | |
|   if (Error || Binder == 0)
 | |
|     return;
 | |
| 
 | |
|   // In valid inputs each bound lifetime is referenced later. Referencing a
 | |
|   // lifetime requires at least one byte of input. Reject inputs that are too
 | |
|   // short to reference all bound lifetimes. Otherwise demangling of invalid
 | |
|   // binders could generate excessive amounts of output.
 | |
|   if (Binder >= Input.size() - BoundLifetimes) {
 | |
|     Error = true;
 | |
|     return;
 | |
|   }
 | |
| 
 | |
|   print("for<");
 | |
|   for (size_t I = 0; I != Binder; ++I) {
 | |
|     BoundLifetimes += 1;
 | |
|     if (I > 0)
 | |
|       print(", ");
 | |
|     printLifetime(1);
 | |
|   }
 | |
|   print("> ");
 | |
| }
 | |
| 
 | |
| // <const> = <basic-type> <const-data>
 | |
| //         | "p"                          // placeholder
 | |
| //         | <backref>
 | |
| void Demangler::demangleConst() {
 | |
|   if (Error || RecursionLevel >= MaxRecursionLevel) {
 | |
|     Error = true;
 | |
|     return;
 | |
|   }
 | |
|   ScopedOverride<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1);
 | |
| 
 | |
|   char C = consume();
 | |
|   BasicType Type;
 | |
|   if (parseBasicType(C, Type)) {
 | |
|     switch (Type) {
 | |
|     case BasicType::I8:
 | |
|     case BasicType::I16:
 | |
|     case BasicType::I32:
 | |
|     case BasicType::I64:
 | |
|     case BasicType::I128:
 | |
|     case BasicType::ISize:
 | |
|     case BasicType::U8:
 | |
|     case BasicType::U16:
 | |
|     case BasicType::U32:
 | |
|     case BasicType::U64:
 | |
|     case BasicType::U128:
 | |
|     case BasicType::USize:
 | |
|       demangleConstInt();
 | |
|       break;
 | |
|     case BasicType::Bool:
 | |
|       demangleConstBool();
 | |
|       break;
 | |
|     case BasicType::Char:
 | |
|       demangleConstChar();
 | |
|       break;
 | |
|     case BasicType::Placeholder:
 | |
|       print('_');
 | |
|       break;
 | |
|     default:
 | |
|       Error = true;
 | |
|       break;
 | |
|     }
 | |
|   } else if (C == 'B') {
 | |
|     demangleBackref([&] { demangleConst(); });
 | |
|   } else {
 | |
|     Error = true;
 | |
|   }
 | |
| }
 | |
| 
 | |
| // <const-data> = ["n"] <hex-number>
 | |
| void Demangler::demangleConstInt() {
 | |
|   if (consumeIf('n'))
 | |
|     print('-');
 | |
| 
 | |
|   StringView HexDigits;
 | |
|   uint64_t Value = parseHexNumber(HexDigits);
 | |
|   if (HexDigits.size() <= 16) {
 | |
|     printDecimalNumber(Value);
 | |
|   } else {
 | |
|     print("0x");
 | |
|     print(HexDigits);
 | |
|   }
 | |
| }
 | |
| 
 | |
| // <const-data> = "0_" // false
 | |
| //              | "1_" // true
 | |
| void Demangler::demangleConstBool() {
 | |
|   StringView HexDigits;
 | |
|   parseHexNumber(HexDigits);
 | |
|   if (HexDigits == "0")
 | |
|     print("false");
 | |
|   else if (HexDigits == "1")
 | |
|     print("true");
 | |
|   else
 | |
|     Error = true;
 | |
| }
 | |
| 
 | |
| /// Returns true if CodePoint represents a printable ASCII character.
 | |
| static bool isAsciiPrintable(uint64_t CodePoint) {
 | |
|   return 0x20 <= CodePoint && CodePoint <= 0x7e;
 | |
| }
 | |
| 
 | |
| // <const-data> = <hex-number>
 | |
| void Demangler::demangleConstChar() {
 | |
|   StringView HexDigits;
 | |
|   uint64_t CodePoint = parseHexNumber(HexDigits);
 | |
|   if (Error || HexDigits.size() > 6) {
 | |
|     Error = true;
 | |
|     return;
 | |
|   }
 | |
| 
 | |
|   print("'");
 | |
|   switch (CodePoint) {
 | |
|   case '\t':
 | |
|     print(R"(\t)");
 | |
|     break;
 | |
|   case '\r':
 | |
|     print(R"(\r)");
 | |
|     break;
 | |
|   case '\n':
 | |
|     print(R"(\n)");
 | |
|     break;
 | |
|   case '\\':
 | |
|     print(R"(\\)");
 | |
|     break;
 | |
|   case '"':
 | |
|     print(R"(")");
 | |
|     break;
 | |
|   case '\'':
 | |
|     print(R"(\')");
 | |
|     break;
 | |
|   default:
 | |
|     if (isAsciiPrintable(CodePoint)) {
 | |
|       char C = CodePoint;
 | |
|       print(C);
 | |
|     } else {
 | |
|       print(R"(\u{)");
 | |
|       print(HexDigits);
 | |
|       print('}');
 | |
|     }
 | |
|     break;
 | |
|   }
 | |
|   print('\'');
 | |
| }
 | |
| 
 | |
| // <undisambiguated-identifier> = ["u"] <decimal-number> ["_"] <bytes>
 | |
| Identifier Demangler::parseIdentifier() {
 | |
|   bool Punycode = consumeIf('u');
 | |
|   uint64_t Bytes = parseDecimalNumber();
 | |
| 
 | |
|   // Underscore resolves the ambiguity when identifier starts with a decimal
 | |
|   // digit or another underscore.
 | |
|   consumeIf('_');
 | |
| 
 | |
|   if (Error || Bytes > Input.size() - Position) {
 | |
|     Error = true;
 | |
|     return {};
 | |
|   }
 | |
|   StringView S = Input.substr(Position, Bytes);
 | |
|   Position += Bytes;
 | |
| 
 | |
|   if (!std::all_of(S.begin(), S.end(), isValid)) {
 | |
|     Error = true;
 | |
|     return {};
 | |
|   }
 | |
| 
 | |
|   return {S, Punycode};
 | |
| }
 | |
| 
 | |
| // Parses optional base 62 number. The presence of a number is determined using
 | |
| // Tag. Returns 0 when tag is absent and parsed value + 1 otherwise
 | |
| //
 | |
| // This function is indended for parsing disambiguators and binders which when
 | |
| // not present have their value interpreted as 0, and otherwise as decoded
 | |
| // value + 1. For example for binders, value for "G_" is 1, for "G0_" value is
 | |
| // 2. When "G" is absent value is 0.
 | |
| uint64_t Demangler::parseOptionalBase62Number(char Tag) {
 | |
|   if (!consumeIf(Tag))
 | |
|     return 0;
 | |
| 
 | |
|   uint64_t N = parseBase62Number();
 | |
|   if (Error || !addAssign(N, 1))
 | |
|     return 0;
 | |
| 
 | |
|   return N;
 | |
| }
 | |
| 
 | |
| // Parses base 62 number with <0-9a-zA-Z> as digits. Number is terminated by
 | |
| // "_". All values are offset by 1, so that "_" encodes 0, "0_" encodes 1,
 | |
| // "1_" encodes 2, etc.
 | |
| //
 | |
| // <base-62-number> = {<0-9a-zA-Z>} "_"
 | |
| uint64_t Demangler::parseBase62Number() {
 | |
|   if (consumeIf('_'))
 | |
|     return 0;
 | |
| 
 | |
|   uint64_t Value = 0;
 | |
| 
 | |
|   while (true) {
 | |
|     uint64_t Digit;
 | |
|     char C = consume();
 | |
| 
 | |
|     if (C == '_') {
 | |
|       break;
 | |
|     } else if (isDigit(C)) {
 | |
|       Digit = C - '0';
 | |
|     } else if (isLower(C)) {
 | |
|       Digit = 10 + (C - 'a');
 | |
|     } else if (isUpper(C)) {
 | |
|       Digit = 10 + 26 + (C - 'A');
 | |
|     } else {
 | |
|       Error = true;
 | |
|       return 0;
 | |
|     }
 | |
| 
 | |
|     if (!mulAssign(Value, 62))
 | |
|       return 0;
 | |
| 
 | |
|     if (!addAssign(Value, Digit))
 | |
|       return 0;
 | |
|   }
 | |
| 
 | |
|   if (!addAssign(Value, 1))
 | |
|     return 0;
 | |
| 
 | |
|   return Value;
 | |
| }
 | |
| 
 | |
| // Parses a decimal number that had been encoded without any leading zeros.
 | |
| //
 | |
| // <decimal-number> = "0"
 | |
| //                  | <1-9> {<0-9>}
 | |
| uint64_t Demangler::parseDecimalNumber() {
 | |
|   char C = look();
 | |
|   if (!isDigit(C)) {
 | |
|     Error = true;
 | |
|     return 0;
 | |
|   }
 | |
| 
 | |
|   if (C == '0') {
 | |
|     consume();
 | |
|     return 0;
 | |
|   }
 | |
| 
 | |
|   uint64_t Value = 0;
 | |
| 
 | |
|   while (isDigit(look())) {
 | |
|     if (!mulAssign(Value, 10)) {
 | |
|       Error = true;
 | |
|       return 0;
 | |
|     }
 | |
| 
 | |
|     uint64_t D = consume() - '0';
 | |
|     if (!addAssign(Value, D))
 | |
|       return 0;
 | |
|   }
 | |
| 
 | |
|   return Value;
 | |
| }
 | |
| 
 | |
| // Parses a hexadecimal number with <0-9a-f> as a digits. Returns the parsed
 | |
| // value and stores hex digits in HexDigits. The return value is unspecified if
 | |
| // HexDigits.size() > 16.
 | |
| //
 | |
| // <hex-number> = "0_"
 | |
| //              | <1-9a-f> {<0-9a-f>} "_"
 | |
| uint64_t Demangler::parseHexNumber(StringView &HexDigits) {
 | |
|   size_t Start = Position;
 | |
|   uint64_t Value = 0;
 | |
| 
 | |
|   if (!isHexDigit(look()))
 | |
|     Error = true;
 | |
| 
 | |
|   if (consumeIf('0')) {
 | |
|     if (!consumeIf('_'))
 | |
|       Error = true;
 | |
|   } else {
 | |
|     while (!Error && !consumeIf('_')) {
 | |
|       char C = consume();
 | |
|       Value *= 16;
 | |
|       if (isDigit(C))
 | |
|         Value += C - '0';
 | |
|       else if ('a' <= C && C <= 'f')
 | |
|         Value += 10 + (C - 'a');
 | |
|       else
 | |
|         Error = true;
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   if (Error) {
 | |
|     HexDigits = StringView();
 | |
|     return 0;
 | |
|   }
 | |
| 
 | |
|   size_t End = Position - 1;
 | |
|   assert(Start < End);
 | |
|   HexDigits = Input.substr(Start, End - Start);
 | |
|   return Value;
 | |
| }
 | |
| 
 | |
| void Demangler::print(char C) {
 | |
|   if (Error || !Print)
 | |
|     return;
 | |
| 
 | |
|   Output += C;
 | |
| }
 | |
| 
 | |
| void Demangler::print(StringView S) {
 | |
|   if (Error || !Print)
 | |
|     return;
 | |
| 
 | |
|   Output += S;
 | |
| }
 | |
| 
 | |
| void Demangler::printDecimalNumber(uint64_t N) {
 | |
|   if (Error || !Print)
 | |
|     return;
 | |
| 
 | |
|   Output << N;
 | |
| }
 | |
| 
 | |
| // Prints a lifetime. An index 0 always represents an erased lifetime. Indices
 | |
| // starting from 1, are De Bruijn indices, referring to higher-ranked lifetimes
 | |
| // bound by one of the enclosing binders.
 | |
| void Demangler::printLifetime(uint64_t Index) {
 | |
|   if (Index == 0) {
 | |
|     print("'_");
 | |
|     return;
 | |
|   }
 | |
| 
 | |
|   if (Index - 1 >= BoundLifetimes) {
 | |
|     Error = true;
 | |
|     return;
 | |
|   }
 | |
| 
 | |
|   uint64_t Depth = BoundLifetimes - Index;
 | |
|   print('\'');
 | |
|   if (Depth < 26) {
 | |
|     char C = 'a' + Depth;
 | |
|     print(C);
 | |
|   } else {
 | |
|     print('z');
 | |
|     printDecimalNumber(Depth - 26 + 1);
 | |
|   }
 | |
| }
 | |
| 
 | |
| static inline bool decodePunycodeDigit(char C, size_t &Value) {
 | |
|   if (isLower(C)) {
 | |
|     Value = C - 'a';
 | |
|     return true;
 | |
|   }
 | |
| 
 | |
|   if (isDigit(C)) {
 | |
|     Value = 26 + (C - '0');
 | |
|     return true;
 | |
|   }
 | |
| 
 | |
|   return false;
 | |
| }
 | |
| 
 | |
| static void removeNullBytes(OutputBuffer &Output, size_t StartIdx) {
 | |
|   char *Buffer = Output.getBuffer();
 | |
|   char *Start = Buffer + StartIdx;
 | |
|   char *End = Buffer + Output.getCurrentPosition();
 | |
|   Output.setCurrentPosition(std::remove(Start, End, '\0') - Buffer);
 | |
| }
 | |
| 
 | |
| // Encodes code point as UTF-8 and stores results in Output. Returns false if
 | |
| // CodePoint is not a valid unicode scalar value.
 | |
| static inline bool encodeUTF8(size_t CodePoint, char *Output) {
 | |
|   if (0xD800 <= CodePoint && CodePoint <= 0xDFFF)
 | |
|     return false;
 | |
| 
 | |
|   if (CodePoint <= 0x7F) {
 | |
|     Output[0] = CodePoint;
 | |
|     return true;
 | |
|   }
 | |
| 
 | |
|   if (CodePoint <= 0x7FF) {
 | |
|     Output[0] = 0xC0 | ((CodePoint >> 6) & 0x3F);
 | |
|     Output[1] = 0x80 | (CodePoint & 0x3F);
 | |
|     return true;
 | |
|   }
 | |
| 
 | |
|   if (CodePoint <= 0xFFFF) {
 | |
|     Output[0] = 0xE0 | (CodePoint >> 12);
 | |
|     Output[1] = 0x80 | ((CodePoint >> 6) & 0x3F);
 | |
|     Output[2] = 0x80 | (CodePoint & 0x3F);
 | |
|     return true;
 | |
|   }
 | |
| 
 | |
|   if (CodePoint <= 0x10FFFF) {
 | |
|     Output[0] = 0xF0 | (CodePoint >> 18);
 | |
|     Output[1] = 0x80 | ((CodePoint >> 12) & 0x3F);
 | |
|     Output[2] = 0x80 | ((CodePoint >> 6) & 0x3F);
 | |
|     Output[3] = 0x80 | (CodePoint & 0x3F);
 | |
|     return true;
 | |
|   }
 | |
| 
 | |
|   return false;
 | |
| }
 | |
| 
 | |
| // Decodes string encoded using punycode and appends results to Output.
 | |
| // Returns true if decoding was successful.
 | |
| static bool decodePunycode(StringView Input, OutputBuffer &Output) {
 | |
|   size_t OutputSize = Output.getCurrentPosition();
 | |
|   size_t InputIdx = 0;
 | |
| 
 | |
|   // Rust uses an underscore as a delimiter.
 | |
|   size_t DelimiterPos = StringView::npos;
 | |
|   for (size_t I = 0; I != Input.size(); ++I)
 | |
|     if (Input[I] == '_')
 | |
|       DelimiterPos = I;
 | |
| 
 | |
|   if (DelimiterPos != StringView::npos) {
 | |
|     // Copy basic code points before the last delimiter to the output.
 | |
|     for (; InputIdx != DelimiterPos; ++InputIdx) {
 | |
|       char C = Input[InputIdx];
 | |
|       if (!isValid(C))
 | |
|         return false;
 | |
|       // Code points are padded with zeros while decoding is in progress.
 | |
|       char UTF8[4] = {C};
 | |
|       Output += StringView(UTF8, UTF8 + 4);
 | |
|     }
 | |
|     // Skip over the delimiter.
 | |
|     ++InputIdx;
 | |
|   }
 | |
| 
 | |
|   size_t Base = 36;
 | |
|   size_t Skew = 38;
 | |
|   size_t Bias = 72;
 | |
|   size_t N = 0x80;
 | |
|   size_t TMin = 1;
 | |
|   size_t TMax = 26;
 | |
|   size_t Damp = 700;
 | |
| 
 | |
|   auto Adapt = [&](size_t Delta, size_t NumPoints) {
 | |
|     Delta /= Damp;
 | |
|     Delta += Delta / NumPoints;
 | |
|     Damp = 2;
 | |
| 
 | |
|     size_t K = 0;
 | |
|     while (Delta > (Base - TMin) * TMax / 2) {
 | |
|       Delta /= Base - TMin;
 | |
|       K += Base;
 | |
|     }
 | |
|     return K + (((Base - TMin + 1) * Delta) / (Delta + Skew));
 | |
|   };
 | |
| 
 | |
|   // Main decoding loop.
 | |
|   for (size_t I = 0; InputIdx != Input.size(); I += 1) {
 | |
|     size_t OldI = I;
 | |
|     size_t W = 1;
 | |
|     size_t Max = std::numeric_limits<size_t>::max();
 | |
|     for (size_t K = Base; true; K += Base) {
 | |
|       if (InputIdx == Input.size())
 | |
|         return false;
 | |
|       char C = Input[InputIdx++];
 | |
|       size_t Digit = 0;
 | |
|       if (!decodePunycodeDigit(C, Digit))
 | |
|         return false;
 | |
| 
 | |
|       if (Digit > (Max - I) / W)
 | |
|         return false;
 | |
|       I += Digit * W;
 | |
| 
 | |
|       size_t T;
 | |
|       if (K <= Bias)
 | |
|         T = TMin;
 | |
|       else if (K >= Bias + TMax)
 | |
|         T = TMax;
 | |
|       else
 | |
|         T = K - Bias;
 | |
| 
 | |
|       if (Digit < T)
 | |
|         break;
 | |
| 
 | |
|       if (W > Max / (Base - T))
 | |
|         return false;
 | |
|       W *= (Base - T);
 | |
|     }
 | |
|     size_t NumPoints = (Output.getCurrentPosition() - OutputSize) / 4 + 1;
 | |
|     Bias = Adapt(I - OldI, NumPoints);
 | |
| 
 | |
|     if (I / NumPoints > Max - N)
 | |
|       return false;
 | |
|     N += I / NumPoints;
 | |
|     I = I % NumPoints;
 | |
| 
 | |
|     // Insert N at position I in the output.
 | |
|     char UTF8[4] = {};
 | |
|     if (!encodeUTF8(N, UTF8))
 | |
|       return false;
 | |
|     Output.insert(OutputSize + I * 4, UTF8, 4);
 | |
|   }
 | |
| 
 | |
|   removeNullBytes(Output, OutputSize);
 | |
|   return true;
 | |
| }
 | |
| 
 | |
| void Demangler::printIdentifier(Identifier Ident) {
 | |
|   if (Error || !Print)
 | |
|     return;
 | |
| 
 | |
|   if (Ident.Punycode) {
 | |
|     if (!decodePunycode(Ident.Name, Output))
 | |
|       Error = true;
 | |
|   } else {
 | |
|     print(Ident.Name);
 | |
|   }
 | |
| }
 | |
| 
 | |
| char Demangler::look() const {
 | |
|   if (Error || Position >= Input.size())
 | |
|     return 0;
 | |
| 
 | |
|   return Input[Position];
 | |
| }
 | |
| 
 | |
| char Demangler::consume() {
 | |
|   if (Error || Position >= Input.size()) {
 | |
|     Error = true;
 | |
|     return 0;
 | |
|   }
 | |
| 
 | |
|   return Input[Position++];
 | |
| }
 | |
| 
 | |
| bool Demangler::consumeIf(char Prefix) {
 | |
|   if (Error || Position >= Input.size() || Input[Position] != Prefix)
 | |
|     return false;
 | |
| 
 | |
|   Position += 1;
 | |
|   return true;
 | |
| }
 | |
| 
 | |
| /// Computes A + B. When computation wraps around sets the error and returns
 | |
| /// false. Otherwise assigns the result to A and returns true.
 | |
| bool Demangler::addAssign(uint64_t &A, uint64_t B) {
 | |
|   if (A > std::numeric_limits<uint64_t>::max() - B) {
 | |
|     Error = true;
 | |
|     return false;
 | |
|   }
 | |
| 
 | |
|   A += B;
 | |
|   return true;
 | |
| }
 | |
| 
 | |
| /// Computes A * B. When computation wraps around sets the error and returns
 | |
| /// false. Otherwise assigns the result to A and returns true.
 | |
| bool Demangler::mulAssign(uint64_t &A, uint64_t B) {
 | |
|   if (B != 0 && A > std::numeric_limits<uint64_t>::max() / B) {
 | |
|     Error = true;
 | |
|     return false;
 | |
|   }
 | |
| 
 | |
|   A *= B;
 | |
|   return true;
 | |
| }
 |