82 lines
		
	
	
		
			2.8 KiB
		
	
	
	
		
			C++
		
	
	
	
			
		
		
	
	
			82 lines
		
	
	
		
			2.8 KiB
		
	
	
	
		
			C++
		
	
	
	
| //===--- ClangCommentHTMLNamedCharacterReferenceEmitter.cpp -----------------=//
 | |
| //
 | |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 | |
| // See https://llvm.org/LICENSE.txt for license information.
 | |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 | |
| //
 | |
| //===----------------------------------------------------------------------===//
 | |
| //
 | |
| // This tablegen backend emits an efficient function to translate HTML named
 | |
| // character references to UTF-8 sequences.
 | |
| //
 | |
| //===----------------------------------------------------------------------===//
 | |
| 
 | |
| #include "TableGenBackends.h"
 | |
| #include "llvm/ADT/SmallString.h"
 | |
| #include "llvm/Support/ConvertUTF.h"
 | |
| #include "llvm/TableGen/Error.h"
 | |
| #include "llvm/TableGen/Record.h"
 | |
| #include "llvm/TableGen/StringMatcher.h"
 | |
| #include "llvm/TableGen/TableGenBackend.h"
 | |
| #include <vector>
 | |
| 
 | |
| using namespace llvm;
 | |
| 
 | |
| /// Convert a code point to the corresponding UTF-8 sequence represented
 | |
| /// as a C string literal.
 | |
| ///
 | |
| /// \returns true on success.
 | |
| static bool translateCodePointToUTF8(unsigned CodePoint,
 | |
|                                      SmallVectorImpl<char> &CLiteral) {
 | |
|   char Translated[UNI_MAX_UTF8_BYTES_PER_CODE_POINT];
 | |
|   char *TranslatedPtr = Translated;
 | |
|   if (!ConvertCodePointToUTF8(CodePoint, TranslatedPtr))
 | |
|     return false;
 | |
| 
 | |
|   StringRef UTF8(Translated, TranslatedPtr - Translated);
 | |
| 
 | |
|   raw_svector_ostream OS(CLiteral);
 | |
|   OS << "\"";
 | |
|   for (size_t i = 0, e = UTF8.size(); i != e; ++i) {
 | |
|     OS << "\\x";
 | |
|     OS.write_hex(static_cast<unsigned char>(UTF8[i]));
 | |
|   }
 | |
|   OS << "\"";
 | |
| 
 | |
|   return true;
 | |
| }
 | |
| 
 | |
| void clang::EmitClangCommentHTMLNamedCharacterReferences(RecordKeeper &Records,
 | |
|                                                          raw_ostream &OS) {
 | |
|   std::vector<Record *> Tags = Records.getAllDerivedDefinitions("NCR");
 | |
|   std::vector<StringMatcher::StringPair> NameToUTF8;
 | |
|   SmallString<32> CLiteral;
 | |
|   for (std::vector<Record *>::iterator I = Tags.begin(), E = Tags.end();
 | |
|        I != E; ++I) {
 | |
|     Record &Tag = **I;
 | |
|     std::string Spelling = std::string(Tag.getValueAsString("Spelling"));
 | |
|     uint64_t CodePoint = Tag.getValueAsInt("CodePoint");
 | |
|     CLiteral.clear();
 | |
|     CLiteral.append("return ");
 | |
|     if (!translateCodePointToUTF8(CodePoint, CLiteral)) {
 | |
|       SrcMgr.PrintMessage(Tag.getLoc().front(),
 | |
|                           SourceMgr::DK_Error,
 | |
|                           Twine("invalid code point"));
 | |
|       continue;
 | |
|     }
 | |
|     CLiteral.append(";");
 | |
| 
 | |
|     StringMatcher::StringPair Match(Spelling, std::string(CLiteral.str()));
 | |
|     NameToUTF8.push_back(Match);
 | |
|   }
 | |
| 
 | |
|   emitSourceFileHeader("HTML named character reference to UTF-8 "
 | |
|                        "translation", OS);
 | |
| 
 | |
|   OS << "StringRef translateHTMLNamedCharacterReferenceToUTF8(\n"
 | |
|         "                                             StringRef Name) {\n";
 | |
|   StringMatcher("Name", NameToUTF8, OS).Emit();
 | |
|   OS << "  return StringRef();\n"
 | |
|      << "}\n\n";
 | |
| }
 |