ClangCommentHTMLNamedCharacterReferenceEmitter.cpp 2.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081
  1. //===--- ClangCommentHTMLNamedCharacterReferenceEmitter.cpp -----------------=//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This tablegen backend emits an efficient function to translate HTML named
  10. // character references to UTF-8 sequences.
  11. //
  12. //===----------------------------------------------------------------------===//
  13. #include "TableGenBackends.h"
  14. #include "llvm/ADT/SmallString.h"
  15. #include "llvm/Support/ConvertUTF.h"
  16. #include "llvm/TableGen/Error.h"
  17. #include "llvm/TableGen/Record.h"
  18. #include "llvm/TableGen/StringMatcher.h"
  19. #include "llvm/TableGen/TableGenBackend.h"
  20. #include <vector>
  21. using namespace llvm;
  22. /// Convert a code point to the corresponding UTF-8 sequence represented
  23. /// as a C string literal.
  24. ///
  25. /// \returns true on success.
  26. static bool translateCodePointToUTF8(unsigned CodePoint,
  27. SmallVectorImpl<char> &CLiteral) {
  28. char Translated[UNI_MAX_UTF8_BYTES_PER_CODE_POINT];
  29. char *TranslatedPtr = Translated;
  30. if (!ConvertCodePointToUTF8(CodePoint, TranslatedPtr))
  31. return false;
  32. StringRef UTF8(Translated, TranslatedPtr - Translated);
  33. raw_svector_ostream OS(CLiteral);
  34. OS << "\"";
  35. for (size_t i = 0, e = UTF8.size(); i != e; ++i) {
  36. OS << "\\x";
  37. OS.write_hex(static_cast<unsigned char>(UTF8[i]));
  38. }
  39. OS << "\"";
  40. return true;
  41. }
  42. void clang::EmitClangCommentHTMLNamedCharacterReferences(RecordKeeper &Records,
  43. raw_ostream &OS) {
  44. std::vector<Record *> Tags = Records.getAllDerivedDefinitions("NCR");
  45. std::vector<StringMatcher::StringPair> NameToUTF8;
  46. SmallString<32> CLiteral;
  47. for (std::vector<Record *>::iterator I = Tags.begin(), E = Tags.end();
  48. I != E; ++I) {
  49. Record &Tag = **I;
  50. std::string Spelling = std::string(Tag.getValueAsString("Spelling"));
  51. uint64_t CodePoint = Tag.getValueAsInt("CodePoint");
  52. CLiteral.clear();
  53. CLiteral.append("return ");
  54. if (!translateCodePointToUTF8(CodePoint, CLiteral)) {
  55. SrcMgr.PrintMessage(Tag.getLoc().front(),
  56. SourceMgr::DK_Error,
  57. Twine("invalid code point"));
  58. continue;
  59. }
  60. CLiteral.append(";");
  61. StringMatcher::StringPair Match(Spelling, std::string(CLiteral.str()));
  62. NameToUTF8.push_back(Match);
  63. }
  64. emitSourceFileHeader("HTML named character reference to UTF-8 "
  65. "translation", OS);
  66. OS << "StringRef translateHTMLNamedCharacterReferenceToUTF8(\n"
  67. " StringRef Name) {\n";
  68. StringMatcher("Name", NameToUTF8, OS).Emit();
  69. OS << " return StringRef();\n"
  70. << "}\n\n";
  71. }