SarifDiagnostics.cpp 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401
  1. //===--- SarifDiagnostics.cpp - Sarif Diagnostics for Paths -----*- C++ -*-===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file defines the SarifDiagnostics object.
  10. //
  11. //===----------------------------------------------------------------------===//
  12. #include "clang/Analysis/MacroExpansionContext.h"
  13. #include "clang/Analysis/PathDiagnostic.h"
  14. #include "clang/Basic/FileManager.h"
  15. #include "clang/Basic/Version.h"
  16. #include "clang/Lex/Preprocessor.h"
  17. #include "clang/StaticAnalyzer/Core/PathDiagnosticConsumers.h"
  18. #include "llvm/ADT/STLExtras.h"
  19. #include "llvm/ADT/StringMap.h"
  20. #include "llvm/Support/ConvertUTF.h"
  21. #include "llvm/Support/JSON.h"
  22. #include "llvm/Support/Path.h"
  23. using namespace llvm;
  24. using namespace clang;
  25. using namespace ento;
  26. namespace {
  27. class SarifDiagnostics : public PathDiagnosticConsumer {
  28. std::string OutputFile;
  29. const LangOptions &LO;
  30. public:
  31. SarifDiagnostics(const std::string &Output, const LangOptions &LO)
  32. : OutputFile(Output), LO(LO) {}
  33. ~SarifDiagnostics() override = default;
  34. void FlushDiagnosticsImpl(std::vector<const PathDiagnostic *> &Diags,
  35. FilesMade *FM) override;
  36. StringRef getName() const override { return "SarifDiagnostics"; }
  37. PathGenerationScheme getGenerationScheme() const override { return Minimal; }
  38. bool supportsLogicalOpControlFlow() const override { return true; }
  39. bool supportsCrossFileDiagnostics() const override { return true; }
  40. };
  41. } // end anonymous namespace
  42. void ento::createSarifDiagnosticConsumer(
  43. PathDiagnosticConsumerOptions DiagOpts, PathDiagnosticConsumers &C,
  44. const std::string &Output, const Preprocessor &PP,
  45. const cross_tu::CrossTranslationUnitContext &CTU,
  46. const MacroExpansionContext &MacroExpansions) {
  47. // TODO: Emit an error here.
  48. if (Output.empty())
  49. return;
  50. C.push_back(new SarifDiagnostics(Output, PP.getLangOpts()));
  51. createTextMinimalPathDiagnosticConsumer(std::move(DiagOpts), C, Output, PP,
  52. CTU, MacroExpansions);
  53. }
  54. static StringRef getFileName(const FileEntry &FE) {
  55. StringRef Filename = FE.tryGetRealPathName();
  56. if (Filename.empty())
  57. Filename = FE.getName();
  58. return Filename;
  59. }
  60. static std::string percentEncodeURICharacter(char C) {
  61. // RFC 3986 claims alpha, numeric, and this handful of
  62. // characters are not reserved for the path component and
  63. // should be written out directly. Otherwise, percent
  64. // encode the character and write that out instead of the
  65. // reserved character.
  66. if (llvm::isAlnum(C) ||
  67. StringRef::npos != StringRef("-._~:@!$&'()*+,;=").find(C))
  68. return std::string(&C, 1);
  69. return "%" + llvm::toHex(StringRef(&C, 1));
  70. }
  71. static std::string fileNameToURI(StringRef Filename) {
  72. llvm::SmallString<32> Ret = StringRef("file://");
  73. // Get the root name to see if it has a URI authority.
  74. StringRef Root = sys::path::root_name(Filename);
  75. if (Root.startswith("//")) {
  76. // There is an authority, so add it to the URI.
  77. Ret += Root.drop_front(2).str();
  78. } else if (!Root.empty()) {
  79. // There is no authority, so end the component and add the root to the URI.
  80. Ret += Twine("/" + Root).str();
  81. }
  82. auto Iter = sys::path::begin(Filename), End = sys::path::end(Filename);
  83. assert(Iter != End && "Expected there to be a non-root path component.");
  84. // Add the rest of the path components, encoding any reserved characters;
  85. // we skip past the first path component, as it was handled it above.
  86. std::for_each(++Iter, End, [&Ret](StringRef Component) {
  87. // For reasons unknown to me, we may get a backslash with Windows native
  88. // paths for the initial backslash following the drive component, which
  89. // we need to ignore as a URI path part.
  90. if (Component == "\\")
  91. return;
  92. // Add the separator between the previous path part and the one being
  93. // currently processed.
  94. Ret += "/";
  95. // URI encode the part.
  96. for (char C : Component) {
  97. Ret += percentEncodeURICharacter(C);
  98. }
  99. });
  100. return std::string(Ret);
  101. }
  102. static json::Object createArtifactLocation(const FileEntry &FE) {
  103. return json::Object{{"uri", fileNameToURI(getFileName(FE))}};
  104. }
  105. static json::Object createArtifact(const FileEntry &FE) {
  106. return json::Object{{"location", createArtifactLocation(FE)},
  107. {"roles", json::Array{"resultFile"}},
  108. {"length", FE.getSize()},
  109. {"mimeType", "text/plain"}};
  110. }
  111. static json::Object createArtifactLocation(const FileEntry &FE,
  112. json::Array &Artifacts) {
  113. std::string FileURI = fileNameToURI(getFileName(FE));
  114. // See if the Artifacts array contains this URI already. If it does not,
  115. // create a new artifact object to add to the array.
  116. auto I = llvm::find_if(Artifacts, [&](const json::Value &File) {
  117. if (const json::Object *Obj = File.getAsObject()) {
  118. if (const json::Object *FileLoc = Obj->getObject("location")) {
  119. Optional<StringRef> URI = FileLoc->getString("uri");
  120. return URI && URI->equals(FileURI);
  121. }
  122. }
  123. return false;
  124. });
  125. // Calculate the index within the artifact array so it can be stored in
  126. // the JSON object.
  127. auto Index = static_cast<unsigned>(std::distance(Artifacts.begin(), I));
  128. if (I == Artifacts.end())
  129. Artifacts.push_back(createArtifact(FE));
  130. return json::Object{{"uri", FileURI}, {"index", Index}};
  131. }
  132. static unsigned int adjustColumnPos(const SourceManager &SM, SourceLocation Loc,
  133. unsigned int TokenLen = 0) {
  134. assert(!Loc.isInvalid() && "invalid Loc when adjusting column position");
  135. std::pair<FileID, unsigned> LocInfo = SM.getDecomposedExpansionLoc(Loc);
  136. assert(LocInfo.second > SM.getExpansionColumnNumber(Loc) &&
  137. "position in file is before column number?");
  138. Optional<MemoryBufferRef> Buf = SM.getBufferOrNone(LocInfo.first);
  139. assert(Buf && "got an invalid buffer for the location's file");
  140. assert(Buf->getBufferSize() >= (LocInfo.second + TokenLen) &&
  141. "token extends past end of buffer?");
  142. // Adjust the offset to be the start of the line, since we'll be counting
  143. // Unicode characters from there until our column offset.
  144. unsigned int Off = LocInfo.second - (SM.getExpansionColumnNumber(Loc) - 1);
  145. unsigned int Ret = 1;
  146. while (Off < (LocInfo.second + TokenLen)) {
  147. Off += getNumBytesForUTF8(Buf->getBuffer()[Off]);
  148. Ret++;
  149. }
  150. return Ret;
  151. }
  152. static json::Object createTextRegion(const LangOptions &LO, SourceRange R,
  153. const SourceManager &SM) {
  154. json::Object Region{
  155. {"startLine", SM.getExpansionLineNumber(R.getBegin())},
  156. {"startColumn", adjustColumnPos(SM, R.getBegin())},
  157. };
  158. if (R.getBegin() == R.getEnd()) {
  159. Region["endColumn"] = adjustColumnPos(SM, R.getBegin());
  160. } else {
  161. Region["endLine"] = SM.getExpansionLineNumber(R.getEnd());
  162. Region["endColumn"] = adjustColumnPos(
  163. SM, R.getEnd(),
  164. Lexer::MeasureTokenLength(R.getEnd(), SM, LO));
  165. }
  166. return Region;
  167. }
  168. static json::Object createPhysicalLocation(const LangOptions &LO,
  169. SourceRange R, const FileEntry &FE,
  170. const SourceManager &SMgr,
  171. json::Array &Artifacts) {
  172. return json::Object{
  173. {{"artifactLocation", createArtifactLocation(FE, Artifacts)},
  174. {"region", createTextRegion(LO, R, SMgr)}}};
  175. }
  176. enum class Importance { Important, Essential, Unimportant };
  177. static StringRef importanceToStr(Importance I) {
  178. switch (I) {
  179. case Importance::Important:
  180. return "important";
  181. case Importance::Essential:
  182. return "essential";
  183. case Importance::Unimportant:
  184. return "unimportant";
  185. }
  186. llvm_unreachable("Fully covered switch is not so fully covered");
  187. }
  188. static json::Object createThreadFlowLocation(json::Object &&Location,
  189. Importance I) {
  190. return json::Object{{"location", std::move(Location)},
  191. {"importance", importanceToStr(I)}};
  192. }
  193. static json::Object createMessage(StringRef Text) {
  194. return json::Object{{"text", Text.str()}};
  195. }
  196. static json::Object createLocation(json::Object &&PhysicalLocation,
  197. StringRef Message = "") {
  198. json::Object Ret{{"physicalLocation", std::move(PhysicalLocation)}};
  199. if (!Message.empty())
  200. Ret.insert({"message", createMessage(Message)});
  201. return Ret;
  202. }
  203. static Importance calculateImportance(const PathDiagnosticPiece &Piece) {
  204. switch (Piece.getKind()) {
  205. case PathDiagnosticPiece::Call:
  206. case PathDiagnosticPiece::Macro:
  207. case PathDiagnosticPiece::Note:
  208. case PathDiagnosticPiece::PopUp:
  209. // FIXME: What should be reported here?
  210. break;
  211. case PathDiagnosticPiece::Event:
  212. return Piece.getTagStr() == "ConditionBRVisitor" ? Importance::Important
  213. : Importance::Essential;
  214. case PathDiagnosticPiece::ControlFlow:
  215. return Importance::Unimportant;
  216. }
  217. return Importance::Unimportant;
  218. }
  219. static json::Object createThreadFlow(const LangOptions &LO,
  220. const PathPieces &Pieces,
  221. json::Array &Artifacts) {
  222. const SourceManager &SMgr = Pieces.front()->getLocation().getManager();
  223. json::Array Locations;
  224. for (const auto &Piece : Pieces) {
  225. const PathDiagnosticLocation &P = Piece->getLocation();
  226. Locations.push_back(createThreadFlowLocation(
  227. createLocation(createPhysicalLocation(
  228. LO, P.asRange(),
  229. *P.asLocation().getExpansionLoc().getFileEntry(),
  230. SMgr, Artifacts),
  231. Piece->getString()),
  232. calculateImportance(*Piece)));
  233. }
  234. return json::Object{{"locations", std::move(Locations)}};
  235. }
  236. static json::Object createCodeFlow(const LangOptions &LO,
  237. const PathPieces &Pieces,
  238. json::Array &Artifacts) {
  239. return json::Object{
  240. {"threadFlows", json::Array{createThreadFlow(LO, Pieces, Artifacts)}}};
  241. }
  242. static json::Object createResult(const LangOptions &LO,
  243. const PathDiagnostic &Diag,
  244. json::Array &Artifacts,
  245. const StringMap<unsigned> &RuleMapping) {
  246. const PathPieces &Path = Diag.path.flatten(false);
  247. const SourceManager &SMgr = Path.front()->getLocation().getManager();
  248. auto Iter = RuleMapping.find(Diag.getCheckerName());
  249. assert(Iter != RuleMapping.end() && "Rule ID is not in the array index map?");
  250. return json::Object{
  251. {"message", createMessage(Diag.getVerboseDescription())},
  252. {"codeFlows", json::Array{createCodeFlow(LO, Path, Artifacts)}},
  253. {"locations",
  254. json::Array{createLocation(createPhysicalLocation(
  255. LO, Diag.getLocation().asRange(),
  256. *Diag.getLocation().asLocation().getExpansionLoc().getFileEntry(),
  257. SMgr, Artifacts))}},
  258. {"ruleIndex", Iter->getValue()},
  259. {"ruleId", Diag.getCheckerName()}};
  260. }
  261. static StringRef getRuleDescription(StringRef CheckName) {
  262. return llvm::StringSwitch<StringRef>(CheckName)
  263. #define GET_CHECKERS
  264. #define CHECKER(FULLNAME, CLASS, HELPTEXT, DOC_URI, IS_HIDDEN) \
  265. .Case(FULLNAME, HELPTEXT)
  266. #include "clang/StaticAnalyzer/Checkers/Checkers.inc"
  267. #undef CHECKER
  268. #undef GET_CHECKERS
  269. ;
  270. }
  271. static StringRef getRuleHelpURIStr(StringRef CheckName) {
  272. return llvm::StringSwitch<StringRef>(CheckName)
  273. #define GET_CHECKERS
  274. #define CHECKER(FULLNAME, CLASS, HELPTEXT, DOC_URI, IS_HIDDEN) \
  275. .Case(FULLNAME, DOC_URI)
  276. #include "clang/StaticAnalyzer/Checkers/Checkers.inc"
  277. #undef CHECKER
  278. #undef GET_CHECKERS
  279. ;
  280. }
  281. static json::Object createRule(const PathDiagnostic &Diag) {
  282. StringRef CheckName = Diag.getCheckerName();
  283. json::Object Ret{
  284. {"fullDescription", createMessage(getRuleDescription(CheckName))},
  285. {"name", CheckName},
  286. {"id", CheckName}};
  287. std::string RuleURI = std::string(getRuleHelpURIStr(CheckName));
  288. if (!RuleURI.empty())
  289. Ret["helpUri"] = RuleURI;
  290. return Ret;
  291. }
  292. static json::Array createRules(std::vector<const PathDiagnostic *> &Diags,
  293. StringMap<unsigned> &RuleMapping) {
  294. json::Array Rules;
  295. llvm::StringSet<> Seen;
  296. llvm::for_each(Diags, [&](const PathDiagnostic *D) {
  297. StringRef RuleID = D->getCheckerName();
  298. std::pair<llvm::StringSet<>::iterator, bool> P = Seen.insert(RuleID);
  299. if (P.second) {
  300. RuleMapping[RuleID] = Rules.size(); // Maps RuleID to an Array Index.
  301. Rules.push_back(createRule(*D));
  302. }
  303. });
  304. return Rules;
  305. }
  306. static json::Object createTool(std::vector<const PathDiagnostic *> &Diags,
  307. StringMap<unsigned> &RuleMapping) {
  308. return json::Object{
  309. {"driver", json::Object{{"name", "clang"},
  310. {"fullName", "clang static analyzer"},
  311. {"language", "en-US"},
  312. {"version", getClangFullVersion()},
  313. {"rules", createRules(Diags, RuleMapping)}}}};
  314. }
  315. static json::Object createRun(const LangOptions &LO,
  316. std::vector<const PathDiagnostic *> &Diags) {
  317. json::Array Results, Artifacts;
  318. StringMap<unsigned> RuleMapping;
  319. json::Object Tool = createTool(Diags, RuleMapping);
  320. llvm::for_each(Diags, [&](const PathDiagnostic *D) {
  321. Results.push_back(createResult(LO, *D, Artifacts, RuleMapping));
  322. });
  323. return json::Object{{"tool", std::move(Tool)},
  324. {"results", std::move(Results)},
  325. {"artifacts", std::move(Artifacts)},
  326. {"columnKind", "unicodeCodePoints"}};
  327. }
  328. void SarifDiagnostics::FlushDiagnosticsImpl(
  329. std::vector<const PathDiagnostic *> &Diags, FilesMade *) {
  330. // We currently overwrite the file if it already exists. However, it may be
  331. // useful to add a feature someday that allows the user to append a run to an
  332. // existing SARIF file. One danger from that approach is that the size of the
  333. // file can become large very quickly, so decoding into JSON to append a run
  334. // may be an expensive operation.
  335. std::error_code EC;
  336. llvm::raw_fd_ostream OS(OutputFile, EC, llvm::sys::fs::OF_TextWithCRLF);
  337. if (EC) {
  338. llvm::errs() << "warning: could not create file: " << EC.message() << '\n';
  339. return;
  340. }
  341. json::Object Sarif{
  342. {"$schema",
  343. "https://raw.githubusercontent.com/oasis-tcs/sarif-spec/master/Schemata/sarif-schema-2.1.0.json"},
  344. {"version", "2.1.0"},
  345. {"runs", json::Array{createRun(LO, Diags)}}};
  346. OS << llvm::formatv("{0:2}\n", json::Value(std::move(Sarif)));
  347. }