Sarif.cpp 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422
  1. //===-- clang/Basic/Sarif.cpp - SarifDocumentWriter class definition ------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. ///
  9. /// \file
  10. /// This file contains the declaration of the SARIFDocumentWriter class, and
  11. /// associated builders such as:
  12. /// - \ref SarifArtifact
  13. /// - \ref SarifArtifactLocation
  14. /// - \ref SarifRule
  15. /// - \ref SarifResult
  16. //===----------------------------------------------------------------------===//
  17. #include "clang/Basic/Sarif.h"
  18. #include "clang/Basic/SourceLocation.h"
  19. #include "clang/Basic/SourceManager.h"
  20. #include "llvm/ADT/ArrayRef.h"
  21. #include "llvm/ADT/STLExtras.h"
  22. #include "llvm/ADT/StringMap.h"
  23. #include "llvm/ADT/StringRef.h"
  24. #include "llvm/Support/ConvertUTF.h"
  25. #include "llvm/Support/JSON.h"
  26. #include "llvm/Support/Path.h"
  27. #include <optional>
  28. #include <string>
  29. #include <utility>
  30. using namespace clang;
  31. using namespace llvm;
  32. using clang::detail::SarifArtifact;
  33. using clang::detail::SarifArtifactLocation;
  34. static StringRef getFileName(const FileEntry &FE) {
  35. StringRef Filename = FE.tryGetRealPathName();
  36. if (Filename.empty())
  37. Filename = FE.getName();
  38. return Filename;
  39. }
  40. /// \name URI
  41. /// @{
  42. /// \internal
  43. /// \brief
  44. /// Return the RFC3986 encoding of the input character.
  45. ///
  46. /// \param C Character to encode to RFC3986.
  47. ///
  48. /// \return The RFC3986 representation of \c C.
  49. static std::string percentEncodeURICharacter(char C) {
  50. // RFC 3986 claims alpha, numeric, and this handful of
  51. // characters are not reserved for the path component and
  52. // should be written out directly. Otherwise, percent
  53. // encode the character and write that out instead of the
  54. // reserved character.
  55. if (llvm::isAlnum(C) ||
  56. StringRef::npos != StringRef("-._~:@!$&'()*+,;=").find(C))
  57. return std::string(&C, 1);
  58. return "%" + llvm::toHex(StringRef(&C, 1));
  59. }
  60. /// \internal
  61. /// \brief Return a URI representing the given file name.
  62. ///
  63. /// \param Filename The filename to be represented as URI.
  64. ///
  65. /// \return RFC3986 URI representing the input file name.
  66. static std::string fileNameToURI(StringRef Filename) {
  67. SmallString<32> Ret = StringRef("file://");
  68. // Get the root name to see if it has a URI authority.
  69. StringRef Root = sys::path::root_name(Filename);
  70. if (Root.startswith("//")) {
  71. // There is an authority, so add it to the URI.
  72. Ret += Root.drop_front(2).str();
  73. } else if (!Root.empty()) {
  74. // There is no authority, so end the component and add the root to the URI.
  75. Ret += Twine("/" + Root).str();
  76. }
  77. auto Iter = sys::path::begin(Filename), End = sys::path::end(Filename);
  78. assert(Iter != End && "Expected there to be a non-root path component.");
  79. // Add the rest of the path components, encoding any reserved characters;
  80. // we skip past the first path component, as it was handled it above.
  81. std::for_each(++Iter, End, [&Ret](StringRef Component) {
  82. // For reasons unknown to me, we may get a backslash with Windows native
  83. // paths for the initial backslash following the drive component, which
  84. // we need to ignore as a URI path part.
  85. if (Component == "\\")
  86. return;
  87. // Add the separator between the previous path part and the one being
  88. // currently processed.
  89. Ret += "/";
  90. // URI encode the part.
  91. for (char C : Component) {
  92. Ret += percentEncodeURICharacter(C);
  93. }
  94. });
  95. return std::string(Ret);
  96. }
  97. /// @}
  98. /// \brief Calculate the column position expressed in the number of UTF-8 code
  99. /// points from column start to the source location
  100. ///
  101. /// \param Loc The source location whose column needs to be calculated.
  102. /// \param TokenLen Optional hint for when the token is multiple bytes long.
  103. ///
  104. /// \return The column number as a UTF-8 aware byte offset from column start to
  105. /// the effective source location.
  106. static unsigned int adjustColumnPos(FullSourceLoc Loc,
  107. unsigned int TokenLen = 0) {
  108. assert(!Loc.isInvalid() && "invalid Loc when adjusting column position");
  109. std::pair<FileID, unsigned> LocInfo = Loc.getDecomposedExpansionLoc();
  110. std::optional<MemoryBufferRef> Buf =
  111. Loc.getManager().getBufferOrNone(LocInfo.first);
  112. assert(Buf && "got an invalid buffer for the location's file");
  113. assert(Buf->getBufferSize() >= (LocInfo.second + TokenLen) &&
  114. "token extends past end of buffer?");
  115. // Adjust the offset to be the start of the line, since we'll be counting
  116. // Unicode characters from there until our column offset.
  117. unsigned int Off = LocInfo.second - (Loc.getExpansionColumnNumber() - 1);
  118. unsigned int Ret = 1;
  119. while (Off < (LocInfo.second + TokenLen)) {
  120. Off += getNumBytesForUTF8(Buf->getBuffer()[Off]);
  121. Ret++;
  122. }
  123. return Ret;
  124. }
  125. /// \name SARIF Utilities
  126. /// @{
  127. /// \internal
  128. json::Object createMessage(StringRef Text) {
  129. return json::Object{{"text", Text.str()}};
  130. }
  131. /// \internal
  132. /// \pre CharSourceRange must be a token range
  133. static json::Object createTextRegion(const SourceManager &SM,
  134. const CharSourceRange &R) {
  135. FullSourceLoc BeginCharLoc{R.getBegin(), SM};
  136. FullSourceLoc EndCharLoc{R.getEnd(), SM};
  137. json::Object Region{{"startLine", BeginCharLoc.getExpansionLineNumber()},
  138. {"startColumn", adjustColumnPos(BeginCharLoc)}};
  139. if (BeginCharLoc == EndCharLoc) {
  140. Region["endColumn"] = adjustColumnPos(BeginCharLoc);
  141. } else {
  142. Region["endLine"] = EndCharLoc.getExpansionLineNumber();
  143. Region["endColumn"] = adjustColumnPos(EndCharLoc);
  144. }
  145. return Region;
  146. }
  147. static json::Object createLocation(json::Object &&PhysicalLocation,
  148. StringRef Message = "") {
  149. json::Object Ret{{"physicalLocation", std::move(PhysicalLocation)}};
  150. if (!Message.empty())
  151. Ret.insert({"message", createMessage(Message)});
  152. return Ret;
  153. }
  154. static StringRef importanceToStr(ThreadFlowImportance I) {
  155. switch (I) {
  156. case ThreadFlowImportance::Important:
  157. return "important";
  158. case ThreadFlowImportance::Essential:
  159. return "essential";
  160. case ThreadFlowImportance::Unimportant:
  161. return "unimportant";
  162. }
  163. llvm_unreachable("Fully covered switch is not so fully covered");
  164. }
  165. static StringRef resultLevelToStr(SarifResultLevel R) {
  166. switch (R) {
  167. case SarifResultLevel::None:
  168. return "none";
  169. case SarifResultLevel::Note:
  170. return "note";
  171. case SarifResultLevel::Warning:
  172. return "warning";
  173. case SarifResultLevel::Error:
  174. return "error";
  175. }
  176. llvm_unreachable("Potentially un-handled SarifResultLevel. "
  177. "Is the switch not fully covered?");
  178. }
  179. static json::Object
  180. createThreadFlowLocation(json::Object &&Location,
  181. const ThreadFlowImportance &Importance) {
  182. return json::Object{{"location", std::move(Location)},
  183. {"importance", importanceToStr(Importance)}};
  184. }
  185. /// @}
  186. json::Object
  187. SarifDocumentWriter::createPhysicalLocation(const CharSourceRange &R) {
  188. assert(R.isValid() &&
  189. "Cannot create a physicalLocation from invalid SourceRange!");
  190. assert(R.isCharRange() &&
  191. "Cannot create a physicalLocation from a token range!");
  192. FullSourceLoc Start{R.getBegin(), SourceMgr};
  193. const FileEntry *FE = Start.getExpansionLoc().getFileEntry();
  194. assert(FE != nullptr && "Diagnostic does not exist within a valid file!");
  195. const std::string &FileURI = fileNameToURI(getFileName(*FE));
  196. auto I = CurrentArtifacts.find(FileURI);
  197. if (I == CurrentArtifacts.end()) {
  198. uint32_t Idx = static_cast<uint32_t>(CurrentArtifacts.size());
  199. const SarifArtifactLocation &Location =
  200. SarifArtifactLocation::create(FileURI).setIndex(Idx);
  201. const SarifArtifact &Artifact = SarifArtifact::create(Location)
  202. .setRoles({"resultFile"})
  203. .setLength(FE->getSize())
  204. .setMimeType("text/plain");
  205. auto StatusIter = CurrentArtifacts.insert({FileURI, Artifact});
  206. // If inserted, ensure the original iterator points to the newly inserted
  207. // element, so it can be used downstream.
  208. if (StatusIter.second)
  209. I = StatusIter.first;
  210. }
  211. assert(I != CurrentArtifacts.end() && "Failed to insert new artifact");
  212. const SarifArtifactLocation &Location = I->second.Location;
  213. json::Object ArtifactLocationObject{{"uri", Location.URI}};
  214. if (Location.Index.has_value())
  215. ArtifactLocationObject["index"] = *Location.Index;
  216. return json::Object{{{"artifactLocation", std::move(ArtifactLocationObject)},
  217. {"region", createTextRegion(SourceMgr, R)}}};
  218. }
  219. json::Object &SarifDocumentWriter::getCurrentTool() {
  220. assert(!Closed && "SARIF Document is closed. "
  221. "Need to call createRun() before using getcurrentTool!");
  222. // Since Closed = false here, expect there to be at least 1 Run, anything
  223. // else is an invalid state.
  224. assert(!Runs.empty() && "There are no runs associated with the document!");
  225. return *Runs.back().getAsObject()->get("tool")->getAsObject();
  226. }
  227. void SarifDocumentWriter::reset() {
  228. CurrentRules.clear();
  229. CurrentArtifacts.clear();
  230. }
  231. void SarifDocumentWriter::endRun() {
  232. // Exit early if trying to close a closed Document.
  233. if (Closed) {
  234. reset();
  235. return;
  236. }
  237. // Since Closed = false here, expect there to be at least 1 Run, anything
  238. // else is an invalid state.
  239. assert(!Runs.empty() && "There are no runs associated with the document!");
  240. // Flush all the rules.
  241. json::Object &Tool = getCurrentTool();
  242. json::Array Rules;
  243. for (const SarifRule &R : CurrentRules) {
  244. json::Object Config{
  245. {"enabled", R.DefaultConfiguration.Enabled},
  246. {"level", resultLevelToStr(R.DefaultConfiguration.Level)},
  247. {"rank", R.DefaultConfiguration.Rank}};
  248. json::Object Rule{
  249. {"name", R.Name},
  250. {"id", R.Id},
  251. {"fullDescription", json::Object{{"text", R.Description}}},
  252. {"defaultConfiguration", std::move(Config)}};
  253. if (!R.HelpURI.empty())
  254. Rule["helpUri"] = R.HelpURI;
  255. Rules.emplace_back(std::move(Rule));
  256. }
  257. json::Object &Driver = *Tool.getObject("driver");
  258. Driver["rules"] = std::move(Rules);
  259. // Flush all the artifacts.
  260. json::Object &Run = getCurrentRun();
  261. json::Array *Artifacts = Run.getArray("artifacts");
  262. for (const auto &Pair : CurrentArtifacts) {
  263. const SarifArtifact &A = Pair.getValue();
  264. json::Object Loc{{"uri", A.Location.URI}};
  265. if (A.Location.Index.has_value()) {
  266. Loc["index"] = static_cast<int64_t>(*A.Location.Index);
  267. }
  268. json::Object Artifact;
  269. Artifact["location"] = std::move(Loc);
  270. if (A.Length.has_value())
  271. Artifact["length"] = static_cast<int64_t>(*A.Length);
  272. if (!A.Roles.empty())
  273. Artifact["roles"] = json::Array(A.Roles);
  274. if (!A.MimeType.empty())
  275. Artifact["mimeType"] = A.MimeType;
  276. if (A.Offset.has_value())
  277. Artifact["offset"] = *A.Offset;
  278. Artifacts->push_back(json::Value(std::move(Artifact)));
  279. }
  280. // Clear, reset temporaries before next run.
  281. reset();
  282. // Mark the document as closed.
  283. Closed = true;
  284. }
  285. json::Array
  286. SarifDocumentWriter::createThreadFlows(ArrayRef<ThreadFlow> ThreadFlows) {
  287. json::Object Ret{{"locations", json::Array{}}};
  288. json::Array Locs;
  289. for (const auto &ThreadFlow : ThreadFlows) {
  290. json::Object PLoc = createPhysicalLocation(ThreadFlow.Range);
  291. json::Object Loc = createLocation(std::move(PLoc), ThreadFlow.Message);
  292. Locs.emplace_back(
  293. createThreadFlowLocation(std::move(Loc), ThreadFlow.Importance));
  294. }
  295. Ret["locations"] = std::move(Locs);
  296. return json::Array{std::move(Ret)};
  297. }
  298. json::Object
  299. SarifDocumentWriter::createCodeFlow(ArrayRef<ThreadFlow> ThreadFlows) {
  300. return json::Object{{"threadFlows", createThreadFlows(ThreadFlows)}};
  301. }
  302. void SarifDocumentWriter::createRun(StringRef ShortToolName,
  303. StringRef LongToolName,
  304. StringRef ToolVersion) {
  305. // Clear resources associated with a previous run.
  306. endRun();
  307. // Signify a new run has begun.
  308. Closed = false;
  309. json::Object Tool{
  310. {"driver",
  311. json::Object{{"name", ShortToolName},
  312. {"fullName", LongToolName},
  313. {"language", "en-US"},
  314. {"version", ToolVersion},
  315. {"informationUri",
  316. "https://clang.llvm.org/docs/UsersManual.html"}}}};
  317. json::Object TheRun{{"tool", std::move(Tool)},
  318. {"results", {}},
  319. {"artifacts", {}},
  320. {"columnKind", "unicodeCodePoints"}};
  321. Runs.emplace_back(std::move(TheRun));
  322. }
  323. json::Object &SarifDocumentWriter::getCurrentRun() {
  324. assert(!Closed &&
  325. "SARIF Document is closed. "
  326. "Can only getCurrentRun() if document is opened via createRun(), "
  327. "create a run first");
  328. // Since Closed = false here, expect there to be at least 1 Run, anything
  329. // else is an invalid state.
  330. assert(!Runs.empty() && "There are no runs associated with the document!");
  331. return *Runs.back().getAsObject();
  332. }
  333. size_t SarifDocumentWriter::createRule(const SarifRule &Rule) {
  334. size_t Ret = CurrentRules.size();
  335. CurrentRules.emplace_back(Rule);
  336. return Ret;
  337. }
  338. void SarifDocumentWriter::appendResult(const SarifResult &Result) {
  339. size_t RuleIdx = Result.RuleIdx;
  340. assert(RuleIdx < CurrentRules.size() &&
  341. "Trying to reference a rule that doesn't exist");
  342. const SarifRule &Rule = CurrentRules[RuleIdx];
  343. assert(Rule.DefaultConfiguration.Enabled &&
  344. "Cannot add a result referencing a disabled Rule");
  345. json::Object Ret{{"message", createMessage(Result.DiagnosticMessage)},
  346. {"ruleIndex", static_cast<int64_t>(RuleIdx)},
  347. {"ruleId", Rule.Id}};
  348. if (!Result.Locations.empty()) {
  349. json::Array Locs;
  350. for (auto &Range : Result.Locations) {
  351. Locs.emplace_back(createLocation(createPhysicalLocation(Range)));
  352. }
  353. Ret["locations"] = std::move(Locs);
  354. }
  355. if (!Result.ThreadFlows.empty())
  356. Ret["codeFlows"] = json::Array{createCodeFlow(Result.ThreadFlows)};
  357. Ret["level"] = resultLevelToStr(
  358. Result.LevelOverride.value_or(Rule.DefaultConfiguration.Level));
  359. json::Object &Run = getCurrentRun();
  360. json::Array *Results = Run.getArray("results");
  361. Results->emplace_back(std::move(Ret));
  362. }
  363. json::Object SarifDocumentWriter::createDocument() {
  364. // Flush all temporaries to their destinations if needed.
  365. endRun();
  366. json::Object Doc{
  367. {"$schema", SchemaURI},
  368. {"version", SchemaVersion},
  369. };
  370. if (!Runs.empty())
  371. Doc["runs"] = json::Array(Runs);
  372. return Doc;
  373. }