contentParser.ts 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305
  1. import { HoppRESTReqBody } from "@hoppscotch/data"
  2. import * as O from "fp-ts/Option"
  3. import * as RA from "fp-ts/ReadonlyArray"
  4. import * as S from "fp-ts/string"
  5. import { pipe, flow } from "fp-ts/function"
  6. import { tupleToRecord } from "~/helpers/functional/record"
  7. import { safeParseJSON } from "~/helpers/functional/json"
  8. import { optionChoose } from "~/helpers/functional/option"
  9. const isJSON = flow(safeParseJSON, O.isSome)
  10. const isXML = (rawData: string) =>
  11. pipe(
  12. rawData,
  13. O.fromPredicate(() => /<\/?[a-zA-Z][\s\S]*>/i.test(rawData)),
  14. O.chain(prettifyXml),
  15. O.isSome
  16. )
  17. const isHTML = (rawData: string) =>
  18. pipe(
  19. rawData,
  20. O.fromPredicate(() => /<\/?[a-zA-Z][\s\S]*>/i.test(rawData)),
  21. O.isSome
  22. )
  23. const isFormData = (rawData: string) =>
  24. pipe(
  25. rawData.match(/^-{2,}[A-Za-z0-9]+\\r\\n/),
  26. O.fromNullable,
  27. O.filter((boundaryMatch) => boundaryMatch.length > 0),
  28. O.isSome
  29. )
  30. const isXWWWFormUrlEncoded = (rawData: string) =>
  31. pipe(
  32. rawData,
  33. O.fromPredicate((rd) => /([^&=]+)=([^&=]*)/.test(rd)),
  34. O.isSome
  35. )
  36. /**
  37. * Detects the content type of the input string
  38. * @param rawData String for which content type is to be detected
  39. * @returns Content type of the data
  40. */
  41. export const detectContentType = (
  42. rawData: string
  43. ): HoppRESTReqBody["contentType"] =>
  44. pipe(
  45. rawData,
  46. optionChoose([
  47. [(rd) => !rd, null],
  48. [isJSON, "application/json" as const],
  49. [isFormData, "multipart/form-data" as const],
  50. [isXML, "application/xml" as const],
  51. [isHTML, "text/html" as const],
  52. [isXWWWFormUrlEncoded, "application/x-www-form-urlencoded" as const],
  53. ]),
  54. O.getOrElseW(() => "text/plain" as const)
  55. )
  56. const multipartFunctions = {
  57. getBoundary(rawData: string, rawContentType: string | undefined) {
  58. return pipe(
  59. rawContentType,
  60. O.fromNullable,
  61. O.filter((rct) => rct.length > 0),
  62. O.match(
  63. () => this.getBoundaryFromRawData(rawData),
  64. (rct) => this.getBoundaryFromRawContentType(rawData, rct)
  65. )
  66. )
  67. },
  68. getBoundaryFromRawData(rawData: string) {
  69. return pipe(
  70. rawData.match(/(-{2,}[A-Za-z0-9]+)\\r\\n/g),
  71. O.fromNullable,
  72. O.filter((boundaryMatch) => boundaryMatch.length > 0),
  73. O.map((matches) => matches[0].slice(0, -4))
  74. )
  75. },
  76. getBoundaryFromRawContentType(rawData: string, rawContentType: string) {
  77. return pipe(
  78. rawContentType.match(/boundary=(.+)/),
  79. O.fromNullable,
  80. O.filter((boundaryContentMatch) => boundaryContentMatch.length > 1),
  81. O.filter((matches) =>
  82. rawData.replaceAll("\\r\\n", "").endsWith("--" + matches[1] + "--")
  83. ),
  84. O.map((matches) => "--" + matches[1])
  85. )
  86. },
  87. splitUsingBoundaryAndNewLines(rawData: string, boundary: string) {
  88. return pipe(
  89. rawData,
  90. S.split(RegExp(`${boundary}-*`)),
  91. RA.filter((p) => p !== "" && p.includes("name")),
  92. RA.map((p) =>
  93. pipe(
  94. p.replaceAll(/\\r\\n+/g, "\\r\\n"),
  95. S.split("\\r\\n"),
  96. RA.filter((q) => q !== "")
  97. )
  98. )
  99. )
  100. },
  101. getNameValuePair(pair: readonly string[]) {
  102. return pipe(
  103. pair,
  104. O.fromPredicate((p) => p.length > 1),
  105. O.chain((pair) => O.fromNullable(pair[0].match(/ name="(\w+)"/))),
  106. O.filter((nameMatch) => nameMatch.length > 0),
  107. O.chain((nameMatch) =>
  108. pipe(
  109. nameMatch[0],
  110. S.replace(/"/g, ""),
  111. S.split("="),
  112. O.fromPredicate((q) => q.length === 2),
  113. O.map(
  114. (nameArr) =>
  115. [nameArr[1], pair[0].includes("filename") ? "" : pair[1]] as [
  116. string,
  117. string
  118. ]
  119. )
  120. )
  121. )
  122. )
  123. },
  124. }
  125. const getFormDataBody = (rawData: string, rawContentType: string | undefined) =>
  126. pipe(
  127. multipartFunctions.getBoundary(rawData, rawContentType),
  128. O.map((boundary) =>
  129. pipe(
  130. multipartFunctions.splitUsingBoundaryAndNewLines(rawData, boundary),
  131. RA.filterMap((p) => multipartFunctions.getNameValuePair(p)),
  132. RA.toArray
  133. )
  134. ),
  135. O.filter((arr) => arr.length > 0),
  136. O.map(tupleToRecord)
  137. )
  138. const getHTMLBody = flow(formatHTML, O.of)
  139. const getXMLBody = (rawData: string) =>
  140. pipe(
  141. rawData,
  142. prettifyXml,
  143. O.alt(() => O.some(rawData))
  144. )
  145. const getFormattedJSON = (jsonString: string) =>
  146. pipe(
  147. jsonString.replaceAll('\\"', '"'),
  148. safeParseJSON,
  149. O.map((parsedJSON) => JSON.stringify(parsedJSON, null, 2)),
  150. O.getOrElse(() => "{ }"),
  151. O.of
  152. )
  153. const getXWWWFormUrlEncodedBody = flow(
  154. decodeURIComponent,
  155. (decoded) => decoded.match(/(([^&=]+)=?([^&=]*))/g),
  156. O.fromNullable,
  157. O.map((pairs) => pairs.map((p) => p.replace("=", ": ")).join("\n"))
  158. )
  159. /**
  160. * Parses provided string according to the content type
  161. * @param rawData Data to be parsed
  162. * @param contentType Content type of the data
  163. * @param rawContentType Optional parameter required for multipart/form-data
  164. * @returns Option of parsed body as string or Record object for multipart/form-data
  165. */
  166. export function parseBody(
  167. rawData: string,
  168. contentType: HoppRESTReqBody["contentType"],
  169. rawContentType?: string
  170. ): O.Option<string | Record<string, string>> {
  171. switch (contentType) {
  172. case "application/hal+json":
  173. case "application/ld+json":
  174. case "application/vnd.api+json":
  175. case "application/json":
  176. return getFormattedJSON(rawData)
  177. case "application/x-www-form-urlencoded":
  178. return getXWWWFormUrlEncodedBody(rawData)
  179. case "multipart/form-data":
  180. return getFormDataBody(rawData, rawContentType)
  181. case "text/html":
  182. return getHTMLBody(rawData)
  183. case "application/xml":
  184. return getXMLBody(rawData)
  185. case "text/plain":
  186. default:
  187. return O.some(rawData)
  188. }
  189. }
  190. /**
  191. * Formatter Functions
  192. */
  193. /**
  194. * Prettifies XML string
  195. * @param sourceXml The string to format
  196. * @returns Indented XML string (uses spaces)
  197. */
  198. function prettifyXml(sourceXml: string) {
  199. return pipe(
  200. O.tryCatch(() => {
  201. const xmlDoc = new DOMParser().parseFromString(
  202. sourceXml,
  203. "application/xml"
  204. )
  205. if (xmlDoc.querySelector("parsererror")) {
  206. throw new Error("Unstructured Body")
  207. }
  208. const xsltDoc = new DOMParser().parseFromString(
  209. [
  210. // describes how we want to modify the XML - indent everything
  211. '<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform">',
  212. ' <xsl:strip-space elements="*"/>',
  213. ' <xsl:template match="para[content-style][not(text())]">', // change to just text() to strip space in text nodes
  214. ' <xsl:value-of select="normalize-space(.)"/>',
  215. " </xsl:template>",
  216. ' <xsl:template match="node()|@*">',
  217. ' <xsl:copy><xsl:apply-templates select="node()|@*"/></xsl:copy>',
  218. " </xsl:template>",
  219. ' <xsl:output indent="yes"/>',
  220. "</xsl:stylesheet>",
  221. ].join("\n"),
  222. "application/xml"
  223. )
  224. const xsltProcessor = new XSLTProcessor()
  225. xsltProcessor.importStylesheet(xsltDoc)
  226. const resultDoc = xsltProcessor.transformToDocument(xmlDoc)
  227. const resultXml = new XMLSerializer().serializeToString(resultDoc)
  228. return resultXml
  229. })
  230. )
  231. }
  232. /**
  233. * Prettifies HTML string
  234. * @param htmlString The string to format
  235. * @returns Indented HTML string (uses spaces)
  236. */
  237. function formatHTML(htmlString: string) {
  238. const tab = " "
  239. let result = ""
  240. let indent = ""
  241. const emptyTags = [
  242. "area",
  243. "base",
  244. "br",
  245. "col",
  246. "embed",
  247. "hr",
  248. "img",
  249. "input",
  250. "link",
  251. "meta",
  252. "param",
  253. "source",
  254. "track",
  255. "wbr",
  256. ]
  257. const spl = htmlString.split(/>\s*</)
  258. spl.forEach((element) => {
  259. if (element.match(/^\/\w/)) {
  260. indent = indent.substring(tab.length)
  261. }
  262. result += indent + "<" + element + ">\n"
  263. if (
  264. element.match(/^<?\w[^>]*[^/]$/) &&
  265. !emptyTags.includes(element.match(/^([a-z]*)/i)?.at(1) || "")
  266. ) {
  267. indent += tab
  268. }
  269. })
  270. return result.substring(1, result.length - 2)
  271. }