NeonEmitter.cpp 80 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631
  1. //===- NeonEmitter.cpp - Generate arm_neon.h for use with clang -*- C++ -*-===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This tablegen backend is responsible for emitting arm_neon.h, which includes
  10. // a declaration and definition of each function specified by the ARM NEON
  11. // compiler interface. See ARM document DUI0348B.
  12. //
  13. // Each NEON instruction is implemented in terms of 1 or more functions which
  14. // are suffixed with the element type of the input vectors. Functions may be
  15. // implemented in terms of generic vector operations such as +, *, -, etc. or
  16. // by calling a __builtin_-prefixed function which will be handled by clang's
  17. // CodeGen library.
  18. //
  19. // Additional validation code can be generated by this file when runHeader() is
  20. // called, rather than the normal run() entry point.
  21. //
  22. // See also the documentation in include/clang/Basic/arm_neon.td.
  23. //
  24. //===----------------------------------------------------------------------===//
  25. #include "TableGenBackends.h"
  26. #include "llvm/ADT/ArrayRef.h"
  27. #include "llvm/ADT/DenseMap.h"
  28. #include "llvm/ADT/None.h"
  29. #include "llvm/ADT/Optional.h"
  30. #include "llvm/ADT/STLExtras.h"
  31. #include "llvm/ADT/SmallVector.h"
  32. #include "llvm/ADT/StringExtras.h"
  33. #include "llvm/ADT/StringRef.h"
  34. #include "llvm/Support/Casting.h"
  35. #include "llvm/Support/ErrorHandling.h"
  36. #include "llvm/Support/raw_ostream.h"
  37. #include "llvm/TableGen/Error.h"
  38. #include "llvm/TableGen/Record.h"
  39. #include "llvm/TableGen/SetTheory.h"
  40. #include <algorithm>
  41. #include <cassert>
  42. #include <cctype>
  43. #include <cstddef>
  44. #include <cstdint>
  45. #include <deque>
  46. #include <map>
  47. #include <set>
  48. #include <sstream>
  49. #include <string>
  50. #include <utility>
  51. #include <vector>
  52. using namespace llvm;
  53. namespace {
  54. // While globals are generally bad, this one allows us to perform assertions
  55. // liberally and somehow still trace them back to the def they indirectly
  56. // came from.
  57. static Record *CurrentRecord = nullptr;
  58. static void assert_with_loc(bool Assertion, const std::string &Str) {
  59. if (!Assertion) {
  60. if (CurrentRecord)
  61. PrintFatalError(CurrentRecord->getLoc(), Str);
  62. else
  63. PrintFatalError(Str);
  64. }
  65. }
  66. enum ClassKind {
  67. ClassNone,
  68. ClassI, // generic integer instruction, e.g., "i8" suffix
  69. ClassS, // signed/unsigned/poly, e.g., "s8", "u8" or "p8" suffix
  70. ClassW, // width-specific instruction, e.g., "8" suffix
  71. ClassB, // bitcast arguments with enum argument to specify type
  72. ClassL, // Logical instructions which are op instructions
  73. // but we need to not emit any suffix for in our
  74. // tests.
  75. ClassNoTest // Instructions which we do not test since they are
  76. // not TRUE instructions.
  77. };
  78. /// NeonTypeFlags - Flags to identify the types for overloaded Neon
  79. /// builtins. These must be kept in sync with the flags in
  80. /// include/clang/Basic/TargetBuiltins.h.
  81. namespace NeonTypeFlags {
  82. enum { EltTypeMask = 0xf, UnsignedFlag = 0x10, QuadFlag = 0x20 };
  83. enum EltType {
  84. Int8,
  85. Int16,
  86. Int32,
  87. Int64,
  88. Poly8,
  89. Poly16,
  90. Poly64,
  91. Poly128,
  92. Float16,
  93. Float32,
  94. Float64,
  95. BFloat16
  96. };
  97. } // end namespace NeonTypeFlags
  98. class NeonEmitter;
  99. //===----------------------------------------------------------------------===//
  100. // TypeSpec
  101. //===----------------------------------------------------------------------===//
  102. /// A TypeSpec is just a simple wrapper around a string, but gets its own type
  103. /// for strong typing purposes.
  104. ///
  105. /// A TypeSpec can be used to create a type.
  106. class TypeSpec : public std::string {
  107. public:
  108. static std::vector<TypeSpec> fromTypeSpecs(StringRef Str) {
  109. std::vector<TypeSpec> Ret;
  110. TypeSpec Acc;
  111. for (char I : Str.str()) {
  112. if (islower(I)) {
  113. Acc.push_back(I);
  114. Ret.push_back(TypeSpec(Acc));
  115. Acc.clear();
  116. } else {
  117. Acc.push_back(I);
  118. }
  119. }
  120. return Ret;
  121. }
  122. };
  123. //===----------------------------------------------------------------------===//
  124. // Type
  125. //===----------------------------------------------------------------------===//
  126. /// A Type. Not much more to say here.
  127. class Type {
  128. private:
  129. TypeSpec TS;
  130. enum TypeKind {
  131. Void,
  132. Float,
  133. SInt,
  134. UInt,
  135. Poly,
  136. BFloat16,
  137. };
  138. TypeKind Kind;
  139. bool Immediate, Constant, Pointer;
  140. // ScalarForMangling and NoManglingQ are really not suited to live here as
  141. // they are not related to the type. But they live in the TypeSpec (not the
  142. // prototype), so this is really the only place to store them.
  143. bool ScalarForMangling, NoManglingQ;
  144. unsigned Bitwidth, ElementBitwidth, NumVectors;
  145. public:
  146. Type()
  147. : Kind(Void), Immediate(false), Constant(false),
  148. Pointer(false), ScalarForMangling(false), NoManglingQ(false),
  149. Bitwidth(0), ElementBitwidth(0), NumVectors(0) {}
  150. Type(TypeSpec TS, StringRef CharMods)
  151. : TS(std::move(TS)), Kind(Void), Immediate(false),
  152. Constant(false), Pointer(false), ScalarForMangling(false),
  153. NoManglingQ(false), Bitwidth(0), ElementBitwidth(0), NumVectors(0) {
  154. applyModifiers(CharMods);
  155. }
  156. /// Returns a type representing "void".
  157. static Type getVoid() { return Type(); }
  158. bool operator==(const Type &Other) const { return str() == Other.str(); }
  159. bool operator!=(const Type &Other) const { return !operator==(Other); }
  160. //
  161. // Query functions
  162. //
  163. bool isScalarForMangling() const { return ScalarForMangling; }
  164. bool noManglingQ() const { return NoManglingQ; }
  165. bool isPointer() const { return Pointer; }
  166. bool isValue() const { return !isVoid() && !isPointer(); }
  167. bool isScalar() const { return isValue() && NumVectors == 0; }
  168. bool isVector() const { return isValue() && NumVectors > 0; }
  169. bool isConstPointer() const { return Constant; }
  170. bool isFloating() const { return Kind == Float; }
  171. bool isInteger() const { return Kind == SInt || Kind == UInt; }
  172. bool isPoly() const { return Kind == Poly; }
  173. bool isSigned() const { return Kind == SInt; }
  174. bool isImmediate() const { return Immediate; }
  175. bool isFloat() const { return isFloating() && ElementBitwidth == 32; }
  176. bool isDouble() const { return isFloating() && ElementBitwidth == 64; }
  177. bool isHalf() const { return isFloating() && ElementBitwidth == 16; }
  178. bool isChar() const { return ElementBitwidth == 8; }
  179. bool isShort() const { return isInteger() && ElementBitwidth == 16; }
  180. bool isInt() const { return isInteger() && ElementBitwidth == 32; }
  181. bool isLong() const { return isInteger() && ElementBitwidth == 64; }
  182. bool isVoid() const { return Kind == Void; }
  183. bool isBFloat16() const { return Kind == BFloat16; }
  184. unsigned getNumElements() const { return Bitwidth / ElementBitwidth; }
  185. unsigned getSizeInBits() const { return Bitwidth; }
  186. unsigned getElementSizeInBits() const { return ElementBitwidth; }
  187. unsigned getNumVectors() const { return NumVectors; }
  188. //
  189. // Mutator functions
  190. //
  191. void makeUnsigned() {
  192. assert(!isVoid() && "not a potentially signed type");
  193. Kind = UInt;
  194. }
  195. void makeSigned() {
  196. assert(!isVoid() && "not a potentially signed type");
  197. Kind = SInt;
  198. }
  199. void makeInteger(unsigned ElemWidth, bool Sign) {
  200. assert(!isVoid() && "converting void to int probably not useful");
  201. Kind = Sign ? SInt : UInt;
  202. Immediate = false;
  203. ElementBitwidth = ElemWidth;
  204. }
  205. void makeImmediate(unsigned ElemWidth) {
  206. Kind = SInt;
  207. Immediate = true;
  208. ElementBitwidth = ElemWidth;
  209. }
  210. void makeScalar() {
  211. Bitwidth = ElementBitwidth;
  212. NumVectors = 0;
  213. }
  214. void makeOneVector() {
  215. assert(isVector());
  216. NumVectors = 1;
  217. }
  218. void make32BitElement() {
  219. assert_with_loc(Bitwidth > 32, "Not enough bits to make it 32!");
  220. ElementBitwidth = 32;
  221. }
  222. void doubleLanes() {
  223. assert_with_loc(Bitwidth != 128, "Can't get bigger than 128!");
  224. Bitwidth = 128;
  225. }
  226. void halveLanes() {
  227. assert_with_loc(Bitwidth != 64, "Can't get smaller than 64!");
  228. Bitwidth = 64;
  229. }
  230. /// Return the C string representation of a type, which is the typename
  231. /// defined in stdint.h or arm_neon.h.
  232. std::string str() const;
  233. /// Return the string representation of a type, which is an encoded
  234. /// string for passing to the BUILTIN() macro in Builtins.def.
  235. std::string builtin_str() const;
  236. /// Return the value in NeonTypeFlags for this type.
  237. unsigned getNeonEnum() const;
  238. /// Parse a type from a stdint.h or arm_neon.h typedef name,
  239. /// for example uint32x2_t or int64_t.
  240. static Type fromTypedefName(StringRef Name);
  241. private:
  242. /// Creates the type based on the typespec string in TS.
  243. /// Sets "Quad" to true if the "Q" or "H" modifiers were
  244. /// seen. This is needed by applyModifier as some modifiers
  245. /// only take effect if the type size was changed by "Q" or "H".
  246. void applyTypespec(bool &Quad);
  247. /// Applies prototype modifiers to the type.
  248. void applyModifiers(StringRef Mods);
  249. };
  250. //===----------------------------------------------------------------------===//
  251. // Variable
  252. //===----------------------------------------------------------------------===//
  253. /// A variable is a simple class that just has a type and a name.
  254. class Variable {
  255. Type T;
  256. std::string N;
  257. public:
  258. Variable() : T(Type::getVoid()) {}
  259. Variable(Type T, std::string N) : T(std::move(T)), N(std::move(N)) {}
  260. Type getType() const { return T; }
  261. std::string getName() const { return "__" + N; }
  262. };
  263. //===----------------------------------------------------------------------===//
  264. // Intrinsic
  265. //===----------------------------------------------------------------------===//
  266. /// The main grunt class. This represents an instantiation of an intrinsic with
  267. /// a particular typespec and prototype.
  268. class Intrinsic {
  269. /// The Record this intrinsic was created from.
  270. Record *R;
  271. /// The unmangled name.
  272. std::string Name;
  273. /// The input and output typespecs. InTS == OutTS except when
  274. /// CartesianProductWith is non-empty - this is the case for vreinterpret.
  275. TypeSpec OutTS, InTS;
  276. /// The base class kind. Most intrinsics use ClassS, which has full type
  277. /// info for integers (s32/u32). Some use ClassI, which doesn't care about
  278. /// signedness (i32), while some (ClassB) have no type at all, only a width
  279. /// (32).
  280. ClassKind CK;
  281. /// The list of DAGs for the body. May be empty, in which case we should
  282. /// emit a builtin call.
  283. ListInit *Body;
  284. /// The architectural #ifdef guard.
  285. std::string Guard;
  286. /// Set if the Unavailable bit is 1. This means we don't generate a body,
  287. /// just an "unavailable" attribute on a declaration.
  288. bool IsUnavailable;
  289. /// Is this intrinsic safe for big-endian? or does it need its arguments
  290. /// reversing?
  291. bool BigEndianSafe;
  292. /// The types of return value [0] and parameters [1..].
  293. std::vector<Type> Types;
  294. /// The index of the key type passed to CGBuiltin.cpp for polymorphic calls.
  295. int PolymorphicKeyType;
  296. /// The local variables defined.
  297. std::map<std::string, Variable> Variables;
  298. /// NeededEarly - set if any other intrinsic depends on this intrinsic.
  299. bool NeededEarly;
  300. /// UseMacro - set if we should implement using a macro or unset for a
  301. /// function.
  302. bool UseMacro;
  303. /// The set of intrinsics that this intrinsic uses/requires.
  304. std::set<Intrinsic *> Dependencies;
  305. /// The "base type", which is Type('d', OutTS). InBaseType is only
  306. /// different if CartesianProductWith is non-empty (for vreinterpret).
  307. Type BaseType, InBaseType;
  308. /// The return variable.
  309. Variable RetVar;
  310. /// A postfix to apply to every variable. Defaults to "".
  311. std::string VariablePostfix;
  312. NeonEmitter &Emitter;
  313. std::stringstream OS;
  314. bool isBigEndianSafe() const {
  315. if (BigEndianSafe)
  316. return true;
  317. for (const auto &T : Types){
  318. if (T.isVector() && T.getNumElements() > 1)
  319. return false;
  320. }
  321. return true;
  322. }
  323. public:
  324. Intrinsic(Record *R, StringRef Name, StringRef Proto, TypeSpec OutTS,
  325. TypeSpec InTS, ClassKind CK, ListInit *Body, NeonEmitter &Emitter,
  326. StringRef Guard, bool IsUnavailable, bool BigEndianSafe)
  327. : R(R), Name(Name.str()), OutTS(OutTS), InTS(InTS), CK(CK), Body(Body),
  328. Guard(Guard.str()), IsUnavailable(IsUnavailable),
  329. BigEndianSafe(BigEndianSafe), PolymorphicKeyType(0), NeededEarly(false),
  330. UseMacro(false), BaseType(OutTS, "."), InBaseType(InTS, "."),
  331. Emitter(Emitter) {
  332. // Modify the TypeSpec per-argument to get a concrete Type, and create
  333. // known variables for each.
  334. // Types[0] is the return value.
  335. unsigned Pos = 0;
  336. Types.emplace_back(OutTS, getNextModifiers(Proto, Pos));
  337. StringRef Mods = getNextModifiers(Proto, Pos);
  338. while (!Mods.empty()) {
  339. Types.emplace_back(InTS, Mods);
  340. if (Mods.contains('!'))
  341. PolymorphicKeyType = Types.size() - 1;
  342. Mods = getNextModifiers(Proto, Pos);
  343. }
  344. for (auto Type : Types) {
  345. // If this builtin takes an immediate argument, we need to #define it rather
  346. // than use a standard declaration, so that SemaChecking can range check
  347. // the immediate passed by the user.
  348. // Pointer arguments need to use macros to avoid hiding aligned attributes
  349. // from the pointer type.
  350. // It is not permitted to pass or return an __fp16 by value, so intrinsics
  351. // taking a scalar float16_t must be implemented as macros.
  352. if (Type.isImmediate() || Type.isPointer() ||
  353. (Type.isScalar() && Type.isHalf()))
  354. UseMacro = true;
  355. }
  356. }
  357. /// Get the Record that this intrinsic is based off.
  358. Record *getRecord() const { return R; }
  359. /// Get the set of Intrinsics that this intrinsic calls.
  360. /// this is the set of immediate dependencies, NOT the
  361. /// transitive closure.
  362. const std::set<Intrinsic *> &getDependencies() const { return Dependencies; }
  363. /// Get the architectural guard string (#ifdef).
  364. std::string getGuard() const { return Guard; }
  365. /// Get the non-mangled name.
  366. std::string getName() const { return Name; }
  367. /// Return true if the intrinsic takes an immediate operand.
  368. bool hasImmediate() const {
  369. return llvm::any_of(Types, [](const Type &T) { return T.isImmediate(); });
  370. }
  371. /// Return the parameter index of the immediate operand.
  372. unsigned getImmediateIdx() const {
  373. for (unsigned Idx = 0; Idx < Types.size(); ++Idx)
  374. if (Types[Idx].isImmediate())
  375. return Idx - 1;
  376. llvm_unreachable("Intrinsic has no immediate");
  377. }
  378. unsigned getNumParams() const { return Types.size() - 1; }
  379. Type getReturnType() const { return Types[0]; }
  380. Type getParamType(unsigned I) const { return Types[I + 1]; }
  381. Type getBaseType() const { return BaseType; }
  382. Type getPolymorphicKeyType() const { return Types[PolymorphicKeyType]; }
  383. /// Return true if the prototype has a scalar argument.
  384. bool protoHasScalar() const;
  385. /// Return the index that parameter PIndex will sit at
  386. /// in a generated function call. This is often just PIndex,
  387. /// but may not be as things such as multiple-vector operands
  388. /// and sret parameters need to be taken into accont.
  389. unsigned getGeneratedParamIdx(unsigned PIndex) {
  390. unsigned Idx = 0;
  391. if (getReturnType().getNumVectors() > 1)
  392. // Multiple vectors are passed as sret.
  393. ++Idx;
  394. for (unsigned I = 0; I < PIndex; ++I)
  395. Idx += std::max(1U, getParamType(I).getNumVectors());
  396. return Idx;
  397. }
  398. bool hasBody() const { return Body && !Body->getValues().empty(); }
  399. void setNeededEarly() { NeededEarly = true; }
  400. bool operator<(const Intrinsic &Other) const {
  401. // Sort lexicographically on a two-tuple (Guard, Name)
  402. if (Guard != Other.Guard)
  403. return Guard < Other.Guard;
  404. return Name < Other.Name;
  405. }
  406. ClassKind getClassKind(bool UseClassBIfScalar = false) {
  407. if (UseClassBIfScalar && !protoHasScalar())
  408. return ClassB;
  409. return CK;
  410. }
  411. /// Return the name, mangled with type information.
  412. /// If ForceClassS is true, use ClassS (u32/s32) instead
  413. /// of the intrinsic's own type class.
  414. std::string getMangledName(bool ForceClassS = false) const;
  415. /// Return the type code for a builtin function call.
  416. std::string getInstTypeCode(Type T, ClassKind CK) const;
  417. /// Return the type string for a BUILTIN() macro in Builtins.def.
  418. std::string getBuiltinTypeStr();
  419. /// Generate the intrinsic, returning code.
  420. std::string generate();
  421. /// Perform type checking and populate the dependency graph, but
  422. /// don't generate code yet.
  423. void indexBody();
  424. private:
  425. StringRef getNextModifiers(StringRef Proto, unsigned &Pos) const;
  426. std::string mangleName(std::string Name, ClassKind CK) const;
  427. void initVariables();
  428. std::string replaceParamsIn(std::string S);
  429. void emitBodyAsBuiltinCall();
  430. void generateImpl(bool ReverseArguments,
  431. StringRef NamePrefix, StringRef CallPrefix);
  432. void emitReturn();
  433. void emitBody(StringRef CallPrefix);
  434. void emitShadowedArgs();
  435. void emitArgumentReversal();
  436. void emitReturnVarDecl();
  437. void emitReturnReversal();
  438. void emitReverseVariable(Variable &Dest, Variable &Src);
  439. void emitNewLine();
  440. void emitClosingBrace();
  441. void emitOpeningBrace();
  442. void emitPrototype(StringRef NamePrefix);
  443. class DagEmitter {
  444. Intrinsic &Intr;
  445. StringRef CallPrefix;
  446. public:
  447. DagEmitter(Intrinsic &Intr, StringRef CallPrefix) :
  448. Intr(Intr), CallPrefix(CallPrefix) {
  449. }
  450. std::pair<Type, std::string> emitDagArg(Init *Arg, std::string ArgName);
  451. std::pair<Type, std::string> emitDagSaveTemp(DagInit *DI);
  452. std::pair<Type, std::string> emitDagSplat(DagInit *DI);
  453. std::pair<Type, std::string> emitDagDup(DagInit *DI);
  454. std::pair<Type, std::string> emitDagDupTyped(DagInit *DI);
  455. std::pair<Type, std::string> emitDagShuffle(DagInit *DI);
  456. std::pair<Type, std::string> emitDagCast(DagInit *DI, bool IsBitCast);
  457. std::pair<Type, std::string> emitDagCall(DagInit *DI,
  458. bool MatchMangledName);
  459. std::pair<Type, std::string> emitDagNameReplace(DagInit *DI);
  460. std::pair<Type, std::string> emitDagLiteral(DagInit *DI);
  461. std::pair<Type, std::string> emitDagOp(DagInit *DI);
  462. std::pair<Type, std::string> emitDag(DagInit *DI);
  463. };
  464. };
  465. //===----------------------------------------------------------------------===//
  466. // NeonEmitter
  467. //===----------------------------------------------------------------------===//
  468. class NeonEmitter {
  469. RecordKeeper &Records;
  470. DenseMap<Record *, ClassKind> ClassMap;
  471. std::map<std::string, std::deque<Intrinsic>> IntrinsicMap;
  472. unsigned UniqueNumber;
  473. void createIntrinsic(Record *R, SmallVectorImpl<Intrinsic *> &Out);
  474. void genBuiltinsDef(raw_ostream &OS, SmallVectorImpl<Intrinsic *> &Defs);
  475. void genOverloadTypeCheckCode(raw_ostream &OS,
  476. SmallVectorImpl<Intrinsic *> &Defs);
  477. void genIntrinsicRangeCheckCode(raw_ostream &OS,
  478. SmallVectorImpl<Intrinsic *> &Defs);
  479. public:
  480. /// Called by Intrinsic - this attempts to get an intrinsic that takes
  481. /// the given types as arguments.
  482. Intrinsic &getIntrinsic(StringRef Name, ArrayRef<Type> Types,
  483. Optional<std::string> MangledName);
  484. /// Called by Intrinsic - returns a globally-unique number.
  485. unsigned getUniqueNumber() { return UniqueNumber++; }
  486. NeonEmitter(RecordKeeper &R) : Records(R), UniqueNumber(0) {
  487. Record *SI = R.getClass("SInst");
  488. Record *II = R.getClass("IInst");
  489. Record *WI = R.getClass("WInst");
  490. Record *SOpI = R.getClass("SOpInst");
  491. Record *IOpI = R.getClass("IOpInst");
  492. Record *WOpI = R.getClass("WOpInst");
  493. Record *LOpI = R.getClass("LOpInst");
  494. Record *NoTestOpI = R.getClass("NoTestOpInst");
  495. ClassMap[SI] = ClassS;
  496. ClassMap[II] = ClassI;
  497. ClassMap[WI] = ClassW;
  498. ClassMap[SOpI] = ClassS;
  499. ClassMap[IOpI] = ClassI;
  500. ClassMap[WOpI] = ClassW;
  501. ClassMap[LOpI] = ClassL;
  502. ClassMap[NoTestOpI] = ClassNoTest;
  503. }
  504. // Emit arm_neon.h.inc
  505. void run(raw_ostream &o);
  506. // Emit arm_fp16.h.inc
  507. void runFP16(raw_ostream &o);
  508. // Emit arm_bf16.h.inc
  509. void runBF16(raw_ostream &o);
  510. // Emit all the __builtin prototypes used in arm_neon.h, arm_fp16.h and
  511. // arm_bf16.h
  512. void runHeader(raw_ostream &o);
  513. };
  514. } // end anonymous namespace
  515. //===----------------------------------------------------------------------===//
  516. // Type implementation
  517. //===----------------------------------------------------------------------===//
  518. std::string Type::str() const {
  519. if (isVoid())
  520. return "void";
  521. std::string S;
  522. if (isInteger() && !isSigned())
  523. S += "u";
  524. if (isPoly())
  525. S += "poly";
  526. else if (isFloating())
  527. S += "float";
  528. else if (isBFloat16())
  529. S += "bfloat";
  530. else
  531. S += "int";
  532. S += utostr(ElementBitwidth);
  533. if (isVector())
  534. S += "x" + utostr(getNumElements());
  535. if (NumVectors > 1)
  536. S += "x" + utostr(NumVectors);
  537. S += "_t";
  538. if (Constant)
  539. S += " const";
  540. if (Pointer)
  541. S += " *";
  542. return S;
  543. }
  544. std::string Type::builtin_str() const {
  545. std::string S;
  546. if (isVoid())
  547. return "v";
  548. if (isPointer()) {
  549. // All pointers are void pointers.
  550. S = "v";
  551. if (isConstPointer())
  552. S += "C";
  553. S += "*";
  554. return S;
  555. } else if (isInteger())
  556. switch (ElementBitwidth) {
  557. case 8: S += "c"; break;
  558. case 16: S += "s"; break;
  559. case 32: S += "i"; break;
  560. case 64: S += "Wi"; break;
  561. case 128: S += "LLLi"; break;
  562. default: llvm_unreachable("Unhandled case!");
  563. }
  564. else if (isBFloat16()) {
  565. assert(ElementBitwidth == 16 && "BFloat16 can only be 16 bits");
  566. S += "y";
  567. } else
  568. switch (ElementBitwidth) {
  569. case 16: S += "h"; break;
  570. case 32: S += "f"; break;
  571. case 64: S += "d"; break;
  572. default: llvm_unreachable("Unhandled case!");
  573. }
  574. // FIXME: NECESSARY???????????????????????????????????????????????????????????????????????
  575. if (isChar() && !isPointer() && isSigned())
  576. // Make chars explicitly signed.
  577. S = "S" + S;
  578. else if (isInteger() && !isSigned())
  579. S = "U" + S;
  580. // Constant indices are "int", but have the "constant expression" modifier.
  581. if (isImmediate()) {
  582. assert(isInteger() && isSigned());
  583. S = "I" + S;
  584. }
  585. if (isScalar())
  586. return S;
  587. std::string Ret;
  588. for (unsigned I = 0; I < NumVectors; ++I)
  589. Ret += "V" + utostr(getNumElements()) + S;
  590. return Ret;
  591. }
  592. unsigned Type::getNeonEnum() const {
  593. unsigned Addend;
  594. switch (ElementBitwidth) {
  595. case 8: Addend = 0; break;
  596. case 16: Addend = 1; break;
  597. case 32: Addend = 2; break;
  598. case 64: Addend = 3; break;
  599. case 128: Addend = 4; break;
  600. default: llvm_unreachable("Unhandled element bitwidth!");
  601. }
  602. unsigned Base = (unsigned)NeonTypeFlags::Int8 + Addend;
  603. if (isPoly()) {
  604. // Adjustment needed because Poly32 doesn't exist.
  605. if (Addend >= 2)
  606. --Addend;
  607. Base = (unsigned)NeonTypeFlags::Poly8 + Addend;
  608. }
  609. if (isFloating()) {
  610. assert(Addend != 0 && "Float8 doesn't exist!");
  611. Base = (unsigned)NeonTypeFlags::Float16 + (Addend - 1);
  612. }
  613. if (isBFloat16()) {
  614. assert(Addend == 1 && "BFloat16 is only 16 bit");
  615. Base = (unsigned)NeonTypeFlags::BFloat16;
  616. }
  617. if (Bitwidth == 128)
  618. Base |= (unsigned)NeonTypeFlags::QuadFlag;
  619. if (isInteger() && !isSigned())
  620. Base |= (unsigned)NeonTypeFlags::UnsignedFlag;
  621. return Base;
  622. }
  623. Type Type::fromTypedefName(StringRef Name) {
  624. Type T;
  625. T.Kind = SInt;
  626. if (Name.front() == 'u') {
  627. T.Kind = UInt;
  628. Name = Name.drop_front();
  629. }
  630. if (Name.startswith("float")) {
  631. T.Kind = Float;
  632. Name = Name.drop_front(5);
  633. } else if (Name.startswith("poly")) {
  634. T.Kind = Poly;
  635. Name = Name.drop_front(4);
  636. } else if (Name.startswith("bfloat")) {
  637. T.Kind = BFloat16;
  638. Name = Name.drop_front(6);
  639. } else {
  640. assert(Name.startswith("int"));
  641. Name = Name.drop_front(3);
  642. }
  643. unsigned I = 0;
  644. for (I = 0; I < Name.size(); ++I) {
  645. if (!isdigit(Name[I]))
  646. break;
  647. }
  648. Name.substr(0, I).getAsInteger(10, T.ElementBitwidth);
  649. Name = Name.drop_front(I);
  650. T.Bitwidth = T.ElementBitwidth;
  651. T.NumVectors = 1;
  652. if (Name.front() == 'x') {
  653. Name = Name.drop_front();
  654. unsigned I = 0;
  655. for (I = 0; I < Name.size(); ++I) {
  656. if (!isdigit(Name[I]))
  657. break;
  658. }
  659. unsigned NumLanes;
  660. Name.substr(0, I).getAsInteger(10, NumLanes);
  661. Name = Name.drop_front(I);
  662. T.Bitwidth = T.ElementBitwidth * NumLanes;
  663. } else {
  664. // Was scalar.
  665. T.NumVectors = 0;
  666. }
  667. if (Name.front() == 'x') {
  668. Name = Name.drop_front();
  669. unsigned I = 0;
  670. for (I = 0; I < Name.size(); ++I) {
  671. if (!isdigit(Name[I]))
  672. break;
  673. }
  674. Name.substr(0, I).getAsInteger(10, T.NumVectors);
  675. Name = Name.drop_front(I);
  676. }
  677. assert(Name.startswith("_t") && "Malformed typedef!");
  678. return T;
  679. }
  680. void Type::applyTypespec(bool &Quad) {
  681. std::string S = TS;
  682. ScalarForMangling = false;
  683. Kind = SInt;
  684. ElementBitwidth = ~0U;
  685. NumVectors = 1;
  686. for (char I : S) {
  687. switch (I) {
  688. case 'S':
  689. ScalarForMangling = true;
  690. break;
  691. case 'H':
  692. NoManglingQ = true;
  693. Quad = true;
  694. break;
  695. case 'Q':
  696. Quad = true;
  697. break;
  698. case 'P':
  699. Kind = Poly;
  700. break;
  701. case 'U':
  702. Kind = UInt;
  703. break;
  704. case 'c':
  705. ElementBitwidth = 8;
  706. break;
  707. case 'h':
  708. Kind = Float;
  709. LLVM_FALLTHROUGH;
  710. case 's':
  711. ElementBitwidth = 16;
  712. break;
  713. case 'f':
  714. Kind = Float;
  715. LLVM_FALLTHROUGH;
  716. case 'i':
  717. ElementBitwidth = 32;
  718. break;
  719. case 'd':
  720. Kind = Float;
  721. LLVM_FALLTHROUGH;
  722. case 'l':
  723. ElementBitwidth = 64;
  724. break;
  725. case 'k':
  726. ElementBitwidth = 128;
  727. // Poly doesn't have a 128x1 type.
  728. if (isPoly())
  729. NumVectors = 0;
  730. break;
  731. case 'b':
  732. Kind = BFloat16;
  733. ElementBitwidth = 16;
  734. break;
  735. default:
  736. llvm_unreachable("Unhandled type code!");
  737. }
  738. }
  739. assert(ElementBitwidth != ~0U && "Bad element bitwidth!");
  740. Bitwidth = Quad ? 128 : 64;
  741. }
  742. void Type::applyModifiers(StringRef Mods) {
  743. bool AppliedQuad = false;
  744. applyTypespec(AppliedQuad);
  745. for (char Mod : Mods) {
  746. switch (Mod) {
  747. case '.':
  748. break;
  749. case 'v':
  750. Kind = Void;
  751. break;
  752. case 'S':
  753. Kind = SInt;
  754. break;
  755. case 'U':
  756. Kind = UInt;
  757. break;
  758. case 'B':
  759. Kind = BFloat16;
  760. ElementBitwidth = 16;
  761. break;
  762. case 'F':
  763. Kind = Float;
  764. break;
  765. case 'P':
  766. Kind = Poly;
  767. break;
  768. case '>':
  769. assert(ElementBitwidth < 128);
  770. ElementBitwidth *= 2;
  771. break;
  772. case '<':
  773. assert(ElementBitwidth > 8);
  774. ElementBitwidth /= 2;
  775. break;
  776. case '1':
  777. NumVectors = 0;
  778. break;
  779. case '2':
  780. NumVectors = 2;
  781. break;
  782. case '3':
  783. NumVectors = 3;
  784. break;
  785. case '4':
  786. NumVectors = 4;
  787. break;
  788. case '*':
  789. Pointer = true;
  790. break;
  791. case 'c':
  792. Constant = true;
  793. break;
  794. case 'Q':
  795. Bitwidth = 128;
  796. break;
  797. case 'q':
  798. Bitwidth = 64;
  799. break;
  800. case 'I':
  801. Kind = SInt;
  802. ElementBitwidth = Bitwidth = 32;
  803. NumVectors = 0;
  804. Immediate = true;
  805. break;
  806. case 'p':
  807. if (isPoly())
  808. Kind = UInt;
  809. break;
  810. case '!':
  811. // Key type, handled elsewhere.
  812. break;
  813. default:
  814. llvm_unreachable("Unhandled character!");
  815. }
  816. }
  817. }
  818. //===----------------------------------------------------------------------===//
  819. // Intrinsic implementation
  820. //===----------------------------------------------------------------------===//
  821. StringRef Intrinsic::getNextModifiers(StringRef Proto, unsigned &Pos) const {
  822. if (Proto.size() == Pos)
  823. return StringRef();
  824. else if (Proto[Pos] != '(')
  825. return Proto.substr(Pos++, 1);
  826. size_t Start = Pos + 1;
  827. size_t End = Proto.find(')', Start);
  828. assert_with_loc(End != StringRef::npos, "unmatched modifier group paren");
  829. Pos = End + 1;
  830. return Proto.slice(Start, End);
  831. }
  832. std::string Intrinsic::getInstTypeCode(Type T, ClassKind CK) const {
  833. char typeCode = '\0';
  834. bool printNumber = true;
  835. if (CK == ClassB)
  836. return "";
  837. if (T.isBFloat16())
  838. return "bf16";
  839. if (T.isPoly())
  840. typeCode = 'p';
  841. else if (T.isInteger())
  842. typeCode = T.isSigned() ? 's' : 'u';
  843. else
  844. typeCode = 'f';
  845. if (CK == ClassI) {
  846. switch (typeCode) {
  847. default:
  848. break;
  849. case 's':
  850. case 'u':
  851. case 'p':
  852. typeCode = 'i';
  853. break;
  854. }
  855. }
  856. if (CK == ClassB) {
  857. typeCode = '\0';
  858. }
  859. std::string S;
  860. if (typeCode != '\0')
  861. S.push_back(typeCode);
  862. if (printNumber)
  863. S += utostr(T.getElementSizeInBits());
  864. return S;
  865. }
  866. std::string Intrinsic::getBuiltinTypeStr() {
  867. ClassKind LocalCK = getClassKind(true);
  868. std::string S;
  869. Type RetT = getReturnType();
  870. if ((LocalCK == ClassI || LocalCK == ClassW) && RetT.isScalar() &&
  871. !RetT.isFloating() && !RetT.isBFloat16())
  872. RetT.makeInteger(RetT.getElementSizeInBits(), false);
  873. // Since the return value must be one type, return a vector type of the
  874. // appropriate width which we will bitcast. An exception is made for
  875. // returning structs of 2, 3, or 4 vectors which are returned in a sret-like
  876. // fashion, storing them to a pointer arg.
  877. if (RetT.getNumVectors() > 1) {
  878. S += "vv*"; // void result with void* first argument
  879. } else {
  880. if (RetT.isPoly())
  881. RetT.makeInteger(RetT.getElementSizeInBits(), false);
  882. if (!RetT.isScalar() && RetT.isInteger() && !RetT.isSigned())
  883. RetT.makeSigned();
  884. if (LocalCK == ClassB && RetT.isValue() && !RetT.isScalar())
  885. // Cast to vector of 8-bit elements.
  886. RetT.makeInteger(8, true);
  887. S += RetT.builtin_str();
  888. }
  889. for (unsigned I = 0; I < getNumParams(); ++I) {
  890. Type T = getParamType(I);
  891. if (T.isPoly())
  892. T.makeInteger(T.getElementSizeInBits(), false);
  893. if (LocalCK == ClassB && !T.isScalar())
  894. T.makeInteger(8, true);
  895. // Halves always get converted to 8-bit elements.
  896. if (T.isHalf() && T.isVector() && !T.isScalarForMangling())
  897. T.makeInteger(8, true);
  898. if (LocalCK == ClassI && T.isInteger())
  899. T.makeSigned();
  900. if (hasImmediate() && getImmediateIdx() == I)
  901. T.makeImmediate(32);
  902. S += T.builtin_str();
  903. }
  904. // Extra constant integer to hold type class enum for this function, e.g. s8
  905. if (LocalCK == ClassB)
  906. S += "i";
  907. return S;
  908. }
  909. std::string Intrinsic::getMangledName(bool ForceClassS) const {
  910. // Check if the prototype has a scalar operand with the type of the vector
  911. // elements. If not, bitcasting the args will take care of arg checking.
  912. // The actual signedness etc. will be taken care of with special enums.
  913. ClassKind LocalCK = CK;
  914. if (!protoHasScalar())
  915. LocalCK = ClassB;
  916. return mangleName(Name, ForceClassS ? ClassS : LocalCK);
  917. }
  918. std::string Intrinsic::mangleName(std::string Name, ClassKind LocalCK) const {
  919. std::string typeCode = getInstTypeCode(BaseType, LocalCK);
  920. std::string S = Name;
  921. if (Name == "vcvt_f16_f32" || Name == "vcvt_f32_f16" ||
  922. Name == "vcvt_f32_f64" || Name == "vcvt_f64_f32" ||
  923. Name == "vcvt_f32_bf16")
  924. return Name;
  925. if (!typeCode.empty()) {
  926. // If the name ends with _xN (N = 2,3,4), insert the typeCode before _xN.
  927. if (Name.size() >= 3 && isdigit(Name.back()) &&
  928. Name[Name.length() - 2] == 'x' && Name[Name.length() - 3] == '_')
  929. S.insert(S.length() - 3, "_" + typeCode);
  930. else
  931. S += "_" + typeCode;
  932. }
  933. if (BaseType != InBaseType) {
  934. // A reinterpret - out the input base type at the end.
  935. S += "_" + getInstTypeCode(InBaseType, LocalCK);
  936. }
  937. if (LocalCK == ClassB)
  938. S += "_v";
  939. // Insert a 'q' before the first '_' character so that it ends up before
  940. // _lane or _n on vector-scalar operations.
  941. if (BaseType.getSizeInBits() == 128 && !BaseType.noManglingQ()) {
  942. size_t Pos = S.find('_');
  943. S.insert(Pos, "q");
  944. }
  945. char Suffix = '\0';
  946. if (BaseType.isScalarForMangling()) {
  947. switch (BaseType.getElementSizeInBits()) {
  948. case 8: Suffix = 'b'; break;
  949. case 16: Suffix = 'h'; break;
  950. case 32: Suffix = 's'; break;
  951. case 64: Suffix = 'd'; break;
  952. default: llvm_unreachable("Bad suffix!");
  953. }
  954. }
  955. if (Suffix != '\0') {
  956. size_t Pos = S.find('_');
  957. S.insert(Pos, &Suffix, 1);
  958. }
  959. return S;
  960. }
  961. std::string Intrinsic::replaceParamsIn(std::string S) {
  962. while (S.find('$') != std::string::npos) {
  963. size_t Pos = S.find('$');
  964. size_t End = Pos + 1;
  965. while (isalpha(S[End]))
  966. ++End;
  967. std::string VarName = S.substr(Pos + 1, End - Pos - 1);
  968. assert_with_loc(Variables.find(VarName) != Variables.end(),
  969. "Variable not defined!");
  970. S.replace(Pos, End - Pos, Variables.find(VarName)->second.getName());
  971. }
  972. return S;
  973. }
  974. void Intrinsic::initVariables() {
  975. Variables.clear();
  976. // Modify the TypeSpec per-argument to get a concrete Type, and create
  977. // known variables for each.
  978. for (unsigned I = 1; I < Types.size(); ++I) {
  979. char NameC = '0' + (I - 1);
  980. std::string Name = "p";
  981. Name.push_back(NameC);
  982. Variables[Name] = Variable(Types[I], Name + VariablePostfix);
  983. }
  984. RetVar = Variable(Types[0], "ret" + VariablePostfix);
  985. }
  986. void Intrinsic::emitPrototype(StringRef NamePrefix) {
  987. if (UseMacro)
  988. OS << "#define ";
  989. else
  990. OS << "__ai " << Types[0].str() << " ";
  991. OS << NamePrefix.str() << mangleName(Name, ClassS) << "(";
  992. for (unsigned I = 0; I < getNumParams(); ++I) {
  993. if (I != 0)
  994. OS << ", ";
  995. char NameC = '0' + I;
  996. std::string Name = "p";
  997. Name.push_back(NameC);
  998. assert(Variables.find(Name) != Variables.end());
  999. Variable &V = Variables[Name];
  1000. if (!UseMacro)
  1001. OS << V.getType().str() << " ";
  1002. OS << V.getName();
  1003. }
  1004. OS << ")";
  1005. }
  1006. void Intrinsic::emitOpeningBrace() {
  1007. if (UseMacro)
  1008. OS << " __extension__ ({";
  1009. else
  1010. OS << " {";
  1011. emitNewLine();
  1012. }
  1013. void Intrinsic::emitClosingBrace() {
  1014. if (UseMacro)
  1015. OS << "})";
  1016. else
  1017. OS << "}";
  1018. }
  1019. void Intrinsic::emitNewLine() {
  1020. if (UseMacro)
  1021. OS << " \\\n";
  1022. else
  1023. OS << "\n";
  1024. }
  1025. void Intrinsic::emitReverseVariable(Variable &Dest, Variable &Src) {
  1026. if (Dest.getType().getNumVectors() > 1) {
  1027. emitNewLine();
  1028. for (unsigned K = 0; K < Dest.getType().getNumVectors(); ++K) {
  1029. OS << " " << Dest.getName() << ".val[" << K << "] = "
  1030. << "__builtin_shufflevector("
  1031. << Src.getName() << ".val[" << K << "], "
  1032. << Src.getName() << ".val[" << K << "]";
  1033. for (int J = Dest.getType().getNumElements() - 1; J >= 0; --J)
  1034. OS << ", " << J;
  1035. OS << ");";
  1036. emitNewLine();
  1037. }
  1038. } else {
  1039. OS << " " << Dest.getName()
  1040. << " = __builtin_shufflevector(" << Src.getName() << ", " << Src.getName();
  1041. for (int J = Dest.getType().getNumElements() - 1; J >= 0; --J)
  1042. OS << ", " << J;
  1043. OS << ");";
  1044. emitNewLine();
  1045. }
  1046. }
  1047. void Intrinsic::emitArgumentReversal() {
  1048. if (isBigEndianSafe())
  1049. return;
  1050. // Reverse all vector arguments.
  1051. for (unsigned I = 0; I < getNumParams(); ++I) {
  1052. std::string Name = "p" + utostr(I);
  1053. std::string NewName = "rev" + utostr(I);
  1054. Variable &V = Variables[Name];
  1055. Variable NewV(V.getType(), NewName + VariablePostfix);
  1056. if (!NewV.getType().isVector() || NewV.getType().getNumElements() == 1)
  1057. continue;
  1058. OS << " " << NewV.getType().str() << " " << NewV.getName() << ";";
  1059. emitReverseVariable(NewV, V);
  1060. V = NewV;
  1061. }
  1062. }
  1063. void Intrinsic::emitReturnVarDecl() {
  1064. assert(RetVar.getType() == Types[0]);
  1065. // Create a return variable, if we're not void.
  1066. if (!RetVar.getType().isVoid()) {
  1067. OS << " " << RetVar.getType().str() << " " << RetVar.getName() << ";";
  1068. emitNewLine();
  1069. }
  1070. }
  1071. void Intrinsic::emitReturnReversal() {
  1072. if (isBigEndianSafe())
  1073. return;
  1074. if (!getReturnType().isVector() || getReturnType().isVoid() ||
  1075. getReturnType().getNumElements() == 1)
  1076. return;
  1077. emitReverseVariable(RetVar, RetVar);
  1078. }
  1079. void Intrinsic::emitShadowedArgs() {
  1080. // Macro arguments are not type-checked like inline function arguments,
  1081. // so assign them to local temporaries to get the right type checking.
  1082. if (!UseMacro)
  1083. return;
  1084. for (unsigned I = 0; I < getNumParams(); ++I) {
  1085. // Do not create a temporary for an immediate argument.
  1086. // That would defeat the whole point of using a macro!
  1087. if (getParamType(I).isImmediate())
  1088. continue;
  1089. // Do not create a temporary for pointer arguments. The input
  1090. // pointer may have an alignment hint.
  1091. if (getParamType(I).isPointer())
  1092. continue;
  1093. std::string Name = "p" + utostr(I);
  1094. assert(Variables.find(Name) != Variables.end());
  1095. Variable &V = Variables[Name];
  1096. std::string NewName = "s" + utostr(I);
  1097. Variable V2(V.getType(), NewName + VariablePostfix);
  1098. OS << " " << V2.getType().str() << " " << V2.getName() << " = "
  1099. << V.getName() << ";";
  1100. emitNewLine();
  1101. V = V2;
  1102. }
  1103. }
  1104. bool Intrinsic::protoHasScalar() const {
  1105. return llvm::any_of(
  1106. Types, [](const Type &T) { return T.isScalar() && !T.isImmediate(); });
  1107. }
  1108. void Intrinsic::emitBodyAsBuiltinCall() {
  1109. std::string S;
  1110. // If this builtin returns a struct 2, 3, or 4 vectors, pass it as an implicit
  1111. // sret-like argument.
  1112. bool SRet = getReturnType().getNumVectors() >= 2;
  1113. StringRef N = Name;
  1114. ClassKind LocalCK = CK;
  1115. if (!protoHasScalar())
  1116. LocalCK = ClassB;
  1117. if (!getReturnType().isVoid() && !SRet)
  1118. S += "(" + RetVar.getType().str() + ") ";
  1119. S += "__builtin_neon_" + mangleName(std::string(N), LocalCK) + "(";
  1120. if (SRet)
  1121. S += "&" + RetVar.getName() + ", ";
  1122. for (unsigned I = 0; I < getNumParams(); ++I) {
  1123. Variable &V = Variables["p" + utostr(I)];
  1124. Type T = V.getType();
  1125. // Handle multiple-vector values specially, emitting each subvector as an
  1126. // argument to the builtin.
  1127. if (T.getNumVectors() > 1) {
  1128. // Check if an explicit cast is needed.
  1129. std::string Cast;
  1130. if (LocalCK == ClassB) {
  1131. Type T2 = T;
  1132. T2.makeOneVector();
  1133. T2.makeInteger(8, /*Sign=*/true);
  1134. Cast = "(" + T2.str() + ")";
  1135. }
  1136. for (unsigned J = 0; J < T.getNumVectors(); ++J)
  1137. S += Cast + V.getName() + ".val[" + utostr(J) + "], ";
  1138. continue;
  1139. }
  1140. std::string Arg = V.getName();
  1141. Type CastToType = T;
  1142. // Check if an explicit cast is needed.
  1143. if (CastToType.isVector() &&
  1144. (LocalCK == ClassB || (T.isHalf() && !T.isScalarForMangling()))) {
  1145. CastToType.makeInteger(8, true);
  1146. Arg = "(" + CastToType.str() + ")" + Arg;
  1147. } else if (CastToType.isVector() && LocalCK == ClassI) {
  1148. if (CastToType.isInteger())
  1149. CastToType.makeSigned();
  1150. Arg = "(" + CastToType.str() + ")" + Arg;
  1151. }
  1152. S += Arg + ", ";
  1153. }
  1154. // Extra constant integer to hold type class enum for this function, e.g. s8
  1155. if (getClassKind(true) == ClassB) {
  1156. S += utostr(getPolymorphicKeyType().getNeonEnum());
  1157. } else {
  1158. // Remove extraneous ", ".
  1159. S.pop_back();
  1160. S.pop_back();
  1161. }
  1162. S += ");";
  1163. std::string RetExpr;
  1164. if (!SRet && !RetVar.getType().isVoid())
  1165. RetExpr = RetVar.getName() + " = ";
  1166. OS << " " << RetExpr << S;
  1167. emitNewLine();
  1168. }
  1169. void Intrinsic::emitBody(StringRef CallPrefix) {
  1170. std::vector<std::string> Lines;
  1171. if (!Body || Body->getValues().empty()) {
  1172. // Nothing specific to output - must output a builtin.
  1173. emitBodyAsBuiltinCall();
  1174. return;
  1175. }
  1176. // We have a list of "things to output". The last should be returned.
  1177. for (auto *I : Body->getValues()) {
  1178. if (StringInit *SI = dyn_cast<StringInit>(I)) {
  1179. Lines.push_back(replaceParamsIn(SI->getAsString()));
  1180. } else if (DagInit *DI = dyn_cast<DagInit>(I)) {
  1181. DagEmitter DE(*this, CallPrefix);
  1182. Lines.push_back(DE.emitDag(DI).second + ";");
  1183. }
  1184. }
  1185. assert(!Lines.empty() && "Empty def?");
  1186. if (!RetVar.getType().isVoid())
  1187. Lines.back().insert(0, RetVar.getName() + " = ");
  1188. for (auto &L : Lines) {
  1189. OS << " " << L;
  1190. emitNewLine();
  1191. }
  1192. }
  1193. void Intrinsic::emitReturn() {
  1194. if (RetVar.getType().isVoid())
  1195. return;
  1196. if (UseMacro)
  1197. OS << " " << RetVar.getName() << ";";
  1198. else
  1199. OS << " return " << RetVar.getName() << ";";
  1200. emitNewLine();
  1201. }
  1202. std::pair<Type, std::string> Intrinsic::DagEmitter::emitDag(DagInit *DI) {
  1203. // At this point we should only be seeing a def.
  1204. DefInit *DefI = cast<DefInit>(DI->getOperator());
  1205. std::string Op = DefI->getAsString();
  1206. if (Op == "cast" || Op == "bitcast")
  1207. return emitDagCast(DI, Op == "bitcast");
  1208. if (Op == "shuffle")
  1209. return emitDagShuffle(DI);
  1210. if (Op == "dup")
  1211. return emitDagDup(DI);
  1212. if (Op == "dup_typed")
  1213. return emitDagDupTyped(DI);
  1214. if (Op == "splat")
  1215. return emitDagSplat(DI);
  1216. if (Op == "save_temp")
  1217. return emitDagSaveTemp(DI);
  1218. if (Op == "op")
  1219. return emitDagOp(DI);
  1220. if (Op == "call" || Op == "call_mangled")
  1221. return emitDagCall(DI, Op == "call_mangled");
  1222. if (Op == "name_replace")
  1223. return emitDagNameReplace(DI);
  1224. if (Op == "literal")
  1225. return emitDagLiteral(DI);
  1226. assert_with_loc(false, "Unknown operation!");
  1227. return std::make_pair(Type::getVoid(), "");
  1228. }
  1229. std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagOp(DagInit *DI) {
  1230. std::string Op = cast<StringInit>(DI->getArg(0))->getAsUnquotedString();
  1231. if (DI->getNumArgs() == 2) {
  1232. // Unary op.
  1233. std::pair<Type, std::string> R =
  1234. emitDagArg(DI->getArg(1), std::string(DI->getArgNameStr(1)));
  1235. return std::make_pair(R.first, Op + R.second);
  1236. } else {
  1237. assert(DI->getNumArgs() == 3 && "Can only handle unary and binary ops!");
  1238. std::pair<Type, std::string> R1 =
  1239. emitDagArg(DI->getArg(1), std::string(DI->getArgNameStr(1)));
  1240. std::pair<Type, std::string> R2 =
  1241. emitDagArg(DI->getArg(2), std::string(DI->getArgNameStr(2)));
  1242. assert_with_loc(R1.first == R2.first, "Argument type mismatch!");
  1243. return std::make_pair(R1.first, R1.second + " " + Op + " " + R2.second);
  1244. }
  1245. }
  1246. std::pair<Type, std::string>
  1247. Intrinsic::DagEmitter::emitDagCall(DagInit *DI, bool MatchMangledName) {
  1248. std::vector<Type> Types;
  1249. std::vector<std::string> Values;
  1250. for (unsigned I = 0; I < DI->getNumArgs() - 1; ++I) {
  1251. std::pair<Type, std::string> R =
  1252. emitDagArg(DI->getArg(I + 1), std::string(DI->getArgNameStr(I + 1)));
  1253. Types.push_back(R.first);
  1254. Values.push_back(R.second);
  1255. }
  1256. // Look up the called intrinsic.
  1257. std::string N;
  1258. if (StringInit *SI = dyn_cast<StringInit>(DI->getArg(0)))
  1259. N = SI->getAsUnquotedString();
  1260. else
  1261. N = emitDagArg(DI->getArg(0), "").second;
  1262. Optional<std::string> MangledName;
  1263. if (MatchMangledName) {
  1264. if (Intr.getRecord()->getValueAsBit("isLaneQ"))
  1265. N += "q";
  1266. MangledName = Intr.mangleName(N, ClassS);
  1267. }
  1268. Intrinsic &Callee = Intr.Emitter.getIntrinsic(N, Types, MangledName);
  1269. // Make sure the callee is known as an early def.
  1270. Callee.setNeededEarly();
  1271. Intr.Dependencies.insert(&Callee);
  1272. // Now create the call itself.
  1273. std::string S;
  1274. if (!Callee.isBigEndianSafe())
  1275. S += CallPrefix.str();
  1276. S += Callee.getMangledName(true) + "(";
  1277. for (unsigned I = 0; I < DI->getNumArgs() - 1; ++I) {
  1278. if (I != 0)
  1279. S += ", ";
  1280. S += Values[I];
  1281. }
  1282. S += ")";
  1283. return std::make_pair(Callee.getReturnType(), S);
  1284. }
  1285. std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagCast(DagInit *DI,
  1286. bool IsBitCast){
  1287. // (cast MOD* VAL) -> cast VAL to type given by MOD.
  1288. std::pair<Type, std::string> R =
  1289. emitDagArg(DI->getArg(DI->getNumArgs() - 1),
  1290. std::string(DI->getArgNameStr(DI->getNumArgs() - 1)));
  1291. Type castToType = R.first;
  1292. for (unsigned ArgIdx = 0; ArgIdx < DI->getNumArgs() - 1; ++ArgIdx) {
  1293. // MOD can take several forms:
  1294. // 1. $X - take the type of parameter / variable X.
  1295. // 2. The value "R" - take the type of the return type.
  1296. // 3. a type string
  1297. // 4. The value "U" or "S" to switch the signedness.
  1298. // 5. The value "H" or "D" to half or double the bitwidth.
  1299. // 6. The value "8" to convert to 8-bit (signed) integer lanes.
  1300. if (!DI->getArgNameStr(ArgIdx).empty()) {
  1301. assert_with_loc(Intr.Variables.find(std::string(
  1302. DI->getArgNameStr(ArgIdx))) != Intr.Variables.end(),
  1303. "Variable not found");
  1304. castToType =
  1305. Intr.Variables[std::string(DI->getArgNameStr(ArgIdx))].getType();
  1306. } else {
  1307. StringInit *SI = dyn_cast<StringInit>(DI->getArg(ArgIdx));
  1308. assert_with_loc(SI, "Expected string type or $Name for cast type");
  1309. if (SI->getAsUnquotedString() == "R") {
  1310. castToType = Intr.getReturnType();
  1311. } else if (SI->getAsUnquotedString() == "U") {
  1312. castToType.makeUnsigned();
  1313. } else if (SI->getAsUnquotedString() == "S") {
  1314. castToType.makeSigned();
  1315. } else if (SI->getAsUnquotedString() == "H") {
  1316. castToType.halveLanes();
  1317. } else if (SI->getAsUnquotedString() == "D") {
  1318. castToType.doubleLanes();
  1319. } else if (SI->getAsUnquotedString() == "8") {
  1320. castToType.makeInteger(8, true);
  1321. } else if (SI->getAsUnquotedString() == "32") {
  1322. castToType.make32BitElement();
  1323. } else {
  1324. castToType = Type::fromTypedefName(SI->getAsUnquotedString());
  1325. assert_with_loc(!castToType.isVoid(), "Unknown typedef");
  1326. }
  1327. }
  1328. }
  1329. std::string S;
  1330. if (IsBitCast) {
  1331. // Emit a reinterpret cast. The second operand must be an lvalue, so create
  1332. // a temporary.
  1333. std::string N = "reint";
  1334. unsigned I = 0;
  1335. while (Intr.Variables.find(N) != Intr.Variables.end())
  1336. N = "reint" + utostr(++I);
  1337. Intr.Variables[N] = Variable(R.first, N + Intr.VariablePostfix);
  1338. Intr.OS << R.first.str() << " " << Intr.Variables[N].getName() << " = "
  1339. << R.second << ";";
  1340. Intr.emitNewLine();
  1341. S = "*(" + castToType.str() + " *) &" + Intr.Variables[N].getName() + "";
  1342. } else {
  1343. // Emit a normal (static) cast.
  1344. S = "(" + castToType.str() + ")(" + R.second + ")";
  1345. }
  1346. return std::make_pair(castToType, S);
  1347. }
  1348. std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagShuffle(DagInit *DI){
  1349. // See the documentation in arm_neon.td for a description of these operators.
  1350. class LowHalf : public SetTheory::Operator {
  1351. public:
  1352. void apply(SetTheory &ST, DagInit *Expr, SetTheory::RecSet &Elts,
  1353. ArrayRef<SMLoc> Loc) override {
  1354. SetTheory::RecSet Elts2;
  1355. ST.evaluate(Expr->arg_begin(), Expr->arg_end(), Elts2, Loc);
  1356. Elts.insert(Elts2.begin(), Elts2.begin() + (Elts2.size() / 2));
  1357. }
  1358. };
  1359. class HighHalf : public SetTheory::Operator {
  1360. public:
  1361. void apply(SetTheory &ST, DagInit *Expr, SetTheory::RecSet &Elts,
  1362. ArrayRef<SMLoc> Loc) override {
  1363. SetTheory::RecSet Elts2;
  1364. ST.evaluate(Expr->arg_begin(), Expr->arg_end(), Elts2, Loc);
  1365. Elts.insert(Elts2.begin() + (Elts2.size() / 2), Elts2.end());
  1366. }
  1367. };
  1368. class Rev : public SetTheory::Operator {
  1369. unsigned ElementSize;
  1370. public:
  1371. Rev(unsigned ElementSize) : ElementSize(ElementSize) {}
  1372. void apply(SetTheory &ST, DagInit *Expr, SetTheory::RecSet &Elts,
  1373. ArrayRef<SMLoc> Loc) override {
  1374. SetTheory::RecSet Elts2;
  1375. ST.evaluate(Expr->arg_begin() + 1, Expr->arg_end(), Elts2, Loc);
  1376. int64_t VectorSize = cast<IntInit>(Expr->getArg(0))->getValue();
  1377. VectorSize /= ElementSize;
  1378. std::vector<Record *> Revved;
  1379. for (unsigned VI = 0; VI < Elts2.size(); VI += VectorSize) {
  1380. for (int LI = VectorSize - 1; LI >= 0; --LI) {
  1381. Revved.push_back(Elts2[VI + LI]);
  1382. }
  1383. }
  1384. Elts.insert(Revved.begin(), Revved.end());
  1385. }
  1386. };
  1387. class MaskExpander : public SetTheory::Expander {
  1388. unsigned N;
  1389. public:
  1390. MaskExpander(unsigned N) : N(N) {}
  1391. void expand(SetTheory &ST, Record *R, SetTheory::RecSet &Elts) override {
  1392. unsigned Addend = 0;
  1393. if (R->getName() == "mask0")
  1394. Addend = 0;
  1395. else if (R->getName() == "mask1")
  1396. Addend = N;
  1397. else
  1398. return;
  1399. for (unsigned I = 0; I < N; ++I)
  1400. Elts.insert(R->getRecords().getDef("sv" + utostr(I + Addend)));
  1401. }
  1402. };
  1403. // (shuffle arg1, arg2, sequence)
  1404. std::pair<Type, std::string> Arg1 =
  1405. emitDagArg(DI->getArg(0), std::string(DI->getArgNameStr(0)));
  1406. std::pair<Type, std::string> Arg2 =
  1407. emitDagArg(DI->getArg(1), std::string(DI->getArgNameStr(1)));
  1408. assert_with_loc(Arg1.first == Arg2.first,
  1409. "Different types in arguments to shuffle!");
  1410. SetTheory ST;
  1411. SetTheory::RecSet Elts;
  1412. ST.addOperator("lowhalf", std::make_unique<LowHalf>());
  1413. ST.addOperator("highhalf", std::make_unique<HighHalf>());
  1414. ST.addOperator("rev",
  1415. std::make_unique<Rev>(Arg1.first.getElementSizeInBits()));
  1416. ST.addExpander("MaskExpand",
  1417. std::make_unique<MaskExpander>(Arg1.first.getNumElements()));
  1418. ST.evaluate(DI->getArg(2), Elts, None);
  1419. std::string S = "__builtin_shufflevector(" + Arg1.second + ", " + Arg2.second;
  1420. for (auto &E : Elts) {
  1421. StringRef Name = E->getName();
  1422. assert_with_loc(Name.startswith("sv"),
  1423. "Incorrect element kind in shuffle mask!");
  1424. S += ", " + Name.drop_front(2).str();
  1425. }
  1426. S += ")";
  1427. // Recalculate the return type - the shuffle may have halved or doubled it.
  1428. Type T(Arg1.first);
  1429. if (Elts.size() > T.getNumElements()) {
  1430. assert_with_loc(
  1431. Elts.size() == T.getNumElements() * 2,
  1432. "Can only double or half the number of elements in a shuffle!");
  1433. T.doubleLanes();
  1434. } else if (Elts.size() < T.getNumElements()) {
  1435. assert_with_loc(
  1436. Elts.size() == T.getNumElements() / 2,
  1437. "Can only double or half the number of elements in a shuffle!");
  1438. T.halveLanes();
  1439. }
  1440. return std::make_pair(T, S);
  1441. }
  1442. std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagDup(DagInit *DI) {
  1443. assert_with_loc(DI->getNumArgs() == 1, "dup() expects one argument");
  1444. std::pair<Type, std::string> A =
  1445. emitDagArg(DI->getArg(0), std::string(DI->getArgNameStr(0)));
  1446. assert_with_loc(A.first.isScalar(), "dup() expects a scalar argument");
  1447. Type T = Intr.getBaseType();
  1448. assert_with_loc(T.isVector(), "dup() used but default type is scalar!");
  1449. std::string S = "(" + T.str() + ") {";
  1450. for (unsigned I = 0; I < T.getNumElements(); ++I) {
  1451. if (I != 0)
  1452. S += ", ";
  1453. S += A.second;
  1454. }
  1455. S += "}";
  1456. return std::make_pair(T, S);
  1457. }
  1458. std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagDupTyped(DagInit *DI) {
  1459. assert_with_loc(DI->getNumArgs() == 2, "dup_typed() expects two arguments");
  1460. std::pair<Type, std::string> B =
  1461. emitDagArg(DI->getArg(1), std::string(DI->getArgNameStr(1)));
  1462. assert_with_loc(B.first.isScalar(),
  1463. "dup_typed() requires a scalar as the second argument");
  1464. Type T;
  1465. // If the type argument is a constant string, construct the type directly.
  1466. if (StringInit *SI = dyn_cast<StringInit>(DI->getArg(0))) {
  1467. T = Type::fromTypedefName(SI->getAsUnquotedString());
  1468. assert_with_loc(!T.isVoid(), "Unknown typedef");
  1469. } else
  1470. T = emitDagArg(DI->getArg(0), std::string(DI->getArgNameStr(0))).first;
  1471. assert_with_loc(T.isVector(), "dup_typed() used but target type is scalar!");
  1472. std::string S = "(" + T.str() + ") {";
  1473. for (unsigned I = 0; I < T.getNumElements(); ++I) {
  1474. if (I != 0)
  1475. S += ", ";
  1476. S += B.second;
  1477. }
  1478. S += "}";
  1479. return std::make_pair(T, S);
  1480. }
  1481. std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagSplat(DagInit *DI) {
  1482. assert_with_loc(DI->getNumArgs() == 2, "splat() expects two arguments");
  1483. std::pair<Type, std::string> A =
  1484. emitDagArg(DI->getArg(0), std::string(DI->getArgNameStr(0)));
  1485. std::pair<Type, std::string> B =
  1486. emitDagArg(DI->getArg(1), std::string(DI->getArgNameStr(1)));
  1487. assert_with_loc(B.first.isScalar(),
  1488. "splat() requires a scalar int as the second argument");
  1489. std::string S = "__builtin_shufflevector(" + A.second + ", " + A.second;
  1490. for (unsigned I = 0; I < Intr.getBaseType().getNumElements(); ++I) {
  1491. S += ", " + B.second;
  1492. }
  1493. S += ")";
  1494. return std::make_pair(Intr.getBaseType(), S);
  1495. }
  1496. std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagSaveTemp(DagInit *DI) {
  1497. assert_with_loc(DI->getNumArgs() == 2, "save_temp() expects two arguments");
  1498. std::pair<Type, std::string> A =
  1499. emitDagArg(DI->getArg(1), std::string(DI->getArgNameStr(1)));
  1500. assert_with_loc(!A.first.isVoid(),
  1501. "Argument to save_temp() must have non-void type!");
  1502. std::string N = std::string(DI->getArgNameStr(0));
  1503. assert_with_loc(!N.empty(),
  1504. "save_temp() expects a name as the first argument");
  1505. assert_with_loc(Intr.Variables.find(N) == Intr.Variables.end(),
  1506. "Variable already defined!");
  1507. Intr.Variables[N] = Variable(A.first, N + Intr.VariablePostfix);
  1508. std::string S =
  1509. A.first.str() + " " + Intr.Variables[N].getName() + " = " + A.second;
  1510. return std::make_pair(Type::getVoid(), S);
  1511. }
  1512. std::pair<Type, std::string>
  1513. Intrinsic::DagEmitter::emitDagNameReplace(DagInit *DI) {
  1514. std::string S = Intr.Name;
  1515. assert_with_loc(DI->getNumArgs() == 2, "name_replace requires 2 arguments!");
  1516. std::string ToReplace = cast<StringInit>(DI->getArg(0))->getAsUnquotedString();
  1517. std::string ReplaceWith = cast<StringInit>(DI->getArg(1))->getAsUnquotedString();
  1518. size_t Idx = S.find(ToReplace);
  1519. assert_with_loc(Idx != std::string::npos, "name should contain '" + ToReplace + "'!");
  1520. S.replace(Idx, ToReplace.size(), ReplaceWith);
  1521. return std::make_pair(Type::getVoid(), S);
  1522. }
  1523. std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagLiteral(DagInit *DI){
  1524. std::string Ty = cast<StringInit>(DI->getArg(0))->getAsUnquotedString();
  1525. std::string Value = cast<StringInit>(DI->getArg(1))->getAsUnquotedString();
  1526. return std::make_pair(Type::fromTypedefName(Ty), Value);
  1527. }
  1528. std::pair<Type, std::string>
  1529. Intrinsic::DagEmitter::emitDagArg(Init *Arg, std::string ArgName) {
  1530. if (!ArgName.empty()) {
  1531. assert_with_loc(!Arg->isComplete(),
  1532. "Arguments must either be DAGs or names, not both!");
  1533. assert_with_loc(Intr.Variables.find(ArgName) != Intr.Variables.end(),
  1534. "Variable not defined!");
  1535. Variable &V = Intr.Variables[ArgName];
  1536. return std::make_pair(V.getType(), V.getName());
  1537. }
  1538. assert(Arg && "Neither ArgName nor Arg?!");
  1539. DagInit *DI = dyn_cast<DagInit>(Arg);
  1540. assert_with_loc(DI, "Arguments must either be DAGs or names!");
  1541. return emitDag(DI);
  1542. }
  1543. std::string Intrinsic::generate() {
  1544. // Avoid duplicated code for big and little endian
  1545. if (isBigEndianSafe()) {
  1546. generateImpl(false, "", "");
  1547. return OS.str();
  1548. }
  1549. // Little endian intrinsics are simple and don't require any argument
  1550. // swapping.
  1551. OS << "#ifdef __LITTLE_ENDIAN__\n";
  1552. generateImpl(false, "", "");
  1553. OS << "#else\n";
  1554. // Big endian intrinsics are more complex. The user intended these
  1555. // intrinsics to operate on a vector "as-if" loaded by (V)LDR,
  1556. // but we load as-if (V)LD1. So we should swap all arguments and
  1557. // swap the return value too.
  1558. //
  1559. // If we call sub-intrinsics, we should call a version that does
  1560. // not re-swap the arguments!
  1561. generateImpl(true, "", "__noswap_");
  1562. // If we're needed early, create a non-swapping variant for
  1563. // big-endian.
  1564. if (NeededEarly) {
  1565. generateImpl(false, "__noswap_", "__noswap_");
  1566. }
  1567. OS << "#endif\n\n";
  1568. return OS.str();
  1569. }
  1570. void Intrinsic::generateImpl(bool ReverseArguments,
  1571. StringRef NamePrefix, StringRef CallPrefix) {
  1572. CurrentRecord = R;
  1573. // If we call a macro, our local variables may be corrupted due to
  1574. // lack of proper lexical scoping. So, add a globally unique postfix
  1575. // to every variable.
  1576. //
  1577. // indexBody() should have set up the Dependencies set by now.
  1578. for (auto *I : Dependencies)
  1579. if (I->UseMacro) {
  1580. VariablePostfix = "_" + utostr(Emitter.getUniqueNumber());
  1581. break;
  1582. }
  1583. initVariables();
  1584. emitPrototype(NamePrefix);
  1585. if (IsUnavailable) {
  1586. OS << " __attribute__((unavailable));";
  1587. } else {
  1588. emitOpeningBrace();
  1589. // Emit return variable declaration first as to not trigger
  1590. // -Wdeclaration-after-statement.
  1591. emitReturnVarDecl();
  1592. emitShadowedArgs();
  1593. if (ReverseArguments)
  1594. emitArgumentReversal();
  1595. emitBody(CallPrefix);
  1596. if (ReverseArguments)
  1597. emitReturnReversal();
  1598. emitReturn();
  1599. emitClosingBrace();
  1600. }
  1601. OS << "\n";
  1602. CurrentRecord = nullptr;
  1603. }
  1604. void Intrinsic::indexBody() {
  1605. CurrentRecord = R;
  1606. initVariables();
  1607. // Emit return variable declaration first as to not trigger
  1608. // -Wdeclaration-after-statement.
  1609. emitReturnVarDecl();
  1610. emitBody("");
  1611. OS.str("");
  1612. CurrentRecord = nullptr;
  1613. }
  1614. //===----------------------------------------------------------------------===//
  1615. // NeonEmitter implementation
  1616. //===----------------------------------------------------------------------===//
  1617. Intrinsic &NeonEmitter::getIntrinsic(StringRef Name, ArrayRef<Type> Types,
  1618. Optional<std::string> MangledName) {
  1619. // First, look up the name in the intrinsic map.
  1620. assert_with_loc(IntrinsicMap.find(Name.str()) != IntrinsicMap.end(),
  1621. ("Intrinsic '" + Name + "' not found!").str());
  1622. auto &V = IntrinsicMap.find(Name.str())->second;
  1623. std::vector<Intrinsic *> GoodVec;
  1624. // Create a string to print if we end up failing.
  1625. std::string ErrMsg = "looking up intrinsic '" + Name.str() + "(";
  1626. for (unsigned I = 0; I < Types.size(); ++I) {
  1627. if (I != 0)
  1628. ErrMsg += ", ";
  1629. ErrMsg += Types[I].str();
  1630. }
  1631. ErrMsg += ")'\n";
  1632. ErrMsg += "Available overloads:\n";
  1633. // Now, look through each intrinsic implementation and see if the types are
  1634. // compatible.
  1635. for (auto &I : V) {
  1636. ErrMsg += " - " + I.getReturnType().str() + " " + I.getMangledName();
  1637. ErrMsg += "(";
  1638. for (unsigned A = 0; A < I.getNumParams(); ++A) {
  1639. if (A != 0)
  1640. ErrMsg += ", ";
  1641. ErrMsg += I.getParamType(A).str();
  1642. }
  1643. ErrMsg += ")\n";
  1644. if (MangledName && MangledName != I.getMangledName(true))
  1645. continue;
  1646. if (I.getNumParams() != Types.size())
  1647. continue;
  1648. unsigned ArgNum = 0;
  1649. bool MatchingArgumentTypes = llvm::all_of(Types, [&](const auto &Type) {
  1650. return Type == I.getParamType(ArgNum++);
  1651. });
  1652. if (MatchingArgumentTypes)
  1653. GoodVec.push_back(&I);
  1654. }
  1655. assert_with_loc(!GoodVec.empty(),
  1656. "No compatible intrinsic found - " + ErrMsg);
  1657. assert_with_loc(GoodVec.size() == 1, "Multiple overloads found - " + ErrMsg);
  1658. return *GoodVec.front();
  1659. }
  1660. void NeonEmitter::createIntrinsic(Record *R,
  1661. SmallVectorImpl<Intrinsic *> &Out) {
  1662. std::string Name = std::string(R->getValueAsString("Name"));
  1663. std::string Proto = std::string(R->getValueAsString("Prototype"));
  1664. std::string Types = std::string(R->getValueAsString("Types"));
  1665. Record *OperationRec = R->getValueAsDef("Operation");
  1666. bool BigEndianSafe = R->getValueAsBit("BigEndianSafe");
  1667. std::string Guard = std::string(R->getValueAsString("ArchGuard"));
  1668. bool IsUnavailable = OperationRec->getValueAsBit("Unavailable");
  1669. std::string CartesianProductWith = std::string(R->getValueAsString("CartesianProductWith"));
  1670. // Set the global current record. This allows assert_with_loc to produce
  1671. // decent location information even when highly nested.
  1672. CurrentRecord = R;
  1673. ListInit *Body = OperationRec->getValueAsListInit("Ops");
  1674. std::vector<TypeSpec> TypeSpecs = TypeSpec::fromTypeSpecs(Types);
  1675. ClassKind CK = ClassNone;
  1676. if (R->getSuperClasses().size() >= 2)
  1677. CK = ClassMap[R->getSuperClasses()[1].first];
  1678. std::vector<std::pair<TypeSpec, TypeSpec>> NewTypeSpecs;
  1679. if (!CartesianProductWith.empty()) {
  1680. std::vector<TypeSpec> ProductTypeSpecs = TypeSpec::fromTypeSpecs(CartesianProductWith);
  1681. for (auto TS : TypeSpecs) {
  1682. Type DefaultT(TS, ".");
  1683. for (auto SrcTS : ProductTypeSpecs) {
  1684. Type DefaultSrcT(SrcTS, ".");
  1685. if (TS == SrcTS ||
  1686. DefaultSrcT.getSizeInBits() != DefaultT.getSizeInBits())
  1687. continue;
  1688. NewTypeSpecs.push_back(std::make_pair(TS, SrcTS));
  1689. }
  1690. }
  1691. } else {
  1692. for (auto TS : TypeSpecs) {
  1693. NewTypeSpecs.push_back(std::make_pair(TS, TS));
  1694. }
  1695. }
  1696. llvm::sort(NewTypeSpecs);
  1697. NewTypeSpecs.erase(std::unique(NewTypeSpecs.begin(), NewTypeSpecs.end()),
  1698. NewTypeSpecs.end());
  1699. auto &Entry = IntrinsicMap[Name];
  1700. for (auto &I : NewTypeSpecs) {
  1701. Entry.emplace_back(R, Name, Proto, I.first, I.second, CK, Body, *this,
  1702. Guard, IsUnavailable, BigEndianSafe);
  1703. Out.push_back(&Entry.back());
  1704. }
  1705. CurrentRecord = nullptr;
  1706. }
  1707. /// genBuiltinsDef: Generate the BuiltinsARM.def and BuiltinsAArch64.def
  1708. /// declaration of builtins, checking for unique builtin declarations.
  1709. void NeonEmitter::genBuiltinsDef(raw_ostream &OS,
  1710. SmallVectorImpl<Intrinsic *> &Defs) {
  1711. OS << "#ifdef GET_NEON_BUILTINS\n";
  1712. // We only want to emit a builtin once, and we want to emit them in
  1713. // alphabetical order, so use a std::set.
  1714. std::set<std::string> Builtins;
  1715. for (auto *Def : Defs) {
  1716. if (Def->hasBody())
  1717. continue;
  1718. std::string S = "BUILTIN(__builtin_neon_" + Def->getMangledName() + ", \"";
  1719. S += Def->getBuiltinTypeStr();
  1720. S += "\", \"n\")";
  1721. Builtins.insert(S);
  1722. }
  1723. for (auto &S : Builtins)
  1724. OS << S << "\n";
  1725. OS << "#endif\n\n";
  1726. }
  1727. /// Generate the ARM and AArch64 overloaded type checking code for
  1728. /// SemaChecking.cpp, checking for unique builtin declarations.
  1729. void NeonEmitter::genOverloadTypeCheckCode(raw_ostream &OS,
  1730. SmallVectorImpl<Intrinsic *> &Defs) {
  1731. OS << "#ifdef GET_NEON_OVERLOAD_CHECK\n";
  1732. // We record each overload check line before emitting because subsequent Inst
  1733. // definitions may extend the number of permitted types (i.e. augment the
  1734. // Mask). Use std::map to avoid sorting the table by hash number.
  1735. struct OverloadInfo {
  1736. uint64_t Mask;
  1737. int PtrArgNum;
  1738. bool HasConstPtr;
  1739. OverloadInfo() : Mask(0ULL), PtrArgNum(0), HasConstPtr(false) {}
  1740. };
  1741. std::map<std::string, OverloadInfo> OverloadMap;
  1742. for (auto *Def : Defs) {
  1743. // If the def has a body (that is, it has Operation DAGs), it won't call
  1744. // __builtin_neon_* so we don't need to generate a definition for it.
  1745. if (Def->hasBody())
  1746. continue;
  1747. // Functions which have a scalar argument cannot be overloaded, no need to
  1748. // check them if we are emitting the type checking code.
  1749. if (Def->protoHasScalar())
  1750. continue;
  1751. uint64_t Mask = 0ULL;
  1752. Mask |= 1ULL << Def->getPolymorphicKeyType().getNeonEnum();
  1753. // Check if the function has a pointer or const pointer argument.
  1754. int PtrArgNum = -1;
  1755. bool HasConstPtr = false;
  1756. for (unsigned I = 0; I < Def->getNumParams(); ++I) {
  1757. const auto &Type = Def->getParamType(I);
  1758. if (Type.isPointer()) {
  1759. PtrArgNum = I;
  1760. HasConstPtr = Type.isConstPointer();
  1761. }
  1762. }
  1763. // For sret builtins, adjust the pointer argument index.
  1764. if (PtrArgNum >= 0 && Def->getReturnType().getNumVectors() > 1)
  1765. PtrArgNum += 1;
  1766. std::string Name = Def->getName();
  1767. // Omit type checking for the pointer arguments of vld1_lane, vld1_dup,
  1768. // and vst1_lane intrinsics. Using a pointer to the vector element
  1769. // type with one of those operations causes codegen to select an aligned
  1770. // load/store instruction. If you want an unaligned operation,
  1771. // the pointer argument needs to have less alignment than element type,
  1772. // so just accept any pointer type.
  1773. if (Name == "vld1_lane" || Name == "vld1_dup" || Name == "vst1_lane") {
  1774. PtrArgNum = -1;
  1775. HasConstPtr = false;
  1776. }
  1777. if (Mask) {
  1778. std::string Name = Def->getMangledName();
  1779. OverloadMap.insert(std::make_pair(Name, OverloadInfo()));
  1780. OverloadInfo &OI = OverloadMap[Name];
  1781. OI.Mask |= Mask;
  1782. OI.PtrArgNum |= PtrArgNum;
  1783. OI.HasConstPtr = HasConstPtr;
  1784. }
  1785. }
  1786. for (auto &I : OverloadMap) {
  1787. OverloadInfo &OI = I.second;
  1788. OS << "case NEON::BI__builtin_neon_" << I.first << ": ";
  1789. OS << "mask = 0x" << Twine::utohexstr(OI.Mask) << "ULL";
  1790. if (OI.PtrArgNum >= 0)
  1791. OS << "; PtrArgNum = " << OI.PtrArgNum;
  1792. if (OI.HasConstPtr)
  1793. OS << "; HasConstPtr = true";
  1794. OS << "; break;\n";
  1795. }
  1796. OS << "#endif\n\n";
  1797. }
  1798. void NeonEmitter::genIntrinsicRangeCheckCode(raw_ostream &OS,
  1799. SmallVectorImpl<Intrinsic *> &Defs) {
  1800. OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n";
  1801. std::set<std::string> Emitted;
  1802. for (auto *Def : Defs) {
  1803. if (Def->hasBody())
  1804. continue;
  1805. // Functions which do not have an immediate do not need to have range
  1806. // checking code emitted.
  1807. if (!Def->hasImmediate())
  1808. continue;
  1809. if (Emitted.find(Def->getMangledName()) != Emitted.end())
  1810. continue;
  1811. std::string LowerBound, UpperBound;
  1812. Record *R = Def->getRecord();
  1813. if (R->getValueAsBit("isVXAR")) {
  1814. //VXAR takes an immediate in the range [0, 63]
  1815. LowerBound = "0";
  1816. UpperBound = "63";
  1817. } else if (R->getValueAsBit("isVCVT_N")) {
  1818. // VCVT between floating- and fixed-point values takes an immediate
  1819. // in the range [1, 32) for f32 or [1, 64) for f64 or [1, 16) for f16.
  1820. LowerBound = "1";
  1821. if (Def->getBaseType().getElementSizeInBits() == 16 ||
  1822. Def->getName().find('h') != std::string::npos)
  1823. // VCVTh operating on FP16 intrinsics in range [1, 16)
  1824. UpperBound = "15";
  1825. else if (Def->getBaseType().getElementSizeInBits() == 32)
  1826. UpperBound = "31";
  1827. else
  1828. UpperBound = "63";
  1829. } else if (R->getValueAsBit("isScalarShift")) {
  1830. // Right shifts have an 'r' in the name, left shifts do not. Convert
  1831. // instructions have the same bounds and right shifts.
  1832. if (Def->getName().find('r') != std::string::npos ||
  1833. Def->getName().find("cvt") != std::string::npos)
  1834. LowerBound = "1";
  1835. UpperBound = utostr(Def->getReturnType().getElementSizeInBits() - 1);
  1836. } else if (R->getValueAsBit("isShift")) {
  1837. // Builtins which are overloaded by type will need to have their upper
  1838. // bound computed at Sema time based on the type constant.
  1839. // Right shifts have an 'r' in the name, left shifts do not.
  1840. if (Def->getName().find('r') != std::string::npos)
  1841. LowerBound = "1";
  1842. UpperBound = "RFT(TV, true)";
  1843. } else if (Def->getClassKind(true) == ClassB) {
  1844. // ClassB intrinsics have a type (and hence lane number) that is only
  1845. // known at runtime.
  1846. if (R->getValueAsBit("isLaneQ"))
  1847. UpperBound = "RFT(TV, false, true)";
  1848. else
  1849. UpperBound = "RFT(TV, false, false)";
  1850. } else {
  1851. // The immediate generally refers to a lane in the preceding argument.
  1852. assert(Def->getImmediateIdx() > 0);
  1853. Type T = Def->getParamType(Def->getImmediateIdx() - 1);
  1854. UpperBound = utostr(T.getNumElements() - 1);
  1855. }
  1856. // Calculate the index of the immediate that should be range checked.
  1857. unsigned Idx = Def->getNumParams();
  1858. if (Def->hasImmediate())
  1859. Idx = Def->getGeneratedParamIdx(Def->getImmediateIdx());
  1860. OS << "case NEON::BI__builtin_neon_" << Def->getMangledName() << ": "
  1861. << "i = " << Idx << ";";
  1862. if (!LowerBound.empty())
  1863. OS << " l = " << LowerBound << ";";
  1864. if (!UpperBound.empty())
  1865. OS << " u = " << UpperBound << ";";
  1866. OS << " break;\n";
  1867. Emitted.insert(Def->getMangledName());
  1868. }
  1869. OS << "#endif\n\n";
  1870. }
  1871. /// runHeader - Emit a file with sections defining:
  1872. /// 1. the NEON section of BuiltinsARM.def and BuiltinsAArch64.def.
  1873. /// 2. the SemaChecking code for the type overload checking.
  1874. /// 3. the SemaChecking code for validation of intrinsic immediate arguments.
  1875. void NeonEmitter::runHeader(raw_ostream &OS) {
  1876. std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
  1877. SmallVector<Intrinsic *, 128> Defs;
  1878. for (auto *R : RV)
  1879. createIntrinsic(R, Defs);
  1880. // Generate shared BuiltinsXXX.def
  1881. genBuiltinsDef(OS, Defs);
  1882. // Generate ARM overloaded type checking code for SemaChecking.cpp
  1883. genOverloadTypeCheckCode(OS, Defs);
  1884. // Generate ARM range checking code for shift/lane immediates.
  1885. genIntrinsicRangeCheckCode(OS, Defs);
  1886. }
  1887. static void emitNeonTypeDefs(const std::string& types, raw_ostream &OS) {
  1888. std::string TypedefTypes(types);
  1889. std::vector<TypeSpec> TDTypeVec = TypeSpec::fromTypeSpecs(TypedefTypes);
  1890. // Emit vector typedefs.
  1891. bool InIfdef = false;
  1892. for (auto &TS : TDTypeVec) {
  1893. bool IsA64 = false;
  1894. Type T(TS, ".");
  1895. if (T.isDouble())
  1896. IsA64 = true;
  1897. if (InIfdef && !IsA64) {
  1898. OS << "#endif\n";
  1899. InIfdef = false;
  1900. }
  1901. if (!InIfdef && IsA64) {
  1902. OS << "#ifdef __aarch64__\n";
  1903. InIfdef = true;
  1904. }
  1905. if (T.isPoly())
  1906. OS << "typedef __attribute__((neon_polyvector_type(";
  1907. else
  1908. OS << "typedef __attribute__((neon_vector_type(";
  1909. Type T2 = T;
  1910. T2.makeScalar();
  1911. OS << T.getNumElements() << "))) ";
  1912. OS << T2.str();
  1913. OS << " " << T.str() << ";\n";
  1914. }
  1915. if (InIfdef)
  1916. OS << "#endif\n";
  1917. OS << "\n";
  1918. // Emit struct typedefs.
  1919. InIfdef = false;
  1920. for (unsigned NumMembers = 2; NumMembers <= 4; ++NumMembers) {
  1921. for (auto &TS : TDTypeVec) {
  1922. bool IsA64 = false;
  1923. Type T(TS, ".");
  1924. if (T.isDouble())
  1925. IsA64 = true;
  1926. if (InIfdef && !IsA64) {
  1927. OS << "#endif\n";
  1928. InIfdef = false;
  1929. }
  1930. if (!InIfdef && IsA64) {
  1931. OS << "#ifdef __aarch64__\n";
  1932. InIfdef = true;
  1933. }
  1934. const char Mods[] = { static_cast<char>('2' + (NumMembers - 2)), 0};
  1935. Type VT(TS, Mods);
  1936. OS << "typedef struct " << VT.str() << " {\n";
  1937. OS << " " << T.str() << " val";
  1938. OS << "[" << NumMembers << "]";
  1939. OS << ";\n} ";
  1940. OS << VT.str() << ";\n";
  1941. OS << "\n";
  1942. }
  1943. }
  1944. if (InIfdef)
  1945. OS << "#endif\n";
  1946. }
  1947. /// run - Read the records in arm_neon.td and output arm_neon.h. arm_neon.h
  1948. /// is comprised of type definitions and function declarations.
  1949. void NeonEmitter::run(raw_ostream &OS) {
  1950. OS << "/*===---- arm_neon.h - ARM Neon intrinsics "
  1951. "------------------------------"
  1952. "---===\n"
  1953. " *\n"
  1954. " * Permission is hereby granted, free of charge, to any person "
  1955. "obtaining "
  1956. "a copy\n"
  1957. " * of this software and associated documentation files (the "
  1958. "\"Software\"),"
  1959. " to deal\n"
  1960. " * in the Software without restriction, including without limitation "
  1961. "the "
  1962. "rights\n"
  1963. " * to use, copy, modify, merge, publish, distribute, sublicense, "
  1964. "and/or sell\n"
  1965. " * copies of the Software, and to permit persons to whom the Software "
  1966. "is\n"
  1967. " * furnished to do so, subject to the following conditions:\n"
  1968. " *\n"
  1969. " * The above copyright notice and this permission notice shall be "
  1970. "included in\n"
  1971. " * all copies or substantial portions of the Software.\n"
  1972. " *\n"
  1973. " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, "
  1974. "EXPRESS OR\n"
  1975. " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF "
  1976. "MERCHANTABILITY,\n"
  1977. " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT "
  1978. "SHALL THE\n"
  1979. " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR "
  1980. "OTHER\n"
  1981. " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, "
  1982. "ARISING FROM,\n"
  1983. " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER "
  1984. "DEALINGS IN\n"
  1985. " * THE SOFTWARE.\n"
  1986. " *\n"
  1987. " *===-----------------------------------------------------------------"
  1988. "---"
  1989. "---===\n"
  1990. " */\n\n";
  1991. OS << "#ifndef __ARM_NEON_H\n";
  1992. OS << "#define __ARM_NEON_H\n\n";
  1993. OS << "#ifndef __ARM_FP\n";
  1994. OS << "#error \"NEON intrinsics not available with the soft-float ABI. "
  1995. "Please use -mfloat-abi=softfp or -mfloat-abi=hard\"\n";
  1996. OS << "#else\n\n";
  1997. OS << "#if !defined(__ARM_NEON)\n";
  1998. OS << "#error \"NEON support not enabled\"\n";
  1999. OS << "#else\n\n";
  2000. OS << "#include <stdint.h>\n\n";
  2001. OS << "#ifdef __ARM_FEATURE_BF16\n";
  2002. OS << "#include <arm_bf16.h>\n";
  2003. OS << "typedef __bf16 bfloat16_t;\n";
  2004. OS << "#endif\n\n";
  2005. // Emit NEON-specific scalar typedefs.
  2006. OS << "typedef float float32_t;\n";
  2007. OS << "typedef __fp16 float16_t;\n";
  2008. OS << "#ifdef __aarch64__\n";
  2009. OS << "typedef double float64_t;\n";
  2010. OS << "#endif\n\n";
  2011. // For now, signedness of polynomial types depends on target
  2012. OS << "#ifdef __aarch64__\n";
  2013. OS << "typedef uint8_t poly8_t;\n";
  2014. OS << "typedef uint16_t poly16_t;\n";
  2015. OS << "typedef uint64_t poly64_t;\n";
  2016. OS << "typedef __uint128_t poly128_t;\n";
  2017. OS << "#else\n";
  2018. OS << "typedef int8_t poly8_t;\n";
  2019. OS << "typedef int16_t poly16_t;\n";
  2020. OS << "typedef int64_t poly64_t;\n";
  2021. OS << "#endif\n";
  2022. emitNeonTypeDefs("cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfdQdPcQPcPsQPsPlQPl", OS);
  2023. OS << "#ifdef __ARM_FEATURE_BF16\n";
  2024. emitNeonTypeDefs("bQb", OS);
  2025. OS << "#endif\n\n";
  2026. OS << "#define __ai static __inline__ __attribute__((__always_inline__, "
  2027. "__nodebug__))\n\n";
  2028. SmallVector<Intrinsic *, 128> Defs;
  2029. std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
  2030. for (auto *R : RV)
  2031. createIntrinsic(R, Defs);
  2032. for (auto *I : Defs)
  2033. I->indexBody();
  2034. llvm::stable_sort(Defs, llvm::deref<std::less<>>());
  2035. // Only emit a def when its requirements have been met.
  2036. // FIXME: This loop could be made faster, but it's fast enough for now.
  2037. bool MadeProgress = true;
  2038. std::string InGuard;
  2039. while (!Defs.empty() && MadeProgress) {
  2040. MadeProgress = false;
  2041. for (SmallVector<Intrinsic *, 128>::iterator I = Defs.begin();
  2042. I != Defs.end(); /*No step*/) {
  2043. bool DependenciesSatisfied = true;
  2044. for (auto *II : (*I)->getDependencies()) {
  2045. if (llvm::is_contained(Defs, II))
  2046. DependenciesSatisfied = false;
  2047. }
  2048. if (!DependenciesSatisfied) {
  2049. // Try the next one.
  2050. ++I;
  2051. continue;
  2052. }
  2053. // Emit #endif/#if pair if needed.
  2054. if ((*I)->getGuard() != InGuard) {
  2055. if (!InGuard.empty())
  2056. OS << "#endif\n";
  2057. InGuard = (*I)->getGuard();
  2058. if (!InGuard.empty())
  2059. OS << "#if " << InGuard << "\n";
  2060. }
  2061. // Actually generate the intrinsic code.
  2062. OS << (*I)->generate();
  2063. MadeProgress = true;
  2064. I = Defs.erase(I);
  2065. }
  2066. }
  2067. assert(Defs.empty() && "Some requirements were not satisfied!");
  2068. if (!InGuard.empty())
  2069. OS << "#endif\n";
  2070. OS << "\n";
  2071. OS << "#undef __ai\n\n";
  2072. OS << "#endif /* if !defined(__ARM_NEON) */\n";
  2073. OS << "#endif /* ifndef __ARM_FP */\n";
  2074. OS << "#endif /* __ARM_NEON_H */\n";
  2075. }
  2076. /// run - Read the records in arm_fp16.td and output arm_fp16.h. arm_fp16.h
  2077. /// is comprised of type definitions and function declarations.
  2078. void NeonEmitter::runFP16(raw_ostream &OS) {
  2079. OS << "/*===---- arm_fp16.h - ARM FP16 intrinsics "
  2080. "------------------------------"
  2081. "---===\n"
  2082. " *\n"
  2083. " * Permission is hereby granted, free of charge, to any person "
  2084. "obtaining a copy\n"
  2085. " * of this software and associated documentation files (the "
  2086. "\"Software\"), to deal\n"
  2087. " * in the Software without restriction, including without limitation "
  2088. "the rights\n"
  2089. " * to use, copy, modify, merge, publish, distribute, sublicense, "
  2090. "and/or sell\n"
  2091. " * copies of the Software, and to permit persons to whom the Software "
  2092. "is\n"
  2093. " * furnished to do so, subject to the following conditions:\n"
  2094. " *\n"
  2095. " * The above copyright notice and this permission notice shall be "
  2096. "included in\n"
  2097. " * all copies or substantial portions of the Software.\n"
  2098. " *\n"
  2099. " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, "
  2100. "EXPRESS OR\n"
  2101. " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF "
  2102. "MERCHANTABILITY,\n"
  2103. " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT "
  2104. "SHALL THE\n"
  2105. " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR "
  2106. "OTHER\n"
  2107. " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, "
  2108. "ARISING FROM,\n"
  2109. " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER "
  2110. "DEALINGS IN\n"
  2111. " * THE SOFTWARE.\n"
  2112. " *\n"
  2113. " *===-----------------------------------------------------------------"
  2114. "---"
  2115. "---===\n"
  2116. " */\n\n";
  2117. OS << "#ifndef __ARM_FP16_H\n";
  2118. OS << "#define __ARM_FP16_H\n\n";
  2119. OS << "#include <stdint.h>\n\n";
  2120. OS << "typedef __fp16 float16_t;\n";
  2121. OS << "#define __ai static __inline__ __attribute__((__always_inline__, "
  2122. "__nodebug__))\n\n";
  2123. SmallVector<Intrinsic *, 128> Defs;
  2124. std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
  2125. for (auto *R : RV)
  2126. createIntrinsic(R, Defs);
  2127. for (auto *I : Defs)
  2128. I->indexBody();
  2129. llvm::stable_sort(Defs, llvm::deref<std::less<>>());
  2130. // Only emit a def when its requirements have been met.
  2131. // FIXME: This loop could be made faster, but it's fast enough for now.
  2132. bool MadeProgress = true;
  2133. std::string InGuard;
  2134. while (!Defs.empty() && MadeProgress) {
  2135. MadeProgress = false;
  2136. for (SmallVector<Intrinsic *, 128>::iterator I = Defs.begin();
  2137. I != Defs.end(); /*No step*/) {
  2138. bool DependenciesSatisfied = true;
  2139. for (auto *II : (*I)->getDependencies()) {
  2140. if (llvm::is_contained(Defs, II))
  2141. DependenciesSatisfied = false;
  2142. }
  2143. if (!DependenciesSatisfied) {
  2144. // Try the next one.
  2145. ++I;
  2146. continue;
  2147. }
  2148. // Emit #endif/#if pair if needed.
  2149. if ((*I)->getGuard() != InGuard) {
  2150. if (!InGuard.empty())
  2151. OS << "#endif\n";
  2152. InGuard = (*I)->getGuard();
  2153. if (!InGuard.empty())
  2154. OS << "#if " << InGuard << "\n";
  2155. }
  2156. // Actually generate the intrinsic code.
  2157. OS << (*I)->generate();
  2158. MadeProgress = true;
  2159. I = Defs.erase(I);
  2160. }
  2161. }
  2162. assert(Defs.empty() && "Some requirements were not satisfied!");
  2163. if (!InGuard.empty())
  2164. OS << "#endif\n";
  2165. OS << "\n";
  2166. OS << "#undef __ai\n\n";
  2167. OS << "#endif /* __ARM_FP16_H */\n";
  2168. }
  2169. void NeonEmitter::runBF16(raw_ostream &OS) {
  2170. OS << "/*===---- arm_bf16.h - ARM BF16 intrinsics "
  2171. "-----------------------------------===\n"
  2172. " *\n"
  2173. " *\n"
  2174. " * Part of the LLVM Project, under the Apache License v2.0 with LLVM "
  2175. "Exceptions.\n"
  2176. " * See https://llvm.org/LICENSE.txt for license information.\n"
  2177. " * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception\n"
  2178. " *\n"
  2179. " *===-----------------------------------------------------------------"
  2180. "------===\n"
  2181. " */\n\n";
  2182. OS << "#ifndef __ARM_BF16_H\n";
  2183. OS << "#define __ARM_BF16_H\n\n";
  2184. OS << "typedef __bf16 bfloat16_t;\n";
  2185. OS << "#define __ai static __inline__ __attribute__((__always_inline__, "
  2186. "__nodebug__))\n\n";
  2187. SmallVector<Intrinsic *, 128> Defs;
  2188. std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
  2189. for (auto *R : RV)
  2190. createIntrinsic(R, Defs);
  2191. for (auto *I : Defs)
  2192. I->indexBody();
  2193. llvm::stable_sort(Defs, llvm::deref<std::less<>>());
  2194. // Only emit a def when its requirements have been met.
  2195. // FIXME: This loop could be made faster, but it's fast enough for now.
  2196. bool MadeProgress = true;
  2197. std::string InGuard;
  2198. while (!Defs.empty() && MadeProgress) {
  2199. MadeProgress = false;
  2200. for (SmallVector<Intrinsic *, 128>::iterator I = Defs.begin();
  2201. I != Defs.end(); /*No step*/) {
  2202. bool DependenciesSatisfied = true;
  2203. for (auto *II : (*I)->getDependencies()) {
  2204. if (llvm::is_contained(Defs, II))
  2205. DependenciesSatisfied = false;
  2206. }
  2207. if (!DependenciesSatisfied) {
  2208. // Try the next one.
  2209. ++I;
  2210. continue;
  2211. }
  2212. // Emit #endif/#if pair if needed.
  2213. if ((*I)->getGuard() != InGuard) {
  2214. if (!InGuard.empty())
  2215. OS << "#endif\n";
  2216. InGuard = (*I)->getGuard();
  2217. if (!InGuard.empty())
  2218. OS << "#if " << InGuard << "\n";
  2219. }
  2220. // Actually generate the intrinsic code.
  2221. OS << (*I)->generate();
  2222. MadeProgress = true;
  2223. I = Defs.erase(I);
  2224. }
  2225. }
  2226. assert(Defs.empty() && "Some requirements were not satisfied!");
  2227. if (!InGuard.empty())
  2228. OS << "#endif\n";
  2229. OS << "\n";
  2230. OS << "#undef __ai\n\n";
  2231. OS << "#endif\n";
  2232. }
  2233. void clang::EmitNeon(RecordKeeper &Records, raw_ostream &OS) {
  2234. NeonEmitter(Records).run(OS);
  2235. }
  2236. void clang::EmitFP16(RecordKeeper &Records, raw_ostream &OS) {
  2237. NeonEmitter(Records).runFP16(OS);
  2238. }
  2239. void clang::EmitBF16(RecordKeeper &Records, raw_ostream &OS) {
  2240. NeonEmitter(Records).runBF16(OS);
  2241. }
  2242. void clang::EmitNeonSema(RecordKeeper &Records, raw_ostream &OS) {
  2243. NeonEmitter(Records).runHeader(OS);
  2244. }
  2245. void clang::EmitNeonTest(RecordKeeper &Records, raw_ostream &OS) {
  2246. llvm_unreachable("Neon test generation no longer implemented!");
  2247. }