extractExternal.cpp 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497
  1. /*
  2. * extractExternal.cpp
  3. */
  4. //===----------------------------------------------------------------------===//
  5. //
  6. // The LLVM Compiler Infrastructure
  7. //
  8. // This file is dual licensed under the MIT and the University of Illinois Open
  9. // Source Licenses. See LICENSE.txt for details.
  10. //
  11. //===----------------------------------------------------------------------===//
  12. #include <stdlib.h>
  13. #include <iostream>
  14. #include <strstream>
  15. #include <fstream>
  16. #include <string>
  17. #include <set>
  18. #include <map>
  19. /* Given a set of n object files h ('external' object files) and a set of m
  20. object files o ('internal' object files),
  21. 1. Determines r, the subset of h that o depends on, directly or indirectly
  22. 2. Removes the files in h - r from the file system
  23. 3. For each external symbol defined in some file in r, rename it in r U o
  24. by prefixing it with "__kmp_external_"
  25. Usage:
  26. hide.exe <n> <filenames for h> <filenames for o>
  27. Thus, the prefixed symbols become hidden in the sense that they now have a special
  28. prefix.
  29. */
  30. using namespace std;
  31. void stop(char* errorMsg) {
  32. printf("%s\n", errorMsg);
  33. exit(1);
  34. }
  35. // an entry in the symbol table of a .OBJ file
  36. class Symbol {
  37. public:
  38. __int64 name;
  39. unsigned value;
  40. unsigned short sectionNum, type;
  41. char storageClass, nAux;
  42. };
  43. class _rstream : public istrstream {
  44. private:
  45. const char *buf;
  46. protected:
  47. _rstream(pair<const char*, streamsize> p):istrstream(p.first,p.second),buf(p.first){}
  48. ~_rstream() {
  49. delete[]buf;
  50. }
  51. };
  52. /* A stream encapuslating the content of a file or the content of a string, overriding the
  53. >> operator to read various integer types in binary form, as well as a symbol table
  54. entry.
  55. */
  56. class rstream : public _rstream {
  57. private:
  58. template<class T>
  59. inline rstream& doRead(T &x) {
  60. read((char*)&x, sizeof(T));
  61. return *this;
  62. }
  63. static pair<const char*, streamsize> getBuf(const char *fileName) {
  64. ifstream raw(fileName,ios::binary | ios::in);
  65. if(!raw.is_open())
  66. stop("rstream.getBuf: Error opening file");
  67. raw.seekg(0,ios::end);
  68. streampos fileSize = raw.tellg();
  69. if(fileSize < 0)
  70. stop("rstream.getBuf: Error reading file");
  71. char *buf = new char[fileSize];
  72. raw.seekg(0,ios::beg);
  73. raw.read(buf, fileSize);
  74. return pair<const char*, streamsize>(buf,fileSize);
  75. }
  76. public:
  77. // construct from a string
  78. rstream(const char *buf,streamsize size):_rstream(pair<const char*,streamsize>(buf, size)){}
  79. /* construct from a file whole content is fully read once to initialize the content of
  80. this stream
  81. */
  82. rstream(const char *fileName):_rstream(getBuf(fileName)){}
  83. rstream& operator>>(int &x) {
  84. return doRead(x);
  85. }
  86. rstream& operator>>(unsigned &x) {
  87. return doRead(x);
  88. }
  89. rstream& operator>>(short &x) {
  90. return doRead(x);
  91. }
  92. rstream& operator>>(unsigned short &x) {
  93. return doRead(x);
  94. }
  95. rstream& operator>>(Symbol &e) {
  96. read((char*)&e, 18);
  97. return *this;
  98. }
  99. };
  100. // string table in a .OBJ file
  101. class StringTable {
  102. private:
  103. map<string, unsigned> directory;
  104. size_t length;
  105. char *data;
  106. // make <directory> from <length> bytes in <data>
  107. void makeDirectory(void) {
  108. unsigned i = 4;
  109. while(i < length) {
  110. string s = string(data + i);
  111. directory.insert(make_pair(s, i));
  112. i += s.size() + 1;
  113. }
  114. }
  115. // initialize <length> and <data> with contents specified by the arguments
  116. void init(const char *_data) {
  117. unsigned _length = *(unsigned*)_data;
  118. if(_length < sizeof(unsigned) || _length != *(unsigned*)_data)
  119. stop("StringTable.init: Invalid symbol table");
  120. if(_data[_length - 1]) {
  121. // to prevent runaway strings, make sure the data ends with a zero
  122. data = new char[length = _length + 1];
  123. data[_length] = 0;
  124. } else {
  125. data = new char[length = _length];
  126. }
  127. *(unsigned*)data = length;
  128. KMP_MEMCPY(data + sizeof(unsigned), _data + sizeof(unsigned),
  129. length - sizeof(unsigned));
  130. makeDirectory();
  131. }
  132. public:
  133. StringTable(rstream &f) {
  134. /* Construct string table by reading from f.
  135. */
  136. streampos s;
  137. unsigned strSize;
  138. char *strData;
  139. s = f.tellg();
  140. f>>strSize;
  141. if(strSize < sizeof(unsigned))
  142. stop("StringTable: Invalid string table");
  143. strData = new char[strSize];
  144. *(unsigned*)strData = strSize;
  145. // read the raw data into <strData>
  146. f.read(strData + sizeof(unsigned), strSize - sizeof(unsigned));
  147. s = f.tellg() - s;
  148. if(s < strSize)
  149. stop("StringTable: Unexpected EOF");
  150. init(strData);
  151. delete[]strData;
  152. }
  153. StringTable(const set<string> &strings) {
  154. /* Construct string table from given strings.
  155. */
  156. char *p;
  157. set<string>::const_iterator it;
  158. size_t s;
  159. // count required size for data
  160. for(length = sizeof(unsigned), it = strings.begin(); it != strings.end(); ++it) {
  161. size_t l = (*it).size();
  162. if(l > (unsigned) 0xFFFFFFFF)
  163. stop("StringTable: String too long");
  164. if(l > 8) {
  165. length += l + 1;
  166. if(length > (unsigned) 0xFFFFFFFF)
  167. stop("StringTable: Symbol table too long");
  168. }
  169. }
  170. data = new char[length];
  171. *(unsigned*)data = length;
  172. // populate data and directory
  173. for(p = data + sizeof(unsigned), it = strings.begin(); it != strings.end(); ++it) {
  174. const string &str = *it;
  175. size_t l = str.size();
  176. if(l > 8) {
  177. directory.insert(make_pair(str, p - data));
  178. KMP_MEMCPY(p, str.c_str(), l);
  179. p[l] = 0;
  180. p += l + 1;
  181. }
  182. }
  183. }
  184. ~StringTable() {
  185. delete[] data;
  186. }
  187. /* Returns encoding for given string based on this string table.
  188. Error if string length is greater than 8 but string is not in
  189. the string table--returns 0.
  190. */
  191. __int64 encode(const string &str) {
  192. __int64 r;
  193. if(str.size() <= 8) {
  194. // encoded directly
  195. ((char*)&r)[7] = 0;
  196. KMP_STRNCPY_S((char*)&r, sizeof(r), str.c_str(), 8);
  197. return r;
  198. } else {
  199. // represented as index into table
  200. map<string,unsigned>::const_iterator it = directory.find(str);
  201. if(it == directory.end())
  202. stop("StringTable::encode: String now found in string table");
  203. ((unsigned*)&r)[0] = 0;
  204. ((unsigned*)&r)[1] = (*it).second;
  205. return r;
  206. }
  207. }
  208. /* Returns string represented by x based on this string table.
  209. Error if x references an invalid position in the table--returns
  210. the empty string.
  211. */
  212. string decode(__int64 x) const {
  213. if(*(unsigned*)&x == 0) {
  214. // represented as index into table
  215. unsigned &p = ((unsigned*)&x)[1];
  216. if(p >= length)
  217. stop("StringTable::decode: Invalid string table lookup");
  218. return string(data + p);
  219. } else {
  220. // encoded directly
  221. char *p = (char*)&x;
  222. int i;
  223. for(i = 0; i < 8 && p[i]; ++i);
  224. return string(p, i);
  225. }
  226. }
  227. void write(ostream &os) {
  228. os.write(data, length);
  229. }
  230. };
  231. /* for the named object file, determines the set of defined symbols and the set of undefined external symbols
  232. and writes them to <defined> and <undefined> respectively
  233. */
  234. void computeExternalSymbols(const char *fileName, set<string> *defined, set<string> *undefined){
  235. streampos fileSize;
  236. size_t strTabStart;
  237. unsigned symTabStart, symNEntries;
  238. rstream f(fileName);
  239. f.seekg(0,ios::end);
  240. fileSize = f.tellg();
  241. f.seekg(8);
  242. f >> symTabStart >> symNEntries;
  243. // seek to the string table
  244. f.seekg(strTabStart = symTabStart + 18 * (size_t)symNEntries);
  245. if(f.eof()) {
  246. printf("computeExternalSymbols: fileName='%s', fileSize = %lu, symTabStart = %u, symNEntries = %u\n",
  247. fileName, (unsigned long) fileSize, symTabStart, symNEntries);
  248. stop("computeExternalSymbols: Unexpected EOF 1");
  249. }
  250. StringTable stringTable(f); // read the string table
  251. if(f.tellg() != fileSize)
  252. stop("computeExternalSymbols: Unexpected data after string table");
  253. f.clear();
  254. f.seekg(symTabStart); // seek to the symbol table
  255. defined->clear(); undefined->clear();
  256. for(int i = 0; i < symNEntries; ++i) {
  257. // process each entry
  258. Symbol e;
  259. if(f.eof())
  260. stop("computeExternalSymbols: Unexpected EOF 2");
  261. f>>e;
  262. if(f.fail())
  263. stop("computeExternalSymbols: File read error");
  264. if(e.nAux) { // auxiliary entry: skip
  265. f.seekg(e.nAux * 18, ios::cur);
  266. i += e.nAux;
  267. }
  268. // if symbol is extern and defined in the current file, insert it
  269. if(e.storageClass == 2)
  270. if(e.sectionNum)
  271. defined->insert(stringTable.decode(e.name));
  272. else
  273. undefined->insert(stringTable.decode(e.name));
  274. }
  275. }
  276. /* For each occurrence of an external symbol in the object file named by
  277. by <fileName> that is a member of <hide>, renames it by prefixing
  278. with "__kmp_external_", writing back the file in-place
  279. */
  280. void hideSymbols(char *fileName, const set<string> &hide) {
  281. static const string prefix("__kmp_external_");
  282. set<string> strings; // set of all occurring symbols, appropriately prefixed
  283. streampos fileSize;
  284. size_t strTabStart;
  285. unsigned symTabStart, symNEntries;
  286. int i;
  287. rstream in(fileName);
  288. in.seekg(0,ios::end);
  289. fileSize = in.tellg();
  290. in.seekg(8);
  291. in >> symTabStart >> symNEntries;
  292. in.seekg(strTabStart = symTabStart + 18 * (size_t)symNEntries);
  293. if(in.eof())
  294. stop("hideSymbols: Unexpected EOF");
  295. StringTable stringTableOld(in); // read original string table
  296. if(in.tellg() != fileSize)
  297. stop("hideSymbols: Unexpected data after string table");
  298. // compute set of occurring strings with prefix added
  299. for(i = 0; i < symNEntries; ++i) {
  300. Symbol e;
  301. in.seekg(symTabStart + i * 18);
  302. if(in.eof())
  303. stop("hideSymbols: Unexpected EOF");
  304. in >> e;
  305. if(in.fail())
  306. stop("hideSymbols: File read error");
  307. if(e.nAux)
  308. i += e.nAux;
  309. const string &s = stringTableOld.decode(e.name);
  310. // if symbol is extern and found in <hide>, prefix and insert into strings,
  311. // otherwise, just insert into strings without prefix
  312. strings.insert( (e.storageClass == 2 && hide.find(s) != hide.end()) ?
  313. prefix + s : s);
  314. }
  315. ofstream out(fileName, ios::trunc | ios::out | ios::binary);
  316. if(!out.is_open())
  317. stop("hideSymbols: Error opening output file");
  318. // make new string table from string set
  319. StringTable stringTableNew = StringTable(strings);
  320. // copy input file to output file up to just before the symbol table
  321. in.seekg(0);
  322. char *buf = new char[symTabStart];
  323. in.read(buf, symTabStart);
  324. out.write(buf, symTabStart);
  325. delete []buf;
  326. // copy input symbol table to output symbol table with name translation
  327. for(i = 0; i < symNEntries; ++i) {
  328. Symbol e;
  329. in.seekg(symTabStart + i*18);
  330. if(in.eof())
  331. stop("hideSymbols: Unexpected EOF");
  332. in >> e;
  333. if(in.fail())
  334. stop("hideSymbols: File read error");
  335. const string &s = stringTableOld.decode(e.name);
  336. out.seekp(symTabStart + i*18);
  337. e.name = stringTableNew.encode( (e.storageClass == 2 && hide.find(s) != hide.end()) ?
  338. prefix + s : s);
  339. out.write((char*)&e, 18);
  340. if(out.fail())
  341. stop("hideSymbols: File write error");
  342. if(e.nAux) {
  343. // copy auxiliary symbol table entries
  344. int nAux = e.nAux;
  345. for(int j = 1; j <= nAux; ++j) {
  346. in >> e;
  347. out.seekp(symTabStart + (i + j) * 18);
  348. out.write((char*)&e, 18);
  349. }
  350. i += nAux;
  351. }
  352. }
  353. // output string table
  354. stringTableNew.write(out);
  355. }
  356. // returns true iff <a> and <b> have no common element
  357. template <class T>
  358. bool isDisjoint(const set<T> &a, const set<T> &b) {
  359. set<T>::const_iterator ita, itb;
  360. for(ita = a.begin(), itb = b.begin(); ita != a.end() && itb != b.end();) {
  361. const T &ta = *ita, &tb = *itb;
  362. if(ta < tb)
  363. ++ita;
  364. else if (tb < ta)
  365. ++itb;
  366. else
  367. return false;
  368. }
  369. return true;
  370. }
  371. /* precondition: <defined> and <undefined> are arrays with <nTotal> elements where
  372. <nTotal> >= <nExternal>. The first <nExternal> elements correspond to the external object
  373. files and the rest correspond to the internal object files.
  374. postcondition: file x is said to depend on file y if undefined[x] and defined[y] are not
  375. disjoint. Returns the transitive closure of the set of internal object files, as a set of
  376. file indexes, under the 'depends on' relation, minus the set of internal object files.
  377. */
  378. set<int> *findRequiredExternal(int nExternal, int nTotal, set<string> *defined, set<string> *undefined) {
  379. set<int> *required = new set<int>;
  380. set<int> fresh[2];
  381. int i, cur = 0;
  382. bool changed;
  383. for(i = nTotal - 1; i >= nExternal; --i)
  384. fresh[cur].insert(i);
  385. do {
  386. changed = false;
  387. for(set<int>::iterator it = fresh[cur].begin(); it != fresh[cur].end(); ++it) {
  388. set<string> &s = undefined[*it];
  389. for(i = 0; i < nExternal; ++i) {
  390. if(required->find(i) == required->end()) {
  391. if(!isDisjoint(defined[i], s)) {
  392. // found a new qualifying element
  393. required->insert(i);
  394. fresh[1 - cur].insert(i);
  395. changed = true;
  396. }
  397. }
  398. }
  399. }
  400. fresh[cur].clear();
  401. cur = 1 - cur;
  402. } while(changed);
  403. return required;
  404. }
  405. int main(int argc, char **argv) {
  406. int nExternal, nInternal, i;
  407. set<string> *defined, *undefined;
  408. set<int>::iterator it;
  409. if(argc < 3)
  410. stop("Please specify a positive integer followed by a list of object filenames");
  411. nExternal = atoi(argv[1]);
  412. if(nExternal <= 0)
  413. stop("Please specify a positive integer followed by a list of object filenames");
  414. if(nExternal + 2 > argc)
  415. stop("Too few external objects");
  416. nInternal = argc - nExternal - 2;
  417. defined = new set<string>[argc - 2];
  418. undefined = new set<string>[argc - 2];
  419. // determine the set of defined and undefined external symbols
  420. for(i = 2; i < argc; ++i)
  421. computeExternalSymbols(argv[i], defined + i - 2, undefined + i - 2);
  422. // determine the set of required external files
  423. set<int> *requiredExternal = findRequiredExternal(nExternal, argc - 2, defined, undefined);
  424. set<string> hide;
  425. /* determine the set of symbols to hide--namely defined external symbols of the
  426. required external files
  427. */
  428. for(it = requiredExternal->begin(); it != requiredExternal->end(); ++it) {
  429. int idx = *it;
  430. set<string>::iterator it2;
  431. /* We have to insert one element at a time instead of inserting a range because
  432. the insert member function taking a range doesn't exist on Windows* OS, at least
  433. at the time of this writing.
  434. */
  435. for(it2 = defined[idx].begin(); it2 != defined[idx].end(); ++it2)
  436. hide.insert(*it2);
  437. }
  438. /* process the external files--removing those that are not required and hiding
  439. the appropriate symbols in the others
  440. */
  441. for(i = 0; i < nExternal; ++i)
  442. if(requiredExternal->find(i) != requiredExternal->end())
  443. hideSymbols(argv[2 + i], hide);
  444. else
  445. remove(argv[2 + i]);
  446. // hide the appropriate symbols in the internal files
  447. for(i = nExternal + 2; i < argc; ++i)
  448. hideSymbols(argv[i], hide);
  449. return 0;
  450. }