123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497 |
- /*
- * extractExternal.cpp
- */
- //===----------------------------------------------------------------------===//
- //
- // The LLVM Compiler Infrastructure
- //
- // This file is dual licensed under the MIT and the University of Illinois Open
- // Source Licenses. See LICENSE.txt for details.
- //
- //===----------------------------------------------------------------------===//
- #include <stdlib.h>
- #include <iostream>
- #include <strstream>
- #include <fstream>
- #include <string>
- #include <set>
- #include <map>
- /* Given a set of n object files h ('external' object files) and a set of m
- object files o ('internal' object files),
- 1. Determines r, the subset of h that o depends on, directly or indirectly
- 2. Removes the files in h - r from the file system
- 3. For each external symbol defined in some file in r, rename it in r U o
- by prefixing it with "__kmp_external_"
- Usage:
- hide.exe <n> <filenames for h> <filenames for o>
- Thus, the prefixed symbols become hidden in the sense that they now have a special
- prefix.
- */
- using namespace std;
- void stop(char* errorMsg) {
- printf("%s\n", errorMsg);
- exit(1);
- }
- // an entry in the symbol table of a .OBJ file
- class Symbol {
- public:
- __int64 name;
- unsigned value;
- unsigned short sectionNum, type;
- char storageClass, nAux;
- };
- class _rstream : public istrstream {
- private:
- const char *buf;
- protected:
- _rstream(pair<const char*, streamsize> p):istrstream(p.first,p.second),buf(p.first){}
- ~_rstream() {
- delete[]buf;
- }
- };
- /* A stream encapuslating the content of a file or the content of a string, overriding the
- >> operator to read various integer types in binary form, as well as a symbol table
- entry.
- */
- class rstream : public _rstream {
- private:
- template<class T>
- inline rstream& doRead(T &x) {
- read((char*)&x, sizeof(T));
- return *this;
- }
- static pair<const char*, streamsize> getBuf(const char *fileName) {
- ifstream raw(fileName,ios::binary | ios::in);
- if(!raw.is_open())
- stop("rstream.getBuf: Error opening file");
- raw.seekg(0,ios::end);
- streampos fileSize = raw.tellg();
- if(fileSize < 0)
- stop("rstream.getBuf: Error reading file");
- char *buf = new char[fileSize];
- raw.seekg(0,ios::beg);
- raw.read(buf, fileSize);
- return pair<const char*, streamsize>(buf,fileSize);
- }
- public:
- // construct from a string
- rstream(const char *buf,streamsize size):_rstream(pair<const char*,streamsize>(buf, size)){}
- /* construct from a file whole content is fully read once to initialize the content of
- this stream
- */
- rstream(const char *fileName):_rstream(getBuf(fileName)){}
- rstream& operator>>(int &x) {
- return doRead(x);
- }
- rstream& operator>>(unsigned &x) {
- return doRead(x);
- }
- rstream& operator>>(short &x) {
- return doRead(x);
- }
- rstream& operator>>(unsigned short &x) {
- return doRead(x);
- }
- rstream& operator>>(Symbol &e) {
- read((char*)&e, 18);
- return *this;
- }
- };
- // string table in a .OBJ file
- class StringTable {
- private:
- map<string, unsigned> directory;
- size_t length;
- char *data;
- // make <directory> from <length> bytes in <data>
- void makeDirectory(void) {
- unsigned i = 4;
- while(i < length) {
- string s = string(data + i);
- directory.insert(make_pair(s, i));
- i += s.size() + 1;
- }
- }
- // initialize <length> and <data> with contents specified by the arguments
- void init(const char *_data) {
- unsigned _length = *(unsigned*)_data;
- if(_length < sizeof(unsigned) || _length != *(unsigned*)_data)
- stop("StringTable.init: Invalid symbol table");
- if(_data[_length - 1]) {
- // to prevent runaway strings, make sure the data ends with a zero
- data = new char[length = _length + 1];
- data[_length] = 0;
- } else {
- data = new char[length = _length];
- }
- *(unsigned*)data = length;
- KMP_MEMCPY(data + sizeof(unsigned), _data + sizeof(unsigned),
- length - sizeof(unsigned));
- makeDirectory();
- }
- public:
- StringTable(rstream &f) {
- /* Construct string table by reading from f.
- */
- streampos s;
- unsigned strSize;
- char *strData;
- s = f.tellg();
- f>>strSize;
- if(strSize < sizeof(unsigned))
- stop("StringTable: Invalid string table");
- strData = new char[strSize];
- *(unsigned*)strData = strSize;
- // read the raw data into <strData>
- f.read(strData + sizeof(unsigned), strSize - sizeof(unsigned));
- s = f.tellg() - s;
- if(s < strSize)
- stop("StringTable: Unexpected EOF");
- init(strData);
- delete[]strData;
- }
- StringTable(const set<string> &strings) {
- /* Construct string table from given strings.
- */
- char *p;
- set<string>::const_iterator it;
- size_t s;
- // count required size for data
- for(length = sizeof(unsigned), it = strings.begin(); it != strings.end(); ++it) {
- size_t l = (*it).size();
- if(l > (unsigned) 0xFFFFFFFF)
- stop("StringTable: String too long");
- if(l > 8) {
- length += l + 1;
- if(length > (unsigned) 0xFFFFFFFF)
- stop("StringTable: Symbol table too long");
- }
- }
- data = new char[length];
- *(unsigned*)data = length;
- // populate data and directory
- for(p = data + sizeof(unsigned), it = strings.begin(); it != strings.end(); ++it) {
- const string &str = *it;
- size_t l = str.size();
- if(l > 8) {
- directory.insert(make_pair(str, p - data));
- KMP_MEMCPY(p, str.c_str(), l);
- p[l] = 0;
- p += l + 1;
- }
- }
- }
- ~StringTable() {
- delete[] data;
- }
- /* Returns encoding for given string based on this string table.
- Error if string length is greater than 8 but string is not in
- the string table--returns 0.
- */
- __int64 encode(const string &str) {
- __int64 r;
- if(str.size() <= 8) {
- // encoded directly
- ((char*)&r)[7] = 0;
- KMP_STRNCPY_S((char*)&r, sizeof(r), str.c_str(), 8);
- return r;
- } else {
- // represented as index into table
- map<string,unsigned>::const_iterator it = directory.find(str);
- if(it == directory.end())
- stop("StringTable::encode: String now found in string table");
- ((unsigned*)&r)[0] = 0;
- ((unsigned*)&r)[1] = (*it).second;
- return r;
- }
- }
- /* Returns string represented by x based on this string table.
- Error if x references an invalid position in the table--returns
- the empty string.
- */
- string decode(__int64 x) const {
- if(*(unsigned*)&x == 0) {
- // represented as index into table
- unsigned &p = ((unsigned*)&x)[1];
- if(p >= length)
- stop("StringTable::decode: Invalid string table lookup");
- return string(data + p);
- } else {
- // encoded directly
- char *p = (char*)&x;
- int i;
- for(i = 0; i < 8 && p[i]; ++i);
- return string(p, i);
- }
- }
- void write(ostream &os) {
- os.write(data, length);
- }
- };
- /* for the named object file, determines the set of defined symbols and the set of undefined external symbols
- and writes them to <defined> and <undefined> respectively
- */
- void computeExternalSymbols(const char *fileName, set<string> *defined, set<string> *undefined){
- streampos fileSize;
- size_t strTabStart;
- unsigned symTabStart, symNEntries;
- rstream f(fileName);
- f.seekg(0,ios::end);
- fileSize = f.tellg();
- f.seekg(8);
- f >> symTabStart >> symNEntries;
- // seek to the string table
- f.seekg(strTabStart = symTabStart + 18 * (size_t)symNEntries);
- if(f.eof()) {
- printf("computeExternalSymbols: fileName='%s', fileSize = %lu, symTabStart = %u, symNEntries = %u\n",
- fileName, (unsigned long) fileSize, symTabStart, symNEntries);
- stop("computeExternalSymbols: Unexpected EOF 1");
- }
- StringTable stringTable(f); // read the string table
- if(f.tellg() != fileSize)
- stop("computeExternalSymbols: Unexpected data after string table");
- f.clear();
- f.seekg(symTabStart); // seek to the symbol table
- defined->clear(); undefined->clear();
- for(int i = 0; i < symNEntries; ++i) {
- // process each entry
- Symbol e;
- if(f.eof())
- stop("computeExternalSymbols: Unexpected EOF 2");
- f>>e;
- if(f.fail())
- stop("computeExternalSymbols: File read error");
- if(e.nAux) { // auxiliary entry: skip
- f.seekg(e.nAux * 18, ios::cur);
- i += e.nAux;
- }
- // if symbol is extern and defined in the current file, insert it
- if(e.storageClass == 2)
- if(e.sectionNum)
- defined->insert(stringTable.decode(e.name));
- else
- undefined->insert(stringTable.decode(e.name));
- }
- }
- /* For each occurrence of an external symbol in the object file named by
- by <fileName> that is a member of <hide>, renames it by prefixing
- with "__kmp_external_", writing back the file in-place
- */
- void hideSymbols(char *fileName, const set<string> &hide) {
- static const string prefix("__kmp_external_");
- set<string> strings; // set of all occurring symbols, appropriately prefixed
- streampos fileSize;
- size_t strTabStart;
- unsigned symTabStart, symNEntries;
- int i;
- rstream in(fileName);
- in.seekg(0,ios::end);
- fileSize = in.tellg();
- in.seekg(8);
- in >> symTabStart >> symNEntries;
- in.seekg(strTabStart = symTabStart + 18 * (size_t)symNEntries);
- if(in.eof())
- stop("hideSymbols: Unexpected EOF");
- StringTable stringTableOld(in); // read original string table
- if(in.tellg() != fileSize)
- stop("hideSymbols: Unexpected data after string table");
- // compute set of occurring strings with prefix added
- for(i = 0; i < symNEntries; ++i) {
- Symbol e;
- in.seekg(symTabStart + i * 18);
- if(in.eof())
- stop("hideSymbols: Unexpected EOF");
- in >> e;
- if(in.fail())
- stop("hideSymbols: File read error");
- if(e.nAux)
- i += e.nAux;
- const string &s = stringTableOld.decode(e.name);
- // if symbol is extern and found in <hide>, prefix and insert into strings,
- // otherwise, just insert into strings without prefix
- strings.insert( (e.storageClass == 2 && hide.find(s) != hide.end()) ?
- prefix + s : s);
- }
- ofstream out(fileName, ios::trunc | ios::out | ios::binary);
- if(!out.is_open())
- stop("hideSymbols: Error opening output file");
- // make new string table from string set
- StringTable stringTableNew = StringTable(strings);
- // copy input file to output file up to just before the symbol table
- in.seekg(0);
- char *buf = new char[symTabStart];
- in.read(buf, symTabStart);
- out.write(buf, symTabStart);
- delete []buf;
- // copy input symbol table to output symbol table with name translation
- for(i = 0; i < symNEntries; ++i) {
- Symbol e;
- in.seekg(symTabStart + i*18);
- if(in.eof())
- stop("hideSymbols: Unexpected EOF");
- in >> e;
- if(in.fail())
- stop("hideSymbols: File read error");
- const string &s = stringTableOld.decode(e.name);
- out.seekp(symTabStart + i*18);
- e.name = stringTableNew.encode( (e.storageClass == 2 && hide.find(s) != hide.end()) ?
- prefix + s : s);
- out.write((char*)&e, 18);
- if(out.fail())
- stop("hideSymbols: File write error");
- if(e.nAux) {
- // copy auxiliary symbol table entries
- int nAux = e.nAux;
- for(int j = 1; j <= nAux; ++j) {
- in >> e;
- out.seekp(symTabStart + (i + j) * 18);
- out.write((char*)&e, 18);
- }
- i += nAux;
- }
- }
- // output string table
- stringTableNew.write(out);
- }
- // returns true iff <a> and <b> have no common element
- template <class T>
- bool isDisjoint(const set<T> &a, const set<T> &b) {
- set<T>::const_iterator ita, itb;
- for(ita = a.begin(), itb = b.begin(); ita != a.end() && itb != b.end();) {
- const T &ta = *ita, &tb = *itb;
- if(ta < tb)
- ++ita;
- else if (tb < ta)
- ++itb;
- else
- return false;
- }
- return true;
- }
- /* precondition: <defined> and <undefined> are arrays with <nTotal> elements where
- <nTotal> >= <nExternal>. The first <nExternal> elements correspond to the external object
- files and the rest correspond to the internal object files.
- postcondition: file x is said to depend on file y if undefined[x] and defined[y] are not
- disjoint. Returns the transitive closure of the set of internal object files, as a set of
- file indexes, under the 'depends on' relation, minus the set of internal object files.
- */
- set<int> *findRequiredExternal(int nExternal, int nTotal, set<string> *defined, set<string> *undefined) {
- set<int> *required = new set<int>;
- set<int> fresh[2];
- int i, cur = 0;
- bool changed;
- for(i = nTotal - 1; i >= nExternal; --i)
- fresh[cur].insert(i);
- do {
- changed = false;
- for(set<int>::iterator it = fresh[cur].begin(); it != fresh[cur].end(); ++it) {
- set<string> &s = undefined[*it];
- for(i = 0; i < nExternal; ++i) {
- if(required->find(i) == required->end()) {
- if(!isDisjoint(defined[i], s)) {
- // found a new qualifying element
- required->insert(i);
- fresh[1 - cur].insert(i);
- changed = true;
- }
- }
- }
- }
- fresh[cur].clear();
- cur = 1 - cur;
- } while(changed);
- return required;
- }
- int main(int argc, char **argv) {
- int nExternal, nInternal, i;
- set<string> *defined, *undefined;
- set<int>::iterator it;
- if(argc < 3)
- stop("Please specify a positive integer followed by a list of object filenames");
- nExternal = atoi(argv[1]);
- if(nExternal <= 0)
- stop("Please specify a positive integer followed by a list of object filenames");
- if(nExternal + 2 > argc)
- stop("Too few external objects");
- nInternal = argc - nExternal - 2;
- defined = new set<string>[argc - 2];
- undefined = new set<string>[argc - 2];
- // determine the set of defined and undefined external symbols
- for(i = 2; i < argc; ++i)
- computeExternalSymbols(argv[i], defined + i - 2, undefined + i - 2);
- // determine the set of required external files
- set<int> *requiredExternal = findRequiredExternal(nExternal, argc - 2, defined, undefined);
- set<string> hide;
- /* determine the set of symbols to hide--namely defined external symbols of the
- required external files
- */
- for(it = requiredExternal->begin(); it != requiredExternal->end(); ++it) {
- int idx = *it;
- set<string>::iterator it2;
- /* We have to insert one element at a time instead of inserting a range because
- the insert member function taking a range doesn't exist on Windows* OS, at least
- at the time of this writing.
- */
- for(it2 = defined[idx].begin(); it2 != defined[idx].end(); ++it2)
- hide.insert(*it2);
- }
- /* process the external files--removing those that are not required and hiding
- the appropriate symbols in the others
- */
- for(i = 0; i < nExternal; ++i)
- if(requiredExternal->find(i) != requiredExternal->end())
- hideSymbols(argv[2 + i], hide);
- else
- remove(argv[2 + i]);
- // hide the appropriate symbols in the internal files
- for(i = nExternal + 2; i < argc; ++i)
- hideSymbols(argv[i], hide);
- return 0;
- }
|