importer.pxi 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584
  1. import marshal
  2. import sys
  3. from _codecs import utf_8_decode, utf_8_encode
  4. from _frozen_importlib import _call_with_frames_removed, spec_from_loader, BuiltinImporter
  5. from _frozen_importlib_external import _os, _path_isfile, _path_isabs, path_sep, _path_join, _path_split
  6. from _io import FileIO
  7. import __res as __resource
  8. _b = lambda x: x if isinstance(x, bytes) else utf_8_encode(x)[0]
  9. _s = lambda x: x if isinstance(x, str) else utf_8_decode(x)[0]
  10. env_entry_point = b'Y_PYTHON_ENTRY_POINT'
  11. env_source_root = b'Y_PYTHON_SOURCE_ROOT'
  12. cfg_source_root = b'arcadia-source-root'
  13. env_extended_source_search = b'Y_PYTHON_EXTENDED_SOURCE_SEARCH'
  14. res_ya_ide_venv = b'YA_IDE_VENV'
  15. executable = sys.executable or 'Y_PYTHON'
  16. sys.modules['run_import_hook'] = __resource
  17. # This is the prefix in contrib/tools/python3/src/Lib/ya.make.
  18. py_prefix = b'py/'
  19. py_prefix_len = len(py_prefix)
  20. YA_IDE_VENV = __resource.find(res_ya_ide_venv)
  21. Y_PYTHON_EXTENDED_SOURCE_SEARCH = _os.environ.get(env_extended_source_search) or YA_IDE_VENV
  22. def _init_venv():
  23. if not _path_isabs(executable):
  24. raise RuntimeError('path in sys.executable is not absolute: {}'.format(executable))
  25. # Creative copy-paste from site.py
  26. exe_dir, _ = _path_split(executable)
  27. site_prefix, _ = _path_split(exe_dir)
  28. libpath = _path_join(site_prefix, 'lib',
  29. 'python%d.%d' % sys.version_info[:2],
  30. 'site-packages')
  31. sys.path.insert(0, libpath)
  32. # emulate site.venv()
  33. sys.prefix = site_prefix
  34. sys.exec_prefix = site_prefix
  35. conf_basename = 'pyvenv.cfg'
  36. candidate_confs = [
  37. conffile for conffile in (
  38. _path_join(exe_dir, conf_basename),
  39. _path_join(site_prefix, conf_basename)
  40. )
  41. if _path_isfile(conffile)
  42. ]
  43. if not candidate_confs:
  44. raise RuntimeError('{} not found'.format(conf_basename))
  45. virtual_conf = candidate_confs[0]
  46. with FileIO(virtual_conf, 'r') as f:
  47. for line in f:
  48. if b'=' in line:
  49. key, _, value = line.partition(b'=')
  50. key = key.strip().lower()
  51. value = value.strip()
  52. if key == cfg_source_root:
  53. return value
  54. raise RuntimeError('{} key not found in {}'.format(cfg_source_root, virtual_conf))
  55. def _get_source_root():
  56. env_value = _os.environ.get(env_source_root)
  57. if env_value or not YA_IDE_VENV:
  58. return env_value
  59. return _init_venv()
  60. Y_PYTHON_SOURCE_ROOT = _get_source_root()
  61. def _print(*xs):
  62. """
  63. This is helpful for debugging, since automatic bytes to str conversion is
  64. not available yet. It is also possible to debug with GDB by breaking on
  65. __Pyx_AddTraceback (with Python GDB pretty printers enabled).
  66. """
  67. parts = []
  68. for s in xs:
  69. if not isinstance(s, (bytes, str)):
  70. s = str(s)
  71. parts.append(_s(s))
  72. sys.stderr.write(' '.join(parts) + '\n')
  73. def file_bytes(path):
  74. # 'open' is not avaiable yet.
  75. with FileIO(path, 'r') as f:
  76. return f.read()
  77. def iter_keys(prefix):
  78. l = len(prefix)
  79. for idx in range(__resource.count()):
  80. key = __resource.key_by_index(idx)
  81. if key.startswith(prefix):
  82. yield key, key[l:]
  83. def iter_py_modules(with_keys=False):
  84. for key, path in iter_keys(b'resfs/file/' + py_prefix):
  85. if path.endswith(b'.py'): # It may also end with '.pyc'.
  86. mod = _s(path[:-3].replace(b'/', b'.'))
  87. if with_keys:
  88. yield key, mod
  89. else:
  90. yield mod
  91. def iter_prefixes(s):
  92. i = s.find('.')
  93. while i >= 0:
  94. yield s[:i]
  95. i = s.find('.', i + 1)
  96. def resfs_resolve(path):
  97. """
  98. Return the absolute path of a root-relative path if it exists.
  99. """
  100. path = _b(path)
  101. if Y_PYTHON_SOURCE_ROOT:
  102. if not path.startswith(Y_PYTHON_SOURCE_ROOT):
  103. path = _b(path_sep).join((Y_PYTHON_SOURCE_ROOT, path))
  104. if _path_isfile(path):
  105. return path
  106. def resfs_src(key, resfs_file=False):
  107. """
  108. Return the root-relative file path of a resource key.
  109. """
  110. if resfs_file:
  111. key = b'resfs/file/' + _b(key)
  112. return __resource.find(b'resfs/src/' + _b(key))
  113. def resfs_read(path, builtin=None):
  114. """
  115. Return the bytes of the resource file at path, or None.
  116. If builtin is True, do not look for it on the filesystem.
  117. If builtin is False, do not look in the builtin resources.
  118. """
  119. if builtin is not True:
  120. arcpath = resfs_src(path, resfs_file=True)
  121. if arcpath:
  122. fspath = resfs_resolve(arcpath)
  123. if fspath:
  124. return file_bytes(fspath)
  125. if builtin is not False:
  126. return __resource.find(b'resfs/file/' + _b(path))
  127. def resfs_files(prefix=b''):
  128. """
  129. List builtin resource file paths.
  130. """
  131. return [key[11:] for key, _ in iter_keys(b'resfs/file/' + _b(prefix))]
  132. def mod_path(mod):
  133. """
  134. Return the resfs path to the source code of the module with the given name.
  135. """
  136. return py_prefix + _b(mod).replace(b'.', b'/') + b'.py'
  137. class ResourceImporter(object):
  138. """ A meta_path importer that loads code from built-in resources.
  139. """
  140. def __init__(self):
  141. self.memory = set(iter_py_modules()) # Set of importable module names.
  142. self.source_map = {} # Map from file names to module names.
  143. self._source_name = {} # Map from original to altered module names.
  144. self._package_prefix = ''
  145. if Y_PYTHON_SOURCE_ROOT and Y_PYTHON_EXTENDED_SOURCE_SEARCH:
  146. self.arcadia_source_finder = ArcadiaSourceFinder(_s(Y_PYTHON_SOURCE_ROOT))
  147. else:
  148. self.arcadia_source_finder = None
  149. for p in list(self.memory) + list(sys.builtin_module_names):
  150. for pp in iter_prefixes(p):
  151. k = pp + '.__init__'
  152. if k not in self.memory:
  153. self.memory.add(k)
  154. def for_package(self, name):
  155. import copy
  156. importer = copy.copy(self)
  157. importer._package_prefix = name + '.'
  158. return importer
  159. def _find_mod_path(self, fullname):
  160. """Find arcadia relative path by module name"""
  161. relpath = resfs_src(mod_path(fullname), resfs_file=True)
  162. if relpath or not self.arcadia_source_finder:
  163. return relpath
  164. return self.arcadia_source_finder.get_module_path(fullname)
  165. def find_spec(self, fullname, path=None, target=None):
  166. try:
  167. is_package = self.is_package(fullname)
  168. except ImportError:
  169. return None
  170. return spec_from_loader(fullname, self, is_package=is_package)
  171. def find_module(self, fullname, path=None):
  172. """For backward compatibility."""
  173. spec = self.find_spec(fullname, path)
  174. return spec.loader if spec is not None else None
  175. def create_module(self, spec):
  176. """Use default semantics for module creation."""
  177. def exec_module(self, module):
  178. code = self.get_code(module.__name__)
  179. module.__file__ = code.co_filename
  180. if self.is_package(module.__name__):
  181. module.__path__= [executable + path_sep + module.__name__.replace('.', path_sep)]
  182. # exec(code, module.__dict__)
  183. _call_with_frames_removed(exec, code, module.__dict__)
  184. # PEP-302 extension 1 of 3: data loader.
  185. def get_data(self, path):
  186. path = _b(path)
  187. abspath = resfs_resolve(path)
  188. if abspath:
  189. return file_bytes(abspath)
  190. path = path.replace(_b('\\'), _b('/'))
  191. data = resfs_read(path, builtin=True)
  192. if data is None:
  193. raise IOError(path) # Y_PYTHON_ENTRY_POINT=:resource_files
  194. return data
  195. # PEP-302 extension 2 of 3: get __file__ without importing.
  196. def get_filename(self, fullname):
  197. modname = fullname
  198. if self.is_package(fullname):
  199. fullname += '.__init__'
  200. relpath = self._find_mod_path(fullname)
  201. if isinstance(relpath, bytes):
  202. relpath = _s(relpath)
  203. return relpath or modname
  204. # PEP-302 extension 3 of 3: packaging introspection.
  205. # Used by `linecache` (while printing tracebacks) unless module filename
  206. # exists on the filesystem.
  207. def get_source(self, fullname):
  208. fullname = self._source_name.get(fullname) or fullname
  209. if self.is_package(fullname):
  210. fullname += '.__init__'
  211. relpath = self.get_filename(fullname)
  212. if relpath:
  213. abspath = resfs_resolve(relpath)
  214. if abspath:
  215. return _s(file_bytes(abspath))
  216. data = resfs_read(mod_path(fullname))
  217. return _s(data) if data else ''
  218. def get_code(self, fullname):
  219. modname = fullname
  220. if self.is_package(fullname):
  221. fullname += '.__init__'
  222. path = mod_path(fullname)
  223. relpath = self._find_mod_path(fullname)
  224. if relpath:
  225. abspath = resfs_resolve(relpath)
  226. if abspath:
  227. data = file_bytes(abspath)
  228. return compile(data, _s(abspath), 'exec', dont_inherit=True)
  229. yapyc_path = path + b'.yapyc3'
  230. yapyc_data = resfs_read(yapyc_path, builtin=True)
  231. if yapyc_data:
  232. return marshal.loads(yapyc_data)
  233. else:
  234. py_data = resfs_read(path, builtin=True)
  235. if py_data:
  236. return compile(py_data, _s(relpath), 'exec', dont_inherit=True)
  237. else:
  238. # This covers packages with no __init__.py in resources.
  239. return compile('', modname, 'exec', dont_inherit=True)
  240. def is_package(self, fullname):
  241. if fullname in self.memory:
  242. return False
  243. if fullname + '.__init__' in self.memory:
  244. return True
  245. if self.arcadia_source_finder:
  246. return self.arcadia_source_finder.is_package(fullname)
  247. raise ImportError(fullname)
  248. # Extension for contrib/python/coverage.
  249. def file_source(self, filename):
  250. """
  251. Return the key of the module source by its resource path.
  252. """
  253. if not self.source_map:
  254. for key, mod in iter_py_modules(with_keys=True):
  255. path = self.get_filename(mod)
  256. self.source_map[path] = key
  257. if filename in self.source_map:
  258. return self.source_map[filename]
  259. if resfs_read(filename, builtin=True) is not None:
  260. return b'resfs/file/' + _b(filename)
  261. return b''
  262. # Extension for pkgutil.iter_modules.
  263. def iter_modules(self, prefix=''):
  264. import re
  265. rx = re.compile(re.escape(self._package_prefix) + r'([^.]+)(\.__init__)?$')
  266. for p in self.memory:
  267. m = rx.match(p)
  268. if m:
  269. yield prefix + m.group(1), m.group(2) is not None
  270. if self.arcadia_source_finder:
  271. for m in self.arcadia_source_finder.iter_modules(self._package_prefix, prefix):
  272. yield m
  273. def get_resource_reader(self, fullname):
  274. try:
  275. if not self.is_package(fullname):
  276. return None
  277. except ImportError:
  278. return None
  279. return _ResfsResourceReader(self, fullname)
  280. class _ResfsResourceReader:
  281. def __init__(self, importer, fullname):
  282. self.importer = importer
  283. self.fullname = fullname
  284. import os
  285. self.prefix = "{}/".format(os.path.dirname(self.importer.get_filename(self.fullname)))
  286. def open_resource(self, resource):
  287. path = f'{self.prefix}{resource}'
  288. from io import BytesIO
  289. try:
  290. return BytesIO(self.importer.get_data(path))
  291. except OSError:
  292. raise FileNotFoundError(path)
  293. def resource_path(self, resource):
  294. # All resources are in the binary file, so there is no path to the file.
  295. # Raising FileNotFoundError tells the higher level API to extract the
  296. # binary data and create a temporary file.
  297. raise FileNotFoundError
  298. def is_resource(self, name):
  299. path = f'{self.prefix}{name}'
  300. try:
  301. self.importer.get_data(path)
  302. except OSError:
  303. return False
  304. return True
  305. def contents(self):
  306. subdirs_seen = set()
  307. for key in resfs_files(self.prefix):
  308. relative = key[len(self.prefix):]
  309. res_or_subdir, *other = relative.split(b'/')
  310. if not other:
  311. yield _s(res_or_subdir)
  312. elif res_or_subdir not in subdirs_seen:
  313. subdirs_seen.add(res_or_subdir)
  314. yield _s(res_or_subdir)
  315. class BuiltinSubmoduleImporter(BuiltinImporter):
  316. @classmethod
  317. def find_spec(cls, fullname, path=None, target=None):
  318. if path is not None:
  319. return super().find_spec(fullname, None, target)
  320. else:
  321. return None
  322. class ArcadiaSourceFinder:
  323. """
  324. Search modules and packages in arcadia source tree.
  325. See https://wiki.yandex-team.ru/devtools/extended-python-source-search/ for details
  326. """
  327. NAMESPACE_PREFIX = b'py/namespace/'
  328. PY_EXT = '.py'
  329. YA_MAKE = 'ya.make'
  330. S_IFDIR = 0o040000
  331. def __init__(self, source_root):
  332. self.source_root = source_root
  333. self.module_path_cache = {'': set()}
  334. for key, dirty_path in iter_keys(self.NAMESPACE_PREFIX):
  335. # dirty_path contains unique prefix to prevent repeatable keys in the resource storage
  336. path = dirty_path.split(b'/', 1)[1]
  337. namespaces = __resource.find(key).split(b':')
  338. for n in namespaces:
  339. package_name = _s(n.rstrip(b'.'))
  340. self.module_path_cache.setdefault(package_name, set()).add(_s(path))
  341. # Fill parents with default empty path set if parent doesn't exist in the cache yet
  342. while package_name:
  343. package_name = package_name.rpartition('.')[0]
  344. if package_name in self.module_path_cache:
  345. break
  346. self.module_path_cache.setdefault(package_name, set())
  347. for package_name in self.module_path_cache.keys():
  348. self._add_parent_dirs(package_name, visited=set())
  349. def get_module_path(self, fullname):
  350. """
  351. Find file path for module 'fullname'.
  352. For packages caller pass fullname as 'package.__init__'.
  353. Return None if nothing is found.
  354. """
  355. try:
  356. if not self.is_package(fullname):
  357. return _b(self._cache_module_path(fullname))
  358. except ImportError:
  359. pass
  360. def is_package(self, fullname):
  361. """Check if fullname is a package. Raise ImportError if fullname is not found"""
  362. path = self._cache_module_path(fullname)
  363. if isinstance(path, set):
  364. return True
  365. if isinstance(path, str):
  366. return False
  367. raise ImportError(fullname)
  368. def iter_modules(self, package_prefix, prefix):
  369. paths = self._cache_module_path(package_prefix.rstrip('.'))
  370. if paths is not None:
  371. # Note: it's ok to yield duplicates because pkgutil discards them
  372. # Yield from cache
  373. import re
  374. rx = re.compile(re.escape(package_prefix) + r'([^.]+)$')
  375. # Save result to temporary list to prevent 'RuntimeError: dictionary changed size during iteration'
  376. found = []
  377. for mod, path in self.module_path_cache.items():
  378. if path is not None:
  379. m = rx.match(mod)
  380. if m:
  381. found.append((prefix + m.group(1), self.is_package(mod)))
  382. for cm in found:
  383. yield cm
  384. # Yield from file system
  385. for path in paths:
  386. abs_path = _path_join(self.source_root, path)
  387. for dir_item in _os.listdir(abs_path):
  388. if self._path_is_simple_dir(_path_join(abs_path, dir_item)):
  389. yield prefix + dir_item, True
  390. elif dir_item.endswith(self.PY_EXT) and _path_isfile(_path_join(abs_path, dir_item)):
  391. yield prefix + dir_item[:-len(self.PY_EXT)], False
  392. def _isdir(self, path):
  393. """ Unlike _path_isdir() this function don't follow symlink """
  394. try:
  395. stat_info = _os.lstat(path)
  396. except OSError:
  397. return False
  398. return (stat_info.st_mode & 0o170000) == self.S_IFDIR
  399. def _path_is_simple_dir(self, abs_path):
  400. """
  401. Check if path is a directory but doesn't contain ya.make file.
  402. We don't want to steal directory from nested project and treat it as a package
  403. """
  404. return self._isdir(abs_path) and not _path_isfile(_path_join(abs_path, self.YA_MAKE))
  405. def _find_module_in_paths(self, find_package_only, paths, module):
  406. """Auxiliary method. See _cache_module_path() for details"""
  407. if paths:
  408. package_paths = set()
  409. for path in paths:
  410. rel_path = _path_join(path, module)
  411. if not find_package_only:
  412. # Check if file_path is a module
  413. module_path = rel_path + self.PY_EXT
  414. if _path_isfile(_path_join(self.source_root, module_path)):
  415. return module_path
  416. # Check if file_path is a package
  417. if self._path_is_simple_dir(_path_join(self.source_root, rel_path)):
  418. package_paths.add(rel_path)
  419. if package_paths:
  420. return package_paths
  421. def _cache_module_path(self, fullname, find_package_only=False):
  422. """
  423. Find module path or package directory paths and save result in the cache
  424. find_package_only=True - don't try to find module
  425. Returns:
  426. List of relative package paths - for a package
  427. Relative module path - for a module
  428. None - module or package is not found
  429. """
  430. if fullname not in self.module_path_cache:
  431. parent, _, tail = fullname.rpartition('.')
  432. parent_paths = self._cache_module_path(parent, find_package_only=True)
  433. self.module_path_cache[fullname] = self._find_module_in_paths(find_package_only, parent_paths, tail)
  434. return self.module_path_cache[fullname]
  435. def _add_parent_dirs(self, package_name, visited):
  436. if not package_name or package_name in visited:
  437. return
  438. visited.add(package_name)
  439. parent, _, tail = package_name.rpartition('.')
  440. self._add_parent_dirs(parent, visited)
  441. paths = self.module_path_cache[package_name]
  442. for parent_path in self.module_path_cache[parent]:
  443. rel_path = _path_join(parent_path, tail)
  444. if self._path_is_simple_dir(_path_join(self.source_root, rel_path)):
  445. paths.add(rel_path)
  446. def excepthook(*args, **kws):
  447. # traceback module cannot be imported at module level, because interpreter
  448. # is not fully initialized yet
  449. import traceback
  450. return traceback.print_exception(*args, **kws)
  451. importer = ResourceImporter()
  452. def executable_path_hook(path):
  453. if path == executable:
  454. return importer
  455. if path.startswith(executable + path_sep):
  456. return importer.for_package(path[len(executable + path_sep):].replace(path_sep, '.'))
  457. raise ImportError(path)
  458. if YA_IDE_VENV:
  459. sys.meta_path.append(importer)
  460. sys.meta_path.append(BuiltinSubmoduleImporter)
  461. if executable not in sys.path:
  462. sys.path.append(executable)
  463. sys.path_hooks.append(executable_path_hook)
  464. else:
  465. sys.meta_path.insert(0, BuiltinSubmoduleImporter)
  466. sys.meta_path.insert(0, importer)
  467. if executable not in sys.path:
  468. sys.path.insert(0, executable)
  469. sys.path_hooks.insert(0, executable_path_hook)
  470. sys.path_importer_cache[executable] = importer
  471. # Indicator that modules and resources are built-in rather than on the file system.
  472. sys.is_standalone_binary = True
  473. sys.frozen = True
  474. # Set of names of importable modules.
  475. sys.extra_modules = importer.memory
  476. # Use custom implementation of traceback printer.
  477. # Built-in printer (PyTraceBack_Print) does not support custom module loaders
  478. sys.excepthook = excepthook