importer.pxi 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609
  1. import marshal
  2. import sys
  3. from _codecs import utf_8_decode, utf_8_encode
  4. from _frozen_importlib import _call_with_frames_removed, spec_from_loader, BuiltinImporter
  5. from _frozen_importlib_external import _os, _path_isfile, _path_isabs, path_sep, _path_join, _path_split
  6. from _io import FileIO
  7. import __res as __resource
  8. _b = lambda x: x if isinstance(x, bytes) else utf_8_encode(x)[0]
  9. _s = lambda x: x if isinstance(x, str) else utf_8_decode(x)[0]
  10. env_entry_point = b'Y_PYTHON_ENTRY_POINT'
  11. env_source_root = b'Y_PYTHON_SOURCE_ROOT'
  12. cfg_source_root = b'arcadia-source-root'
  13. env_extended_source_search = b'Y_PYTHON_EXTENDED_SOURCE_SEARCH'
  14. res_ya_ide_venv = b'YA_IDE_VENV'
  15. executable = sys.executable or 'Y_PYTHON'
  16. sys.modules['run_import_hook'] = __resource
  17. # This is the prefix in contrib/tools/python3/src/Lib/ya.make.
  18. py_prefix = b'py/'
  19. py_prefix_len = len(py_prefix)
  20. YA_IDE_VENV = __resource.find(res_ya_ide_venv)
  21. Y_PYTHON_EXTENDED_SOURCE_SEARCH = _os.environ.get(env_extended_source_search) or YA_IDE_VENV
  22. def _init_venv():
  23. if not _path_isabs(executable):
  24. raise RuntimeError('path in sys.executable is not absolute: {}'.format(executable))
  25. # Creative copy-paste from site.py
  26. exe_dir, _ = _path_split(executable)
  27. site_prefix, _ = _path_split(exe_dir)
  28. libpath = _path_join(site_prefix, 'lib',
  29. 'python%d.%d' % sys.version_info[:2],
  30. 'site-packages')
  31. sys.path.insert(0, libpath)
  32. # emulate site.venv()
  33. sys.prefix = site_prefix
  34. sys.exec_prefix = site_prefix
  35. conf_basename = 'pyvenv.cfg'
  36. candidate_confs = [
  37. conffile for conffile in (
  38. _path_join(exe_dir, conf_basename),
  39. _path_join(site_prefix, conf_basename)
  40. )
  41. if _path_isfile(conffile)
  42. ]
  43. if not candidate_confs:
  44. raise RuntimeError('{} not found'.format(conf_basename))
  45. virtual_conf = candidate_confs[0]
  46. with FileIO(virtual_conf, 'r') as f:
  47. for line in f:
  48. if b'=' in line:
  49. key, _, value = line.partition(b'=')
  50. key = key.strip().lower()
  51. value = value.strip()
  52. if key == cfg_source_root:
  53. return value
  54. raise RuntimeError('{} key not found in {}'.format(cfg_source_root, virtual_conf))
  55. def _get_source_root():
  56. env_value = _os.environ.get(env_source_root)
  57. if env_value or not YA_IDE_VENV:
  58. return env_value
  59. return _init_venv()
  60. Y_PYTHON_SOURCE_ROOT = _get_source_root()
  61. def _print(*xs):
  62. """
  63. This is helpful for debugging, since automatic bytes to str conversion is
  64. not available yet. It is also possible to debug with GDB by breaking on
  65. __Pyx_AddTraceback (with Python GDB pretty printers enabled).
  66. """
  67. parts = []
  68. for s in xs:
  69. if not isinstance(s, (bytes, str)):
  70. s = str(s)
  71. parts.append(_s(s))
  72. sys.stderr.write(' '.join(parts) + '\n')
  73. def file_bytes(path):
  74. # 'open' is not avaiable yet.
  75. with FileIO(path, 'r') as f:
  76. return f.read()
  77. def iter_keys(prefix):
  78. l = len(prefix)
  79. for idx in range(__resource.count()):
  80. key = __resource.key_by_index(idx)
  81. if key.startswith(prefix):
  82. yield key, key[l:]
  83. def iter_py_modules(with_keys=False):
  84. for key, path in iter_keys(b'resfs/file/' + py_prefix):
  85. if path.endswith(b'.py'): # It may also end with '.pyc'.
  86. mod = _s(path[:-3].replace(b'/', b'.'))
  87. if with_keys:
  88. yield key, mod
  89. else:
  90. yield mod
  91. def py_src_key(filename):
  92. return py_prefix + _b(filename)
  93. def iter_prefixes(s):
  94. i = s.find('.')
  95. while i >= 0:
  96. yield s[:i]
  97. i = s.find('.', i + 1)
  98. def resfs_resolve(path):
  99. """
  100. Return the absolute path of a root-relative path if it exists.
  101. """
  102. path = _b(path)
  103. if Y_PYTHON_SOURCE_ROOT:
  104. if not path.startswith(Y_PYTHON_SOURCE_ROOT):
  105. path = _b(path_sep).join((Y_PYTHON_SOURCE_ROOT, path))
  106. if _path_isfile(path):
  107. return path
  108. def resfs_src(key, resfs_file=False):
  109. """
  110. Return the root-relative file path of a resource key.
  111. """
  112. if resfs_file:
  113. key = b'resfs/file/' + _b(key)
  114. return __resource.find(b'resfs/src/' + _b(key))
  115. def resfs_read(path, builtin=None):
  116. """
  117. Return the bytes of the resource file at path, or None.
  118. If builtin is True, do not look for it on the filesystem.
  119. If builtin is False, do not look in the builtin resources.
  120. """
  121. if builtin is not True:
  122. arcpath = resfs_src(path, resfs_file=True)
  123. if arcpath:
  124. fspath = resfs_resolve(arcpath)
  125. if fspath:
  126. return file_bytes(fspath)
  127. if builtin is not False:
  128. return __resource.find(b'resfs/file/' + _b(path))
  129. def resfs_files(prefix=b''):
  130. """
  131. List builtin resource file paths.
  132. """
  133. return [key[11:] for key, _ in iter_keys(b'resfs/file/' + _b(prefix))]
  134. def mod_path(mod):
  135. """
  136. Return the resfs path to the source code of the module with the given name.
  137. """
  138. return py_prefix + _b(mod).replace(b'.', b'/') + b'.py'
  139. class ResourceImporter(object):
  140. """ A meta_path importer that loads code from built-in resources.
  141. """
  142. def __init__(self):
  143. self.memory = set(iter_py_modules()) # Set of importable module names.
  144. self.source_map = {} # Map from file names to module names.
  145. self._source_name = {} # Map from original to altered module names.
  146. self._package_prefix = ''
  147. if Y_PYTHON_SOURCE_ROOT and Y_PYTHON_EXTENDED_SOURCE_SEARCH:
  148. self.arcadia_source_finder = ArcadiaSourceFinder(_s(Y_PYTHON_SOURCE_ROOT))
  149. else:
  150. self.arcadia_source_finder = None
  151. for p in list(self.memory) + list(sys.builtin_module_names):
  152. for pp in iter_prefixes(p):
  153. k = pp + '.__init__'
  154. if k not in self.memory:
  155. self.memory.add(k)
  156. def for_package(self, name):
  157. import copy
  158. importer = copy.copy(self)
  159. importer._package_prefix = name + '.'
  160. return importer
  161. def _find_mod_path(self, fullname):
  162. """Find arcadia relative path by module name"""
  163. relpath = resfs_src(mod_path(fullname), resfs_file=True)
  164. if relpath or not self.arcadia_source_finder:
  165. return relpath
  166. return self.arcadia_source_finder.get_module_path(fullname)
  167. def find_spec(self, fullname, path=None, target=None):
  168. try:
  169. is_package = self.is_package(fullname)
  170. except ImportError:
  171. return None
  172. return spec_from_loader(fullname, self, is_package=is_package)
  173. def find_module(self, fullname, path=None):
  174. """For backward compatibility."""
  175. spec = self.find_spec(fullname, path)
  176. return spec.loader if spec is not None else None
  177. def create_module(self, spec):
  178. """Use default semantics for module creation."""
  179. def exec_module(self, module):
  180. code = self.get_code(module.__name__)
  181. module.__file__ = code.co_filename
  182. if self.is_package(module.__name__):
  183. module.__path__= [executable + path_sep + module.__name__.replace('.', path_sep)]
  184. # exec(code, module.__dict__)
  185. _call_with_frames_removed(exec, code, module.__dict__)
  186. # PEP-302 extension 1 of 3: data loader.
  187. def get_data(self, path):
  188. path = _b(path)
  189. abspath = resfs_resolve(path)
  190. if abspath:
  191. return file_bytes(abspath)
  192. path = path.replace(_b('\\'), _b('/'))
  193. data = resfs_read(path, builtin=True)
  194. if data is None:
  195. raise IOError(path) # Y_PYTHON_ENTRY_POINT=:resource_files
  196. return data
  197. # PEP-302 extension 2 of 3: get __file__ without importing.
  198. def get_filename(self, fullname):
  199. modname = fullname
  200. if self.is_package(fullname):
  201. fullname += '.__init__'
  202. relpath = self._find_mod_path(fullname)
  203. if isinstance(relpath, bytes):
  204. relpath = _s(relpath)
  205. return relpath or modname
  206. # PEP-302 extension 3 of 3: packaging introspection.
  207. # Used by `linecache` (while printing tracebacks) unless module filename
  208. # exists on the filesystem.
  209. def get_source(self, fullname):
  210. fullname = self._source_name.get(fullname) or fullname
  211. if self.is_package(fullname):
  212. fullname += '.__init__'
  213. relpath = self.get_filename(fullname)
  214. if relpath:
  215. abspath = resfs_resolve(relpath)
  216. if abspath:
  217. return _s(file_bytes(abspath))
  218. data = resfs_read(mod_path(fullname))
  219. return _s(data) if data else ''
  220. def get_code(self, fullname):
  221. modname = fullname
  222. if self.is_package(fullname):
  223. fullname += '.__init__'
  224. path = mod_path(fullname)
  225. relpath = self._find_mod_path(fullname)
  226. if relpath:
  227. abspath = resfs_resolve(relpath)
  228. if abspath:
  229. data = file_bytes(abspath)
  230. return compile(data, _s(abspath), 'exec', dont_inherit=True)
  231. yapyc_path = path + b'.yapyc3'
  232. yapyc_data = resfs_read(yapyc_path, builtin=True)
  233. if yapyc_data:
  234. return marshal.loads(yapyc_data)
  235. else:
  236. py_data = resfs_read(path, builtin=True)
  237. if py_data:
  238. return compile(py_data, _s(relpath), 'exec', dont_inherit=True)
  239. else:
  240. # This covers packages with no __init__.py in resources.
  241. return compile('', modname, 'exec', dont_inherit=True)
  242. def is_package(self, fullname):
  243. if fullname in self.memory:
  244. return False
  245. if fullname + '.__init__' in self.memory:
  246. return True
  247. if self.arcadia_source_finder:
  248. return self.arcadia_source_finder.is_package(fullname)
  249. raise ImportError(fullname)
  250. # Extension for contrib/python/coverage.
  251. def file_source(self, filename):
  252. """
  253. Return the key of the module source by its resource path.
  254. """
  255. if not self.source_map:
  256. for key, mod in iter_py_modules(with_keys=True):
  257. path = self.get_filename(mod)
  258. self.source_map[path] = key
  259. if filename in self.source_map:
  260. return self.source_map[filename]
  261. if resfs_read(filename, builtin=True) is not None:
  262. return b'resfs/file/' + _b(filename)
  263. return b''
  264. # Extension for pkgutil.iter_modules.
  265. def iter_modules(self, prefix=''):
  266. import re
  267. rx = re.compile(re.escape(self._package_prefix) + r'([^.]+)(\.__init__)?$')
  268. for p in self.memory:
  269. m = rx.match(p)
  270. if m:
  271. yield prefix + m.group(1), m.group(2) is not None
  272. if self.arcadia_source_finder:
  273. for m in self.arcadia_source_finder.iter_modules(self._package_prefix, prefix):
  274. yield m
  275. def get_resource_reader(self, fullname):
  276. try:
  277. if not self.is_package(fullname):
  278. return None
  279. except ImportError:
  280. return None
  281. return _ResfsResourceReader(self, fullname)
  282. class _ResfsResourceReader:
  283. def __init__(self, importer, fullname):
  284. self.importer = importer
  285. self.fullname = fullname
  286. import os
  287. self.prefix = "{}/".format(os.path.dirname(self.importer.get_filename(self.fullname)))
  288. def open_resource(self, resource):
  289. path = f'{self.prefix}{resource}'
  290. from io import BytesIO
  291. try:
  292. return BytesIO(self.importer.get_data(path))
  293. except OSError:
  294. raise FileNotFoundError(path)
  295. def resource_path(self, resource):
  296. # All resources are in the binary file, so there is no path to the file.
  297. # Raising FileNotFoundError tells the higher level API to extract the
  298. # binary data and create a temporary file.
  299. raise FileNotFoundError
  300. def is_resource(self, name):
  301. path = f'{self.prefix}{name}'
  302. try:
  303. self.importer.get_data(path)
  304. except OSError:
  305. return False
  306. return True
  307. def contents(self):
  308. subdirs_seen = set()
  309. for key in resfs_files(self.prefix):
  310. relative = key[len(self.prefix):]
  311. res_or_subdir, *other = relative.split(b'/')
  312. if not other:
  313. yield _s(res_or_subdir)
  314. elif res_or_subdir not in subdirs_seen:
  315. subdirs_seen.add(res_or_subdir)
  316. yield _s(res_or_subdir)
  317. class BuiltinSubmoduleImporter(BuiltinImporter):
  318. @classmethod
  319. def find_spec(cls, fullname, path=None, target=None):
  320. if path is not None:
  321. return super().find_spec(fullname, None, target)
  322. else:
  323. return None
  324. class ArcadiaSourceFinder:
  325. """
  326. Search modules and packages in arcadia source tree.
  327. See https://wiki.yandex-team.ru/devtools/extended-python-source-search/ for details
  328. """
  329. NAMESPACE_PREFIX = b'py/namespace/'
  330. PY_EXT = '.py'
  331. YA_MAKE = 'ya.make'
  332. S_IFDIR = 0o040000
  333. def __init__(self, source_root):
  334. self.source_root = source_root
  335. self.module_path_cache = {'': set()}
  336. for key, dirty_path in iter_keys(self.NAMESPACE_PREFIX):
  337. # dirty_path contains unique prefix to prevent repeatable keys in the resource storage
  338. path = dirty_path.split(b'/', 1)[1]
  339. namespaces = __resource.find(key).split(b':')
  340. for n in namespaces:
  341. package_name = _s(n.rstrip(b'.'))
  342. self.module_path_cache.setdefault(package_name, set()).add(_s(path))
  343. # Fill parents with default empty path set if parent doesn't exist in the cache yet
  344. while package_name:
  345. package_name = package_name.rpartition('.')[0]
  346. if package_name in self.module_path_cache:
  347. break
  348. self.module_path_cache.setdefault(package_name, set())
  349. for package_name in self.module_path_cache.keys():
  350. self._add_parent_dirs(package_name, visited=set())
  351. def get_module_path(self, fullname):
  352. """
  353. Find file path for module 'fullname'.
  354. For packages caller pass fullname as 'package.__init__'.
  355. Return None if nothing is found.
  356. """
  357. try:
  358. if not self.is_package(fullname):
  359. return _b(self._cache_module_path(fullname))
  360. except ImportError:
  361. pass
  362. def is_package(self, fullname):
  363. """Check if fullname is a package. Raise ImportError if fullname is not found"""
  364. path = self._cache_module_path(fullname)
  365. if isinstance(path, set):
  366. return True
  367. if isinstance(path, str):
  368. return False
  369. raise ImportError(fullname)
  370. def iter_modules(self, package_prefix, prefix):
  371. paths = self._cache_module_path(package_prefix.rstrip('.'))
  372. if paths is not None:
  373. # Note: it's ok to yield duplicates because pkgutil discards them
  374. # Yield from cache
  375. import re
  376. rx = re.compile(re.escape(package_prefix) + r'([^.]+)$')
  377. # Save result to temporary list to prevent 'RuntimeError: dictionary changed size during iteration'
  378. found = []
  379. for mod, path in self.module_path_cache.items():
  380. if path is not None:
  381. m = rx.match(mod)
  382. if m:
  383. found.append((prefix + m.group(1), self.is_package(mod)))
  384. for cm in found:
  385. yield cm
  386. # Yield from file system
  387. for path in paths:
  388. abs_path = _path_join(self.source_root, path)
  389. for dir_item in _os.listdir(abs_path):
  390. if self._path_is_simple_dir(_path_join(abs_path, dir_item)):
  391. yield prefix + dir_item, True
  392. elif dir_item.endswith(self.PY_EXT) and _path_isfile(_path_join(abs_path, dir_item)):
  393. yield prefix + dir_item[:-len(self.PY_EXT)], False
  394. def _isdir(self, path):
  395. """ Unlike _path_isdir() this function don't follow symlink """
  396. try:
  397. stat_info = _os.lstat(path)
  398. except OSError:
  399. return False
  400. return (stat_info.st_mode & 0o170000) == self.S_IFDIR
  401. def _path_is_simple_dir(self, abs_path):
  402. """
  403. Check if path is a directory but doesn't contain ya.make file.
  404. We don't want to steal directory from nested project and treat it as a package
  405. """
  406. return self._isdir(abs_path) and not _path_isfile(_path_join(abs_path, self.YA_MAKE))
  407. def _find_module_in_paths(self, find_package_only, paths, module):
  408. """Auxiliary method. See _cache_module_path() for details"""
  409. if paths:
  410. package_paths = set()
  411. for path in paths:
  412. rel_path = _path_join(path, module)
  413. if not find_package_only:
  414. # Check if file_path is a module
  415. module_path = rel_path + self.PY_EXT
  416. if _path_isfile(_path_join(self.source_root, module_path)):
  417. return module_path
  418. # Check if file_path is a package
  419. if self._path_is_simple_dir(_path_join(self.source_root, rel_path)):
  420. package_paths.add(rel_path)
  421. if package_paths:
  422. return package_paths
  423. def _cache_module_path(self, fullname, find_package_only=False):
  424. """
  425. Find module path or package directory paths and save result in the cache
  426. find_package_only=True - don't try to find module
  427. Returns:
  428. List of relative package paths - for a package
  429. Relative module path - for a module
  430. None - module or package is not found
  431. """
  432. if fullname not in self.module_path_cache:
  433. parent, _, tail = fullname.rpartition('.')
  434. parent_paths = self._cache_module_path(parent, find_package_only=True)
  435. self.module_path_cache[fullname] = self._find_module_in_paths(find_package_only, parent_paths, tail)
  436. return self.module_path_cache[fullname]
  437. def _add_parent_dirs(self, package_name, visited):
  438. if not package_name or package_name in visited:
  439. return
  440. visited.add(package_name)
  441. parent, _, tail = package_name.rpartition('.')
  442. self._add_parent_dirs(parent, visited)
  443. paths = self.module_path_cache[package_name]
  444. for parent_path in self.module_path_cache[parent]:
  445. rel_path = _path_join(parent_path, tail)
  446. if self._path_is_simple_dir(_path_join(self.source_root, rel_path)):
  447. paths.add(rel_path)
  448. def excepthook(*args, **kws):
  449. # traceback module cannot be imported at module level, because interpreter
  450. # is not fully initialized yet
  451. import traceback
  452. return traceback.print_exception(*args, **kws)
  453. importer = ResourceImporter()
  454. def executable_path_hook(path):
  455. if path == executable:
  456. return importer
  457. if path.startswith(executable + path_sep):
  458. return importer.for_package(path[len(executable + path_sep):].replace(path_sep, '.'))
  459. raise ImportError(path)
  460. def get_path0():
  461. """
  462. An incomplete and simplified version of _PyPathConfig_ComputeSysPath0.
  463. We need this to somewhat properly emulate the behaviour of a normal python interpreter
  464. when using ya ide venv.
  465. """
  466. if not sys.argv:
  467. return
  468. argv0 = sys.argv[0]
  469. have_module_arg = argv0 == '-m'
  470. if have_module_arg:
  471. return _os.getcwd()
  472. if YA_IDE_VENV:
  473. sys.meta_path.append(importer)
  474. sys.meta_path.append(BuiltinSubmoduleImporter)
  475. if executable not in sys.path:
  476. sys.path.append(executable)
  477. path0 = get_path0()
  478. if path0 is not None:
  479. sys.path.insert(0, path0)
  480. sys.path_hooks.append(executable_path_hook)
  481. else:
  482. sys.meta_path.insert(0, BuiltinSubmoduleImporter)
  483. sys.meta_path.insert(0, importer)
  484. if executable not in sys.path:
  485. sys.path.insert(0, executable)
  486. sys.path_hooks.insert(0, executable_path_hook)
  487. sys.path_importer_cache[executable] = importer
  488. # Indicator that modules and resources are built-in rather than on the file system.
  489. sys.is_standalone_binary = True
  490. sys.frozen = True
  491. # Set of names of importable modules.
  492. sys.extra_modules = importer.memory
  493. # Use custom implementation of traceback printer.
  494. # Built-in printer (PyTraceBack_Print) does not support custom module loaders
  495. sys.excepthook = excepthook