importer.pxi 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658
  1. import marshal
  2. import sys
  3. from _codecs import utf_8_decode, utf_8_encode
  4. from _frozen_importlib import _call_with_frames_removed, spec_from_loader, BuiltinImporter
  5. from _frozen_importlib_external import _os, _path_isfile, _path_isabs, path_sep, _path_join, _path_split
  6. from _io import FileIO
  7. import __res as __resource
  8. _b = lambda x: x if isinstance(x, bytes) else utf_8_encode(x)[0]
  9. _s = lambda x: x if isinstance(x, str) else utf_8_decode(x)[0]
  10. env_entry_point = b'Y_PYTHON_ENTRY_POINT'
  11. env_source_root = b'Y_PYTHON_SOURCE_ROOT'
  12. cfg_source_root = b'arcadia-source-root'
  13. env_extended_source_search = b'Y_PYTHON_EXTENDED_SOURCE_SEARCH'
  14. res_ya_ide_venv = b'YA_IDE_VENV'
  15. executable = sys.executable or 'Y_PYTHON'
  16. sys.modules['run_import_hook'] = __resource
  17. # This is the prefix in contrib/tools/python3/src/Lib/ya.make.
  18. py_prefix = b'py/'
  19. py_prefix_len = len(py_prefix)
  20. YA_IDE_VENV = __resource.find(res_ya_ide_venv)
  21. Y_PYTHON_EXTENDED_SOURCE_SEARCH = _os.environ.get(env_extended_source_search) or YA_IDE_VENV
  22. def _init_venv():
  23. if not _path_isabs(executable):
  24. raise RuntimeError(f'path in sys.executable is not absolute: {executable}')
  25. # Creative copy-paste from site.py
  26. exe_dir, _ = _path_split(executable)
  27. site_prefix, _ = _path_split(exe_dir)
  28. libpath = _path_join(site_prefix, 'lib',
  29. 'python%d.%d' % sys.version_info[:2],
  30. 'site-packages')
  31. sys.path.insert(0, libpath)
  32. # emulate site.venv()
  33. sys.prefix = site_prefix
  34. sys.exec_prefix = site_prefix
  35. conf_basename = 'pyvenv.cfg'
  36. candidate_confs = [
  37. conffile for conffile in (
  38. _path_join(exe_dir, conf_basename),
  39. _path_join(site_prefix, conf_basename)
  40. )
  41. if _path_isfile(conffile)
  42. ]
  43. if not candidate_confs:
  44. raise RuntimeError(f'{conf_basename} not found')
  45. virtual_conf = candidate_confs[0]
  46. with FileIO(virtual_conf, 'r') as f:
  47. for line in f:
  48. if b'=' in line:
  49. key, _, value = line.partition(b'=')
  50. key = key.strip().lower()
  51. value = value.strip()
  52. if key == cfg_source_root:
  53. return value
  54. raise RuntimeError(f'{cfg_source_root} key not found in {virtual_conf}')
  55. def _get_source_root():
  56. env_value = _os.environ.get(env_source_root)
  57. if env_value or not YA_IDE_VENV:
  58. return env_value
  59. return _init_venv()
  60. Y_PYTHON_SOURCE_ROOT = _get_source_root()
  61. def _print(*xs):
  62. """
  63. This is helpful for debugging, since automatic bytes to str conversion is
  64. not available yet. It is also possible to debug with GDB by breaking on
  65. __Pyx_AddTraceback (with Python GDB pretty printers enabled).
  66. """
  67. parts = []
  68. for s in xs:
  69. if not isinstance(s, (bytes, str)):
  70. s = str(s)
  71. parts.append(_s(s))
  72. sys.stderr.write(' '.join(parts) + '\n')
  73. def file_bytes(path):
  74. # 'open' is not avaiable yet.
  75. with FileIO(path, 'r') as f:
  76. return f.read()
  77. def iter_keys(prefix):
  78. l = len(prefix)
  79. for idx in range(__resource.count()):
  80. key = __resource.key_by_index(idx)
  81. if key.startswith(prefix):
  82. yield key, key[l:]
  83. def iter_py_modules(with_keys=False):
  84. for key, path in iter_keys(b'resfs/file/' + py_prefix):
  85. if path.endswith(b'.py'): # It may also end with '.pyc'.
  86. mod = _s(path[:-3].replace(b'/', b'.'))
  87. if with_keys:
  88. yield key, mod
  89. else:
  90. yield mod
  91. def py_src_key(filename):
  92. return py_prefix + _b(filename)
  93. def iter_prefixes(s):
  94. i = s.find('.')
  95. while i >= 0:
  96. yield s[:i]
  97. i = s.find('.', i + 1)
  98. def resfs_resolve(path):
  99. """
  100. Return the absolute path of a root-relative path if it exists.
  101. """
  102. path = _b(path)
  103. if Y_PYTHON_SOURCE_ROOT:
  104. if not path.startswith(Y_PYTHON_SOURCE_ROOT):
  105. path = _b(path_sep).join((Y_PYTHON_SOURCE_ROOT, path))
  106. if _path_isfile(path):
  107. return path
  108. def resfs_src(key, resfs_file=False):
  109. """
  110. Return the root-relative file path of a resource key.
  111. """
  112. if resfs_file:
  113. key = b'resfs/file/' + _b(key)
  114. return __resource.find(b'resfs/src/' + _b(key))
  115. def resfs_read(path, builtin=None):
  116. """
  117. Return the bytes of the resource file at path, or None.
  118. If builtin is True, do not look for it on the filesystem.
  119. If builtin is False, do not look in the builtin resources.
  120. """
  121. if builtin is not True:
  122. arcpath = resfs_src(path, resfs_file=True)
  123. if arcpath:
  124. fspath = resfs_resolve(arcpath)
  125. if fspath:
  126. return file_bytes(fspath)
  127. if builtin is not False:
  128. return __resource.find(b'resfs/file/' + _b(path))
  129. def resfs_files(prefix=b''):
  130. """
  131. List builtin resource file paths.
  132. """
  133. return [key[11:] for key, _ in iter_keys(b'resfs/file/' + _b(prefix))]
  134. def mod_path(mod):
  135. """
  136. Return the resfs path to the source code of the module with the given name.
  137. """
  138. return py_prefix + _b(mod).replace(b'.', b'/') + b'.py'
  139. class ResourceImporter:
  140. """ A meta_path importer that loads code from built-in resources.
  141. """
  142. def __init__(self):
  143. self.memory = set(iter_py_modules()) # Set of importable module names.
  144. self.source_map = {} # Map from file names to module names.
  145. self._source_name = {} # Map from original to altered module names.
  146. self._package_prefix = ''
  147. self._before_import_callback = None
  148. self._after_import_callback = None
  149. if Y_PYTHON_SOURCE_ROOT and Y_PYTHON_EXTENDED_SOURCE_SEARCH:
  150. self.arcadia_source_finder = ArcadiaSourceFinder(_s(Y_PYTHON_SOURCE_ROOT))
  151. else:
  152. self.arcadia_source_finder = None
  153. for p in list(self.memory) + list(sys.builtin_module_names):
  154. for pp in iter_prefixes(p):
  155. k = pp + '.__init__'
  156. if k not in self.memory:
  157. self.memory.add(k)
  158. def set_callbacks(self, before_import=None, after_import=None):
  159. """Callable[[module], None]"""
  160. self._before_import_callback= before_import
  161. self._after_import_callback = after_import
  162. def for_package(self, name):
  163. import copy
  164. importer = copy.copy(self)
  165. importer._package_prefix = name + '.'
  166. return importer
  167. def _find_mod_path(self, fullname):
  168. """Find arcadia relative path by module name"""
  169. relpath = resfs_src(mod_path(fullname), resfs_file=True)
  170. if relpath or not self.arcadia_source_finder:
  171. return relpath
  172. return self.arcadia_source_finder.get_module_path(fullname)
  173. def find_spec(self, fullname, path=None, target=None):
  174. # Поддежка переопределения стандартного distutils из пакетом из setuptools
  175. if fullname.startswith("distutils."):
  176. setuptools_path = f"{path_sep}setuptools{path_sep}_distutils"
  177. if path and len(path) > 0 and setuptools_path in path[0]:
  178. import importlib
  179. import importlib.abc
  180. setuptools_name = "setuptools._distutils.{}".format(fullname.removeprefix("distutils."))
  181. is_package = self.is_package(setuptools_name)
  182. if is_package:
  183. source = self.get_source(f"{setuptools_name}.__init__")
  184. relpath = self._find_mod_path(f"{setuptools_name}.__init__")
  185. else:
  186. source = self.get_source(setuptools_name)
  187. relpath = self._find_mod_path(setuptools_name)
  188. class DistutilsLoader(importlib.abc.Loader):
  189. def exec_module(self, module):
  190. code = compile(source, _s(relpath), 'exec', dont_inherit=True)
  191. module.__file__ = code.co_filename
  192. if is_package:
  193. module.__path__= [executable + path_sep + setuptools_name.replace('.', path_sep)]
  194. _call_with_frames_removed(exec, code, module.__dict__)
  195. return spec_from_loader(fullname, DistutilsLoader(), is_package=is_package)
  196. try:
  197. is_package = self.is_package(fullname)
  198. except ImportError:
  199. return None
  200. return spec_from_loader(fullname, self, is_package=is_package)
  201. def find_module(self, fullname, path=None):
  202. """For backward compatibility."""
  203. spec = self.find_spec(fullname, path)
  204. return spec.loader if spec is not None else None
  205. def create_module(self, spec):
  206. """Use default semantics for module creation."""
  207. def exec_module(self, module):
  208. code = self.get_code(module.__name__)
  209. module.__file__ = code.co_filename
  210. if self.is_package(module.__name__):
  211. module.__path__= [executable + path_sep + module.__name__.replace('.', path_sep)]
  212. # exec(code, module.__dict__)
  213. # __name__ and __file__ could be overwritten after execution
  214. # So these two things are needed if wee want to be consistent at some point
  215. initial_modname = module.__name__
  216. initial_filename = module.__file__
  217. if self._before_import_callback:
  218. self._before_import_callback(initial_modname, initial_filename)
  219. # “Zero-cost” exceptions are implemented.
  220. # The cost of try statements is almost eliminated when no exception is raised
  221. try:
  222. _call_with_frames_removed(exec, code, module.__dict__)
  223. finally:
  224. if self._after_import_callback:
  225. self._after_import_callback(initial_modname, initial_filename)
  226. # PEP-302 extension 1 of 3: data loader.
  227. def get_data(self, path):
  228. path = _b(path)
  229. abspath = resfs_resolve(path)
  230. if abspath:
  231. return file_bytes(abspath)
  232. path = path.replace(_b('\\'), _b('/'))
  233. data = resfs_read(path, builtin=True)
  234. if data is None:
  235. raise OSError(path) # Y_PYTHON_ENTRY_POINT=:resource_files
  236. return data
  237. # PEP-302 extension 2 of 3: get __file__ without importing.
  238. def get_filename(self, fullname):
  239. modname = fullname
  240. if self.is_package(fullname):
  241. fullname += '.__init__'
  242. relpath = self._find_mod_path(fullname)
  243. if isinstance(relpath, bytes):
  244. relpath = _s(relpath)
  245. return relpath or modname
  246. # PEP-302 extension 3 of 3: packaging introspection.
  247. # Used by `linecache` (while printing tracebacks) unless module filename
  248. # exists on the filesystem.
  249. def get_source(self, fullname):
  250. fullname = self._source_name.get(fullname) or fullname
  251. if self.is_package(fullname):
  252. fullname += '.__init__'
  253. relpath = self.get_filename(fullname)
  254. if relpath:
  255. abspath = resfs_resolve(relpath)
  256. if abspath:
  257. return _s(file_bytes(abspath))
  258. data = resfs_read(mod_path(fullname))
  259. return _s(data) if data else ''
  260. def get_code(self, fullname):
  261. modname = fullname
  262. if self.is_package(fullname):
  263. fullname += '.__init__'
  264. path = mod_path(fullname)
  265. relpath = self._find_mod_path(fullname)
  266. if relpath:
  267. abspath = resfs_resolve(relpath)
  268. if abspath:
  269. data = file_bytes(abspath)
  270. return compile(data, _s(abspath), 'exec', dont_inherit=True)
  271. yapyc_path = path + b'.yapyc3'
  272. yapyc_data = resfs_read(yapyc_path, builtin=True)
  273. if yapyc_data:
  274. return marshal.loads(yapyc_data)
  275. else:
  276. py_data = resfs_read(path, builtin=True)
  277. if py_data:
  278. return compile(py_data, _s(relpath), 'exec', dont_inherit=True)
  279. else:
  280. # This covers packages with no __init__.py in resources.
  281. return compile('', modname, 'exec', dont_inherit=True)
  282. def is_package(self, fullname):
  283. if fullname in self.memory:
  284. return False
  285. if fullname + '.__init__' in self.memory:
  286. return True
  287. if self.arcadia_source_finder:
  288. return self.arcadia_source_finder.is_package(fullname)
  289. raise ImportError(fullname)
  290. # Extension for contrib/python/coverage.
  291. def file_source(self, filename):
  292. """
  293. Return the key of the module source by its resource path.
  294. """
  295. if not self.source_map:
  296. for key, mod in iter_py_modules(with_keys=True):
  297. path = self.get_filename(mod)
  298. self.source_map[path] = key
  299. if filename in self.source_map:
  300. return self.source_map[filename]
  301. if resfs_read(filename, builtin=True) is not None:
  302. return b'resfs/file/' + _b(filename)
  303. return b''
  304. # Extension for pkgutil.iter_modules.
  305. def iter_modules(self, prefix=''):
  306. import re
  307. rx = re.compile(re.escape(self._package_prefix) + r'([^.]+)(\.__init__)?$')
  308. for p in self.memory:
  309. m = rx.match(p)
  310. if m:
  311. yield prefix + m.group(1), m.group(2) is not None
  312. if self.arcadia_source_finder:
  313. for m in self.arcadia_source_finder.iter_modules(self._package_prefix, prefix):
  314. yield m
  315. def get_resource_reader(self, fullname):
  316. import os
  317. path = os.path.dirname(self.get_filename(fullname))
  318. return _ResfsResourceReader(self, path)
  319. class _ResfsResourceReader:
  320. def __init__(self, importer, path):
  321. self.importer = importer
  322. self.path = path
  323. def open_resource(self, resource):
  324. path = f'{self.path}/{resource}'
  325. from io import BytesIO
  326. try:
  327. return BytesIO(self.importer.get_data(path))
  328. except OSError:
  329. raise FileNotFoundError(path)
  330. def resource_path(self, resource):
  331. # All resources are in the binary file, so there is no path to the file.
  332. # Raising FileNotFoundError tells the higher level API to extract the
  333. # binary data and create a temporary file.
  334. raise FileNotFoundError
  335. def is_resource(self, name):
  336. path = f'{self.path}/{name}'
  337. try:
  338. self.importer.get_data(path)
  339. except OSError:
  340. return False
  341. return True
  342. def contents(self):
  343. subdirs_seen = set()
  344. len_path = len(self.path) + 1 # path + /
  345. for key in resfs_files(f"{self.path}/"):
  346. relative = key[len_path:]
  347. res_or_subdir, *other = relative.split(b'/')
  348. if not other:
  349. yield _s(res_or_subdir)
  350. elif res_or_subdir not in subdirs_seen:
  351. subdirs_seen.add(res_or_subdir)
  352. yield _s(res_or_subdir)
  353. def files(self):
  354. import sitecustomize
  355. return sitecustomize.ArcadiaResourceContainer(f"resfs/file/{self.path}/")
  356. class BuiltinSubmoduleImporter(BuiltinImporter):
  357. @classmethod
  358. def find_spec(cls, fullname, path=None, target=None):
  359. if path is not None:
  360. return super().find_spec(fullname, None, target)
  361. else:
  362. return None
  363. class ArcadiaSourceFinder:
  364. """
  365. Search modules and packages in arcadia source tree.
  366. See https://wiki.yandex-team.ru/devtools/extended-python-source-search/ for details
  367. """
  368. NAMESPACE_PREFIX = b'py/namespace/'
  369. PY_EXT = '.py'
  370. YA_MAKE = 'ya.make'
  371. S_IFDIR = 0o040000
  372. def __init__(self, source_root):
  373. self.source_root = source_root
  374. self.module_path_cache = {'': set()}
  375. for key, dirty_path in iter_keys(self.NAMESPACE_PREFIX):
  376. # dirty_path contains unique prefix to prevent repeatable keys in the resource storage
  377. path = dirty_path.split(b'/', 1)[1]
  378. namespaces = __resource.find(key).split(b':')
  379. for n in namespaces:
  380. package_name = _s(n.rstrip(b'.'))
  381. self.module_path_cache.setdefault(package_name, set()).add(_s(path))
  382. # Fill parents with default empty path set if parent doesn't exist in the cache yet
  383. while package_name:
  384. package_name = package_name.rpartition('.')[0]
  385. if package_name in self.module_path_cache:
  386. break
  387. self.module_path_cache.setdefault(package_name, set())
  388. for package_name in self.module_path_cache.keys():
  389. self._add_parent_dirs(package_name, visited=set())
  390. def get_module_path(self, fullname):
  391. """
  392. Find file path for module 'fullname'.
  393. For packages caller pass fullname as 'package.__init__'.
  394. Return None if nothing is found.
  395. """
  396. try:
  397. if not self.is_package(fullname):
  398. return _b(self._cache_module_path(fullname))
  399. except ImportError:
  400. pass
  401. def is_package(self, fullname):
  402. """Check if fullname is a package. Raise ImportError if fullname is not found"""
  403. path = self._cache_module_path(fullname)
  404. if isinstance(path, set):
  405. return True
  406. if isinstance(path, str):
  407. return False
  408. raise ImportError(fullname)
  409. def iter_modules(self, package_prefix, prefix):
  410. paths = self._cache_module_path(package_prefix.rstrip('.'))
  411. if paths is not None:
  412. # Note: it's ok to yield duplicates because pkgutil discards them
  413. # Yield from cache
  414. import re
  415. rx = re.compile(re.escape(package_prefix) + r'([^.]+)$')
  416. # Save result to temporary list to prevent 'RuntimeError: dictionary changed size during iteration'
  417. found = []
  418. for mod, path in self.module_path_cache.items():
  419. if path is not None:
  420. m = rx.match(mod)
  421. if m:
  422. found.append((prefix + m.group(1), self.is_package(mod)))
  423. yield from found
  424. # Yield from file system
  425. for path in paths:
  426. abs_path = _path_join(self.source_root, path)
  427. for dir_item in _os.listdir(abs_path):
  428. if self._path_is_simple_dir(_path_join(abs_path, dir_item)):
  429. yield prefix + dir_item, True
  430. elif dir_item.endswith(self.PY_EXT) and _path_isfile(_path_join(abs_path, dir_item)):
  431. yield prefix + dir_item[:-len(self.PY_EXT)], False
  432. def _isdir(self, path):
  433. """ Unlike _path_isdir() this function don't follow symlink """
  434. try:
  435. stat_info = _os.lstat(path)
  436. except OSError:
  437. return False
  438. return (stat_info.st_mode & 0o170000) == self.S_IFDIR
  439. def _path_is_simple_dir(self, abs_path):
  440. """
  441. Check if path is a directory but doesn't contain ya.make file.
  442. We don't want to steal directory from nested project and treat it as a package
  443. """
  444. return self._isdir(abs_path) and not _path_isfile(_path_join(abs_path, self.YA_MAKE))
  445. def _find_module_in_paths(self, find_package_only, paths, module):
  446. """Auxiliary method. See _cache_module_path() for details"""
  447. if paths:
  448. package_paths = set()
  449. for path in paths:
  450. rel_path = _path_join(path, module)
  451. if not find_package_only:
  452. # Check if file_path is a module
  453. module_path = rel_path + self.PY_EXT
  454. if _path_isfile(_path_join(self.source_root, module_path)):
  455. return module_path
  456. # Check if file_path is a package
  457. if self._path_is_simple_dir(_path_join(self.source_root, rel_path)):
  458. package_paths.add(rel_path)
  459. if package_paths:
  460. return package_paths
  461. def _cache_module_path(self, fullname, find_package_only=False):
  462. """
  463. Find module path or package directory paths and save result in the cache
  464. find_package_only=True - don't try to find module
  465. Returns:
  466. List of relative package paths - for a package
  467. Relative module path - for a module
  468. None - module or package is not found
  469. """
  470. if fullname not in self.module_path_cache:
  471. parent, _, tail = fullname.rpartition('.')
  472. parent_paths = self._cache_module_path(parent, find_package_only=True)
  473. self.module_path_cache[fullname] = self._find_module_in_paths(find_package_only, parent_paths, tail)
  474. return self.module_path_cache[fullname]
  475. def _add_parent_dirs(self, package_name, visited):
  476. if not package_name or package_name in visited:
  477. return
  478. visited.add(package_name)
  479. parent, _, tail = package_name.rpartition('.')
  480. self._add_parent_dirs(parent, visited)
  481. paths = self.module_path_cache[package_name]
  482. for parent_path in self.module_path_cache[parent]:
  483. rel_path = _path_join(parent_path, tail)
  484. if self._path_is_simple_dir(_path_join(self.source_root, rel_path)):
  485. paths.add(rel_path)
  486. def excepthook(*args, **kws):
  487. # traceback module cannot be imported at module level, because interpreter
  488. # is not fully initialized yet
  489. import traceback
  490. return traceback.print_exception(*args, **kws)
  491. importer = ResourceImporter()
  492. def executable_path_hook(path):
  493. if path == executable:
  494. return importer
  495. if path.startswith(executable + path_sep):
  496. return importer.for_package(path[len(executable + path_sep):].replace(path_sep, '.'))
  497. raise ImportError(path)
  498. def get_path0():
  499. """
  500. An incomplete and simplified version of _PyPathConfig_ComputeSysPath0.
  501. We need this to somewhat properly emulate the behaviour of a normal python interpreter
  502. when using ya ide venv.
  503. """
  504. if not sys.argv:
  505. return
  506. argv0 = sys.argv[0]
  507. have_module_arg = argv0 == '-m'
  508. if have_module_arg:
  509. return _os.getcwd()
  510. if YA_IDE_VENV:
  511. sys.meta_path.append(importer)
  512. sys.meta_path.append(BuiltinSubmoduleImporter)
  513. if executable not in sys.path:
  514. sys.path.append(executable)
  515. path0 = get_path0()
  516. if path0 is not None:
  517. sys.path.insert(0, path0)
  518. sys.path_hooks.append(executable_path_hook)
  519. else:
  520. sys.meta_path.insert(0, BuiltinSubmoduleImporter)
  521. sys.meta_path.insert(0, importer)
  522. if executable not in sys.path:
  523. sys.path.insert(0, executable)
  524. sys.path_hooks.insert(0, executable_path_hook)
  525. sys.path_importer_cache[executable] = importer
  526. # Indicator that modules and resources are built-in rather than on the file system.
  527. sys.is_standalone_binary = True
  528. sys.frozen = True
  529. # Set of names of importable modules.
  530. sys.extra_modules = importer.memory
  531. # Use custom implementation of traceback printer.
  532. # Built-in printer (PyTraceBack_Print) does not support custom module loaders
  533. sys.excepthook = excepthook