loaders.py 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605
  1. # Copyright 2012-2015 Amazon.com, Inc. or its affiliates. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License"). You
  4. # may not use this file except in compliance with the License. A copy of
  5. # the License is located at
  6. #
  7. # http://aws.amazon.com/apache2.0/
  8. #
  9. # or in the "license" file accompanying this file. This file is
  10. # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
  11. # ANY KIND, either express or implied. See the License for the specific
  12. # language governing permissions and limitations under the License.
  13. """Module for loading various model files.
  14. This module provides the classes that are used to load models used
  15. by botocore. This can include:
  16. * Service models (e.g. the model for EC2, S3, DynamoDB, etc.)
  17. * Service model extras which customize the service models
  18. * Other models associated with a service (pagination, waiters)
  19. * Non service-specific config (Endpoint data, retry config)
  20. Loading a module is broken down into several steps:
  21. * Determining the path to load
  22. * Search the data_path for files to load
  23. * The mechanics of loading the file
  24. * Searching for extras and applying them to the loaded file
  25. The last item is used so that other faster loading mechanism
  26. besides the default JSON loader can be used.
  27. The Search Path
  28. ===============
  29. Similar to how the PATH environment variable is to finding executables
  30. and the PYTHONPATH environment variable is to finding python modules
  31. to import, the botocore loaders have the concept of a data path exposed
  32. through AWS_DATA_PATH.
  33. This enables end users to provide additional search paths where we
  34. will attempt to load models outside of the models we ship with
  35. botocore. When you create a ``Loader``, there are two paths
  36. automatically added to the model search path:
  37. * <botocore root>/data/
  38. * ~/.aws/models
  39. The first value is the path where all the model files shipped with
  40. botocore are located.
  41. The second path is so that users can just drop new model files in
  42. ``~/.aws/models`` without having to mess around with the AWS_DATA_PATH.
  43. The AWS_DATA_PATH using the platform specific path separator to
  44. separate entries (typically ``:`` on linux and ``;`` on windows).
  45. Directory Layout
  46. ================
  47. The Loader expects a particular directory layout. In order for any
  48. directory specified in AWS_DATA_PATH to be considered, it must have
  49. this structure for service models::
  50. <root>
  51. |
  52. |-- servicename1
  53. | |-- 2012-10-25
  54. | |-- service-2.json
  55. |-- ec2
  56. | |-- 2014-01-01
  57. | | |-- paginators-1.json
  58. | | |-- service-2.json
  59. | | |-- waiters-2.json
  60. | |-- 2015-03-01
  61. | |-- paginators-1.json
  62. | |-- service-2.json
  63. | |-- waiters-2.json
  64. | |-- service-2.sdk-extras.json
  65. That is:
  66. * The root directory contains sub directories that are the name
  67. of the services.
  68. * Within each service directory, there's a sub directory for each
  69. available API version.
  70. * Within each API version, there are model specific files, including
  71. (but not limited to): service-2.json, waiters-2.json, paginators-1.json
  72. The ``-1`` and ``-2`` suffix at the end of the model files denote which version
  73. schema is used within the model. Even though this information is available in
  74. the ``version`` key within the model, this version is also part of the filename
  75. so that code does not need to load the JSON model in order to determine which
  76. version to use.
  77. The ``sdk-extras`` and similar files represent extra data that needs to be
  78. applied to the model after it is loaded. Data in these files might represent
  79. information that doesn't quite fit in the original models, but is still needed
  80. for the sdk. For instance, additional operation parameters might be added here
  81. which don't represent the actual service api.
  82. """
  83. import collections
  84. import logging
  85. import os
  86. from botocore import BOTOCORE_ROOT
  87. from botocore.compat import HAS_GZIP, OrderedDict, json, six
  88. from botocore.exceptions import DataNotFoundError, UnknownServiceError
  89. from botocore.utils import deep_merge
  90. from library.python import resource
  91. _JSON_OPEN_METHODS = {
  92. '.json': open,
  93. }
  94. if HAS_GZIP:
  95. from gzip import open as gzip_open
  96. _JSON_OPEN_METHODS['.json.gz'] = gzip_open
  97. logger = logging.getLogger(__name__)
  98. def instance_cache(func):
  99. """Cache the result of a method on a per instance basis.
  100. This is not a general purpose caching decorator. In order
  101. for this to be used, it must be used on methods on an
  102. instance, and that instance *must* provide a
  103. ``self._cache`` dictionary.
  104. """
  105. def _wrapper(self, *args, **kwargs):
  106. key = (func.__name__,) + args
  107. for pair in sorted(kwargs.items()):
  108. key += pair
  109. if key in self._cache:
  110. return self._cache[key]
  111. data = func(self, *args, **kwargs)
  112. self._cache[key] = data
  113. return data
  114. return _wrapper
  115. class JSONFileLoader:
  116. """Loader JSON files.
  117. This class can load the default format of models, which is a JSON file.
  118. """
  119. def exists(self, file_path):
  120. """Checks if the file exists.
  121. :type file_path: str
  122. :param file_path: The full path to the file to load without
  123. the '.json' extension.
  124. :return: True if file path exists, False otherwise.
  125. """
  126. for ext in _JSON_OPEN_METHODS:
  127. if os.path.isfile(file_path + ext):
  128. return True
  129. return False
  130. def _load_file(self, full_path, open_method):
  131. if not os.path.isfile(full_path):
  132. return
  133. # By default the file will be opened with locale encoding on Python 3.
  134. # We specify "utf8" here to ensure the correct behavior.
  135. with open_method(full_path, 'rb') as fp:
  136. payload = fp.read().decode('utf-8')
  137. logger.debug("Loading JSON file: %s", full_path)
  138. return json.loads(payload, object_pairs_hook=OrderedDict)
  139. def load_file(self, file_path):
  140. """Attempt to load the file path.
  141. :type file_path: str
  142. :param file_path: The full path to the file to load without
  143. the '.json' extension.
  144. :return: The loaded data if it exists, otherwise None.
  145. """
  146. for ext, open_method in _JSON_OPEN_METHODS.items():
  147. data = self._load_file(file_path + ext, open_method)
  148. if data is not None:
  149. return data
  150. return None
  151. # SQS-119
  152. class HybridJsonLoader(JSONFileLoader):
  153. type_data_cache = collections.defaultdict(lambda: collections.defaultdict(set))
  154. arcadia_resources_path = (
  155. 'contrib/python/awscli/awscli/data/',
  156. 'contrib/python/boto3/py3/boto3/data/',
  157. 'contrib/python/botocore/py3/botocore/data/',
  158. )
  159. @classmethod
  160. def collect_service_data(cls):
  161. if cls.type_data_cache:
  162. return
  163. for res in resource.resfs_files():
  164. res = six.ensure_str(res)
  165. if res.startswith(cls.arcadia_resources_path):
  166. splitted_path = res.split('/data/')[1].split('/')
  167. if len(splitted_path) >= 3:
  168. service_name, version, type_name = splitted_path[:3]
  169. type_name = type_name.replace('.json.gz', '').replace('.json', '')
  170. cls.type_data_cache[type_name][service_name].add(version)
  171. @classmethod
  172. def read_from_resources(cls, file_path):
  173. for ext in _JSON_OPEN_METHODS:
  174. for prefix in cls.arcadia_resources_path:
  175. path = f'{prefix}{file_path}{ext}'
  176. data = resource.resfs_read(path)
  177. if data:
  178. return path, ext, data
  179. return
  180. def exists(self, file_path):
  181. if self.read_from_resources(file_path):
  182. return True
  183. return super(HybridJsonLoader, self).exists(file_path)
  184. def _load_resource(self, full_path, ext, data):
  185. # By default the file will be opened with locale encoding on Python 3.
  186. # We specify "utf8" here to ensure the correct behavior.
  187. if ext == ".json":
  188. payload = data.decode('utf-8')
  189. elif ext == ".json.gz":
  190. import io
  191. with gzip_open(io.BytesIO(data)) as fp:
  192. payload = fp.read().decode('utf-8')
  193. else:
  194. raise ValueError(f"Unknown extension {ext}")
  195. logger.debug("Loading JSON file: %s", full_path)
  196. return json.loads(payload, object_pairs_hook=OrderedDict)
  197. def load_file(self, file_path):
  198. if load_args := self.read_from_resources(file_path):
  199. return self._load_resource(*load_args)
  200. return super(HybridJsonLoader, self).load_file(file_path)
  201. def create_loader(search_path_string=None):
  202. """Create a Loader class.
  203. This factory function creates a loader given a search string path.
  204. :type search_string_path: str
  205. :param search_string_path: The AWS_DATA_PATH value. A string
  206. of data path values separated by the ``os.path.pathsep`` value,
  207. which is typically ``:`` on POSIX platforms and ``;`` on
  208. windows.
  209. :return: A ``Loader`` instance.
  210. """
  211. if search_path_string is None:
  212. return Loader()
  213. paths = []
  214. extra_paths = search_path_string.split(os.pathsep)
  215. for path in extra_paths:
  216. path = os.path.expanduser(os.path.expandvars(path))
  217. paths.append(path)
  218. return Loader(extra_search_paths=paths)
  219. class Loader:
  220. """Find and load data models.
  221. This class will handle searching for and loading data models.
  222. The main method used here is ``load_service_model``, which is a
  223. convenience method over ``load_data`` and ``determine_latest_version``.
  224. """
  225. FILE_LOADER_CLASS = HybridJsonLoader
  226. # The included models in botocore/data/ that we ship with botocore.
  227. BUILTIN_DATA_PATH = os.path.join(BOTOCORE_ROOT, 'data')
  228. # For convenience we automatically add ~/.aws/models to the data path.
  229. CUSTOMER_DATA_PATH = os.path.join(
  230. os.path.expanduser('~'), '.aws', 'models'
  231. )
  232. BUILTIN_EXTRAS_TYPES = ['sdk']
  233. def __init__(
  234. self,
  235. extra_search_paths=None,
  236. file_loader=None,
  237. cache=None,
  238. include_default_search_paths=True,
  239. include_default_extras=True,
  240. ):
  241. self._cache = {}
  242. if file_loader is None:
  243. file_loader = self.FILE_LOADER_CLASS()
  244. self.file_loader = file_loader
  245. if extra_search_paths is not None:
  246. self._search_paths = extra_search_paths
  247. else:
  248. self._search_paths = []
  249. if include_default_search_paths:
  250. self._search_paths.extend(
  251. [self.CUSTOMER_DATA_PATH, self.BUILTIN_DATA_PATH]
  252. )
  253. self._extras_types = []
  254. if include_default_extras:
  255. self._extras_types.extend(self.BUILTIN_EXTRAS_TYPES)
  256. self._extras_processor = ExtrasProcessor()
  257. @property
  258. def search_paths(self):
  259. return self._search_paths
  260. @property
  261. def extras_types(self):
  262. return self._extras_types
  263. @instance_cache
  264. def list_available_services(self, type_name):
  265. """List all known services.
  266. This will traverse the search path and look for all known
  267. services.
  268. :type type_name: str
  269. :param type_name: The type of the service (service-2,
  270. paginators-1, waiters-2, etc). This is needed because
  271. the list of available services depends on the service
  272. type. For example, the latest API version available for
  273. a resource-1.json file may not be the latest API version
  274. available for a services-2.json file.
  275. :return: A list of all services. The list of services will
  276. be sorted.
  277. """
  278. services = set()
  279. for possible_path in self._potential_locations():
  280. # Any directory in the search path is potentially a service.
  281. # We'll collect any initial list of potential services,
  282. # but we'll then need to further process these directories
  283. # by searching for the corresponding type_name in each
  284. # potential directory.
  285. possible_services = [
  286. d
  287. for d in os.listdir(possible_path)
  288. if os.path.isdir(os.path.join(possible_path, d))
  289. ]
  290. for service_name in possible_services:
  291. full_dirname = os.path.join(possible_path, service_name)
  292. api_versions = os.listdir(full_dirname)
  293. for api_version in api_versions:
  294. full_load_path = os.path.join(
  295. full_dirname, api_version, type_name
  296. )
  297. if self.file_loader.exists(full_load_path):
  298. services.add(service_name)
  299. break
  300. # SQS-119
  301. HybridJsonLoader.collect_service_data()
  302. services = services.union(HybridJsonLoader.type_data_cache[type_name].keys())
  303. return sorted(services)
  304. @instance_cache
  305. def determine_latest_version(self, service_name, type_name):
  306. """Find the latest API version available for a service.
  307. :type service_name: str
  308. :param service_name: The name of the service.
  309. :type type_name: str
  310. :param type_name: The type of the service (service-2,
  311. paginators-1, waiters-2, etc). This is needed because
  312. the latest API version available can depend on the service
  313. type. For example, the latest API version available for
  314. a resource-1.json file may not be the latest API version
  315. available for a services-2.json file.
  316. :rtype: str
  317. :return: The latest API version. If the service does not exist
  318. or does not have any available API data, then a
  319. ``DataNotFoundError`` exception will be raised.
  320. """
  321. return max(self.list_api_versions(service_name, type_name))
  322. @instance_cache
  323. def list_api_versions(self, service_name, type_name):
  324. """List all API versions available for a particular service type
  325. :type service_name: str
  326. :param service_name: The name of the service
  327. :type type_name: str
  328. :param type_name: The type name for the service (i.e service-2,
  329. paginators-1, etc.)
  330. :rtype: list
  331. :return: A list of API version strings in sorted order.
  332. """
  333. known_api_versions = set()
  334. for possible_path in self._potential_locations(
  335. service_name, must_exist=True, is_dir=True
  336. ):
  337. for dirname in os.listdir(possible_path):
  338. full_path = os.path.join(possible_path, dirname, type_name)
  339. # Only add to the known_api_versions if the directory
  340. # contains a service-2, paginators-1, etc. file corresponding
  341. # to the type_name passed in.
  342. if self.file_loader.exists(full_path):
  343. known_api_versions.add(dirname)
  344. # SQS-119
  345. HybridJsonLoader.collect_service_data()
  346. known_api_versions = known_api_versions.union(HybridJsonLoader.type_data_cache[type_name][service_name])
  347. if not known_api_versions:
  348. raise DataNotFoundError(data_path=service_name)
  349. return sorted(known_api_versions)
  350. @instance_cache
  351. def load_service_model(self, service_name, type_name, api_version=None):
  352. """Load a botocore service model
  353. This is the main method for loading botocore models (e.g. a service
  354. model, pagination configs, waiter configs, etc.).
  355. :type service_name: str
  356. :param service_name: The name of the service (e.g ``ec2``, ``s3``).
  357. :type type_name: str
  358. :param type_name: The model type. Valid types include, but are not
  359. limited to: ``service-2``, ``paginators-1``, ``waiters-2``.
  360. :type api_version: str
  361. :param api_version: The API version to load. If this is not
  362. provided, then the latest API version will be used.
  363. :type load_extras: bool
  364. :param load_extras: Whether or not to load the tool extras which
  365. contain additional data to be added to the model.
  366. :raises: UnknownServiceError if there is no known service with
  367. the provided service_name.
  368. :raises: DataNotFoundError if no data could be found for the
  369. service_name/type_name/api_version.
  370. :return: The loaded data, as a python type (e.g. dict, list, etc).
  371. """
  372. # Wrapper around the load_data. This will calculate the path
  373. # to call load_data with.
  374. known_services = self.list_available_services(type_name)
  375. if service_name not in known_services:
  376. raise UnknownServiceError(
  377. service_name=service_name,
  378. known_service_names=', '.join(sorted(known_services)),
  379. )
  380. if api_version is None:
  381. api_version = self.determine_latest_version(
  382. service_name, type_name
  383. )
  384. full_path = os.path.join(service_name, api_version, type_name)
  385. model = self.load_data(full_path)
  386. # Load in all the extras
  387. extras_data = self._find_extras(service_name, type_name, api_version)
  388. self._extras_processor.process(model, extras_data)
  389. return model
  390. def _find_extras(self, service_name, type_name, api_version):
  391. """Creates an iterator over all the extras data."""
  392. for extras_type in self.extras_types:
  393. extras_name = f'{type_name}.{extras_type}-extras'
  394. full_path = os.path.join(service_name, api_version, extras_name)
  395. try:
  396. yield self.load_data(full_path)
  397. except DataNotFoundError:
  398. pass
  399. @instance_cache
  400. def load_data_with_path(self, name):
  401. """Same as ``load_data`` but returns file path as second return value.
  402. :type name: str
  403. :param name: The data path, i.e ``ec2/2015-03-01/service-2``.
  404. :return: Tuple of the loaded data and the path to the data file
  405. where the data was loaded from. If no data could be found then a
  406. DataNotFoundError is raised.
  407. """
  408. for possible_path in self._potential_locations(name):
  409. found = self.file_loader.load_file(possible_path)
  410. if found is not None:
  411. return found, possible_path
  412. # SQS-119
  413. found_by_arcadia_loader = self.file_loader.load_file(name)
  414. if found_by_arcadia_loader is not None:
  415. return found_by_arcadia_loader, None
  416. # We didn't find anything that matched on any path.
  417. raise DataNotFoundError(data_path=name)
  418. def load_data(self, name):
  419. """Load data given a data path.
  420. This is a low level method that will search through the various
  421. search paths until it's able to load a value. This is typically
  422. only needed to load *non* model files (such as _endpoints and
  423. _retry). If you need to load model files, you should prefer
  424. ``load_service_model``. Use ``load_data_with_path`` to get the
  425. data path of the data file as second return value.
  426. :type name: str
  427. :param name: The data path, i.e ``ec2/2015-03-01/service-2``.
  428. :return: The loaded data. If no data could be found then
  429. a DataNotFoundError is raised.
  430. """
  431. data, _ = self.load_data_with_path(name)
  432. return data
  433. def _potential_locations(self, name=None, must_exist=False, is_dir=False):
  434. # Will give an iterator over the full path of potential locations
  435. # according to the search path.
  436. for path in self.search_paths:
  437. if os.path.isdir(path):
  438. full_path = path
  439. if name is not None:
  440. full_path = os.path.join(path, name)
  441. if not must_exist:
  442. yield full_path
  443. else:
  444. if is_dir and os.path.isdir(full_path):
  445. yield full_path
  446. elif os.path.exists(full_path):
  447. yield full_path
  448. def is_builtin_path(self, path):
  449. """Whether a given path is within the package's data directory.
  450. This method can be used together with load_data_with_path(name)
  451. to determine if data has been loaded from a file bundled with the
  452. package, as opposed to a file in a separate location.
  453. :type path: str
  454. :param path: The file path to check.
  455. :return: Whether the given path is within the package's data directory.
  456. """
  457. if path is None:
  458. return True
  459. path = os.path.expanduser(os.path.expandvars(path))
  460. return path.startswith(self.BUILTIN_DATA_PATH)
  461. class ExtrasProcessor:
  462. """Processes data from extras files into service models."""
  463. def process(self, original_model, extra_models):
  464. """Processes data from a list of loaded extras files into a model
  465. :type original_model: dict
  466. :param original_model: The service model to load all the extras into.
  467. :type extra_models: iterable of dict
  468. :param extra_models: A list of loaded extras models.
  469. """
  470. for extras in extra_models:
  471. self._process(original_model, extras)
  472. def _process(self, model, extra_model):
  473. """Process a single extras model into a service model."""
  474. if 'merge' in extra_model:
  475. deep_merge(model, extra_model['merge'])