123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491 |
- # Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved.
- #
- # Licensed under the Apache License, Version 2.0 (the "License"). You
- # may not use this file except in compliance with the License. A copy of
- # the License is located at
- #
- # http://aws.amazon.com/apache2.0/
- #
- # or in the "license" file accompanying this file. This file is
- # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
- # ANY KIND, either express or implied. See the License for the specific
- # language governing permissions and limitations under the License.
- import copy
- import os
- import tempfile
- import time
- import shutil
- import glob
- from botocore.exceptions import ClientError
- from __tests__ import StreamWithError
- from __tests__ import FileSizeProvider
- from __tests__ import RecordingSubscriber
- from __tests__ import RecordingOSUtils
- from __tests__ import NonSeekableWriter
- from __tests__ import BaseGeneralInterfaceTest
- from __tests__ import skip_if_windows
- from __tests__ import skip_if_using_serial_implementation
- from s3transfer.compat import six
- from s3transfer.compat import SOCKET_ERROR
- from s3transfer.exceptions import RetriesExceededError
- from s3transfer.manager import TransferManager
- from s3transfer.manager import TransferConfig
- from s3transfer.download import GetObjectTask
- class BaseDownloadTest(BaseGeneralInterfaceTest):
- def setUp(self):
- super(BaseDownloadTest, self).setUp()
- self.config = TransferConfig(max_request_concurrency=1)
- self._manager = TransferManager(self.client, self.config)
- # Create a temporary directory to write to
- self.tempdir = tempfile.mkdtemp()
- self.filename = os.path.join(self.tempdir, 'myfile')
- # Initialize some default arguments
- self.bucket = 'mybucket'
- self.key = 'mykey'
- self.extra_args = {}
- self.subscribers = []
- # Create a stream to read from
- self.content = b'my content'
- self.stream = six.BytesIO(self.content)
- def tearDown(self):
- super(BaseDownloadTest, self).tearDown()
- shutil.rmtree(self.tempdir)
- @property
- def manager(self):
- return self._manager
- @property
- def method(self):
- return self.manager.download
- def create_call_kwargs(self):
- return {
- 'bucket': self.bucket,
- 'key': self.key,
- 'fileobj': self.filename
- }
- def create_invalid_extra_args(self):
- return {
- 'Foo': 'bar'
- }
- def create_stubbed_responses(self):
- # We want to make sure the beginning of the stream is always used
- # incase this gets called twice.
- self.stream.seek(0)
- return [
- {
- 'method': 'head_object',
- 'service_response': {
- 'ContentLength': len(self.content)
- }
- },
- {
- 'method': 'get_object',
- 'service_response': {
- 'Body': self.stream
- }
- }
- ]
- def create_expected_progress_callback_info(self):
- # Note that last read is from the empty sentinel indicating
- # that the stream is done.
- return [
- {'bytes_transferred': 10}
- ]
- def add_head_object_response(self, expected_params=None):
- head_response = self.create_stubbed_responses()[0]
- if expected_params:
- head_response['expected_params'] = expected_params
- self.stubber.add_response(**head_response)
- def add_successful_get_object_responses(
- self, expected_params=None, expected_ranges=None):
- # Add all get_object responses needed to complete the download.
- # Should account for both ranged and nonranged downloads.
- for i, stubbed_response in enumerate(
- self.create_stubbed_responses()[1:]):
- if expected_params:
- stubbed_response['expected_params'] = copy.deepcopy(
- expected_params)
- if expected_ranges:
- stubbed_response['expected_params'][
- 'Range'] = expected_ranges[i]
- self.stubber.add_response(**stubbed_response)
- def add_n_retryable_get_object_responses(self, n, num_reads=0):
- for _ in range(n):
- self.stubber.add_response(
- method='get_object',
- service_response={
- 'Body': StreamWithError(
- copy.deepcopy(self.stream), SOCKET_ERROR, num_reads)
- }
- )
- def test_download_temporary_file_does_not_exist(self):
- self.add_head_object_response()
- self.add_successful_get_object_responses()
- future = self.manager.download(**self.create_call_kwargs())
- future.result()
- # Make sure the file exists
- self.assertTrue(os.path.exists(self.filename))
- # Make sure the random temporary file does not exist
- possible_matches = glob.glob('%s*' % self.filename + os.extsep)
- self.assertEqual(possible_matches, [])
- def test_download_for_fileobj(self):
- self.add_head_object_response()
- self.add_successful_get_object_responses()
- with open(self.filename, 'wb') as f:
- future = self.manager.download(
- self.bucket, self.key, f, self.extra_args)
- future.result()
- # Ensure that the contents are correct
- with open(self.filename, 'rb') as f:
- self.assertEqual(self.content, f.read())
- def test_download_for_seekable_filelike_obj(self):
- self.add_head_object_response()
- self.add_successful_get_object_responses()
- # Create a file-like object to test. In this case, it is a BytesIO
- # object.
- bytes_io = six.BytesIO()
- future = self.manager.download(
- self.bucket, self.key, bytes_io, self.extra_args)
- future.result()
- # Ensure that the contents are correct
- bytes_io.seek(0)
- self.assertEqual(self.content, bytes_io.read())
- def test_download_for_nonseekable_filelike_obj(self):
- self.add_head_object_response()
- self.add_successful_get_object_responses()
- with open(self.filename, 'wb') as f:
- future = self.manager.download(
- self.bucket, self.key, NonSeekableWriter(f), self.extra_args)
- future.result()
- # Ensure that the contents are correct
- with open(self.filename, 'rb') as f:
- self.assertEqual(self.content, f.read())
- def test_download_cleanup_on_failure(self):
- self.add_head_object_response()
- # Throw an error on the download
- self.stubber.add_client_error('get_object')
- future = self.manager.download(**self.create_call_kwargs())
- with self.assertRaises(ClientError):
- future.result()
- # Make sure the actual file and the temporary do not exist
- # by globbing for the file and any of its extensions
- possible_matches = glob.glob('%s*' % self.filename)
- self.assertEqual(possible_matches, [])
- def test_download_with_nonexistent_directory(self):
- self.add_head_object_response()
- self.add_successful_get_object_responses()
- call_kwargs = self.create_call_kwargs()
- call_kwargs['fileobj'] = os.path.join(
- self.tempdir, 'missing-directory', 'myfile')
- future = self.manager.download(**call_kwargs)
- with self.assertRaises(IOError):
- future.result()
- def test_retries_and_succeeds(self):
- self.add_head_object_response()
- # Insert a response that will trigger a retry.
- self.add_n_retryable_get_object_responses(1)
- # Add the normal responses to simulate the download proceeding
- # as normal after the retry.
- self.add_successful_get_object_responses()
- future = self.manager.download(**self.create_call_kwargs())
- future.result()
- # The retry should have been consumed and the process should have
- # continued using the successful responses.
- self.stubber.assert_no_pending_responses()
- with open(self.filename, 'rb') as f:
- self.assertEqual(self.content, f.read())
- def test_retry_failure(self):
- self.add_head_object_response()
- max_retries = 3
- self.config.num_download_attempts = max_retries
- self._manager = TransferManager(self.client, self.config)
- # Add responses that fill up the maximum number of retries.
- self.add_n_retryable_get_object_responses(max_retries)
- future = self.manager.download(**self.create_call_kwargs())
- # A retry exceeded error should have happened.
- with self.assertRaises(RetriesExceededError):
- future.result()
- # All of the retries should have been used up.
- self.stubber.assert_no_pending_responses()
- def test_retry_rewinds_callbacks(self):
- self.add_head_object_response()
- # Insert a response that will trigger a retry after one read of the
- # stream has been made.
- self.add_n_retryable_get_object_responses(1, num_reads=1)
- # Add the normal responses to simulate the download proceeding
- # as normal after the retry.
- self.add_successful_get_object_responses()
- recorder_subscriber = RecordingSubscriber()
- # Set the streaming to a size that is smaller than the data we
- # currently provide to it to simulate rewinds of callbacks.
- self.config.io_chunksize = 3
- future = self.manager.download(
- subscribers=[recorder_subscriber], **self.create_call_kwargs())
- future.result()
- # Ensure that there is no more remaining responses and that contents
- # are correct.
- self.stubber.assert_no_pending_responses()
- with open(self.filename, 'rb') as f:
- self.assertEqual(self.content, f.read())
- # Assert that the number of bytes seen is equal to the length of
- # downloaded content.
- self.assertEqual(
- recorder_subscriber.calculate_bytes_seen(), len(self.content))
- # Also ensure that the second progress invocation was negative three
- # becasue a retry happened on the second read of the stream and we
- # know that the chunk size for each read is 3.
- progress_byte_amts = [
- call['bytes_transferred'] for call in
- recorder_subscriber.on_progress_calls
- ]
- self.assertEqual(-3, progress_byte_amts[1])
- def test_can_provide_file_size(self):
- self.add_successful_get_object_responses()
- call_kwargs = self.create_call_kwargs()
- call_kwargs['subscribers'] = [FileSizeProvider(len(self.content))]
- future = self.manager.download(**call_kwargs)
- future.result()
- # The HeadObject should have not happened and should have been able
- # to successfully download the file.
- self.stubber.assert_no_pending_responses()
- with open(self.filename, 'rb') as f:
- self.assertEqual(self.content, f.read())
- def test_uses_provided_osutil(self):
- osutil = RecordingOSUtils()
- # Use the recording os utility for the transfer manager
- self._manager = TransferManager(self.client, self.config, osutil)
- self.add_head_object_response()
- self.add_successful_get_object_responses()
- future = self.manager.download(**self.create_call_kwargs())
- future.result()
- # The osutil should have had its open() method invoked when opening
- # a temporary file and its rename_file() method invoked when the
- # the temporary file was moved to its final location.
- self.assertEqual(len(osutil.open_records), 1)
- self.assertEqual(len(osutil.rename_records), 1)
- @skip_if_windows('Windows does not support UNIX special files')
- @skip_if_using_serial_implementation(
- 'A seperate thread is needed to read from the fifo')
- def test_download_for_fifo_file(self):
- self.add_head_object_response()
- self.add_successful_get_object_responses()
- # Create the fifo file
- os.mkfifo(self.filename)
- future = self.manager.download(
- self.bucket, self.key, self.filename, self.extra_args)
- # The call to open a fifo will block until there is both a reader
- # and a writer, so we need to open it for reading after we've
- # started the transfer.
- with open(self.filename, 'rb') as fifo:
- future.result()
- self.assertEqual(fifo.read(), self.content)
- def test_raise_exception_on_s3_object_lambda_resource(self):
- s3_object_lambda_arn = (
- 'arn:aws:s3-object-lambda:us-west-2:123456789012:'
- 'accesspoint:my-accesspoint'
- )
- with self.assertRaisesRegexp(ValueError, 'methods do not support'):
- self.manager.download(
- s3_object_lambda_arn, self.key, self.filename, self.extra_args)
- class TestNonRangedDownload(BaseDownloadTest):
- # TODO: If you want to add tests outside of this test class and still
- # subclass from BaseDownloadTest you need to set ``__test__ = True``. If
- # you do not, your tests will not get picked up by the test runner! This
- # needs to be done until we find a better way to ignore running test cases
- # from the general test base class, which we do not want ran.
- __test__ = True
- def test_download(self):
- self.extra_args['RequestPayer'] = 'requester'
- expected_params = {
- 'Bucket': self.bucket,
- 'Key': self.key,
- 'RequestPayer': 'requester'
- }
- self.add_head_object_response(expected_params)
- self.add_successful_get_object_responses(expected_params)
- future = self.manager.download(
- self.bucket, self.key, self.filename, self.extra_args)
- future.result()
- # Ensure that the contents are correct
- with open(self.filename, 'rb') as f:
- self.assertEqual(self.content, f.read())
- def test_allowed_copy_params_are_valid(self):
- op_model = self.client.meta.service_model.operation_model('GetObject')
- for allowed_upload_arg in self._manager.ALLOWED_DOWNLOAD_ARGS:
- self.assertIn(allowed_upload_arg, op_model.input_shape.members)
- def test_download_empty_object(self):
- self.content = b''
- self.stream = six.BytesIO(self.content)
- self.add_head_object_response()
- self.add_successful_get_object_responses()
- future = self.manager.download(
- self.bucket, self.key, self.filename, self.extra_args)
- future.result()
- # Ensure that the empty file exists
- with open(self.filename, 'rb') as f:
- self.assertEqual(b'', f.read())
- def test_uses_bandwidth_limiter(self):
- self.content = b'a' * 1024 * 1024
- self.stream = six.BytesIO(self.content)
- self.config = TransferConfig(
- max_request_concurrency=1, max_bandwidth=len(self.content)/2)
- self._manager = TransferManager(self.client, self.config)
- self.add_head_object_response()
- self.add_successful_get_object_responses()
- start = time.time()
- future = self.manager.download(
- self.bucket, self.key, self.filename, self.extra_args)
- future.result()
- # This is just a smoke test to make sure that the limiter is
- # being used and not necessary its exactness. So we set the maximum
- # bandwidth to len(content)/2 per sec and make sure that it is
- # noticeably slower. Ideally it will take more than two seconds, but
- # given tracking at the beginning of transfers are not entirely
- # accurate setting at the initial start of a transfer, we give us
- # some flexibility by setting the expected time to half of the
- # theoretical time to take.
- self.assertGreaterEqual(time.time() - start, 1)
- # Ensure that the contents are correct
- with open(self.filename, 'rb') as f:
- self.assertEqual(self.content, f.read())
- class TestRangedDownload(BaseDownloadTest):
- # TODO: If you want to add tests outside of this test class and still
- # subclass from BaseDownloadTest you need to set ``__test__ = True``. If
- # you do not, your tests will not get picked up by the test runner! This
- # needs to be done until we find a better way to ignore running test cases
- # from the general test base class, which we do not want ran.
- __test__ = True
- def setUp(self):
- super(TestRangedDownload, self).setUp()
- self.config = TransferConfig(
- max_request_concurrency=1, multipart_threshold=1,
- multipart_chunksize=4)
- self._manager = TransferManager(self.client, self.config)
- def create_stubbed_responses(self):
- return [
- {
- 'method': 'head_object',
- 'service_response': {
- 'ContentLength': len(self.content)
- }
- },
- {
- 'method': 'get_object',
- 'service_response': {
- 'Body': six.BytesIO(self.content[0:4])
- }
- },
- {
- 'method': 'get_object',
- 'service_response': {
- 'Body': six.BytesIO(self.content[4:8])
- }
- },
- {
- 'method': 'get_object',
- 'service_response': {
- 'Body': six.BytesIO(self.content[8:])
- }
- }
- ]
- def create_expected_progress_callback_info(self):
- return [
- {'bytes_transferred': 4},
- {'bytes_transferred': 4},
- {'bytes_transferred': 2},
- ]
- def test_download(self):
- self.extra_args['RequestPayer'] = 'requester'
- expected_params = {
- 'Bucket': self.bucket,
- 'Key': self.key,
- 'RequestPayer': 'requester'
- }
- expected_ranges = ['bytes=0-3', 'bytes=4-7', 'bytes=8-']
- self.add_head_object_response(expected_params)
- self.add_successful_get_object_responses(
- expected_params, expected_ranges)
- future = self.manager.download(
- self.bucket, self.key, self.filename, self.extra_args)
- future.result()
- # Ensure that the contents are correct
- with open(self.filename, 'rb') as f:
- self.assertEqual(self.content, f.read())
|