| # Copyright (c) 2006-2012 Mitch Garnaat http://garnaat.org/ |
| # Copyright (c) 2011, Nexenta Systems Inc. |
| # Copyright (c) 2012 Amazon.com, Inc. or its affiliates. All Rights Reserved |
| # |
| # Permission is hereby granted, free of charge, to any person obtaining a |
| # copy of this software and associated documentation files (the |
| # "Software"), to deal in the Software without restriction, including |
| # without limitation the rights to use, copy, modify, merge, publish, dis- |
| # tribute, sublicense, and/or sell copies of the Software, and to permit |
| # persons to whom the Software is furnished to do so, subject to the fol- |
| # lowing conditions: |
| # |
| # The above copyright notice and this permission notice shall be included |
| # in all copies or substantial portions of the Software. |
| # |
| # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
| # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL- |
| # ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT |
| # SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, |
| # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
| # IN THE SOFTWARE. |
| |
| import mimetypes |
| import os |
| import re |
| import rfc822 |
| import StringIO |
| import base64 |
| import binascii |
| import math |
| import urllib |
| import boto.utils |
| from boto.exception import BotoClientError |
| from boto.provider import Provider |
| from boto.s3.keyfile import KeyFile |
| from boto.s3.user import User |
| from boto import UserAgent |
| from boto.utils import compute_md5 |
| try: |
| from hashlib import md5 |
| except ImportError: |
| from md5 import md5 |
| |
| |
| class Key(object): |
| """ |
| Represents a key (object) in an S3 bucket. |
| |
| :ivar bucket: The parent :class:`boto.s3.bucket.Bucket`. |
| :ivar name: The name of this Key object. |
| :ivar metadata: A dictionary containing user metadata that you |
| wish to store with the object or that has been retrieved from |
| an existing object. |
| :ivar cache_control: The value of the `Cache-Control` HTTP header. |
| :ivar content_type: The value of the `Content-Type` HTTP header. |
| :ivar content_encoding: The value of the `Content-Encoding` HTTP header. |
| :ivar content_disposition: The value of the `Content-Disposition` HTTP |
| header. |
| :ivar content_language: The value of the `Content-Language` HTTP header. |
| :ivar etag: The `etag` associated with this object. |
| :ivar last_modified: The string timestamp representing the last |
| time this object was modified in S3. |
| :ivar owner: The ID of the owner of this object. |
| :ivar storage_class: The storage class of the object. Currently, one of: |
| STANDARD | REDUCED_REDUNDANCY | GLACIER |
| :ivar md5: The MD5 hash of the contents of the object. |
| :ivar size: The size, in bytes, of the object. |
| :ivar version_id: The version ID of this object, if it is a versioned |
| object. |
| :ivar encrypted: Whether the object is encrypted while at rest on |
| the server. |
| """ |
| |
| DefaultContentType = 'application/octet-stream' |
| |
| RestoreBody = """<?xml version="1.0" encoding="UTF-8"?> |
| <RestoreRequest xmlns="http://s3.amazonaws.com/doc/2006-03-01"> |
| <Days>%s</Days> |
| </RestoreRequest>""" |
| |
| |
| BufferSize = 8192 |
| |
| # The object metadata fields a user can set, other than custom metadata |
| # fields (i.e., those beginning with a provider-specific prefix like |
| # x-amz-meta). |
| base_user_settable_fields = set(["cache-control", "content-disposition", |
| "content-encoding", "content-language", |
| "content-md5", "content-type"]) |
| _underscore_base_user_settable_fields = set() |
| for f in base_user_settable_fields: |
| _underscore_base_user_settable_fields.add(f.replace('-', '_')) |
| |
| |
| |
| def __init__(self, bucket=None, name=None): |
| self.bucket = bucket |
| self.name = name |
| self.metadata = {} |
| self.cache_control = None |
| self.content_type = self.DefaultContentType |
| self.content_encoding = None |
| self.content_disposition = None |
| self.content_language = None |
| self.filename = None |
| self.etag = None |
| self.is_latest = False |
| self.last_modified = None |
| self.owner = None |
| self.storage_class = 'STANDARD' |
| self.md5 = None |
| self.base64md5 = None |
| self.path = None |
| self.resp = None |
| self.mode = None |
| self.size = None |
| self.version_id = None |
| self.source_version_id = None |
| self.delete_marker = False |
| self.encrypted = None |
| # If the object is being restored, this attribute will be set to True. |
| # If the object is restored, it will be set to False. Otherwise this |
| # value will be None. If the restore is completed (ongoing_restore = |
| # False), the expiry_date will be populated with the expiry date of the |
| # restored object. |
| self.ongoing_restore = None |
| self.expiry_date = None |
| |
| def __repr__(self): |
| if self.bucket: |
| return '<Key: %s,%s>' % (self.bucket.name, self.name) |
| else: |
| return '<Key: None,%s>' % self.name |
| |
| def __getattr__(self, name): |
| if name == 'key': |
| return self.name |
| else: |
| raise AttributeError |
| |
| def __setattr__(self, name, value): |
| if name == 'key': |
| self.__dict__['name'] = value |
| else: |
| self.__dict__[name] = value |
| |
| def __iter__(self): |
| return self |
| |
| @property |
| def provider(self): |
| provider = None |
| if self.bucket and self.bucket.connection: |
| provider = self.bucket.connection.provider |
| return provider |
| |
| def get_md5_from_hexdigest(self, md5_hexdigest): |
| """ |
| A utility function to create the 2-tuple (md5hexdigest, base64md5) |
| from just having a precalculated md5_hexdigest. |
| """ |
| digest = binascii.unhexlify(md5_hexdigest) |
| base64md5 = base64.encodestring(digest) |
| if base64md5[-1] == '\n': |
| base64md5 = base64md5[0:-1] |
| return (md5_hexdigest, base64md5) |
| |
| def handle_encryption_headers(self, resp): |
| provider = self.bucket.connection.provider |
| if provider.server_side_encryption_header: |
| self.encrypted = resp.getheader(provider.server_side_encryption_header, None) |
| else: |
| self.encrypted = None |
| |
| def handle_version_headers(self, resp, force=False): |
| provider = self.bucket.connection.provider |
| # If the Key object already has a version_id attribute value, it |
| # means that it represents an explicit version and the user is |
| # doing a get_contents_*(version_id=<foo>) to retrieve another |
| # version of the Key. In that case, we don't really want to |
| # overwrite the version_id in this Key object. Comprende? |
| if self.version_id is None or force: |
| self.version_id = resp.getheader(provider.version_id, None) |
| self.source_version_id = resp.getheader(provider.copy_source_version_id, |
| None) |
| if resp.getheader(provider.delete_marker, 'false') == 'true': |
| self.delete_marker = True |
| else: |
| self.delete_marker = False |
| |
| def handle_restore_headers(self, response): |
| header = response.getheader('x-amz-restore') |
| if header is None: |
| return |
| parts = header.split(',', 1) |
| for part in parts: |
| key, val = [i.strip() for i in part.split('=')] |
| val = val.replace('"', '') |
| if key == 'ongoing-request': |
| self.ongoing_restore = True if val.lower() == 'true' else False |
| elif key == 'expiry-date': |
| self.expiry_date = val |
| |
| def open_read(self, headers=None, query_args='', |
| override_num_retries=None, response_headers=None): |
| """ |
| Open this key for reading |
| |
| :type headers: dict |
| :param headers: Headers to pass in the web request |
| |
| :type query_args: string |
| :param query_args: Arguments to pass in the query string |
| (ie, 'torrent') |
| |
| :type override_num_retries: int |
| :param override_num_retries: If not None will override configured |
| num_retries parameter for underlying GET. |
| |
| :type response_headers: dict |
| :param response_headers: A dictionary containing HTTP |
| headers/values that will override any headers associated |
| with the stored object in the response. See |
| http://goo.gl/EWOPb for details. |
| """ |
| if self.resp == None: |
| self.mode = 'r' |
| |
| provider = self.bucket.connection.provider |
| self.resp = self.bucket.connection.make_request( |
| 'GET', self.bucket.name, self.name, headers, |
| query_args=query_args, |
| override_num_retries=override_num_retries) |
| if self.resp.status < 199 or self.resp.status > 299: |
| body = self.resp.read() |
| raise provider.storage_response_error(self.resp.status, |
| self.resp.reason, body) |
| response_headers = self.resp.msg |
| self.metadata = boto.utils.get_aws_metadata(response_headers, |
| provider) |
| for name, value in response_headers.items(): |
| # To get correct size for Range GETs, use Content-Range |
| # header if one was returned. If not, use Content-Length |
| # header. |
| if (name.lower() == 'content-length' and |
| 'Content-Range' not in response_headers): |
| self.size = int(value) |
| elif name.lower() == 'content-range': |
| end_range = re.sub('.*/(.*)', '\\1', value) |
| self.size = int(end_range) |
| elif name.lower() == 'etag': |
| self.etag = value |
| elif name.lower() == 'content-type': |
| self.content_type = value |
| elif name.lower() == 'content-encoding': |
| self.content_encoding = value |
| elif name.lower() == 'content-language': |
| self.content_language = value |
| elif name.lower() == 'last-modified': |
| self.last_modified = value |
| elif name.lower() == 'cache-control': |
| self.cache_control = value |
| elif name.lower() == 'content-disposition': |
| self.content_disposition = value |
| self.handle_version_headers(self.resp) |
| self.handle_encryption_headers(self.resp) |
| |
| def open_write(self, headers=None, override_num_retries=None): |
| """ |
| Open this key for writing. |
| Not yet implemented |
| |
| :type headers: dict |
| :param headers: Headers to pass in the write request |
| |
| :type override_num_retries: int |
| :param override_num_retries: If not None will override configured |
| num_retries parameter for underlying PUT. |
| """ |
| raise BotoClientError('Not Implemented') |
| |
| def open(self, mode='r', headers=None, query_args=None, |
| override_num_retries=None): |
| if mode == 'r': |
| self.mode = 'r' |
| self.open_read(headers=headers, query_args=query_args, |
| override_num_retries=override_num_retries) |
| elif mode == 'w': |
| self.mode = 'w' |
| self.open_write(headers=headers, |
| override_num_retries=override_num_retries) |
| else: |
| raise BotoClientError('Invalid mode: %s' % mode) |
| |
| closed = False |
| |
| def close(self, fast=False): |
| """ |
| Close this key. |
| |
| :type fast: bool |
| :param fast: True if you want the connection to be closed without first |
| reading the content. This should only be used in cases where subsequent |
| calls don't need to return the content from the open HTTP connection. |
| Note: As explained at |
| http://docs.python.org/2/library/httplib.html#httplib.HTTPConnection.getresponse, |
| callers must read the whole response before sending a new request to the |
| server. Calling Key.close(fast=True) and making a subsequent request to |
| the server will work because boto will get an httplib exception and |
| close/reopen the connection. |
| |
| """ |
| if self.resp and not fast: |
| self.resp.read() |
| self.resp = None |
| self.mode = None |
| self.closed = True |
| |
| def next(self): |
| """ |
| By providing a next method, the key object supports use as an iterator. |
| For example, you can now say: |
| |
| for bytes in key: |
| write bytes to a file or whatever |
| |
| All of the HTTP connection stuff is handled for you. |
| """ |
| self.open_read() |
| data = self.resp.read(self.BufferSize) |
| if not data: |
| self.close() |
| raise StopIteration |
| return data |
| |
| def read(self, size=0): |
| self.open_read() |
| if size == 0: |
| data = self.resp.read() |
| else: |
| data = self.resp.read(size) |
| if not data: |
| self.close() |
| return data |
| |
| def change_storage_class(self, new_storage_class, dst_bucket=None, |
| validate_dst_bucket=True): |
| """ |
| Change the storage class of an existing key. |
| Depending on whether a different destination bucket is supplied |
| or not, this will either move the item within the bucket, preserving |
| all metadata and ACL info bucket changing the storage class or it |
| will copy the item to the provided destination bucket, also |
| preserving metadata and ACL info. |
| |
| :type new_storage_class: string |
| :param new_storage_class: The new storage class for the Key. |
| Possible values are: |
| * STANDARD |
| * REDUCED_REDUNDANCY |
| |
| :type dst_bucket: string |
| :param dst_bucket: The name of a destination bucket. If not |
| provided the current bucket of the key will be used. |
| |
| :type validate_dst_bucket: bool |
| :param validate_dst_bucket: If True, will validate the dst_bucket |
| by using an extra list request. |
| """ |
| if new_storage_class == 'STANDARD': |
| return self.copy(self.bucket.name, self.name, |
| reduced_redundancy=False, preserve_acl=True, |
| validate_dst_bucket=validate_dst_bucket) |
| elif new_storage_class == 'REDUCED_REDUNDANCY': |
| return self.copy(self.bucket.name, self.name, |
| reduced_redundancy=True, preserve_acl=True, |
| validate_dst_bucket=validate_dst_bucket) |
| else: |
| raise BotoClientError('Invalid storage class: %s' % |
| new_storage_class) |
| |
| def copy(self, dst_bucket, dst_key, metadata=None, |
| reduced_redundancy=False, preserve_acl=False, |
| encrypt_key=False, validate_dst_bucket=True): |
| """ |
| Copy this Key to another bucket. |
| |
| :type dst_bucket: string |
| :param dst_bucket: The name of the destination bucket |
| |
| :type dst_key: string |
| :param dst_key: The name of the destination key |
| |
| :type metadata: dict |
| :param metadata: Metadata to be associated with new key. If |
| metadata is supplied, it will replace the metadata of the |
| source key being copied. If no metadata is supplied, the |
| source key's metadata will be copied to the new key. |
| |
| :type reduced_redundancy: bool |
| :param reduced_redundancy: If True, this will force the |
| storage class of the new Key to be REDUCED_REDUNDANCY |
| regardless of the storage class of the key being copied. |
| The Reduced Redundancy Storage (RRS) feature of S3, |
| provides lower redundancy at lower storage cost. |
| |
| :type preserve_acl: bool |
| :param preserve_acl: If True, the ACL from the source key will |
| be copied to the destination key. If False, the |
| destination key will have the default ACL. Note that |
| preserving the ACL in the new key object will require two |
| additional API calls to S3, one to retrieve the current |
| ACL and one to set that ACL on the new object. If you |
| don't care about the ACL, a value of False will be |
| significantly more efficient. |
| |
| :type encrypt_key: bool |
| :param encrypt_key: If True, the new copy of the object will |
| be encrypted on the server-side by S3 and will be stored |
| in an encrypted form while at rest in S3. |
| |
| :type validate_dst_bucket: bool |
| :param validate_dst_bucket: If True, will validate the dst_bucket |
| by using an extra list request. |
| |
| :rtype: :class:`boto.s3.key.Key` or subclass |
| :returns: An instance of the newly created key object |
| """ |
| dst_bucket = self.bucket.connection.lookup(dst_bucket, |
| validate_dst_bucket) |
| if reduced_redundancy: |
| storage_class = 'REDUCED_REDUNDANCY' |
| else: |
| storage_class = self.storage_class |
| return dst_bucket.copy_key(dst_key, self.bucket.name, |
| self.name, metadata, |
| storage_class=storage_class, |
| preserve_acl=preserve_acl, |
| encrypt_key=encrypt_key) |
| |
| def startElement(self, name, attrs, connection): |
| if name == 'Owner': |
| self.owner = User(self) |
| return self.owner |
| else: |
| return None |
| |
| def endElement(self, name, value, connection): |
| if name == 'Key': |
| self.name = value |
| elif name == 'ETag': |
| self.etag = value |
| elif name == 'IsLatest': |
| if value == 'true': |
| self.is_latest = True |
| else: |
| self.is_latest = False |
| elif name == 'LastModified': |
| self.last_modified = value |
| elif name == 'Size': |
| self.size = int(value) |
| elif name == 'StorageClass': |
| self.storage_class = value |
| elif name == 'Owner': |
| pass |
| elif name == 'VersionId': |
| self.version_id = value |
| else: |
| setattr(self, name, value) |
| |
| def exists(self): |
| """ |
| Returns True if the key exists |
| |
| :rtype: bool |
| :return: Whether the key exists on S3 |
| """ |
| return bool(self.bucket.lookup(self.name)) |
| |
| def delete(self): |
| """ |
| Delete this key from S3 |
| """ |
| return self.bucket.delete_key(self.name, version_id=self.version_id) |
| |
| def get_metadata(self, name): |
| return self.metadata.get(name) |
| |
| def set_metadata(self, name, value): |
| self.metadata[name] = value |
| |
| def update_metadata(self, d): |
| self.metadata.update(d) |
| |
| # convenience methods for setting/getting ACL |
| def set_acl(self, acl_str, headers=None): |
| if self.bucket != None: |
| self.bucket.set_acl(acl_str, self.name, headers=headers) |
| |
| def get_acl(self, headers=None): |
| if self.bucket != None: |
| return self.bucket.get_acl(self.name, headers=headers) |
| |
| def get_xml_acl(self, headers=None): |
| if self.bucket != None: |
| return self.bucket.get_xml_acl(self.name, headers=headers) |
| |
| def set_xml_acl(self, acl_str, headers=None): |
| if self.bucket != None: |
| return self.bucket.set_xml_acl(acl_str, self.name, headers=headers) |
| |
| def set_canned_acl(self, acl_str, headers=None): |
| return self.bucket.set_canned_acl(acl_str, self.name, headers) |
| |
| def get_redirect(self): |
| """Return the redirect location configured for this key. |
| |
| If no redirect is configured (via set_redirect), then None |
| will be returned. |
| |
| """ |
| response = self.bucket.connection.make_request( |
| 'HEAD', self.bucket.name, self.name) |
| if response.status == 200: |
| return response.getheader('x-amz-website-redirect-location') |
| else: |
| raise self.provider.storage_response_error( |
| response.status, response.reason, response.read()) |
| |
| def set_redirect(self, redirect_location): |
| """Configure this key to redirect to another location. |
| |
| When the bucket associated with this key is accessed from the website |
| endpoint, a 301 redirect will be issued to the specified |
| `redirect_location`. |
| |
| :type redirect_location: string |
| :param redirect_location: The location to redirect. |
| |
| """ |
| headers = {'x-amz-website-redirect-location': redirect_location} |
| response = self.bucket.connection.make_request('PUT', self.bucket.name, |
| self.name, headers) |
| if response.status == 200: |
| return True |
| else: |
| raise self.provider.storage_response_error( |
| response.status, response.reason, response.read()) |
| |
| def make_public(self, headers=None): |
| return self.bucket.set_canned_acl('public-read', self.name, headers) |
| |
| def generate_url(self, expires_in, method='GET', headers=None, |
| query_auth=True, force_http=False, response_headers=None, |
| expires_in_absolute=False, version_id=None, |
| policy=None, reduced_redundancy=False, encrypt_key=False): |
| """ |
| Generate a URL to access this key. |
| |
| :type expires_in: int |
| :param expires_in: How long the url is valid for, in seconds |
| |
| :type method: string |
| :param method: The method to use for retrieving the file |
| (default is GET) |
| |
| :type headers: dict |
| :param headers: Any headers to pass along in the request |
| |
| :type query_auth: bool |
| :param query_auth: |
| |
| :type force_http: bool |
| :param force_http: If True, http will be used instead of https. |
| |
| :type response_headers: dict |
| :param response_headers: A dictionary containing HTTP |
| headers/values that will override any headers associated |
| with the stored object in the response. See |
| http://goo.gl/EWOPb for details. |
| |
| :type expires_in_absolute: bool |
| :param expires_in_absolute: |
| |
| :type version_id: string |
| :param version_id: The version_id of the object to GET. If specified |
| this overrides any value in the key. |
| |
| :type policy: :class:`boto.s3.acl.CannedACLStrings` |
| :param policy: A canned ACL policy that will be applied to the |
| new key in S3. |
| |
| :type reduced_redundancy: bool |
| :param reduced_redundancy: If True, this will set the storage |
| class of the new Key to be REDUCED_REDUNDANCY. The Reduced |
| Redundancy Storage (RRS) feature of S3, provides lower |
| redundancy at lower storage cost. |
| |
| :type encrypt_key: bool |
| :param encrypt_key: If True, the new copy of the object will |
| be encrypted on the server-side by S3 and will be stored |
| in an encrypted form while at rest in S3. |
| |
| :rtype: string |
| :return: The URL to access the key |
| """ |
| provider = self.bucket.connection.provider |
| version_id = version_id or self.version_id |
| if headers is None: |
| headers = {} |
| else: |
| headers = headers.copy() |
| |
| # add headers accordingly (usually PUT case) |
| if policy: |
| headers[provider.acl_header] = policy |
| if reduced_redundancy: |
| self.storage_class = 'REDUCED_REDUNDANCY' |
| if provider.storage_class_header: |
| headers[provider.storage_class_header] = self.storage_class |
| if encrypt_key: |
| headers[provider.server_side_encryption_header] = 'AES256' |
| headers = boto.utils.merge_meta(headers, self.metadata, provider) |
| |
| return self.bucket.connection.generate_url(expires_in, method, |
| self.bucket.name, self.name, |
| headers, query_auth, |
| force_http, |
| response_headers, |
| expires_in_absolute, |
| version_id) |
| |
| def send_file(self, fp, headers=None, cb=None, num_cb=10, |
| query_args=None, chunked_transfer=False, size=None): |
| """ |
| Upload a file to a key into a bucket on S3. |
| |
| :type fp: file |
| :param fp: The file pointer to upload. The file pointer must |
| point point at the offset from which you wish to upload. |
| ie. if uploading the full file, it should point at the |
| start of the file. Normally when a file is opened for |
| reading, the fp will point at the first byte. See the |
| bytes parameter below for more info. |
| |
| :type headers: dict |
| :param headers: The headers to pass along with the PUT request |
| |
| :type cb: function |
| :param cb: a callback function that will be called to report |
| progress on the upload. The callback should accept two |
| integer parameters, the first representing the number of |
| bytes that have been successfully transmitted to S3 and |
| the second representing the size of the to be transmitted |
| object. |
| |
| :type num_cb: int |
| :param num_cb: (optional) If a callback is specified with the |
| cb parameter this parameter determines the granularity of |
| the callback by defining the maximum number of times the |
| callback will be called during the file |
| transfer. Providing a negative integer will cause your |
| callback to be called with each buffer read. |
| |
| :type size: int |
| :param size: (optional) The Maximum number of bytes to read |
| from the file pointer (fp). This is useful when uploading |
| a file in multiple parts where you are splitting the file |
| up into different ranges to be uploaded. If not specified, |
| the default behaviour is to read all bytes from the file |
| pointer. Less bytes may be available. |
| """ |
| provider = self.bucket.connection.provider |
| try: |
| spos = fp.tell() |
| except IOError: |
| spos = None |
| self.read_from_stream = False |
| |
| def sender(http_conn, method, path, data, headers): |
| # This function is called repeatedly for temporary retries |
| # so we must be sure the file pointer is pointing at the |
| # start of the data. |
| if spos is not None and spos != fp.tell(): |
| fp.seek(spos) |
| elif spos is None and self.read_from_stream: |
| # if seek is not supported, and we've read from this |
| # stream already, then we need to abort retries to |
| # avoid setting bad data. |
| raise provider.storage_data_error( |
| 'Cannot retry failed request. fp does not support seeking.') |
| |
| http_conn.putrequest(method, path) |
| for key in headers: |
| http_conn.putheader(key, headers[key]) |
| http_conn.endheaders() |
| |
| # Calculate all MD5 checksums on the fly, if not already computed |
| if not self.base64md5: |
| m = md5() |
| else: |
| m = None |
| |
| save_debug = self.bucket.connection.debug |
| self.bucket.connection.debug = 0 |
| # If the debuglevel < 4 we don't want to show connection |
| # payload, so turn off HTTP connection-level debug output (to |
| # be restored below). |
| # Use the getattr approach to allow this to work in AppEngine. |
| if getattr(http_conn, 'debuglevel', 0) < 4: |
| http_conn.set_debuglevel(0) |
| |
| data_len = 0 |
| if cb: |
| if size: |
| cb_size = size |
| elif self.size: |
| cb_size = self.size |
| else: |
| cb_size = 0 |
| if chunked_transfer and cb_size == 0: |
| # For chunked Transfer, we call the cb for every 1MB |
| # of data transferred, except when we know size. |
| cb_count = (1024 * 1024) / self.BufferSize |
| elif num_cb > 1: |
| cb_count = int(math.ceil(cb_size / self.BufferSize / (num_cb - 1.0))) |
| elif num_cb < 0: |
| cb_count = -1 |
| else: |
| cb_count = 0 |
| i = 0 |
| cb(data_len, cb_size) |
| |
| bytes_togo = size |
| if bytes_togo and bytes_togo < self.BufferSize: |
| chunk = fp.read(bytes_togo) |
| else: |
| chunk = fp.read(self.BufferSize) |
| if spos is None: |
| # read at least something from a non-seekable fp. |
| self.read_from_stream = True |
| while chunk: |
| chunk_len = len(chunk) |
| data_len += chunk_len |
| if chunked_transfer: |
| http_conn.send('%x;\r\n' % chunk_len) |
| http_conn.send(chunk) |
| http_conn.send('\r\n') |
| else: |
| http_conn.send(chunk) |
| if m: |
| m.update(chunk) |
| if bytes_togo: |
| bytes_togo -= chunk_len |
| if bytes_togo <= 0: |
| break |
| if cb: |
| i += 1 |
| if i == cb_count or cb_count == -1: |
| cb(data_len, cb_size) |
| i = 0 |
| if bytes_togo and bytes_togo < self.BufferSize: |
| chunk = fp.read(bytes_togo) |
| else: |
| chunk = fp.read(self.BufferSize) |
| |
| self.size = data_len |
| |
| if m: |
| # Use the chunked trailer for the digest |
| hd = m.hexdigest() |
| self.md5, self.base64md5 = self.get_md5_from_hexdigest(hd) |
| |
| if chunked_transfer: |
| http_conn.send('0\r\n') |
| # http_conn.send("Content-MD5: %s\r\n" % self.base64md5) |
| http_conn.send('\r\n') |
| |
| if cb and (cb_count <= 1 or i > 0) and data_len > 0: |
| cb(data_len, cb_size) |
| |
| http_conn.set_debuglevel(save_debug) |
| self.bucket.connection.debug = save_debug |
| response = http_conn.getresponse() |
| body = response.read() |
| if ((response.status == 500 or response.status == 503 or |
| response.getheader('location')) and not chunked_transfer): |
| # we'll try again. |
| return response |
| elif response.status >= 200 and response.status <= 299: |
| self.etag = response.getheader('etag') |
| if self.etag != '"%s"' % self.md5: |
| raise provider.storage_data_error( |
| 'ETag from S3 did not match computed MD5') |
| return response |
| else: |
| raise provider.storage_response_error( |
| response.status, response.reason, body) |
| |
| if not headers: |
| headers = {} |
| else: |
| headers = headers.copy() |
| headers['User-Agent'] = UserAgent |
| if self.storage_class != 'STANDARD': |
| headers[provider.storage_class_header] = self.storage_class |
| if 'Content-Encoding' in headers: |
| self.content_encoding = headers['Content-Encoding'] |
| if 'Content-Language' in headers: |
| self.content_encoding = headers['Content-Language'] |
| if 'Content-Type' in headers: |
| # Some use cases need to suppress sending of the Content-Type |
| # header and depend on the receiving server to set the content |
| # type. This can be achieved by setting headers['Content-Type'] |
| # to None when calling this method. |
| if headers['Content-Type'] is None: |
| # Delete null Content-Type value to skip sending that header. |
| del headers['Content-Type'] |
| else: |
| self.content_type = headers['Content-Type'] |
| elif self.path: |
| self.content_type = mimetypes.guess_type(self.path)[0] |
| if self.content_type == None: |
| self.content_type = self.DefaultContentType |
| headers['Content-Type'] = self.content_type |
| else: |
| headers['Content-Type'] = self.content_type |
| if self.base64md5: |
| headers['Content-MD5'] = self.base64md5 |
| if chunked_transfer: |
| headers['Transfer-Encoding'] = 'chunked' |
| #if not self.base64md5: |
| # headers['Trailer'] = "Content-MD5" |
| else: |
| headers['Content-Length'] = str(self.size) |
| headers['Expect'] = '100-Continue' |
| headers = boto.utils.merge_meta(headers, self.metadata, provider) |
| resp = self.bucket.connection.make_request('PUT', self.bucket.name, |
| self.name, headers, |
| sender=sender, |
| query_args=query_args) |
| self.handle_version_headers(resp, force=True) |
| |
| def compute_md5(self, fp, size=None): |
| """ |
| :type fp: file |
| :param fp: File pointer to the file to MD5 hash. The file |
| pointer will be reset to the same position before the |
| method returns. |
| |
| :type size: int |
| :param size: (optional) The Maximum number of bytes to read |
| from the file pointer (fp). This is useful when uploading |
| a file in multiple parts where the file is being split |
| inplace into different parts. Less bytes may be available. |
| |
| :rtype: tuple |
| :return: A tuple containing the hex digest version of the MD5 |
| hash as the first element and the base64 encoded version |
| of the plain digest as the second element. |
| """ |
| tup = compute_md5(fp, size=size) |
| # Returned values are MD5 hash, base64 encoded MD5 hash, and data size. |
| # The internal implementation of compute_md5() needs to return the |
| # data size but we don't want to return that value to the external |
| # caller because it changes the class interface (i.e. it might |
| # break some code) so we consume the third tuple value here and |
| # return the remainder of the tuple to the caller, thereby preserving |
| # the existing interface. |
| self.size = tup[2] |
| return tup[0:2] |
| |
| def set_contents_from_stream(self, fp, headers=None, replace=True, |
| cb=None, num_cb=10, policy=None, |
| reduced_redundancy=False, query_args=None, |
| size=None): |
| """ |
| Store an object using the name of the Key object as the key in |
| cloud and the contents of the data stream pointed to by 'fp' as |
| the contents. |
| |
| The stream object is not seekable and total size is not known. |
| This has the implication that we can't specify the |
| Content-Size and Content-MD5 in the header. So for huge |
| uploads, the delay in calculating MD5 is avoided but with a |
| penalty of inability to verify the integrity of the uploaded |
| data. |
| |
| :type fp: file |
| :param fp: the file whose contents are to be uploaded |
| |
| :type headers: dict |
| :param headers: additional HTTP headers to be sent with the |
| PUT request. |
| |
| :type replace: bool |
| :param replace: If this parameter is False, the method will first check |
| to see if an object exists in the bucket with the same key. If it |
| does, it won't overwrite it. The default value is True which will |
| overwrite the object. |
| |
| :type cb: function |
| :param cb: a callback function that will be called to report |
| progress on the upload. The callback should accept two integer |
| parameters, the first representing the number of bytes that have |
| been successfully transmitted to GS and the second representing the |
| total number of bytes that need to be transmitted. |
| |
| :type num_cb: int |
| :param num_cb: (optional) If a callback is specified with the |
| cb parameter, this parameter determines the granularity of |
| the callback by defining the maximum number of times the |
| callback will be called during the file transfer. |
| |
| :type policy: :class:`boto.gs.acl.CannedACLStrings` |
| :param policy: A canned ACL policy that will be applied to the new key |
| in GS. |
| |
| :type reduced_redundancy: bool |
| :param reduced_redundancy: If True, this will set the storage |
| class of the new Key to be REDUCED_REDUNDANCY. The Reduced |
| Redundancy Storage (RRS) feature of S3, provides lower |
| redundancy at lower storage cost. |
| |
| :type size: int |
| :param size: (optional) The Maximum number of bytes to read from |
| the file pointer (fp). This is useful when uploading a |
| file in multiple parts where you are splitting the file up |
| into different ranges to be uploaded. If not specified, |
| the default behaviour is to read all bytes from the file |
| pointer. Less bytes may be available. |
| """ |
| |
| provider = self.bucket.connection.provider |
| if not provider.supports_chunked_transfer(): |
| raise BotoClientError('%s does not support chunked transfer' |
| % provider.get_provider_name()) |
| |
| # Name of the Object should be specified explicitly for Streams. |
| if not self.name or self.name == '': |
| raise BotoClientError('Cannot determine the destination ' |
| 'object name for the given stream') |
| |
| if headers is None: |
| headers = {} |
| if policy: |
| headers[provider.acl_header] = policy |
| |
| if reduced_redundancy: |
| self.storage_class = 'REDUCED_REDUNDANCY' |
| if provider.storage_class_header: |
| headers[provider.storage_class_header] = self.storage_class |
| |
| if self.bucket != None: |
| if not replace: |
| if self.bucket.lookup(self.name): |
| return |
| self.send_file(fp, headers, cb, num_cb, query_args, |
| chunked_transfer=True, size=size) |
| |
| def set_contents_from_file(self, fp, headers=None, replace=True, |
| cb=None, num_cb=10, policy=None, md5=None, |
| reduced_redundancy=False, query_args=None, |
| encrypt_key=False, size=None, rewind=False): |
| """ |
| Store an object in S3 using the name of the Key object as the |
| key in S3 and the contents of the file pointed to by 'fp' as the |
| contents. The data is read from 'fp' from its current position until |
| 'size' bytes have been read or EOF. |
| |
| :type fp: file |
| :param fp: the file whose contents to upload |
| |
| :type headers: dict |
| :param headers: Additional HTTP headers that will be sent with |
| the PUT request. |
| |
| :type replace: bool |
| :param replace: If this parameter is False, the method will |
| first check to see if an object exists in the bucket with |
| the same key. If it does, it won't overwrite it. The |
| default value is True which will overwrite the object. |
| |
| :type cb: function |
| :param cb: a callback function that will be called to report |
| progress on the upload. The callback should accept two |
| integer parameters, the first representing the number of |
| bytes that have been successfully transmitted to S3 and |
| the second representing the size of the to be transmitted |
| object. |
| |
| :type cb: int |
| :param num_cb: (optional) If a callback is specified with the |
| cb parameter this parameter determines the granularity of |
| the callback by defining the maximum number of times the |
| callback will be called during the file transfer. |
| |
| :type policy: :class:`boto.s3.acl.CannedACLStrings` |
| :param policy: A canned ACL policy that will be applied to the |
| new key in S3. |
| |
| :type md5: A tuple containing the hexdigest version of the MD5 |
| checksum of the file as the first element and the |
| Base64-encoded version of the plain checksum as the second |
| element. This is the same format returned by the |
| compute_md5 method. |
| :param md5: If you need to compute the MD5 for any reason |
| prior to upload, it's silly to have to do it twice so this |
| param, if present, will be used as the MD5 values of the |
| file. Otherwise, the checksum will be computed. |
| |
| :type reduced_redundancy: bool |
| :param reduced_redundancy: If True, this will set the storage |
| class of the new Key to be REDUCED_REDUNDANCY. The Reduced |
| Redundancy Storage (RRS) feature of S3, provides lower |
| redundancy at lower storage cost. |
| |
| :type encrypt_key: bool |
| :param encrypt_key: If True, the new copy of the object will |
| be encrypted on the server-side by S3 and will be stored |
| in an encrypted form while at rest in S3. |
| |
| :type size: int |
| :param size: (optional) The Maximum number of bytes to read |
| from the file pointer (fp). This is useful when uploading |
| a file in multiple parts where you are splitting the file |
| up into different ranges to be uploaded. If not specified, |
| the default behaviour is to read all bytes from the file |
| pointer. Less bytes may be available. |
| |
| :type rewind: bool |
| :param rewind: (optional) If True, the file pointer (fp) will |
| be rewound to the start before any bytes are read from |
| it. The default behaviour is False which reads from the |
| current position of the file pointer (fp). |
| |
| :rtype: int |
| :return: The number of bytes written to the key. |
| """ |
| provider = self.bucket.connection.provider |
| headers = headers or {} |
| if policy: |
| headers[provider.acl_header] = policy |
| if encrypt_key: |
| headers[provider.server_side_encryption_header] = 'AES256' |
| |
| if rewind: |
| # caller requests reading from beginning of fp. |
| fp.seek(0, os.SEEK_SET) |
| else: |
| # The following seek/tell/seek logic is intended |
| # to detect applications using the older interface to |
| # set_contents_from_file(), which automatically rewound the |
| # file each time the Key was reused. This changed with commit |
| # 14ee2d03f4665fe20d19a85286f78d39d924237e, to support uploads |
| # split into multiple parts and uploaded in parallel, and at |
| # the time of that commit this check was added because otherwise |
| # older programs would get a success status and upload an empty |
| # object. Unfortuantely, it's very inefficient for fp's implemented |
| # by KeyFile (used, for example, by gsutil when copying between |
| # providers). So, we skip the check for the KeyFile case. |
| # TODO: At some point consider removing this seek/tell/seek |
| # logic, after enough time has passed that it's unlikely any |
| # programs remain that assume the older auto-rewind interface. |
| if not isinstance(fp, KeyFile): |
| spos = fp.tell() |
| fp.seek(0, os.SEEK_END) |
| if fp.tell() == spos: |
| fp.seek(0, os.SEEK_SET) |
| if fp.tell() != spos: |
| # Raise an exception as this is likely a programming |
| # error whereby there is data before the fp but nothing |
| # after it. |
| fp.seek(spos) |
| raise AttributeError('fp is at EOF. Use rewind option ' |
| 'or seek() to data start.') |
| # seek back to the correct position. |
| fp.seek(spos) |
| |
| if reduced_redundancy: |
| self.storage_class = 'REDUCED_REDUNDANCY' |
| if provider.storage_class_header: |
| headers[provider.storage_class_header] = self.storage_class |
| # TODO - What if provider doesn't support reduced reduncancy? |
| # What if different providers provide different classes? |
| if hasattr(fp, 'name'): |
| self.path = fp.name |
| if self.bucket != None: |
| if not md5 and provider.supports_chunked_transfer(): |
| # defer md5 calculation to on the fly and |
| # we don't know anything about size yet. |
| chunked_transfer = True |
| self.size = None |
| else: |
| chunked_transfer = False |
| if isinstance(fp, KeyFile): |
| # Avoid EOF seek for KeyFile case as it's very inefficient. |
| key = fp.getkey() |
| size = key.size - fp.tell() |
| self.size = size |
| # At present both GCS and S3 use MD5 for the etag for |
| # non-multipart-uploaded objects. If the etag is 32 hex |
| # chars use it as an MD5, to avoid having to read the file |
| # twice while transferring. |
| if (re.match('^"[a-fA-F0-9]{32}"$', key.etag)): |
| etag = key.etag.strip('"') |
| md5 = (etag, base64.b64encode(binascii.unhexlify(etag))) |
| if not md5: |
| # compute_md5() and also set self.size to actual |
| # size of the bytes read computing the md5. |
| md5 = self.compute_md5(fp, size) |
| # adjust size if required |
| size = self.size |
| elif size: |
| self.size = size |
| else: |
| # If md5 is provided, still need to size so |
| # calculate based on bytes to end of content |
| spos = fp.tell() |
| fp.seek(0, os.SEEK_END) |
| self.size = fp.tell() - spos |
| fp.seek(spos) |
| size = self.size |
| self.md5 = md5[0] |
| self.base64md5 = md5[1] |
| |
| if self.name == None: |
| self.name = self.md5 |
| if not replace: |
| if self.bucket.lookup(self.name): |
| return |
| |
| self.send_file(fp, headers=headers, cb=cb, num_cb=num_cb, |
| query_args=query_args, |
| chunked_transfer=chunked_transfer, size=size) |
| # return number of bytes written. |
| return self.size |
| |
| def set_contents_from_filename(self, filename, headers=None, replace=True, |
| cb=None, num_cb=10, policy=None, md5=None, |
| reduced_redundancy=False, |
| encrypt_key=False): |
| """ |
| Store an object in S3 using the name of the Key object as the |
| key in S3 and the contents of the file named by 'filename'. |
| See set_contents_from_file method for details about the |
| parameters. |
| |
| :type filename: string |
| :param filename: The name of the file that you want to put onto S3 |
| |
| :type headers: dict |
| :param headers: Additional headers to pass along with the |
| request to AWS. |
| |
| :type replace: bool |
| :param replace: If True, replaces the contents of the file |
| if it already exists. |
| |
| :type cb: function |
| :param cb: a callback function that will be called to report |
| progress on the upload. The callback should accept two |
| integer parameters, the first representing the number of |
| bytes that have been successfully transmitted to S3 and |
| the second representing the size of the to be transmitted |
| object. |
| |
| :type cb: int |
| :param num_cb: (optional) If a callback is specified with the |
| cb parameter this parameter determines the granularity of |
| the callback by defining the maximum number of times the |
| callback will be called during the file transfer. |
| |
| :type policy: :class:`boto.s3.acl.CannedACLStrings` |
| :param policy: A canned ACL policy that will be applied to the |
| new key in S3. |
| |
| :type md5: A tuple containing the hexdigest version of the MD5 |
| checksum of the file as the first element and the |
| Base64-encoded version of the plain checksum as the second |
| element. This is the same format returned by the |
| compute_md5 method. |
| :param md5: If you need to compute the MD5 for any reason |
| prior to upload, it's silly to have to do it twice so this |
| param, if present, will be used as the MD5 values of the |
| file. Otherwise, the checksum will be computed. |
| |
| :type reduced_redundancy: bool |
| :param reduced_redundancy: If True, this will set the storage |
| class of the new Key to be REDUCED_REDUNDANCY. The Reduced |
| Redundancy Storage (RRS) feature of S3, provides lower |
| redundancy at lower storage cost. :type encrypt_key: bool |
| :param encrypt_key: If True, the new copy of the object |
| will be encrypted on the server-side by S3 and will be |
| stored in an encrypted form while at rest in S3. |
| """ |
| fp = open(filename, 'rb') |
| try: |
| self.set_contents_from_file(fp, headers, replace, cb, num_cb, |
| policy, md5, reduced_redundancy, |
| encrypt_key=encrypt_key) |
| finally: |
| fp.close() |
| |
| def set_contents_from_string(self, s, headers=None, replace=True, |
| cb=None, num_cb=10, policy=None, md5=None, |
| reduced_redundancy=False, |
| encrypt_key=False): |
| """ |
| Store an object in S3 using the name of the Key object as the |
| key in S3 and the string 's' as the contents. |
| See set_contents_from_file method for details about the |
| parameters. |
| |
| :type headers: dict |
| :param headers: Additional headers to pass along with the |
| request to AWS. |
| |
| :type replace: bool |
| :param replace: If True, replaces the contents of the file if |
| it already exists. |
| |
| :type cb: function |
| :param cb: a callback function that will be called to report |
| progress on the upload. The callback should accept two |
| integer parameters, the first representing the number of |
| bytes that have been successfully transmitted to S3 and |
| the second representing the size of the to be transmitted |
| object. |
| |
| :type cb: int |
| :param num_cb: (optional) If a callback is specified with the |
| cb parameter this parameter determines the granularity of |
| the callback by defining the maximum number of times the |
| callback will be called during the file transfer. |
| |
| :type policy: :class:`boto.s3.acl.CannedACLStrings` |
| :param policy: A canned ACL policy that will be applied to the |
| new key in S3. |
| |
| :type md5: A tuple containing the hexdigest version of the MD5 |
| checksum of the file as the first element and the |
| Base64-encoded version of the plain checksum as the second |
| element. This is the same format returned by the |
| compute_md5 method. |
| :param md5: If you need to compute the MD5 for any reason |
| prior to upload, it's silly to have to do it twice so this |
| param, if present, will be used as the MD5 values of the |
| file. Otherwise, the checksum will be computed. |
| |
| :type reduced_redundancy: bool |
| :param reduced_redundancy: If True, this will set the storage |
| class of the new Key to be REDUCED_REDUNDANCY. The Reduced |
| Redundancy Storage (RRS) feature of S3, provides lower |
| redundancy at lower storage cost. |
| |
| :type encrypt_key: bool |
| :param encrypt_key: If True, the new copy of the object will |
| be encrypted on the server-side by S3 and will be stored |
| in an encrypted form while at rest in S3. |
| """ |
| if isinstance(s, unicode): |
| s = s.encode("utf-8") |
| fp = StringIO.StringIO(s) |
| r = self.set_contents_from_file(fp, headers, replace, cb, num_cb, |
| policy, md5, reduced_redundancy, |
| encrypt_key=encrypt_key) |
| fp.close() |
| return r |
| |
| def get_file(self, fp, headers=None, cb=None, num_cb=10, |
| torrent=False, version_id=None, override_num_retries=None, |
| response_headers=None): |
| """ |
| Retrieves a file from an S3 Key |
| |
| :type fp: file |
| :param fp: File pointer to put the data into |
| |
| :type headers: string |
| :param: headers to send when retrieving the files |
| |
| :type cb: function |
| :param cb: a callback function that will be called to report |
| progress on the upload. The callback should accept two |
| integer parameters, the first representing the number of |
| bytes that have been successfully transmitted to S3 and |
| the second representing the size of the to be transmitted |
| object. |
| |
| :type cb: int |
| :param num_cb: (optional) If a callback is specified with the |
| cb parameter this parameter determines the granularity of |
| the callback by defining the maximum number of times the |
| callback will be called during the file transfer. |
| |
| :type torrent: bool |
| :param torrent: Flag for whether to get a torrent for the file |
| |
| :type override_num_retries: int |
| :param override_num_retries: If not None will override configured |
| num_retries parameter for underlying GET. |
| |
| :type response_headers: dict |
| :param response_headers: A dictionary containing HTTP |
| headers/values that will override any headers associated |
| with the stored object in the response. See |
| http://goo.gl/EWOPb for details. |
| """ |
| self._get_file_internal(fp, headers=headers, cb=cb, num_cb=num_cb, |
| torrent=torrent, version_id=version_id, |
| override_num_retries=override_num_retries, |
| response_headers=response_headers, |
| query_args=None) |
| |
| def _get_file_internal(self, fp, headers=None, cb=None, num_cb=10, |
| torrent=False, version_id=None, override_num_retries=None, |
| response_headers=None, query_args=None): |
| if headers is None: |
| headers = {} |
| save_debug = self.bucket.connection.debug |
| if self.bucket.connection.debug == 1: |
| self.bucket.connection.debug = 0 |
| |
| query_args = query_args or [] |
| if torrent: |
| query_args.append('torrent') |
| m = None |
| else: |
| m = md5() |
| # If a version_id is passed in, use that. If not, check to see |
| # if the Key object has an explicit version_id and, if so, use that. |
| # Otherwise, don't pass a version_id query param. |
| if version_id is None: |
| version_id = self.version_id |
| if version_id: |
| query_args.append('versionId=%s' % version_id) |
| if response_headers: |
| for key in response_headers: |
| query_args.append('%s=%s' % (key, urllib.quote(response_headers[key]))) |
| query_args = '&'.join(query_args) |
| self.open('r', headers, query_args=query_args, |
| override_num_retries=override_num_retries) |
| |
| data_len = 0 |
| if cb: |
| if self.size is None: |
| cb_size = 0 |
| else: |
| cb_size = self.size |
| if self.size is None and num_cb != -1: |
| # If size is not available due to chunked transfer for example, |
| # we'll call the cb for every 1MB of data transferred. |
| cb_count = (1024 * 1024) / self.BufferSize |
| elif num_cb > 1: |
| cb_count = int(math.ceil(cb_size/self.BufferSize/(num_cb-1.0))) |
| elif num_cb < 0: |
| cb_count = -1 |
| else: |
| cb_count = 0 |
| i = 0 |
| cb(data_len, cb_size) |
| for bytes in self: |
| fp.write(bytes) |
| data_len += len(bytes) |
| if m: |
| m.update(bytes) |
| if cb: |
| if cb_size > 0 and data_len >= cb_size: |
| break |
| i += 1 |
| if i == cb_count or cb_count == -1: |
| cb(data_len, cb_size) |
| i = 0 |
| if cb and (cb_count <= 1 or i > 0) and data_len > 0: |
| cb(data_len, cb_size) |
| if m: |
| self.md5 = m.hexdigest() |
| if self.size is None and not torrent and "Range" not in headers: |
| self.size = data_len |
| self.close() |
| self.bucket.connection.debug = save_debug |
| |
| def get_torrent_file(self, fp, headers=None, cb=None, num_cb=10): |
| """ |
| Get a torrent file (see to get_file) |
| |
| :type fp: file |
| :param fp: The file pointer of where to put the torrent |
| |
| :type headers: dict |
| :param headers: Headers to be passed |
| |
| :type cb: function |
| :param cb: a callback function that will be called to report |
| progress on the upload. The callback should accept two |
| integer parameters, the first representing the number of |
| bytes that have been successfully transmitted to S3 and |
| the second representing the size of the to be transmitted |
| object. |
| |
| :type cb: int |
| :param num_cb: (optional) If a callback is specified with the |
| cb parameter this parameter determines the granularity of |
| the callback by defining the maximum number of times the |
| callback will be called during the file transfer. |
| |
| """ |
| return self.get_file(fp, headers, cb, num_cb, torrent=True) |
| |
| def get_contents_to_file(self, fp, headers=None, |
| cb=None, num_cb=10, |
| torrent=False, |
| version_id=None, |
| res_download_handler=None, |
| response_headers=None): |
| """ |
| Retrieve an object from S3 using the name of the Key object as the |
| key in S3. Write the contents of the object to the file pointed |
| to by 'fp'. |
| |
| :type fp: File -like object |
| :param fp: |
| |
| :type headers: dict |
| :param headers: additional HTTP headers that will be sent with |
| the GET request. |
| |
| :type cb: function |
| :param cb: a callback function that will be called to report |
| progress on the upload. The callback should accept two |
| integer parameters, the first representing the number of |
| bytes that have been successfully transmitted to S3 and |
| the second representing the size of the to be transmitted |
| object. |
| |
| :type cb: int |
| :param num_cb: (optional) If a callback is specified with the |
| cb parameter this parameter determines the granularity of |
| the callback by defining the maximum number of times the |
| callback will be called during the file transfer. |
| |
| :type torrent: bool |
| :param torrent: If True, returns the contents of a torrent |
| file as a string. |
| |
| :type res_upload_handler: ResumableDownloadHandler |
| :param res_download_handler: If provided, this handler will |
| perform the download. |
| |
| :type response_headers: dict |
| :param response_headers: A dictionary containing HTTP |
| headers/values that will override any headers associated |
| with the stored object in the response. See |
| http://goo.gl/EWOPb for details. |
| """ |
| if self.bucket != None: |
| if res_download_handler: |
| res_download_handler.get_file(self, fp, headers, cb, num_cb, |
| torrent=torrent, |
| version_id=version_id) |
| else: |
| self.get_file(fp, headers, cb, num_cb, torrent=torrent, |
| version_id=version_id, |
| response_headers=response_headers) |
| |
| def get_contents_to_filename(self, filename, headers=None, |
| cb=None, num_cb=10, |
| torrent=False, |
| version_id=None, |
| res_download_handler=None, |
| response_headers=None): |
| """ |
| Retrieve an object from S3 using the name of the Key object as the |
| key in S3. Store contents of the object to a file named by 'filename'. |
| See get_contents_to_file method for details about the |
| parameters. |
| |
| :type filename: string |
| :param filename: The filename of where to put the file contents |
| |
| :type headers: dict |
| :param headers: Any additional headers to send in the request |
| |
| :type cb: function |
| :param cb: a callback function that will be called to report |
| progress on the upload. The callback should accept two |
| integer parameters, the first representing the number of |
| bytes that have been successfully transmitted to S3 and |
| the second representing the size of the to be transmitted |
| object. |
| |
| :type cb: int |
| :param num_cb: (optional) If a callback is specified with the |
| cb parameter this parameter determines the granularity of |
| the callback by defining the maximum number of times the |
| callback will be called during the file transfer. |
| |
| :type torrent: bool |
| :param torrent: If True, returns the contents of a torrent file |
| as a string. |
| |
| :type res_upload_handler: ResumableDownloadHandler |
| :param res_download_handler: If provided, this handler will |
| perform the download. |
| |
| :type response_headers: dict |
| :param response_headers: A dictionary containing HTTP |
| headers/values that will override any headers associated |
| with the stored object in the response. See |
| http://goo.gl/EWOPb for details. |
| """ |
| fp = open(filename, 'wb') |
| try: |
| self.get_contents_to_file(fp, headers, cb, num_cb, torrent=torrent, |
| version_id=version_id, |
| res_download_handler=res_download_handler, |
| response_headers=response_headers) |
| except Exception: |
| os.remove(filename) |
| raise |
| finally: |
| fp.close() |
| # if last_modified date was sent from s3, try to set file's timestamp |
| if self.last_modified != None: |
| try: |
| modified_tuple = rfc822.parsedate_tz(self.last_modified) |
| modified_stamp = int(rfc822.mktime_tz(modified_tuple)) |
| os.utime(fp.name, (modified_stamp, modified_stamp)) |
| except Exception: |
| pass |
| |
| def get_contents_as_string(self, headers=None, |
| cb=None, num_cb=10, |
| torrent=False, |
| version_id=None, |
| response_headers=None): |
| """ |
| Retrieve an object from S3 using the name of the Key object as the |
| key in S3. Return the contents of the object as a string. |
| See get_contents_to_file method for details about the |
| parameters. |
| |
| :type headers: dict |
| :param headers: Any additional headers to send in the request |
| |
| :type cb: function |
| :param cb: a callback function that will be called to report |
| progress on the upload. The callback should accept two |
| integer parameters, the first representing the number of |
| bytes that have been successfully transmitted to S3 and |
| the second representing the size of the to be transmitted |
| object. |
| |
| :type cb: int |
| :param num_cb: (optional) If a callback is specified with the |
| cb parameter this parameter determines the granularity of |
| the callback by defining the maximum number of times the |
| callback will be called during the file transfer. |
| |
| :type torrent: bool |
| :param torrent: If True, returns the contents of a torrent file |
| as a string. |
| |
| :type response_headers: dict |
| :param response_headers: A dictionary containing HTTP |
| headers/values that will override any headers associated |
| with the stored object in the response. See |
| http://goo.gl/EWOPb for details. |
| |
| :rtype: string |
| :returns: The contents of the file as a string |
| """ |
| fp = StringIO.StringIO() |
| self.get_contents_to_file(fp, headers, cb, num_cb, torrent=torrent, |
| version_id=version_id, |
| response_headers=response_headers) |
| return fp.getvalue() |
| |
| def add_email_grant(self, permission, email_address, headers=None): |
| """ |
| Convenience method that provides a quick way to add an email grant |
| to a key. This method retrieves the current ACL, creates a new |
| grant based on the parameters passed in, adds that grant to the ACL |
| and then PUT's the new ACL back to S3. |
| |
| :type permission: string |
| :param permission: The permission being granted. Should be one of: |
| (READ, WRITE, READ_ACP, WRITE_ACP, FULL_CONTROL). |
| |
| :type email_address: string |
| :param email_address: The email address associated with the AWS |
| account your are granting the permission to. |
| |
| :type recursive: boolean |
| :param recursive: A boolean value to controls whether the |
| command will apply the grant to all keys within the bucket |
| or not. The default value is False. By passing a True |
| value, the call will iterate through all keys in the |
| bucket and apply the same grant to each key. CAUTION: If |
| you have a lot of keys, this could take a long time! |
| """ |
| policy = self.get_acl(headers=headers) |
| policy.acl.add_email_grant(permission, email_address) |
| self.set_acl(policy, headers=headers) |
| |
| def add_user_grant(self, permission, user_id, headers=None, |
| display_name=None): |
| """ |
| Convenience method that provides a quick way to add a canonical |
| user grant to a key. This method retrieves the current ACL, |
| creates a new grant based on the parameters passed in, adds that |
| grant to the ACL and then PUT's the new ACL back to S3. |
| |
| :type permission: string |
| :param permission: The permission being granted. Should be one of: |
| (READ, WRITE, READ_ACP, WRITE_ACP, FULL_CONTROL). |
| |
| :type user_id: string |
| :param user_id: The canonical user id associated with the AWS |
| account your are granting the permission to. |
| |
| :type display_name: string |
| :param display_name: An option string containing the user's |
| Display Name. Only required on Walrus. |
| """ |
| policy = self.get_acl(headers=headers) |
| policy.acl.add_user_grant(permission, user_id, |
| display_name=display_name) |
| self.set_acl(policy, headers=headers) |
| |
| def _normalize_metadata(self, metadata): |
| if type(metadata) == set: |
| norm_metadata = set() |
| for k in metadata: |
| norm_metadata.add(k.lower()) |
| else: |
| norm_metadata = {} |
| for k in metadata: |
| norm_metadata[k.lower()] = metadata[k] |
| return norm_metadata |
| |
| def _get_remote_metadata(self, headers=None): |
| """ |
| Extracts metadata from existing URI into a dict, so we can |
| overwrite/delete from it to form the new set of metadata to apply to a |
| key. |
| """ |
| metadata = {} |
| for underscore_name in self._underscore_base_user_settable_fields: |
| if hasattr(self, underscore_name): |
| value = getattr(self, underscore_name) |
| if value: |
| # Generate HTTP field name corresponding to "_" named field. |
| field_name = underscore_name.replace('_', '-') |
| metadata[field_name.lower()] = value |
| # self.metadata contains custom metadata, which are all user-settable. |
| prefix = self.provider.metadata_prefix |
| for underscore_name in self.metadata: |
| field_name = underscore_name.replace('_', '-') |
| metadata['%s%s' % (prefix, field_name.lower())] = ( |
| self.metadata[underscore_name]) |
| return metadata |
| |
| def set_remote_metadata(self, metadata_plus, metadata_minus, preserve_acl, |
| headers=None): |
| metadata_plus = self._normalize_metadata(metadata_plus) |
| metadata_minus = self._normalize_metadata(metadata_minus) |
| metadata = self._get_remote_metadata() |
| metadata.update(metadata_plus) |
| for h in metadata_minus: |
| if h in metadata: |
| del metadata[h] |
| src_bucket = self.bucket |
| # Boto prepends the meta prefix when adding headers, so strip prefix in |
| # metadata before sending back in to copy_key() call. |
| rewritten_metadata = {} |
| for h in metadata: |
| if (h.startswith('x-goog-meta-') or h.startswith('x-amz-meta-')): |
| rewritten_h = (h.replace('x-goog-meta-', '') |
| .replace('x-amz-meta-', '')) |
| else: |
| rewritten_h = h |
| rewritten_metadata[rewritten_h] = metadata[h] |
| metadata = rewritten_metadata |
| src_bucket.copy_key(self.name, self.bucket.name, self.name, |
| metadata=metadata, preserve_acl=preserve_acl, |
| headers=headers) |
| |
| def restore(self, days, headers=None): |
| """Restore an object from an archive. |
| |
| :type days: int |
| :param days: The lifetime of the restored object (must |
| be at least 1 day). If the object is already restored |
| then this parameter can be used to readjust the lifetime |
| of the restored object. In this case, the days |
| param is with respect to the initial time of the request. |
| If the object has not been restored, this param is with |
| respect to the completion time of the request. |
| |
| """ |
| response = self.bucket.connection.make_request( |
| 'POST', self.bucket.name, self.name, |
| data=self.RestoreBody % days, |
| headers=headers, query_args='restore') |
| if response.status not in (200, 202): |
| provider = self.bucket.connection.provider |
| raise provider.storage_response_error(response.status, |
| response.reason, |
| response.read()) |