如何上传一个文件到目录在 S3桶使用 boto

我想使用 python 在 s3 bucket 中复制一个文件。

例句: 我有桶的名字 = 测试。而在桶中,我有2个文件夹名为“转储”和“输入”。现在我想复制一个文件从本地目录到 S3“转储”文件夹使用 python... 有人能帮助我吗?

347588 次浏览

注意: 此答案使用 boto。请参阅使用 boto3另一个答案,即 更新

试试这个..。

import boto
import boto.s3
import sys
from boto.s3.key import Key


AWS_ACCESS_KEY_ID = ''
AWS_SECRET_ACCESS_KEY = ''


bucket_name = AWS_ACCESS_KEY_ID.lower() + '-dump'
conn = boto.connect_s3(AWS_ACCESS_KEY_ID,
AWS_SECRET_ACCESS_KEY)




bucket = conn.create_bucket(bucket_name,
location=boto.s3.connection.Location.DEFAULT)


testfile = "replace this with an actual filename"
print 'Uploading %s to Amazon S3 bucket %s' % \
(testfile, bucket_name)


def percent_cb(complete, total):
sys.stdout.write('.')
sys.stdout.flush()




k = Key(bucket)
k.key = 'my test file'
k.set_contents_from_filename(testfile,
cb=percent_cb, num_cb=10)

[更新] 我不是一个 pythonist,所以感谢关于导入声明的提醒。 另外,我不建议在您自己的源代码中放置凭据。如果您正在 AWS 内部运行它,那么可以使用 IAM 凭证和实例配置文件(http://docs.aws.amazon.com/IAM/latest/UserGuide/id_roles_use_switch-role-ec2_instance-profiles.html) ,并且为了在您的 Dev/Test 环境中保持相同的行为,可以使用类似 AdRoll 中的全息图(https://github.com/AdRoll/hologram)

我使用了这个,它实现起来非常简单

import tinys3


conn = tinys3.Connection('S3_ACCESS_KEY','S3_SECRET_KEY',tls=True)


f = open('some_file.zip','rb')
conn.upload('some_file.zip',f,'my_bucket')

Https://www.smore.com/labs/tinys3/

没必要搞得这么复杂:

s3_connection = boto.connect_s3()
bucket = s3_connection.get_bucket('your bucket name')
key = boto.s3.key.Key(bucket, 'some_file.zip')
with open('some_file.zip') as f:
key.send_file(f)
from boto3.s3.transfer import S3Transfer
import boto3
#have all the variables populated which are required below
client = boto3.client('s3', aws_access_key_id=access_key,aws_secret_access_key=secret_key)
transfer = S3Transfer(client)
transfer.upload_file(filepath, bucket_name, folder_name+"/"+filename)

这也会奏效:

import os
import boto
import boto.s3.connection
from boto.s3.key import Key


try:


conn = boto.s3.connect_to_region('us-east-1',
aws_access_key_id = 'AWS-Access-Key',
aws_secret_access_key = 'AWS-Secrete-Key',
# host = 's3-website-us-east-1.amazonaws.com',
# is_secure=True,               # uncomment if you are not using ssl
calling_format = boto.s3.connection.OrdinaryCallingFormat(),
)


bucket = conn.get_bucket('YourBucketName')
key_name = 'FileToUpload'
path = 'images/holiday' #Directory Under which file should get upload
full_key_name = os.path.join(path, key_name)
k = bucket.new_key(full_key_name)
k.set_contents_from_filename(key_name)


except Exception,e:
print str(e)
print "error"
import boto
from boto.s3.key import Key


AWS_ACCESS_KEY_ID = ''
AWS_SECRET_ACCESS_KEY = ''
END_POINT = ''                          # eg. us-east-1
S3_HOST = ''                            # eg. s3.us-east-1.amazonaws.com
BUCKET_NAME = 'test'
FILENAME = 'upload.txt'
UPLOADED_FILENAME = 'dumps/upload.txt'
# include folders in file path. If it doesn't exist, it will be created


s3 = boto.s3.connect_to_region(END_POINT,
aws_access_key_id=AWS_ACCESS_KEY_ID,
aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
host=S3_HOST)


bucket = s3.get_bucket(BUCKET_NAME)
k = Key(bucket)
k.key = UPLOADED_FILENAME
k.set_contents_from_filename(FILENAME)
import boto3


s3 = boto3.resource('s3')
BUCKET = "test"


s3.Bucket(BUCKET).upload_file("your/local/file", "dump/file")
xmlstr = etree.tostring(listings,  encoding='utf8', method='xml')
conn = boto.connect_s3(
aws_access_key_id = access_key,
aws_secret_access_key = secret_key,
# host = '<bucketName>.s3.amazonaws.com',
host = 'bycket.s3.amazonaws.com',
#is_secure=False,               # uncomment if you are not using ssl
calling_format = boto.s3.connection.OrdinaryCallingFormat(),
)
conn.auth_region_name = 'us-west-1'


bucket = conn.get_bucket('resources', validate=False)
key= bucket.get_key('filename.txt')
key.set_contents_from_string("SAMPLE TEXT")
key.set_canned_acl('public-read')

这是一个三线。只要按照指示的 Boto3文档

import boto3
s3 = boto3.resource(service_name = 's3')
s3.meta.client.upload_file(Filename = 'C:/foo/bar/baz.filetype', Bucket = 'yourbucketname', Key = 'baz.filetype')

一些重要的论点是:

参数:

  • Filename (str)——要上载的文件的路径。
  • Bucket (str)——要上载到的 Bucket 的名称。
  • Key (str)——要在 s3 bucket 中分配给文件的名称。这可能与文件的名称相同,也可能是您选择的不同名称,但文件类型应该保持不变。

    注意: 我假设您已经按照 Boto3文档中的最佳配置实践中的建议将您的凭据保存在 ~\.aws文件夹中。

  • 上传文件夹示例如下代码和 S3文件夹图片 enter image description here

    import boto
    import boto.s3
    import boto.s3.connection
    import os.path
    import sys
    
    
    # Fill in info on data to upload
    # destination bucket name
    bucket_name = 'willie20181121'
    # source directory
    sourceDir = '/home/willie/Desktop/x/'  #Linux Path
    # destination directory name (on s3)
    destDir = '/test1/'   #S3 Path
    
    
    #max size in bytes before uploading in parts. between 1 and 5 GB recommended
    MAX_SIZE = 20 * 1000 * 1000
    #size of parts when uploading in parts
    PART_SIZE = 6 * 1000 * 1000
    
    
    access_key = 'MPBVAQ*******IT****'
    secret_key = '11t63yDV***********HgUcgMOSN*****'
    
    
    conn = boto.connect_s3(
    aws_access_key_id = access_key,
    aws_secret_access_key = secret_key,
    host = '******.org.tw',
    is_secure=False,               # uncomment if you are not using ssl
    calling_format = boto.s3.connection.OrdinaryCallingFormat(),
    )
    bucket = conn.create_bucket(bucket_name,
    location=boto.s3.connection.Location.DEFAULT)
    
    
    
    
    uploadFileNames = []
    for (sourceDir, dirname, filename) in os.walk(sourceDir):
    uploadFileNames.extend(filename)
    break
    
    
    def percent_cb(complete, total):
    sys.stdout.write('.')
    sys.stdout.flush()
    
    
    for filename in uploadFileNames:
    sourcepath = os.path.join(sourceDir + filename)
    destpath = os.path.join(destDir, filename)
    print ('Uploading %s to Amazon S3 bucket %s' % \
    (sourcepath, bucket_name))
    
    
    filesize = os.path.getsize(sourcepath)
    if filesize > MAX_SIZE:
    print ("multipart upload")
    mp = bucket.initiate_multipart_upload(destpath)
    fp = open(sourcepath,'rb')
    fp_num = 0
    while (fp.tell() < filesize):
    fp_num += 1
    print ("uploading part %i" %fp_num)
    mp.upload_part_from_file(fp, fp_num, cb=percent_cb, num_cb=10, size=PART_SIZE)
    
    
    mp.complete_upload()
    
    
    else:
    print ("singlepart upload")
    k = boto.s3.key.Key(bucket)
    k.key = destpath
    k.set_contents_from_filename(sourcepath,
    cb=percent_cb, num_cb=10)
    

    附注: 更多参考 网址

    在具有凭据的会话中将文件上载到 s3。

    import boto3
    
    
    session = boto3.Session(
    aws_access_key_id='AWS_ACCESS_KEY_ID',
    aws_secret_access_key='AWS_SECRET_ACCESS_KEY',
    )
    s3 = session.resource('s3')
    # Filename - File to upload
    # Bucket - Bucket to upload to (the top level directory under AWS S3)
    # Key - S3 object name (can contain subdirectories). If not specified then file_name is used
    s3.meta.client.upload_file(Filename='input_file_path', Bucket='bucket_name', Key='s3_output_key')
    

    使用 boto3

    import logging
    import boto3
    from botocore.exceptions import ClientError
    
    
    
    
    def upload_file(file_name, bucket, object_name=None):
    """Upload a file to an S3 bucket
    
    
    :param file_name: File to upload
    :param bucket: Bucket to upload to
    :param object_name: S3 object name. If not specified then file_name is used
    :return: True if file was uploaded, else False
    """
    
    
    # If S3 object_name was not specified, use file_name
    if object_name is None:
    object_name = file_name
    
    
    # Upload the file
    s3_client = boto3.client('s3')
    try:
    response = s3_client.upload_file(file_name, bucket, object_name)
    except ClientError as e:
    logging.error(e)
    return False
    return True
    

    更多资料:- Https://boto3.amazonaws.com/v1/documentation/api/latest/guide/s3-uploading-files.html

    在我看来,我有一些更有秩序的东西:

    import boto3
    from pprint import pprint
    from botocore.exceptions import NoCredentialsError
    
    
    
    
    class S3(object):
    BUCKET = "test"
    connection = None
    
    
    def __init__(self):
    try:
    vars = get_s3_credentials("aws")
    self.connection = boto3.resource('s3', 'aws_access_key_id',
    'aws_secret_access_key')
    except(Exception) as error:
    print(error)
    self.connection = None
    
    
    
    
    def upload_file(self, file_to_upload_path, file_name):
    if file_to_upload is None or file_name is None: return False
    try:
    pprint(file_to_upload)
    file_name = "your-folder-inside-s3/{0}".format(file_name)
    self.connection.Bucket(self.BUCKET).upload_file(file_to_upload_path,
    file_name)
    print("Upload Successful")
    return True
    
    
    except FileNotFoundError:
    print("The file was not found")
    return False
    
    
    except NoCredentialsError:
    print("Credentials not available")
    return False
    
    
    
    
    

    这里有三个重要的变量,水桶常数,File _ to _ uploadfile _ to _ 上传File _ name

    是你的 S3桶的名字

    file_to_upload_path: 必须是你想要上传的文件的路径

    file_name: 是在 bucket 中生成的文件和路径(这是添加文件夹或其他东西的地方)

    有很多方法,但是您可以在另一个脚本中重用这些代码,如下所示

    import S3
    
    
    def some_function():
    S3.S3().upload_file(path_to_file, final_file_name)
    

    您还应该提到内容类型,以省略文件访问问题。

    import os
    image='fly.png'
    s3_filestore_path = 'images/fly.png'
    filename, file_extension = os.path.splitext(image)
    content_type_dict={".png":"image/png",".html":"text/html",
    ".css":"text/css",".js":"application/javascript",
    ".jpg":"image/png",".gif":"image/gif",
    ".jpeg":"image/jpeg"}
    
    
    content_type=content_type_dict[file_extension]
    s3 = boto3.client('s3', config=boto3.session.Config(signature_version='s3v4'),
    region_name='ap-south-1',
    aws_access_key_id=S3_KEY,
    aws_secret_access_key=S3_SECRET)
    s3.put_object(Body=image, Bucket=S3_BUCKET, Key=s3_filestore_path, ContentType=content_type)
    

    如果在系统上安装了 Aws 命令行接口,则可以使用 python子流程库。 例如:

    import subprocess
    def copy_file_to_s3(source: str, target: str, bucket: str):
    subprocess.run(["aws", "s3" , "cp", source, f"s3://{bucket}/{target}"])
    

    类似地,您可以将该逻辑用于所有类型的 AWS 客户端操作,如下载或列出文件等。也可以得到返回值。这样就不需要进口 boto3了。我想它的用途不是这样的,但实际上我觉得这样很方便。通过这种方式,您还可以获得控制台中显示的上传状态,例如:

    Completed 3.5 GiB/3.5 GiB (242.8 MiB/s) with 1 file(s) remaining
    

    为了按照您的愿望修改该方法,我建议查看 子流程引用和 AWS Cli 参考

    注意: 这是我对 类似的问题答案的复印件。

    这里的很多现有答案都非常复杂,一个简单的方法是使用 cloudpathlib,它包装了 boto3

    首先,确保通过设置 ~/.aws/credentials文件或环境变量正确地进行身份验证。

    下面是上传文件的方法:

    from pathlib import Path
    from cloudpathlib import CloudPath
    
    
    # write a local file that we will upload:
    Path("test_file.txt").write_text("hello")
    #> 5
    
    
    # upload that file to S3
    CloudPath("s3://drivendata-public-assets/testsfile.txt").upload_from("test_file.txt")
    #> S3Path('s3://mybucket/testsfile.txt')
    
    
    # read it back from s3
    CloudPath("s3://mybucket/testsfile.txt").read_text()
    #> 'hello'
    

    注意,您也可以使用常规的 write_textwrite_bytesopen方法直接写入云路径。

    我稍微修改了您的示例,删除了一些导入和进度,以获得我需要的 boto 示例。

    import boto.s3
    from boto.s3.key import Key
    
    
    AWS_ACCESS_KEY_ID = 'your-access-key-id'
    AWS_SECRET_ACCESS_KEY = 'your-secret-access-key'
    
    
    bucket_name = AWS_ACCESS_KEY_ID.lower() + '-form13'
    conn = boto.connect_s3(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY)
    bucket = conn.create_bucket(bucket_name, location=boto.s3.connection.Location.DEFAULT)
    filename = 'embedding.csv'
    
    
    k = Key(bucket)
    k.key = filename
    k.set_contents_from_filename(filename)
    

    这里也有一个 boto3的例子:

    import boto3
    
    
    ACCESS_KEY = 'your-access-key'
    SECRET_KEY = 'your-secret-key'
    
    
    file_name='embedding.csv'
    object_name=file_name
    bucket_name = ACCESS_KEY.lower() + '-form13'
    
    
    s3 = boto3.client('s3', aws_access_key_id=ACCESS_KEY, aws_secret_access_key=SECRET_KEY)
    s3.create_bucket(Bucket=bucket_name)
    s3.upload_file(file_name, bucket_name, object_name)