You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
101 lines
4.4 KiB
101 lines
4.4 KiB
# Licensed under GNU General Public License v3.0 |
|
# |
|
# This code will delete everything from an S3 bucket beneath a certain prefix. |
|
# "Prefix" is what AWS calls a "directory," or a "folder" when it is in S3. This |
|
# is the right term. It's not really a directory. |
|
# |
|
# To use: update 'prefix' and 'bucketname' below. |
|
# |
|
# WARNING: There are a couple things that this does not handle: |
|
# 1. If you have object versions turned on, and any objects have more than one version |
|
# this does not remove all versions of the objects. It just removes the CURRENT version. |
|
# 2. If you have A LOT of objects in the bucket, and the AWS credentials (e.g., an assumed |
|
# role) expire while this runs, it will die. It will have deleted as much as it could, |
|
# so starting it up again will just keep going. |
|
# 3. You can empty the entire bucket by using '/' as the prefix. It will work, but this is |
|
# probably not the most efficient way to empty a whole bucket. |
|
|
|
import botocore |
|
import boto3 |
|
import argparse |
|
import sys |
|
|
|
# a couple globals |
|
debug = False |
|
pageSize = 1000 # Will handle this many objects per API call. 1000 is AWS max. |
|
|
|
class initArgs(object): |
|
|
|
def __init__(self): |
|
self.parser = argparse.ArgumentParser() |
|
self.parser.add_argument( |
|
"--bucket", type=str, help="Bucket name", required=True) |
|
self.parser.add_argument( |
|
"--prefix", type=str, help="Prefix inside the bucket. Must end in /", required=True) |
|
self.parser.add_argument( |
|
"--pagesize", type=int, help="Number of objects to delete per run, max 1000. Default 1000.", |
|
default=1000, required=False) |
|
self.parser.add_argument( "--debug", action='store_true', help="don't actually do anything") |
|
self.args = self.parser.parse_args(sys.argv[1:]) |
|
|
|
def get_args(self): |
|
return self.args |
|
|
|
def emptyFolder( bucketname: str, prefix: str): |
|
"""Given a bucket name and prefix, delete all objects in that bucket with |
|
that prefix. E.g., emptyFolder( "mybucket", "temp" ) will delete all |
|
objects in s3://mybucket/temp/. |
|
|
|
:param bucketname: Name of the bucket |
|
:param prefix: Prefix of objects to delete. Specify None to delete all objects |
|
:return: Nothing. Deletes objects. |
|
""" |
|
page = 0 |
|
iterator = None |
|
try: |
|
session = boto3.Session( ) |
|
s3 = session.client('s3') |
|
paginator = s3.get_paginator( 'list_objects_v2' ) |
|
iterator = paginator.paginate( Bucket=bucketname, Prefix=prefix, |
|
PaginationConfig={ 'PageSize': pageSize }) |
|
except botocore.exceptions.ClientError as e: |
|
raise Exception("boto3 client error in paginate: " + e.__str__()) |
|
except Exception as e: |
|
raise Exception("Unexpected error in paginate: " + e.__str__()) |
|
|
|
for s3Page in iterator: |
|
page += 1 |
|
objectList = [{'Key': obj.get('Key')} for obj in s3Page.get('Contents', []) ] |
|
print( f"Deleting {len(objectList)} objects, page {page}") |
|
# only delete if debug is false |
|
if objectList and (not debug): |
|
try: |
|
response = s3.delete_objects( Bucket=bucketname, |
|
Delete={ 'Objects': objectList } ) |
|
except botocore.exceptions.ClientError as e: |
|
raise Exception("boto3 client error in delete_objects: " + e.__str__()) |
|
except Exception as e: |
|
raise Exception("Unexpected error in delete_objects: " + e.__str__()) |
|
if( response.get('ResponseMetadata')['HTTPStatusCode'] != 200 ): |
|
raise Exception("Unexpected HTTP response: " + |
|
response.get('ResponseMetadata')['HTTPStatusCode'] ) |
|
if __name__ == "__main__": |
|
args = initArgs().get_args() |
|
pagesize=args.pagesize |
|
if( args.debug == True ): |
|
debug = True |
|
if( not args.prefix.endswith('/') ): |
|
print( f"ERROR: prefix doesn't end in a slash: \"{args.prefix}\"") |
|
exit(1) |
|
if( args.prefix.startswith('/') ): |
|
print( f"ERROR: prefix must NOT start with a slash: \"{args.prefix}\"") |
|
exit(1) |
|
if( (args.pagesize <= 0) or (args.pagesize > 1000) ): |
|
print( f"ERROR: pagesize must be > 0 and must be < 1000: pagesize=\"{args.pagesize}\"") |
|
exit(1) |
|
else: |
|
pageSize = args.pagesize |
|
if( args.prefix == "/" ): |
|
print( "WARNING: prefix is /. DELETING ALL OBJECTS" ) |
|
|
|
emptyFolder(bucketname=args.bucket, prefix=args.prefix) |