These are just bash and python scripts that I wrote to do AWS chores conveniently.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

60 lines
3.4 KiB

-- https://github.com/duo-labs/cloudtrail-partitioner/issues/14
-- aws s3 ls s3://BUCKETNAME/AWSLogs/o-ORGNAME/ | cut -c 32-50 | tr / , | xargs | tr -d ' '
-- To use this:
-- 1. Find all the comments below that tell you "Edit the next line" and do what they say.
-- 2. Go to Amazon Athena and copy paste this SQL into the SQL box and execute it
-- 3. You'll now have a table named `orgtrail`. Do a quick search to verify it:
-- SELECT * from `orgtrail` limit 5;
CREATE EXTERNAL TABLE orgtrail (
`eventtime` string,
`eventsource` string,
`eventname` string,
`awsregion` string,
`sourceipaddress` string,
`useragent` string,
`errorcode` string,
`errormessage` string,
`requestparameters` string,
`responseelements` string,
`additionaleventdata` string,
`requestid` string,
`eventid` string,
`eventtype` string,
`apiversion` string,
`readonly` string,
`recipientaccountid` string,
`serviceeventdetails` string,
`sharedeventid` string,
`vpcendpointid` string,
`eventversion` string,
`resources` array<struct<arn:string,accountid:string,type:string>>,
`useridentity` struct<type:string,principalid:string,arn:string,accountid:string,invokedby:string,accesskeyid:string,username:string,sessioncontext:struct<attributes:struct<mfaauthenticated:string,creationdate:string>,sessionissuer:struct<type:string,principalid:string,arn:string,accountid:string,username:string>>>
)
COMMENT 'CloudTrail table with projection for future partitions'
PARTITIONED BY (accountId string, region string, date string)
ROW FORMAT SERDE 'com.amazon.emr.hive.serde.CloudTrailSerde'
STORED AS INPUTFORMAT 'com.amazon.emr.cloudtrail.CloudTrailInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
-- Edit the next line to change BUCKETNAME and o-ORGNAME to your bucket name and your Org ID
LOCATION 's3://BUCKETNAME/AWSLogs/o-ORGNAME'
TBLPROPERTIES(
"projection.enabled" = "true",
"projection.date.type" = "date",
-- Edit the next line if you want to make queries more or less efficient. Setting the date range
-- to a more recent date means records from earlier dates will be inaccessible, but it will speed
-- up querying. Set it to the oldest date you need to access.
"projection.date.range" = "2021/01/01,NOW",
"projection.date.format" = "yyyy/MM/dd",
"projection.date.interval" = "1" ,
"projection.accountid.type" = "enum",
-- Edit the next line to insert each 12-digit AWS account ID that you are logging for. Yes, if you
-- get a new AWS account, you have to destroy the table definition, add that ID to this list, and
-- recreate the table. It's clunky, but it works.
"projection.accountid.values" = "111111111111,222222222222,333333333333",
"projection.region.type" = "enum",
-- Edit the next line to have the regions you care about
"projection.region.values" = "ap-northeast-1,ap-northeast-2,ap-northeast-3,ap-south-1,ap-southeast-1,ap-southeast-2,ca-central-1,eu-central-1,eu-north-1,eu-west-1,eu-west-2,eu-west-3,sa-east-1,us-east-1,us-east-2,us-west-1,us-west-2",
-- Edit the next line to change BUCKETNAME and o-ORGNAME to your bucket name and your Org ID
"storage.location.template" = "s3://BUCKETNAME/AWSLogs/o-ORGNAME/${accountid}/CloudTrail/${region}/${date}"
);