You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
60 lines
3.4 KiB
60 lines
3.4 KiB
-- https://github.com/duo-labs/cloudtrail-partitioner/issues/14 |
|
-- aws s3 ls s3://BUCKETNAME/AWSLogs/o-ORGNAME/ | cut -c 32-50 | tr / , | xargs | tr -d ' ' |
|
-- To use this: |
|
-- 1. Find all the comments below that tell you "Edit the next line" and do what they say. |
|
-- 2. Go to Amazon Athena and copy paste this SQL into the SQL box and execute it |
|
-- 3. You'll now have a table named `orgtrail`. Do a quick search to verify it: |
|
-- SELECT * from `orgtrail` limit 5; |
|
|
|
CREATE EXTERNAL TABLE orgtrail ( |
|
`eventtime` string, |
|
`eventsource` string, |
|
`eventname` string, |
|
`awsregion` string, |
|
`sourceipaddress` string, |
|
`useragent` string, |
|
`errorcode` string, |
|
`errormessage` string, |
|
`requestparameters` string, |
|
`responseelements` string, |
|
`additionaleventdata` string, |
|
`requestid` string, |
|
`eventid` string, |
|
`eventtype` string, |
|
`apiversion` string, |
|
`readonly` string, |
|
`recipientaccountid` string, |
|
`serviceeventdetails` string, |
|
`sharedeventid` string, |
|
`vpcendpointid` string, |
|
`eventversion` string, |
|
`resources` array<struct<arn:string,accountid:string,type:string>>, |
|
`useridentity` struct<type:string,principalid:string,arn:string,accountid:string,invokedby:string,accesskeyid:string,username:string,sessioncontext:struct<attributes:struct<mfaauthenticated:string,creationdate:string>,sessionissuer:struct<type:string,principalid:string,arn:string,accountid:string,username:string>>> |
|
) |
|
COMMENT 'CloudTrail table with projection for future partitions' |
|
PARTITIONED BY (accountId string, region string, date string) |
|
ROW FORMAT SERDE 'com.amazon.emr.hive.serde.CloudTrailSerde' |
|
STORED AS INPUTFORMAT 'com.amazon.emr.cloudtrail.CloudTrailInputFormat' |
|
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' |
|
-- Edit the next line to change BUCKETNAME and o-ORGNAME to your bucket name and your Org ID |
|
LOCATION 's3://BUCKETNAME/AWSLogs/o-ORGNAME' |
|
TBLPROPERTIES( |
|
"projection.enabled" = "true", |
|
"projection.date.type" = "date", |
|
-- Edit the next line if you want to make queries more or less efficient. Setting the date range |
|
-- to a more recent date means records from earlier dates will be inaccessible, but it will speed |
|
-- up querying. Set it to the oldest date you need to access. |
|
"projection.date.range" = "2021/01/01,NOW", |
|
"projection.date.format" = "yyyy/MM/dd", |
|
"projection.date.interval" = "1" , |
|
"projection.accountid.type" = "enum", |
|
-- Edit the next line to insert each 12-digit AWS account ID that you are logging for. Yes, if you |
|
-- get a new AWS account, you have to destroy the table definition, add that ID to this list, and |
|
-- recreate the table. It's clunky, but it works. |
|
"projection.accountid.values" = "111111111111,222222222222,333333333333", |
|
"projection.region.type" = "enum", |
|
-- Edit the next line to have the regions you care about |
|
"projection.region.values" = "ap-northeast-1,ap-northeast-2,ap-northeast-3,ap-south-1,ap-southeast-1,ap-southeast-2,ca-central-1,eu-central-1,eu-north-1,eu-west-1,eu-west-2,eu-west-3,sa-east-1,us-east-1,us-east-2,us-west-1,us-west-2", |
|
-- Edit the next line to change BUCKETNAME and o-ORGNAME to your bucket name and your Org ID |
|
"storage.location.template" = "s3://BUCKETNAME/AWSLogs/o-ORGNAME/${accountid}/CloudTrail/${region}/${date}" |
|
);
|
|
|