parent
f052a56b66
commit
992b2dbc57
1 changed files with 48 additions and 33 deletions
@ -1,45 +1,60 @@ |
||||
-- https://github.com/duo-labs/cloudtrail-partitioner/issues/14 |
||||
-- aws --profile foo s3 ls s3://cloudtrail-bucket/AWSLogs/o-orgid/ | cut -c 32-50 | tr / , | xargs | tr -d ' ' |
||||
-- aws s3 ls s3://BUCKETNAME/AWSLogs/o-ORGNAME/ | cut -c 32-50 | tr / , | xargs | tr -d ' ' |
||||
-- To use this: |
||||
-- 1. Find all the comments below that tell you "Edit the next line" and do what they say. |
||||
-- 2. Go to Amazon Athena and copy paste this SQL into the SQL box and execute it |
||||
-- 3. You'll now have a table named `orgtrail`. Do a quick search to verify it: |
||||
-- SELECT * from `orgtrail` limit 5; |
||||
|
||||
CREATE EXTERNAL TABLE orgtrail ( |
||||
`eventversion` string, |
||||
`useridentity` struct<type:string,principalid:string,arn:string,accountid:string,invokedby:string,accesskeyid:string,username:string,sessioncontext:struct<attributes:struct<mfaauthenticated:string,creationdate:string>,sessionissuer:struct<type:string,principalid:string,arn:string,accountid:string,username:string>>>, |
||||
`eventtime` string, |
||||
`eventsource` string, |
||||
`eventname` string, |
||||
`awsregion` string, |
||||
`sourceipaddress` string, |
||||
`useragent` string, |
||||
`errorcode` string, |
||||
`errormessage` string, |
||||
`requestparameters` string, |
||||
`responseelements` string, |
||||
`additionaleventdata` string, |
||||
`requestid` string, |
||||
`eventid` string, |
||||
`resources` array<struct<arn:string,accountid:string,type:string>>, |
||||
`eventtype` string, |
||||
`apiversion` string, |
||||
`readonly` string, |
||||
`recipientaccountid` string, |
||||
`serviceeventdetails` string, |
||||
`sharedeventid` string, |
||||
`vpcendpointid` string) |
||||
`eventtime` string, |
||||
`eventsource` string, |
||||
`eventname` string, |
||||
`awsregion` string, |
||||
`sourceipaddress` string, |
||||
`useragent` string, |
||||
`errorcode` string, |
||||
`errormessage` string, |
||||
`requestparameters` string, |
||||
`responseelements` string, |
||||
`additionaleventdata` string, |
||||
`requestid` string, |
||||
`eventid` string, |
||||
`eventtype` string, |
||||
`apiversion` string, |
||||
`readonly` string, |
||||
`recipientaccountid` string, |
||||
`serviceeventdetails` string, |
||||
`sharedeventid` string, |
||||
`vpcendpointid` string, |
||||
`eventversion` string, |
||||
`resources` array<struct<arn:string,accountid:string,type:string>>, |
||||
`useridentity` struct<type:string,principalid:string,arn:string,accountid:string,invokedby:string,accesskeyid:string,username:string,sessioncontext:struct<attributes:struct<mfaauthenticated:string,creationdate:string>,sessionissuer:struct<type:string,principalid:string,arn:string,accountid:string,username:string>>> |
||||
) |
||||
COMMENT 'CloudTrail table with projection for future partitions' |
||||
PARTITIONED BY (accountId string, region string, date string) |
||||
ROW FORMAT SERDE 'com.amazon.emr.hive.serde.CloudTrailSerde' |
||||
STORED AS INPUTFORMAT 'com.amazon.emr.cloudtrail.CloudTrailInputFormat' |
||||
ROW FORMAT SERDE 'com.amazon.emr.hive.serde.CloudTrailSerde' |
||||
STORED AS INPUTFORMAT 'com.amazon.emr.cloudtrail.CloudTrailInputFormat' |
||||
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' |
||||
-- Edit the next line to change BUCKETNAME and o-ORGNAME to your bucket name and your Org ID |
||||
LOCATION 's3://BUCKETNAME/AWSLogs/o-ORGNAME' |
||||
TBLPROPERTIES( |
||||
"projection.enabled" = "true", |
||||
"projection.date.type" = "date", |
||||
"projection.date.range" = "2021/01/01,NOW", |
||||
"projection.date.format" = "yyyy/MM/dd", |
||||
"projection.date.interval" = "1" , |
||||
"projection.enabled" = "true", |
||||
"projection.date.type" = "date", |
||||
-- Edit the next line if you want to make queries more or less efficient. Setting the date range |
||||
-- to a more recent date means records from earlier dates will be inaccessible, but it will speed |
||||
-- up querying. Set it to the oldest date you need to access. |
||||
"projection.date.range" = "2021/01/01,NOW", |
||||
"projection.date.format" = "yyyy/MM/dd", |
||||
"projection.date.interval" = "1" , |
||||
"projection.accountid.type" = "enum", |
||||
-- Edit the next line to insert each 12-digit AWS account ID that you are logging for. Yes, if you |
||||
-- get a new AWS account, you have to destroy the table definition, add that ID to this list, and |
||||
-- recreate the table. It's clunky, but it works. |
||||
"projection.accountid.values" = "111111111111,222222222222,333333333333", |
||||
"projection.region.type" = "enum", |
||||
"projection.region.values" = "ap-northeast-1,ap-northeast-2,ap-northeast-3,ap-south-1,ap-southeast-1,ap-southeast-2,ca-central-1,eu-central-1,eu-north-1,eu-west-1,eu-west-2,eu-west-3,sa-east-1,us-east-1,us-east-2,us-west-1,us-west-2", |
||||
"projection.region.type" = "enum", |
||||
-- Edit the next line to have the regions you care about |
||||
"projection.region.values" = "ap-northeast-1,ap-northeast-2,ap-northeast-3,ap-south-1,ap-southeast-1,ap-southeast-2,ca-central-1,eu-central-1,eu-north-1,eu-west-1,eu-west-2,eu-west-3,sa-east-1,us-east-1,us-east-2,us-west-1,us-west-2", |
||||
-- Edit the next line to change BUCKETNAME and o-ORGNAME to your bucket name and your Org ID |
||||
"storage.location.template" = "s3://BUCKETNAME/AWSLogs/o-ORGNAME/${accountid}/CloudTrail/${region}/${date}" |
||||
); |
||||
|
Loading…
Reference in new issue