Added lots of comments to say how to use it.

master
Paco Hope 4 months ago
parent f052a56b66
commit 992b2dbc57
  1. 81
      create-athena-org-table.sql

@ -1,45 +1,60 @@
-- https://github.com/duo-labs/cloudtrail-partitioner/issues/14
-- aws --profile foo s3 ls s3://cloudtrail-bucket/AWSLogs/o-orgid/ | cut -c 32-50 | tr / , | xargs | tr -d ' '
-- aws s3 ls s3://BUCKETNAME/AWSLogs/o-ORGNAME/ | cut -c 32-50 | tr / , | xargs | tr -d ' '
-- To use this:
-- 1. Find all the comments below that tell you "Edit the next line" and do what they say.
-- 2. Go to Amazon Athena and copy paste this SQL into the SQL box and execute it
-- 3. You'll now have a table named `orgtrail`. Do a quick search to verify it:
-- SELECT * from `orgtrail` limit 5;
CREATE EXTERNAL TABLE orgtrail (
`eventversion` string,
`useridentity` struct<type:string,principalid:string,arn:string,accountid:string,invokedby:string,accesskeyid:string,username:string,sessioncontext:struct<attributes:struct<mfaauthenticated:string,creationdate:string>,sessionissuer:struct<type:string,principalid:string,arn:string,accountid:string,username:string>>>,
`eventtime` string,
`eventsource` string,
`eventname` string,
`awsregion` string,
`sourceipaddress` string,
`useragent` string,
`errorcode` string,
`errormessage` string,
`requestparameters` string,
`responseelements` string,
`additionaleventdata` string,
`requestid` string,
`eventid` string,
`resources` array<struct<arn:string,accountid:string,type:string>>,
`eventtype` string,
`apiversion` string,
`readonly` string,
`recipientaccountid` string,
`serviceeventdetails` string,
`sharedeventid` string,
`vpcendpointid` string)
`eventtime` string,
`eventsource` string,
`eventname` string,
`awsregion` string,
`sourceipaddress` string,
`useragent` string,
`errorcode` string,
`errormessage` string,
`requestparameters` string,
`responseelements` string,
`additionaleventdata` string,
`requestid` string,
`eventid` string,
`eventtype` string,
`apiversion` string,
`readonly` string,
`recipientaccountid` string,
`serviceeventdetails` string,
`sharedeventid` string,
`vpcendpointid` string,
`eventversion` string,
`resources` array<struct<arn:string,accountid:string,type:string>>,
`useridentity` struct<type:string,principalid:string,arn:string,accountid:string,invokedby:string,accesskeyid:string,username:string,sessioncontext:struct<attributes:struct<mfaauthenticated:string,creationdate:string>,sessionissuer:struct<type:string,principalid:string,arn:string,accountid:string,username:string>>>
)
COMMENT 'CloudTrail table with projection for future partitions'
PARTITIONED BY (accountId string, region string, date string)
ROW FORMAT SERDE 'com.amazon.emr.hive.serde.CloudTrailSerde'
STORED AS INPUTFORMAT 'com.amazon.emr.cloudtrail.CloudTrailInputFormat'
ROW FORMAT SERDE 'com.amazon.emr.hive.serde.CloudTrailSerde'
STORED AS INPUTFORMAT 'com.amazon.emr.cloudtrail.CloudTrailInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
-- Edit the next line to change BUCKETNAME and o-ORGNAME to your bucket name and your Org ID
LOCATION 's3://BUCKETNAME/AWSLogs/o-ORGNAME'
TBLPROPERTIES(
"projection.enabled" = "true",
"projection.date.type" = "date",
"projection.date.range" = "2021/01/01,NOW",
"projection.date.format" = "yyyy/MM/dd",
"projection.date.interval" = "1" ,
"projection.enabled" = "true",
"projection.date.type" = "date",
-- Edit the next line if you want to make queries more or less efficient. Setting the date range
-- to a more recent date means records from earlier dates will be inaccessible, but it will speed
-- up querying. Set it to the oldest date you need to access.
"projection.date.range" = "2021/01/01,NOW",
"projection.date.format" = "yyyy/MM/dd",
"projection.date.interval" = "1" ,
"projection.accountid.type" = "enum",
-- Edit the next line to insert each 12-digit AWS account ID that you are logging for. Yes, if you
-- get a new AWS account, you have to destroy the table definition, add that ID to this list, and
-- recreate the table. It's clunky, but it works.
"projection.accountid.values" = "111111111111,222222222222,333333333333",
"projection.region.type" = "enum",
"projection.region.values" = "ap-northeast-1,ap-northeast-2,ap-northeast-3,ap-south-1,ap-southeast-1,ap-southeast-2,ca-central-1,eu-central-1,eu-north-1,eu-west-1,eu-west-2,eu-west-3,sa-east-1,us-east-1,us-east-2,us-west-1,us-west-2",
"projection.region.type" = "enum",
-- Edit the next line to have the regions you care about
"projection.region.values" = "ap-northeast-1,ap-northeast-2,ap-northeast-3,ap-south-1,ap-southeast-1,ap-southeast-2,ca-central-1,eu-central-1,eu-north-1,eu-west-1,eu-west-2,eu-west-3,sa-east-1,us-east-1,us-east-2,us-west-1,us-west-2",
-- Edit the next line to change BUCKETNAME and o-ORGNAME to your bucket name and your Org ID
"storage.location.template" = "s3://BUCKETNAME/AWSLogs/o-ORGNAME/${accountid}/CloudTrail/${region}/${date}"
);

Loading…
Cancel
Save