AWSTemplateFormatVersion: 2010-09-09
Description: Create an Athena database for the Cumulus project

Parameters:
  BucketPrefix:
    Type: 'String'
    Description: 'Prefix for Cumulus bucket names (they will look like {BucketPrefix}-{purpose}-{AWS::AccountId}-{AWS::Region})'
  DatabasePrefix:
    Type: 'String'
    Description: 'Prefix for Athena-related AWS resources (they will look like {DatabasePrefix}-{service related suffix}. It can be left as default unless you need to run multiple instances of this stack.'
    Default: 'cumulus-deid'
  EtlSubdir:
    Type: 'String'
    Description: 'Subdirectory on the Cumulus ETL output bucket where files will be placed. This should match the path you give when running Cumulus ETL. Using a subdirectory is recommended to allow for test runs of Cumulus ETL in different subdirectories and general future-proofing.'
  KMSMasterKeyID:
    Type: 'String'
    Description: 'KMS key ARN for Cumulus buckets'
  UploadRoleArn:
    Type: 'String'
    Description: 'ARN for role that is running Cumulus ETL and thus uploading files to S3'

Resources:
  ####################################################
  # S3 Buckets for raw Cumulus output
  ####################################################

  S3Bucket:
    Type: AWS::S3::Bucket
    Properties:
      BucketName: !Sub ${BucketPrefix}-${AWS::AccountId}-${AWS::Region}
      BucketEncryption:
        ServerSideEncryptionConfiguration:
          - ServerSideEncryptionByDefault:
              KMSMasterKeyID: !Ref KMSMasterKeyID
              SSEAlgorithm: aws:kms
      PublicAccessBlockConfiguration:
        BlockPublicAcls: True
        BlockPublicPolicy: True
        IgnorePublicAcls: True
        RestrictPublicBuckets: True

  S3BucketPolicy:
    Type: AWS::S3::BucketPolicy
    Properties:
      Bucket: !Ref S3Bucket
      PolicyDocument:
        Version: 2012-10-17
        Statement:
          - Sid: AWSBucketRequiresEncryption
            Effect: Deny
            Principal: "*"
            Action: s3:PutObject
            Resource: !Sub "arn:aws:s3:::${S3Bucket}/*"
            Condition:
              StringNotEquals:
                s3:x-amz-server-side-encryption: aws:kms
          - Sid: AWSBucketAllowUploads
            Effect: Allow
            Principal:
              AWS: !Ref UploadRoleArn
            Action:
              - s3:DeleteObject
              - s3:PutObject
            Resource: !Sub "arn:aws:s3:::${S3Bucket}/*"
          - Sid: AWSBucketAllowListing
            Effect: Allow
            Principal:
              AWS: !Ref UploadRoleArn
            Action:
              - s3:ListBucket
            Resource: !Sub "arn:aws:s3:::${S3Bucket}"

  PHIBucket:
    Type: AWS::S3::Bucket
    Properties:
      BucketName: !Sub ${BucketPrefix}-phi-${AWS::AccountId}-${AWS::Region}
      BucketEncryption:
        ServerSideEncryptionConfiguration:
          - ServerSideEncryptionByDefault:
              KMSMasterKeyID: !Ref KMSMasterKeyID
              SSEAlgorithm: aws:kms
      PublicAccessBlockConfiguration:
        BlockPublicAcls: True
        BlockPublicPolicy: True
        IgnorePublicAcls: True
        RestrictPublicBuckets: True

  PHIBucketPolicy:
    Type: AWS::S3::BucketPolicy
    Properties:
      Bucket: !Ref PHIBucket
      PolicyDocument:
        Version: 2012-10-17
        Statement:
          - Sid: AWSBucketRequiresEncryption
            Effect: Deny
            Principal: "*"
            Action: s3:PutObject
            Resource: !Sub "arn:aws:s3:::${PHIBucket}/*"
            Condition:
              StringNotEquals:
                s3:x-amz-server-side-encryption: aws:kms
          - Sid: AWSBucketAllowAccess
            Effect: Allow
            Principal:
              AWS: !Ref UploadRoleArn
            Action:
              - s3:GetObject
              - s3:PutObject
            Resource: !Sub "arn:aws:s3:::${PHIBucket}/*"

  ####################################################
  # Glue database & tables for raw Cumulus data
  ####################################################

  GlueSecurity:
    Type: AWS::Glue::SecurityConfiguration
    Properties:
      EncryptionConfiguration:
        S3Encryptions:
          - KmsKeyArn: !Ref KMSMasterKeyID
            S3EncryptionMode: SSE-KMS
      Name: !Sub "${DatabasePrefix}-kms"

  CrawlerRole:
    Type: AWS::IAM::Role
    Properties:
      AssumeRolePolicyDocument:
        Version: 2012-10-17
        Statement:
          - Effect: Allow
            Principal:
              Service:
                - glue.amazonaws.com
            Action:
              - sts:AssumeRole
      ManagedPolicyArns:
        - arn:aws:iam::aws:policy/service-role/AWSGlueServiceRole
      Policies:
        - PolicyName: S3BucketAccessPolicy
          PolicyDocument:
            Version: 2012-10-17
            Statement:
              - Effect: Allow
                Action:
                  - s3:GetObject
                  - s3:PutObject
                Resource: !Sub "arn:aws:s3:::${S3Bucket}/*"

  GlueDB:
    Type: AWS::Glue::Database
    Properties:
      CatalogId: !Ref AWS::AccountId
      DatabaseInput:
        # replace hyphens with underscores for ease of SQL (hyphens are reserved)
        Name: !Join
          - '_'
          - Fn::Split:
            - '-'
            - !Sub "${DatabasePrefix}_db"

  GlueCrawler:
    Type: AWS::Glue::Crawler
    Properties:
      Name: !Sub "${DatabasePrefix}-crawler"
      DatabaseName: !Ref GlueDB
      Role: !GetAtt CrawlerRole.Arn
      CrawlerSecurityConfiguration: !Ref GlueSecurity
      RecrawlPolicy:
        RecrawlBehavior: CRAWL_EVERYTHING
      SchemaChangePolicy:
        DeleteBehavior: DEPRECATE_IN_DATABASE
        UpdateBehavior: UPDATE_IN_DATABASE
      Schedule:
        # Schedule a monthly run to catch any newly-added fields automatically
        ScheduleExpression: "cron(0 8 1 * ? *)"  # 8am on the 1st of the month
      Targets:
        DeltaTargets:
          # Unfortunately, we can't seem to use wildcards to define tables.
          # And each DeltaTables entry can only hold ten tables (AWS-side limitation).
          # But we can just have multiple groupings of ten!
          - DeltaTables:
              - !Sub "s3://${S3Bucket}/${EtlSubdir}/allergyintolerance"
              - !Sub "s3://${S3Bucket}/${EtlSubdir}/condition"
              - !Sub "s3://${S3Bucket}/${EtlSubdir}/device"
              - !Sub "s3://${S3Bucket}/${EtlSubdir}/diagnosticreport"
              - !Sub "s3://${S3Bucket}/${EtlSubdir}/documentreference"
              - !Sub "s3://${S3Bucket}/${EtlSubdir}/encounter"
              - !Sub "s3://${S3Bucket}/${EtlSubdir}/episodeofcare"
              - !Sub "s3://${S3Bucket}/${EtlSubdir}/immunization"
              - !Sub "s3://${S3Bucket}/${EtlSubdir}/location"
              - !Sub "s3://${S3Bucket}/${EtlSubdir}/medication"
            CreateNativeDeltaTable: True
            WriteManifest: False
          - DeltaTables:
              - !Sub "s3://${S3Bucket}/${EtlSubdir}/medicationdispense"
              - !Sub "s3://${S3Bucket}/${EtlSubdir}/medicationrequest"
              - !Sub "s3://${S3Bucket}/${EtlSubdir}/observation"
              - !Sub "s3://${S3Bucket}/${EtlSubdir}/organization"
              - !Sub "s3://${S3Bucket}/${EtlSubdir}/patient"
              - !Sub "s3://${S3Bucket}/${EtlSubdir}/practitioner"
              - !Sub "s3://${S3Bucket}/${EtlSubdir}/practitionerrole"
              - !Sub "s3://${S3Bucket}/${EtlSubdir}/procedure"
              - !Sub "s3://${S3Bucket}/${EtlSubdir}/servicerequest"
              - !Sub "s3://${S3Bucket}/${EtlSubdir}/specimen"
            CreateNativeDeltaTable: True
            WriteManifest: False
          - DeltaTables:
              - !Sub "s3://${S3Bucket}/${EtlSubdir}/etl__completion"
              - !Sub "s3://${S3Bucket}/${EtlSubdir}/etl__completion_encounters"
            CreateNativeDeltaTable: True
            WriteManifest: False

  ####################################################
  # Athena queries and where to store them
  ####################################################

  AthenaBucket:
    Type: AWS::S3::Bucket
    Properties:
      BucketName: !Sub ${BucketPrefix}-athena-${AWS::AccountId}-${AWS::Region}
      BucketEncryption:
        ServerSideEncryptionConfiguration:
          - ServerSideEncryptionByDefault:
              KMSMasterKeyID: !Ref KMSMasterKeyID
              SSEAlgorithm: aws:kms
      PublicAccessBlockConfiguration:
        BlockPublicAcls: True
        BlockPublicPolicy: True
        IgnorePublicAcls: True
        RestrictPublicBuckets: True

  AthenaBucketPolicy:
    Type: AWS::S3::BucketPolicy
    Properties:
      Bucket: !Ref AthenaBucket
      PolicyDocument:
        Version: 2012-10-17
        Statement:
          # We don't expect non-Athena uploads to this bucket, but just as a safeguard against misconfiguration,
          # let's enforce encryption on all incoming data.
          - Sid: AWSBucketRequiresEncryption
            Effect: Deny
            Principal: "*"
            Action: s3:PutObject
            Resource: !Sub "arn:aws:s3:::${AthenaBucket}/*"
            Condition:
              StringNotEquals:
                s3:x-amz-server-side-encryption: aws:kms

  AthenaWorkGroup:
    Type: AWS::Athena::WorkGroup
    Properties:
      Name: !Sub "${DatabasePrefix}"
      State: ENABLED
      WorkGroupConfiguration:
        EnforceWorkGroupConfiguration: True
        PublishCloudWatchMetricsEnabled: True
        EngineVersion:
          SelectedEngineVersion: "Athena engine version 3"
        ResultConfiguration:
          EncryptionConfiguration:
            EncryptionOption: SSE_KMS
            KmsKey: !Ref KMSMasterKeyID
          OutputLocation: !Sub "s3://${AthenaBucket}/"

Outputs:
  BucketName:
    Description: Cumulus de-identified output bucket ID
    Value: !Ref S3Bucket
  PhiBucketName:
    Description: Cumulus PHI output bucket ID
    Value: !Ref PHIBucket
  AthenaBucketName:
    Description: Cumulus Athena results bucket ID
    Value: !Ref AthenaBucket
