Skip to main content

Privacera Documentation

Secure S3 via Boto3 in Databricks notebook

This section describes how to use the AWS SDK (Boto3) for PrivaceraCloud to enforce access control on AWS S3 file data through a Privacera Dataserver proxy.

These examples are intended to be run in a Databricks notebook.

Prerequisites

Make sure you have the following ready:

Create and run the program

  1. Install the AWS Boto3 libraries:

    pip install boto3
  2. Import the required libraries:

    import boto3
  3. Access the AWS S3 files:

    def check_s3_file_exists(bucket, key, access_key, secret_key, endpoint_url, dataserver_cert, region_name):
      exec_status = False
      access_key = ${privacera_access_key}
      secret_key = ${privacera_secret_key}
      endpoint_url = endpoint_url
      try:
        s3 = boto3.resource(service_name='s3', aws_access_key_id=access_key, aws_secret_access_key=secret_key, endpoint_url=endpoint_url, region_name=region_name)
        print(s3.Object(bucket_name=bucket, key=key).get()['Body'].read().decode('utf-8'))
        exec_status = True
      except Exception as e:
        print("Got error: {}".format(e))
      finally:
        return exec_status  
      
    def read_s3_file(bucket, key, access_key, secret_key, endpoint_url, dataserver_cert, region_name):
      exec_status = False
      access_key = access_key
      secret_key = secret_key
      endpoint_url = endpoint_url
      try:
        s3 = boto3.client(service_name='s3', aws_access_key_id=access_key, aws_secret_access_key=secret_key, endpoint_url=endpoint_url, region_name=region_name)
        obj = s3.get_object(Bucket=bucket, Key=key)
        print(obj['Body'].read().decode('utf-8'))
        exec_status = True
      except Exception as e:
        print("Got error: {}".format(e))
      finally:
        return exec_status
      
    readFilePath = "file data/data/format=txt/sample/sample_small.txt"
    bucket = "infraqa-test"
    #saas
    access_key = "${privacera_access_key}"
    secret_key = "${privacera_secret_key}"
    endpoint_url = "https://ds.privaceracloud.com"
    dataserver_cert = ""
    region_name = "us-east-1"
    print(f"got file===== {readFilePath} ============= bucket= {bucket}")
    status = check_s3_file_exists(bucket, readFilePath, access_key, secret_key, endpoint_url, dataserver_cert, region_name)