解析 Amazon S3 事件以获得一般理解



我想在与 AWS Lambda 交互时了解有关 AWS "事件"的更多信息。 在下面的代码中,.csv文件将上传到 AWS S3。 S3 生成一个事件,下面的 lambda 函数解析.csv并上传到 DynamoDB。 该代码推断源存储桶和文件密钥。 我想知道如何访问存储桶和名称以外的信息。

我假设事件解析从第 14 行开始for record in event['Records']:

import csv
import os
import tempfile
import boto3
dynamodb = boto3.resource('dynamodb')
table = dynamodb.Table('Movies')
s3 = boto3.client('s3')

def lambda_handler(event, context):
**for record in event['Records']:**
source_bucket = record['s3']['bucket']['name']
key = record['s3']['object']['key']
with tempfile.TemporaryDirectory() as tmpdir:
download_path = os.path.join(tmpdir, key)
s3.download_file(source_bucket, key, download_path)
items = read_csv(download_file)
with table.batch_writer() as batch:
for item in items:
batch.put_item(Item=item)

def read_csv(file):
items=[]
with open(file) as csvfile:
reader = csv.DictReader(csvfile)
for row in reader:
data = {}
data['Meta'] = {}
data['Year'] = int(row['Year'])
data['Title'] = row['Title'] or None
data['Meta']['Length'] = int(row['Length'] or 0)
data['Meta']['Length'] = int(row['Length'] or 0)
data['Meta']['Subject'] = row['Subject'] or None
data['Meta']['Actor'] = row['Actor'] or None
data['Meta']['Actress'] = row['Actress'] or None
data['Meta']['Director'] = row['Director'] or None
data['Meta']['Popularity'] = row['Popularity'] or None
data['Meta']['Awards'] = row['Awards'] == 'Yes'
data['Meta']['Image'] = row['Image'] or None
data['Meta'] = {k: v for k,
v in data['Meta'].items() if v is not None}
items.append(data)
return items

谢谢

如果您询问可以从事件消息中提取哪些其他信息,则事件消息结构在以下 S3 文档中定义:https://docs.aws.amazon.com/AmazonS3/latest/dev/notification-content-structure.html

为方便起见,下面复制了结构。

{  
"Records":[  
{  
"eventVersion":"2.2",
"eventSource":"aws:s3",
"awsRegion":"us-west-2",
"eventTime":The time, in ISO-8601 format, for example, 1970-01-01T00:00:00.000Z, when Amazon S3 finished processing the request,
"eventName":"event-type",
"userIdentity":{  
"principalId":"Amazon-customer-ID-of-the-user-who-caused-the-event"
},
"requestParameters":{  
"sourceIPAddress":"ip-address-where-request-came-from"
},
"responseElements":{  
"x-amz-request-id":"Amazon S3 generated request ID",
"x-amz-id-2":"Amazon S3 host that processed the request"
},
"s3":{  
"s3SchemaVersion":"1.0",
"configurationId":"ID found in the bucket notification configuration",
"bucket":{  
"name":"bucket-name",
"ownerIdentity":{  
"principalId":"Amazon-customer-ID-of-the-bucket-owner"
},
"arn":"bucket-ARN"
},
"object":{  
"key":"object-key",
"size":object-size,
"eTag":"object eTag",
"versionId":"object version if bucket is versioning-enabled, otherwise null",
"sequencer": "a string representation of a hexadecimal value used to determine event sequence, 
only used with PUTs and DELETEs"
}
},
"glacierEventData": {
"restoreEventData": {
"lifecycleRestorationExpiryTime": "The time, in ISO-8601 format, for example, 1970-01-01T00:00:00.000Z, of Restore Expiry",
"lifecycleRestoreStorageClass": "Source storage class for restore"
}
}
}
]
}

最新更新