我已经确认了account-b 中的以下策略和信任关系
信任关系
{
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Principal": {
"AWS": "arn:aws:sts::account-b:assumed-role/WorkDocs_API_Developer/workdocs_session"
},
"Action": "sts:AssumeRole",
"Condition": {}
}
]
}
以及相关政策:
{
"Version": "2012-10-17",
"Statement": [
{
"Sid": "VisualEditor0",
"Effect": "Allow",
"Action": [
"workdocs:GetDocumentPath",
"workdocs:GetCurrentUser",
"workdocs:CreateNotificationSubscription",
"workdocs:DescribeAvailableDirectories",
"workdocs:UpdateFolder",
"workdocs:CheckAlias",
"workdocs:DownloadDocumentVersion",
"workdocs:GetResources",
"workdocs:DescribeActivities",
"workdocs:DescribeRootFolders",
"workdocs:UpdateDocument",
"workdocs:CreateFolder",
"workdocs:GetFolder",
"workdocs:InitiateDocumentVersionUpload",
"workdocs:DescribeResourcePermissions",
"workdocs:DescribeDocumentVersions",
"workdocs:CreateLabels",
"workdocs:DescribeGroups",
"workdocs:DescribeNotificationSubscriptions",
"workdocs:DescribeFolderContents",
"workdocs:AbortDocumentVersionUpload",
"workdocs:DescribeComments",
"workdocs:GetDocumentVersion",
"workdocs:AddResourcePermissions",
"workdocs:DescribeInstances",
"workdocs:GetDocument",
"workdocs:DescribeUsers",
"workdocs:CreateComment",
"workdocs:CreateCustomMetadata",
"workdocs:UpdateDocumentVersion",
"workdocs:GetFolderPath"
],
"Resource": "*"
}
]
}
这是我当前的代码:
import os
import sys
# this adds the parent directory of bin so we can find the module
parent_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir))
sys.path.append(parent_dir)
#This addes venv lib/python3.6/site-packages/ to the search path
mod_path = os.path.abspath(parent_dir+"/lib/python"+str(sys.version_info[0])+"."+str(sys.version_info[1])+"/site-packages/")
sys.path.append(mod_path)
from base64 import b64decode
import boto3
from io import BytesIO
import datetime
from openpyxl import load_workbook
import pandas as pd
import sqlalchemy
from sqlalchemy import create_engine
import requests
workdocs_region ='us-west-2'
services_region = 'us-east-1'
wd_role_arn = 'arn:aws:iam::account-b:role/WorkDocs_API_Developer'
def assume_role(wd_role_arn):
sts = boto3.client('sts')
creds = sts.assume_role(RoleArn=wd_role_arn,
RoleSessionName='workdocs_session'
)['Credentials']
return creds
def get_data():
role = assume_role(wd_role_arn)
client = boto3.client('workdocs',
aws_access_key_id=role['AccessKeyId'],
aws_secret_access_key=role['SecretAccessKey'],
aws_session_token=role['SessionToken'],
region_name=workdocs_region
)
folder_id = os.environ['FOLDER_ID']
doc_id = os.environ['DOC_ID']
if not doc_id:
documents = client.describe_folder_contents(FolderId = folder_id)
file = os.environ['FILE_NAME'].replace(' ','_')
for d in documents['Documents']:
if file in d['LatestVersionMetadata']['Name'].replace(' ','_'):
print(d['LatestVersionMetadata']['Id'])
doc_id = d['LatestVersionMetadata']['Id']
doc_meta = client.get_document(
DocumentId=doc_id
)
latest_doc = client.get_document_version(
DocumentId=doc_meta['Metadata']['Id'],
VersionId=doc_meta['Metadata']['LatestVersionMetadata']['Id'],
Fields='SOURCE'
)
document_url = latest_doc['Metadata']['Source']['ORIGINAL']
document_name = latest_doc['Metadata']['Name']
r = requests.get(document_url)
wb = load_workbook(filename=BytesIO(r.content))
for s in wb.sheetnames:
ws = wb[s]
data = ws.values
columns = next(data)[0:]
columns = [item.lower().replace(' ', '_').strip() for item in columns]
df = pd.DataFrame(data, columns=columns)
df['snapshot_date'] = datetime.date.today()
if os.environ['OUT_LOCATION'] in ['Redshift', 'redshift', 'rs'] :
redshift_load(df)
elif os.environ['OUT_LOCATION'] in ['S3' , 's3'] :
s3_load(df, s)
def redshift_load(df):
rs = os.environ['REDSHIFT_INSTANCE']
rs_port = os.environ['REDSHIFT_PORT']
rs_db = os.environ['REDSHIFT_DB']
rs_user = os.environ['REDSHIFT_USER']
rs_password = boto3.client('kms', region_name=services_region).decrypt(CiphertextBlob=b64decode(os.environ['REDSHIFT_PASSWORD']))['Plaintext']
engine = create_engine('postgresql://{}:{}@{}:{}/{}'.format(rs_user,rs_password.decode('utf-8'),rs,rs_port,rs_db))
schema = os.environ['SCHEMA_NAME']
table = os.environ['TABLE_NAME']
df.to_sql(table, engine, schema, if_exists='append', index=False, method='multi')
def s3_load(df, sheet):
session = boto3.Session()
creds = session.get_credentials()
client = boto3.client('s3',
aws_access_key_id=creds.access_key,
aws_secret_access_key=creds.secret_key,
aws_session_token=creds.token,
region_name=services_region,
)
csv = df.to_csv(index=False)
key = datetime.datetime.today().strftime('%Y/%m/%d') + '/' + sheet + '.csv'
client.put_object(Bucket=os.environ['BUCKET'], Key=key, Body=csv)
def lambda_handler(event, context) :
get_data()
有人能告诉我为什么会出现这个错误吗:
"errorMessage": "An error occurred (UnauthorizedResourceAccessException) when calling the GetDocument operation: Principal [arn:aws:sts::289497978546:assumed-role/WorkDocs_API_Developer/workdocs_session] is not allowed to execute [workdocs:GetDocument] on the resource"
我一辈子都想不出来。
lambda执行角色在帐户中具有以下内容—
信任策略
{
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Principal": {
"Service": "lambda.amazonaws.com",
"AWS": [
"arn:aws:sts::account-b:assumed-role/WorkDocs_API_Developer/workdocs_session",
"arn:aws:iam::account-b:role/WorkDocs_API_Developer",
"arn:aws:sts::account-a:assumed-role/Lambda-WorkDocs/corpworkdocs_api_pull_infra"
]
},
"Action": "sts:AssumeRole"
}
]
}
资源策略:
{
"Version": "2012-10-17",
"Statement": [
{
"Action": [
"workdocs:*"
],
"Effect": "Allow",
"Resource": [
"*",
"arn:aws:sts::account-b:assumed-role/WorkDocs_API_Developer/workdocs_session"
]
}
]
}
我的代码中有什么没有做的吗?信任或资源策略是否存在某种问题?
答案是我使用了不正确的ID——现有代码正在从最新的元数据中提取ID。这不是实际的文档Id。我不知道该部分中的Id是什么用的,但它不是文档。
您必须返回文档以获取正确的Id。这是代码:
import os
import sys
# this adds the parent directory of bin so we can find the module
parent_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir))
sys.path.append(parent_dir)
#This addes venv lib/python3.6/site-packages/ to the search path
mod_path = os.path.abspath(parent_dir+"/lib/python"+str(sys.version_info[0])+"."+str(sys.version_info[1])+"/site-packages/")
sys.path.append(mod_path)
from base64 import b64decode
import boto3
from io import BytesIO
import datetime
from openpyxl import load_workbook
import pandas as pd
import sqlalchemy
from sqlalchemy import create_engine
import requests
workdocs_region ='us-west-2'
services_region = 'us-east-1'
wd_role_arn = 'arn:aws:iam::account-b:role/WorkDocs_API_Developer'
def assume_role(wd_role_arn):
sts = boto3.client('sts')
creds = sts.assume_role(RoleArn=wd_role_arn,
RoleSessionName='workdocs_session'
)['Credentials']
return creds
def get_data():
role = assume_role(wd_role_arn)
client = boto3.client('workdocs',
aws_access_key_id=role['AccessKeyId'],
aws_secret_access_key=role['SecretAccessKey'],
aws_session_token=role['SessionToken'],
region_name=workdocs_region
)
folder_id = os.environ['FOLDER_ID']
doc_id = os.environ['DOC_ID']
if not doc_id:
documents = client.describe_folder_contents(FolderId = folder_id)
file = os.environ['FILE_NAME'].replace(' ','_')
for d in documents['Documents']:
if file in d['LatestVersionMetadata']['Name'].replace(' ','_'):
doc_id = d['Id']
doc_meta = client.get_document(
DocumentId=doc_id
)
latest_doc = client.get_document_version(
DocumentId=doc_meta['Metadata']['Id'],
VersionId=doc_meta['Metadata']['LatestVersionMetadata']['Id'],
Fields='SOURCE'
)
document_url = latest_doc['Metadata']['Source']['ORIGINAL']
document_name = latest_doc['Metadata']['Name']
r = requests.get(document_url)
wb = load_workbook(filename=BytesIO(r.content))
for s in wb.sheetnames:
ws = wb[s]
data = ws.values
columns = next(data)[0:]
columns = [item.lower().replace(' ', '_').strip() for item in columns]
df = pd.DataFrame(data, columns=columns)
df['snapshot_date'] = datetime.date.today()
if os.environ['OUT_LOCATION'] in ['Redshift', 'redshift', 'rs'] :
redshift_load(df)
elif os.environ['OUT_LOCATION'] in ['S3' , 's3'] :
s3_load(df, s)
def redshift_load(df):
rs = os.environ['REDSHIFT_INSTANCE']
rs_port = os.environ['REDSHIFT_PORT']
rs_db = os.environ['REDSHIFT_DB']
rs_user = os.environ['REDSHIFT_USER']
rs_password = boto3.client('kms', region_name=services_region).decrypt(CiphertextBlob=b64decode(os.environ['REDSHIFT_PASSWORD']))['Plaintext']
engine = create_engine('postgresql://{}:{}@{}:{}/{}'.format(rs_user,rs_password.decode('utf-8'),rs,rs_port,rs_db))
schema = os.environ['SCHEMA_NAME']
table = os.environ['TABLE_NAME']
df.to_sql(table, engine, schema, if_exists='append', index=False, method='multi')
def s3_load(df, sheet):
session = boto3.Session()
creds = session.get_credentials()
client = boto3.client('s3',
aws_access_key_id=creds.access_key,
aws_secret_access_key=creds.secret_key,
aws_session_token=creds.token,
region_name=services_region,
)
csv = df.to_csv(index=False)
key = datetime.datetime.today().strftime('%Y/%m/%d') + '/' + sheet + '.csv'
client.put_object(Bucket=os.environ['BUCKET'], Key=key, Body=csv)
def lambda_handler(event, context) :
get_data()