boto3、aws-workdocs的角色假设错误,以及python的Lambda执行错误



我已经确认了account-b 中的以下策略和信任关系

信任关系

{
  "Version": "2012-10-17",
  "Statement": [
    {
      "Effect": "Allow",
      "Principal": {
        "AWS": "arn:aws:sts::account-b:assumed-role/WorkDocs_API_Developer/workdocs_session"
      },
      "Action": "sts:AssumeRole",
      "Condition": {}
    }
  ]
}

以及相关政策:

{
    "Version": "2012-10-17",
    "Statement": [
        {
            "Sid": "VisualEditor0",
            "Effect": "Allow",
            "Action": [
                "workdocs:GetDocumentPath",
                "workdocs:GetCurrentUser",
                "workdocs:CreateNotificationSubscription",
                "workdocs:DescribeAvailableDirectories",
                "workdocs:UpdateFolder",
                "workdocs:CheckAlias",
                "workdocs:DownloadDocumentVersion",
                "workdocs:GetResources",
                "workdocs:DescribeActivities",
                "workdocs:DescribeRootFolders",
                "workdocs:UpdateDocument",
                "workdocs:CreateFolder",
                "workdocs:GetFolder",
                "workdocs:InitiateDocumentVersionUpload",
                "workdocs:DescribeResourcePermissions",
                "workdocs:DescribeDocumentVersions",
                "workdocs:CreateLabels",
                "workdocs:DescribeGroups",
                "workdocs:DescribeNotificationSubscriptions",
                "workdocs:DescribeFolderContents",
                "workdocs:AbortDocumentVersionUpload",
                "workdocs:DescribeComments",
                "workdocs:GetDocumentVersion",
                "workdocs:AddResourcePermissions",
                "workdocs:DescribeInstances",
                "workdocs:GetDocument",
                "workdocs:DescribeUsers",
                "workdocs:CreateComment",
                "workdocs:CreateCustomMetadata",
                "workdocs:UpdateDocumentVersion",
                "workdocs:GetFolderPath"
            ],
            "Resource": "*"
        }
    ]
}

这是我当前的代码:

import os
import sys
# this adds the parent directory of bin so we can find the  module
parent_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir))
sys.path.append(parent_dir)
#This addes venv lib/python3.6/site-packages/ to the search path
mod_path = os.path.abspath(parent_dir+"/lib/python"+str(sys.version_info[0])+"."+str(sys.version_info[1])+"/site-packages/")
sys.path.append(mod_path)
from base64 import b64decode
import boto3
from io import BytesIO
import datetime
from openpyxl import load_workbook
import pandas as pd
import sqlalchemy
from sqlalchemy import create_engine
import requests
workdocs_region ='us-west-2'
services_region = 'us-east-1'
wd_role_arn = 'arn:aws:iam::account-b:role/WorkDocs_API_Developer'

def assume_role(wd_role_arn):
    sts = boto3.client('sts')
    creds = sts.assume_role(RoleArn=wd_role_arn,
                            RoleSessionName='workdocs_session'
                            )['Credentials']
    return creds
def get_data():
    role = assume_role(wd_role_arn) 
    client = boto3.client('workdocs',
                          aws_access_key_id=role['AccessKeyId'],
                          aws_secret_access_key=role['SecretAccessKey'],
                          aws_session_token=role['SessionToken'],
                          region_name=workdocs_region
                          )
    folder_id = os.environ['FOLDER_ID']
    doc_id = os.environ['DOC_ID']
    if not doc_id:   
        documents = client.describe_folder_contents(FolderId = folder_id)
        file = os.environ['FILE_NAME'].replace(' ','_')
        for d in documents['Documents']:
            if file in d['LatestVersionMetadata']['Name'].replace(' ','_'):
                print(d['LatestVersionMetadata']['Id'])
                doc_id = d['LatestVersionMetadata']['Id']
    doc_meta = client.get_document(
                    DocumentId=doc_id
                    )
    latest_doc = client.get_document_version(
                    DocumentId=doc_meta['Metadata']['Id'],
                    VersionId=doc_meta['Metadata']['LatestVersionMetadata']['Id'],
                    Fields='SOURCE'
                    )
    document_url = latest_doc['Metadata']['Source']['ORIGINAL']
    document_name = latest_doc['Metadata']['Name']
    r = requests.get(document_url)
    wb = load_workbook(filename=BytesIO(r.content))
    for s in wb.sheetnames:
        ws = wb[s]
        data = ws.values
        columns = next(data)[0:]
        columns = [item.lower().replace(' ', '_').strip() for item in columns]
        df = pd.DataFrame(data, columns=columns)
        df['snapshot_date'] = datetime.date.today()
        if os.environ['OUT_LOCATION'] in ['Redshift', 'redshift', 'rs'] :
            redshift_load(df)
        elif os.environ['OUT_LOCATION'] in ['S3' , 's3'] :
            s3_load(df, s)
def redshift_load(df):
    rs = os.environ['REDSHIFT_INSTANCE']
    rs_port = os.environ['REDSHIFT_PORT']
    rs_db = os.environ['REDSHIFT_DB']
    rs_user = os.environ['REDSHIFT_USER']
    rs_password = boto3.client('kms', region_name=services_region).decrypt(CiphertextBlob=b64decode(os.environ['REDSHIFT_PASSWORD']))['Plaintext']
    engine = create_engine('postgresql://{}:{}@{}:{}/{}'.format(rs_user,rs_password.decode('utf-8'),rs,rs_port,rs_db))
    schema = os.environ['SCHEMA_NAME']
    table = os.environ['TABLE_NAME']
    df.to_sql(table, engine, schema, if_exists='append', index=False, method='multi')
def s3_load(df, sheet):
    session = boto3.Session()
    creds = session.get_credentials()
    client = boto3.client('s3',
                          aws_access_key_id=creds.access_key,
                          aws_secret_access_key=creds.secret_key,
                          aws_session_token=creds.token,
                          region_name=services_region,
                          )        
    csv = df.to_csv(index=False)
    key = datetime.datetime.today().strftime('%Y/%m/%d') + '/' + sheet + '.csv'
    client.put_object(Bucket=os.environ['BUCKET'], Key=key, Body=csv)

def lambda_handler(event, context) :          
    get_data()

有人能告诉我为什么会出现这个错误吗:

"errorMessage": "An error occurred (UnauthorizedResourceAccessException) when calling the GetDocument operation: Principal [arn:aws:sts::289497978546:assumed-role/WorkDocs_API_Developer/workdocs_session] is not allowed to execute [workdocs:GetDocument] on the resource"

我一辈子都想不出来。

lambda执行角色在帐户中具有以下内容—

信任策略

{
  "Version": "2012-10-17",
  "Statement": [
    {
      "Effect": "Allow",
      "Principal": {
        "Service": "lambda.amazonaws.com",
        "AWS": [
          "arn:aws:sts::account-b:assumed-role/WorkDocs_API_Developer/workdocs_session",
          "arn:aws:iam::account-b:role/WorkDocs_API_Developer",
          "arn:aws:sts::account-a:assumed-role/Lambda-WorkDocs/corpworkdocs_api_pull_infra"
        ]
      },
      "Action": "sts:AssumeRole"
    }
  ]
}

资源策略:

{
    "Version": "2012-10-17",
    "Statement": [
        {
            "Action": [
                "workdocs:*"
            ],
            "Effect": "Allow",
            "Resource": [
                "*",
                "arn:aws:sts::account-b:assumed-role/WorkDocs_API_Developer/workdocs_session"
            ]
        }
    ]
}

我的代码中有什么没有做的吗?信任或资源策略是否存在某种问题?

答案是我使用了不正确的ID——现有代码正在从最新的元数据中提取ID。这不是实际的文档Id。我不知道该部分中的Id是什么用的,但它不是文档。

您必须返回文档以获取正确的Id。这是代码:

import os
import sys
# this adds the parent directory of bin so we can find the  module
parent_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir))
sys.path.append(parent_dir)
#This addes venv lib/python3.6/site-packages/ to the search path
mod_path = os.path.abspath(parent_dir+"/lib/python"+str(sys.version_info[0])+"."+str(sys.version_info[1])+"/site-packages/")
sys.path.append(mod_path)
from base64 import b64decode
import boto3
from io import BytesIO
import datetime
from openpyxl import load_workbook
import pandas as pd
import sqlalchemy
from sqlalchemy import create_engine
import requests
workdocs_region ='us-west-2'
services_region = 'us-east-1'
wd_role_arn = 'arn:aws:iam::account-b:role/WorkDocs_API_Developer'

def assume_role(wd_role_arn):
    sts = boto3.client('sts')
    creds = sts.assume_role(RoleArn=wd_role_arn,
                            RoleSessionName='workdocs_session'
                            )['Credentials']
    return creds
def get_data():
    role = assume_role(wd_role_arn) 
    client = boto3.client('workdocs',
                          aws_access_key_id=role['AccessKeyId'],
                          aws_secret_access_key=role['SecretAccessKey'],
                          aws_session_token=role['SessionToken'],
                          region_name=workdocs_region
                          )
    folder_id = os.environ['FOLDER_ID']
    doc_id = os.environ['DOC_ID']
    if not doc_id:   
        documents = client.describe_folder_contents(FolderId = folder_id)
        file = os.environ['FILE_NAME'].replace(' ','_')
        for d in documents['Documents']:
            if file in d['LatestVersionMetadata']['Name'].replace(' ','_'):
                doc_id = d['Id']
    doc_meta = client.get_document(
                    DocumentId=doc_id
                    )
    latest_doc = client.get_document_version(
                    DocumentId=doc_meta['Metadata']['Id'],
                    VersionId=doc_meta['Metadata']['LatestVersionMetadata']['Id'],
                    Fields='SOURCE'
                    )
    document_url = latest_doc['Metadata']['Source']['ORIGINAL']
    document_name = latest_doc['Metadata']['Name']
    r = requests.get(document_url)
    wb = load_workbook(filename=BytesIO(r.content))
    for s in wb.sheetnames:
        ws = wb[s]
        data = ws.values
        columns = next(data)[0:]
        columns = [item.lower().replace(' ', '_').strip() for item in columns]
        df = pd.DataFrame(data, columns=columns)
        df['snapshot_date'] = datetime.date.today()
        if os.environ['OUT_LOCATION'] in ['Redshift', 'redshift', 'rs'] :
            redshift_load(df)
        elif os.environ['OUT_LOCATION'] in ['S3' , 's3'] :
            s3_load(df, s)
def redshift_load(df):
    rs = os.environ['REDSHIFT_INSTANCE']
    rs_port = os.environ['REDSHIFT_PORT']
    rs_db = os.environ['REDSHIFT_DB']
    rs_user = os.environ['REDSHIFT_USER']
    rs_password = boto3.client('kms', region_name=services_region).decrypt(CiphertextBlob=b64decode(os.environ['REDSHIFT_PASSWORD']))['Plaintext']
    engine = create_engine('postgresql://{}:{}@{}:{}/{}'.format(rs_user,rs_password.decode('utf-8'),rs,rs_port,rs_db))
    schema = os.environ['SCHEMA_NAME']
    table = os.environ['TABLE_NAME']
    df.to_sql(table, engine, schema, if_exists='append', index=False, method='multi')
def s3_load(df, sheet):
    session = boto3.Session()
    creds = session.get_credentials()
    client = boto3.client('s3',
                          aws_access_key_id=creds.access_key,
                          aws_secret_access_key=creds.secret_key,
                          aws_session_token=creds.token,
                          region_name=services_region,
                          )        
    csv = df.to_csv(index=False)
    key = datetime.datetime.today().strftime('%Y/%m/%d') + '/' + sheet + '.csv'
    client.put_object(Bucket=os.environ['BUCKET'], Key=key, Body=csv)

def lambda_handler(event, context) :          
    get_data()

相关内容

  • 没有找到相关文章

最新更新