我在编写将文件从谷歌云存储加载或导出到谷歌大查询的python脚本时遇到问题。
#standardSQL
import json
import argparse
import time
import uuid
from google.cloud import bigquery
from google.cloud import storage
dataset = 'dataworks-356fa'
source = 'gs://dataworks-356fa-backups/pullnupload.json'
# def load_data_from_gcs(dataset, source):
# # load_data_from_gcs(dataworks-356fa, 'test10', gs://dataworks-356fa-backups/pullnupload.json):
# bigquery_client = bigquery.Client('dataworks-356fa')
# dataset = bigquery_client.dataset(FirebaseArchive)
# table = dataset.table(test10)
# job_name = str(uuid.uuid4())
#
# job = bigquery_client.load_table_from_storage(
# job_name, test10, 'gs://dataworks-356fa-backups/pullnupload.json')
#
# job.source_format = 'NEWLINE_DELIMITED_JSON'
# job.begin()
def load_data_from_gcs(dataset, test10, source ):
bigquery_client = bigquery.Client(dataset)
dataset = bigquery_client.dataset('FirebaseArchive')
table = dataset.table(test10)
job_name = str(uuid.uuid4())
job = bigquery_client.load_table_from_storage(
job_name, table, "gs://dataworks-356fa-backups/pullnupload.json")
job.source_format = 'NEWLINE_DELIMITED_JSON'
job.begin()
job.errors
到目前为止,这是我的代码。此文件将运行,但它不会将任何内容加载到 bigquery 中或返回错误消息。它运行然后返回正常的终端视图。
从你上一个的问题来看,你有wait_for_job
函数。您应该在打印错误之前使用它,如下所示:
def load_data_from_gcs(dataset, test10, source ):
bigquery_client = bigquery.Client(dataset)
dataset = bigquery_client.dataset('FirebaseArchive')
table = dataset.table(test10)
job_name = str(uuid.uuid4())
job = bigquery_client.load_table_from_storage(
job_name, table, "gs://dataworks-356fa-backups/pullnupload.json")
job.source_format = 'NEWLINE_DELIMITED_JSON'
job.begin()
wait_for_job(job)
print("state of job is: " + job.state)
print("errors: " + job.errors)
您还可以使用 IPython 手动运行每个步骤,并观察每行的结果。
请注意,job.state
必须先达到"完成"状态,然后才能查找错误。