大查询 CSV 文件加载失败



google.api_core.exceptions.BadRequest: 400 读取数据时出错,错误消息:CSV 表遇到太多错误,放弃。行: 1;错误: 1. 请查看错误流以获取更多详细信息。

我正在尝试运行将数据加载到 csv 中的 Python 脚本,但收到此错误。 谁能解释我这个错误

   import csv
#Imports the Google Cloud BigQuery client library
from google.cloud import bigquery
from google.cloud.bigquery import Dataset
from google.cloud.bigquery import Table
from google.cloud.bigquery import LoadJobConfig
from google.cloud.bigquery import SchemaField
filename = 'events.csv'
idNeeded=0
#Instantiates a client
bigquery_client = bigquery.Client()
#Runs a query from BigQuery
def runBigQueryQuery( query, filename, idNeeded ):
  if idNeeded == 1:  
    i = 1
    query_job = bigquery_client.query(query)
    results = query_job.result()
    with open (filename, 'w', newline='') as f: #Create CSV file
      write = csv.writer(f,dialect='excel',lineterminator='n')
      try:
        for row in results:
          print('{},{},{},{},{},{},{},{},{},{},{},{},{},{},{} '.format(row.EventId,
row.ScheduleId,
row.Date, 
row.TimeFrom, 
row.Description,
row.TimeTo, 
row.ResourceId, 
row.EmployeeId, 
row.MovementTypeId, 
row.Capacity, 
row.CanBook, 
row.NonMemberFlag, 
row.MemberAmount, 
row.NonMemberAmount, 
row.Attendance))
          write.writerow([i,row.EventId,
row.ScheduleId,
row.Date, 
row.TimeFrom, 
row.Description,
row.TimeTo, 
row.ResourceId, 
row.EmployeeId, 
row.MovementTypeId, 
row.Capacity, 
row.CanBook, 
row.NonMemberFlag, 
row.MemberAmount, 
row.NonMemberAmount, 
row.Attendance]) #write Rows to CSV
          i = i+1
      except AttributeError as error:
        print('An error occured: {0}'.format(error))

  else:
    query_job = bigquery_client.query(query)
    results = query_job.result()
    with open (filename, 'w', newline='') as f: #Create CSV file
      write = csv.writer(f,dialect='excel',lineterminator='n')
      try:
        for row in results:
          print('{},{},{},{},{},{},{},{},{},{},{},{},{},{},{} '.format( row.EventId,
row.ScheduleId,
row.Date, 
row.TimeFrom, 
row.Description,
row.TimeTo, 
row.ResourceId, 
row.EmployeeId, 
row.MovementTypeId, 
row.Capacity, 
row.CanBook, 
row.NonMemberFlag, 
row.MemberAmount, 
row.NonMemberAmount, 
row.Attendance))
          write.writerow([row.EventId,
row.ScheduleId,
row.Date, 
row.TimeFrom, 
row.Description,
row.TimeTo, 
row.ResourceId, 
row.EmployeeId, 
row.MovementTypeId, 
row.Capacity, 
row.CanBook, 
row.NonMemberFlag, 
row.MemberAmount, 
row.NonMemberAmount, 
row.Attendance]) #write Rows to CSV
      except AttributeError as error:
        print('An error occured: {0}'.format(error))
  return
#Creates a dataset in BigQuery
def createDataset(datasetname):
  dataset_ref = bigquery_client.dataset(datasetname)
  dataset = Dataset(dataset_ref)
  dataset.location = 'US'
  dataset = bigquery_client.create_dataset(dataset)
  return
def getDataset(datasetname):
  dataset = bigquery_client.dataset(datasetname)
  return dataset
def createTable(tablename, global_dataset_ref):
  schema = [
    #Enter Schema here.
    # SchemaField('url', 'STRING', mode='required'),
    # SchemaField('views', 'INTEGER', mode='required')
  ]
  table_ref = global_dataset_ref.table(tablename)
  table = Table(table_ref, schema=schema)
  table = bigquery_client.create_table(table)
  assert table.table_id == tablename
  return
def getTable(tablename, global_dataset_ref):
  table_ref = global_dataset_ref.table(tablename)
  table = bigquery_client.get_table(table_ref)
  # print(table.table_id)
  print(table.schema)
  # print(table.description)
  # print(table.num_rows)
  return table

def getTableSchema(tablename, global_dataset_ref):
  table_ref = global_dataset_ref.table(tablename)
  table = bigquery_client.get_table(table_ref)
  schema = table.schema
  return schema
def loadDataFromCSV(tablename, global_dataset_ref, filename):
  schema = getTableSchema(tablename, global_dataset_ref)
  table_ref = global_dataset_ref.table(tablename)
  load_config = LoadJobConfig()
  load_config.source_format = bigquery.SourceFormat.CSV
  load_config.schema = schema
  load_config.autodetect = True
  load_config.allow_quoted_newlines = True
  with open (filename, 'rb') as readable:
      job = bigquery_client.load_table_from_file(readable, table_ref, location='US', job_config=load_config)
  job.result()
  print('Loaded {} rows into {}:{}.'.format(job.output_rows, global_dataset_ref, table_ref.table_id)) 
  return
# Testing
if __name__ == '__main__':
  datasetname = 'Data_Layer'
  tablename = 'Events'
  sqlquery = '''SELECT 
  null as EventId,
  sc.scheduleid AS ScheduleId,
  NULL AS Description,
  sc.scheduledatefrom AS Date,
  sc.timestart AS TimeFrom,
  sc.timeduration AS TimeTo,
  r.resourceid AS ResourceId,
  sp.employeeid AS EmployeeId,
  NULL AS MovementTypeId,
  r.configheight AS Capacity,
  CASE
    WHEN st.schedulestatus IN (1,  3) THEN '1'
    ELSE '0'
  END CanBook,
  CASE
    WHEN sv.nonmembermayenroll = TRUE THEN '1'
    ELSE '0'
  END NonMemberFlag,
  COALESCE(ProgramPrice.pricemember,
    ServicePrice.pricemember,
    0) AS MemberAmount,
  COALESCE(ProgramPrice.pricenonmember,
    ServicePrice.pricenonmember,
    0) AS NonMemberAmount,
  'N/A' AS Attendance
FROM
  AloomaTest.SCSESSIONS s
LEFT JOIN
  AloomaTest.SCSESSION_PROVIDERS sp
ON
  sp.sessionid = s.sessionid
LEFT JOIN
  AloomaTest.SCSESSION_RESOURCES sr
ON
  sr.sessionid = s.sessionid
LEFT JOIN
  AloomaTest.SCSCHEDULES sc
ON
  sc.scheduleid = s.scheduleid
LEFT JOIN
  AloomaTest._SCSCHEDULESTATUS ST
ON
  ST.schedulestatus = sc.schedulestatus
LEFT JOIN
  AloomaTest.SCRESOURCES r
ON
  r.resourceid = sr.resourceid
LEFT JOIN
  AloomaTest.SCSERVICES sv
ON
  sv.serviceid = sc.serviceid
LEFT JOIN
  AloomaTest.SCPROGREG_SEMCOURSES semc
ON
  semc.serviceid = sc.serviceid
  AND semc.semesterid = sc.semesterid
LEFT JOIN
  AloomaTest.SCPROGREG_PRICES ProgramPrice
ON
  ProgramPrice.scheduleid = sc.scheduleid
LEFT JOIN
  AloomaTest.SCPROGREG_PRICES ServicePrice
ON
  ServicePrice.semcourseid = semc.semcourseid
WHERE
  COALESCE(ProgramPrice.feetypeid,
    0) = 0
  AND COALESCE(ServicePrice.feetypeid,
    0)= 0
    and  sc.scheduleid in(31207,
25936,
5761094,
832794,
9825,
17912)
'''
  #createDataset(datasetname) #Successfully tested this code 2018-09-24
global_dataset_ref = getDataset(datasetname) #Successfully tested this code 2018-09-24
  #createTable(tablename, global_dataset_ref) #Successfully tested this code 2018-09-24
getTable(tablename, global_dataset_ref) #Successfully tested this code 2018-09-24
runBigQueryQuery(sqlquery,filename,idNeeded) #Successfully tested this code 2018-09-24
loadDataFromCSV(tablename, global_dataset_ref,filename) #Successfully tested this code 2018-09-24

示例数据

,25936,2009-06-01 18:30:00,1110,M1PO - M1 PT Full,60,,254,,,1,0,0,0,N/A
,17912,2009-04-22 06:15:00,375,Pil Ptnr - Pilates Partner,60,47,398,,10,1,1,0,0,N/A
,31207,2009-06-22 19:00:00,1140,D390-2 - 1 1/2 Hour Massage,90,107,548,,20,0,0,0,0,N/A
,5761094,2018-10-05 00:00:00,1140,Fr 7:00-9:00p Adult Paddle Mixer,120,583,2349,,20,0,1,20,50,N/A
,5761094,2018-10-05 00:00:00,1140,Fr 7:00-9:00p Adult Paddle Mixer,120,591,2349,,20,0,1,20,50,N/A
,5761094,2018-10-05 00:00:00,1140,Fr 7:00-9:00p Adult Paddle Mixer,120,585,2349,,20,0,1,20,50,N/A
,5761094,2018-10-05 00:00:00,1140,Fr 7:00-9:00p Adult Paddle Mixer,120,584,2349,,20,0,1,20,50,N/A
,832794,2012-02-21 14:30:00,870,Comp Member One/One,60,,2963,,,1,0,0,0,N/A

错误消息指示您的 CSV 中只有 1 行,您在制作时可能会缺少新行。

最新更新