Teradata TPT加载脚本性能



我希望有人能帮助我改进Teradata TPT加载脚本。我使用下面的脚本将一个3GB分隔的CSV文件加载到Teradata中。该文件位于我的本地笔记本电脑硬盘上。加载该文件大约需要30分钟,这相当长。要加载的行总数约为3000万。关于绩效改进的任何建议。

DEFINE JOB LOAD_TD_FROM_CSV
DESCRIPTION 'Load Teradata table from CSV File'
(
DEFINE SCHEMA FOOD_TPT   /*Define Table schema*/
DESCRIPTION 'FOOD_TPT'
(
col1 VARCHAR(20),
col2 VARCHAR(100),
col3 VARCHAR(100),
col4 VARCHAR(100),
col5 VARCHAR(100),
col6 VARCHAR(100),
col7 VARCHAR(100),
col8 VARCHAR(100)

);

DEFINE OPERATOR DDL_OPERATOR  
TYPE DDL
ATTRIBUTES
(
VARCHAR TdpId           = 'system', /*System Name*/
VARCHAR UserName        = 'user',    /*USERNAME*/
VARCHAR UserPassword    = 'password', /*Password*/ 
VARCHAR Errorlist             ='3807' /*This is added to skip 'Table does not exist error' and treat it as warnig*/
);

DEFINE OPERATOR LOAD_CSV    /*Load information*/
DESCRIPTION 'Operator to Load CSV Data'
TYPE LOAD
SCHEMA *
ATTRIBUTES
(
VARCHAR PrivateLogName,
VARCHAR TraceLevel      = 'None',
INTEGER TenacityHours   = 1,
INTEGER TenacitySleep   = 1,
INTEGER MaxSessions     = 4,
INTEGER MinSessions     = 1,
INTEGER BUFFERSIZE      =16,
VARCHAR TargetTable     = 'FOOD_TPT_STG',   /*Define target table name where the file will be loaded*/
VARCHAR LogTable        = 'FOOD_TPT_LOG',     /*Define Log table name*/
VARCHAR ErrorTable1     = 'FOOD_TPT_STG_E1',/*There are 2 error tables. Define them. First table is _ET table*/
VARCHAR ErrorTable2     = 'FOOD_TPT_STG_E2', /*Define _UV table*/
VARCHAR TdpId           = 'system',  /*System Name*/
VARCHAR UserName        = 'user',     /*Username*/
VARCHAR UserPassword    = 'password'              /*Password*/
);
DEFINE OPERATOR READ_CSV
DESCRIPTION 'Operator to Read CSV File'
TYPE DATACONNECTOR PRODUCER
SCHEMA FOOD_TPT
ATTRIBUTES
(
VARCHAR  Filename            =   'file.csv'  /*give file name with path*/
,VARCHAR Format               =   'Delimited'
,VARCHAR TextDelimiter        =   ','
,VARCHAR AcceptExcessColumns  =   'N'
,VARCHAR PrivateLogName       =   'LOAD_FROM_CSV'
,Integer SkipRows=1  /*skips the header in csv file*/
);

Step Setup_Tables  /*Enter all executable SQLs in this step*/
(
APPLY
('Drop table FOOD_TPT_STG_E1;'), /*Drop error tables*/
('Drop table FOOD_TPT_STG_E2;'),
('Drop table FOOD_TPT_LOG;'),  /*Drop Log Table*/
('Drop table FOOD_TPT_STG;'), /*Drop Target staging tables*/
('CREATE  TABLE FOOD_TPT_STG ,NO FALLBACK ,
NO BEFORE JOURNAL,
NO AFTER JOURNAL,
CHECKSUM = DEFAULT,
datablocksize= 1022 kbytes,
DEFAULT MERGEBLOCKRATIO
(
col1 VARCHAR(20),
col2 VARCHAR(100),
col3 VARCHAR(100),
col4 VARCHAR(100),
col5 VARCHAR(100),
col6 VARCHAR(100),
col7 VARCHAR(100),
col8 VARCHAR(100)
)

NO PRIMARY INDEX;')                                                                                                                                            /*Create Target table*/
TO OPERATOR (DDL_OPERATOR);
);
Step Load_Table
(
APPLY ('INSERT INTO FOOD_RISK_TPT_STG
(
:col1
,:col2 
,:col3 
,:col4 
,:col5 
,:col6 
,:col7 
,:col8 
);') /*Inserts records from CSV file into Target Table*/
TO OPERATOR (LOAD_CSV)
SELECT * FROM  operator(READ_CSV);

);
);

提前感谢

正如Fred所写,您指定的BUFFERSIZE为16KB->每个块22行(FastLoad根据定义的最大大小计算(,这导致发送160万条消息。删除该属性,默认情况下会得到1MB,每个块1400行。此外,你可以简化你的脚本如下:

DEFINE JOB LOAD_TD_FROM_CSV
DESCRIPTION 'Load Teradata table from CSV File'
(
Step Setup_Tables  /*Enter all executable SQLs in this step*/
(
APPLY
('Drop table FOOD_TPT_STG_E1;'), /*Drop error tables*/
('Drop table FOOD_TPT_STG_E2;'),
('Drop table FOOD_TPT_LOG;'),  /*Drop Log Table*/
('Drop table FOOD_TPT_STG;'), /*Drop Target staging tables*/
('CREATE  TABLE FOOD_TPT_STG ,NO FALLBACK ,
NO BEFORE JOURNAL,
NO AFTER JOURNAL,
CHECKSUM = DEFAULT,
datablocksize= 1022 kbytes,
DEFAULT MERGEBLOCKRATIO
(
col1 VARCHAR(20),
col2 VARCHAR(100),
col3 VARCHAR(100),
col4 VARCHAR(100),
col5 VARCHAR(100),
col6 VARCHAR(100),
col7 VARCHAR(100),
col8 VARCHAR(100)
)

NO PRIMARY INDEX;') /*Create Target table*/
TO OPERATOR ($DDL
(
TdpId           = 'system', /*System Name*/
UserName        = 'user',    /*USERNAME*/
UserPassword    = 'password', /*Password*/ 
Errorlist             ='3807' /*This is added to skip 'Table does not exist error' and treat it as warnig*/
)
);
Step Load_Table
(
APPLY ($INSERT 'FOOD_RISK_TPT_STG') /*Inserts records from CSV file into Target Table*/
TO OPERATOR ($LOAD(
/*     BUFFERSIZE      = 16384,  Default is 1 MB, increasing it further to the max 16MB might improve a bit */
TargetTable     = 'FOOD_TPT_STG',   /*Define target table name where the file will be loaded*/
LogTable        = 'FOOD_TPT_LOG',     /*Define Log table name*/
ErrorTable1     = 'FOOD_TPT_STG_E1',/*There are 2 error tables. Define them. First table is _ET table*/
ErrorTable2     = 'FOOD_TPT_STG_E2', /*Define _UV table*/
TdpId           = 'system',  /*System Name*/
UserName        = 'user',     /*Username*/
UserPassword    = 'password'              /*Password*/
)
)
SELECT * FROM  operator($FILE_READER
(
Filename             =   'file.csv'  /*give file name with path*/
,Format               =   'Delimited'
,TextDelimiter        =   ','
,AcceptExcessColumns  =   'N'
,PrivateLogName       =   'LOAD_FROM_CSV'
,SkipRows=1  /*skips the header in csv file*/
));
);

还有一些作业变量文件可以使脚本更好地可重用

最新更新