为什么我的Perl脚本在Windows上使用大文件产生损坏的输出?



我是Perl的新手,我有一个非常奇怪的打印问题。

Perl程序在Windows XP上运行。它首先执行一个SQL,然后循环结果,并通过5个子例程输出到5个文件。这5个文件将加载到数据库中,因此它使用|作为分隔符。每个子例程都有如下内容:

print outfile $array[field1] . '|' . $array[field2] . '|' . $array[field3] . "n";

奇怪的是,有时程序输出OK。有时,输出会损坏,例如,在某个点之后缺少换行,或者来自数组的值不正确。

我想知道这是否与记忆有关。输出文件大小从500MB到9GB不等。该程序从SQL中每次读取一条记录的输出,也每次写入一条记录。

下面是完整的Perl脚本。

#!/usr/bin/perl
use DBI;
use DBD::Oracle;
# Constants:
use constant field0  =>  0;
use constant field1  =>  1;
use constant field2  =>  2;
use constant field3  =>  3;
use constant field4  =>  4;
use constant field5  =>  5;
use constant field6  =>  6;
use constant field7  =>  7;
use constant field8  =>  8;
use constant field9  =>  9;
use constant field10  => 10;
use constant field11  => 11;
use constant field12  => 12;
use constant field13  => 13;
use constant field14  => 14;
use constant field15  => 15;
use constant field16  => 16;
use constant field17  => 17;
use constant field18  => 18;
use constant field19  => 19;
use constant field20  => 20;
use constant field21  => 21;
use constant field22  => 22;
use constant field23  => 23;
use constant field24  => 24;
use constant field25  => 25;
use constant field26  => 26;
use constant field27  => 27;
use constant field28  => 28;
use constant field29  => 29;
use constant field30  => 30;
use constant field31  => 31;
use constant field32  => 32;
use constant field33  => 33;
use constant field34  => 34;
use constant field35  => 35;
use constant field36  => 36;
use constant field37  => 37;
use constant field38  => 38;
use constant field39  => 39;
use constant field40  => 40;
use constant field41  => 41;
# Capture Directory Path from Environment Variable:
my $DIRECTORY = $ENV{DATADIR};
# Process Counters:
my %fileCntr = (
    ccr1  => 0,
    ccr2  => 0,
    ccr3  => 0,
    ccr4  => 0,
    ccr5  => 0
   );
# Process Control Hashes:
my %xref = ();
# Process Control Variables:
my $diag = 0;
my $proc = 0;
my $ndcc = 0;
my $previous = "";
# Claims Extract array:
my @arr = ();
my $hdr = "";
# Accept/Parse DSS Connection String:
$ENV{PSWD} =~ /(.+)/(.+)@(.+)/;
my $USER = $1;
my $PASS = $2;
my $CONN = 'DBI:Oracle:' . $3;
# ALTER Date format:
my $ATL = qq(ALTER SESSION SET NLS_DATE_FORMAT = 'YYYY-MM-DD');
# Database Connection:
my $dbh = DBI->connect( $CONN, $USER, $PASS, { RaiseError => 1, AutoCommit => 0 } );
  $dbh->do($ATL);   # Execute ALTER session.
my $SQL = qq(
 SELECT ... here is a big sql query
);
# Open OUTPUT file for CCR processing:
open OUT1, ">$DIRECTORY/ccr1.dat" or die "Unable to open OUT1 file: $!n";
open OUT2, ">$DIRECTORY/ccr2.dat" or die "Unable to open OUT2 file: $!n";
open OUT3, ">$DIRECTORY/ccr3.dat" or die "Unable to open OUT3 file: $!n";
open OUT4, ">$DIRECTORY/ccr4.dat" or die "Unable to open OUT4 file: $!n";
open OUT5, ">$DIRECTORY/ccr5.dat" or die "Unable to open OUT5 file: $!n";
# Redirect STDOUT to log file:
open STDOUT, ">$DIRECTORY/ccr.log"   or die "Unable to open LOG file: $!n";
# Prepare $SQL for execution:
my $sth = $dbh->prepare($SQL);
$sth->execute();
# Produce out files:
{
  local $, = "|";
  local $ = "n";
  while (@arr = $sth->fetchrow_array)
  {
    # Direct Write of CCR1&2 records:
    &BuildCCR12();
    # Write and Wipe CCR3 HASH Table:
    &WriteCCR3() unless ($arr[field0] == $previous);
    &BuildCCR3();
    # Loop processing for CCR4:
    &BuildCCR4();
    # Loop processing for CCR5:
    &BuildCCR5();
  }
}
# Print Record Counts for OUTPUT files:
foreach my $key (keys %fileCntr) { print "$key: " . $fileCntr{$key} . "n"; }
# Terminate DB connection:
$sth->finish();
$dbh->disconnect();
# Close all output files:
close(OUT1); close(OUT2); close(OUT3);
close(OUT4); close(OUT5);
{
 # Reassign Output End-of-record across subroutine block:
 local $ = "n";
 sub BuildCCR12
 {
  # Write CCR1 Table:
  print OUT1 $arr[field6]  . '|' . $arr[field7]   . '|' . $arr[field5]   . '|' .
     $arr[field0]          . '|' . $arr[field8]   . '|' . $arr[field9]   . '|' .
     $arr[field10]         . '|' . $arr[field11]  . '|' . $arr[field12]  . '|' .
     $arr[field13]         . '|' . $arr[field2]   . '|' . $arr[field3]   . '|' .
     $arr[field40]         . '|' . $arr[field16];
  $fileCntr{ccr1}++;
  # Write CCR2 Table:
  unless ($arr[field17] eq '###########') {
            print OUT2 ++$ndcc . "|" .  $arr[field0]     . "|" . 
            $arr[field6]       . '|' . $arr[field7]      . '|' .
            $arr[field17]      . '|' . $arr[field19]     . '|' . $arr[field18] . '|' .
            $arr[field2]       . '|' . $arr[field3]      . '|' . $arr[field39];
            $fileCntr{ccr2}++;
            }
 }
 sub WriteCCR3
 {
  unless ($previous == "")
  {
   # Produce ccr3 from DISTINCT combo listing:
   foreach $key (keys %xref) { print OUT3 $xref{$key}; $fileCntr{ccr3}++; }
   %xref = ();
  }
 }
 sub BuildCCR3
 {
  # Spin off relationship:
  for (my $i = field8; $i <= field13; $i++)
  {
   unless ($arr[$i] == -1)
   {
    $xref{$arr[field0] . "|" . $arr[$i]} = $arr[field0] . "|" . $arr[$i];
   }
  }
   $previous = $arr[field0];
 }
 sub BuildCCR4
 {
  # Spin off relationship:
  for (my $i = field26; $i <= field37; $i++)
  {
   my $sak = $arr[field0] . $arr[field6] . $arr[field7] . $arr[$i];
   unless (($arr[$i] eq '#######') or ($arr[$i] eq '######')) {
                        print OUT4 ++$diag . '|' . $arr[field0] . '|' . 
                              $arr[field6] . '|' .
                              $arr[field7] . '|' . $arr[$i];
                    $fileCntr{ccr4}++;
                  }
  }
 }
 sub BuildCCR5
 {
  # Spin off field0/Procedure relationship:
  for (my $i = field20; $i <= field23; $i++)
  {
   my $sak = $arr[field0] . $arr[field6] . $arr[field7] . $arr[$i];
   unless ($arr[$i] eq '######' or $arr[$i] eq '####') {
                 print OUT5 ++$proc . '|' .  $arr[field0] . '|' . $arr[field6] . '|' .
                         $arr[field7]   . '|' . $arr[$i];
                 $fileCntr{ccr5}++;
               }
  }
 }
}

问题是CCR3输出。在某一点之后,换行由于某种原因消失,数据被损坏,就好像换行吃掉了一些输出一样。从这一点开始,它变成了一条连续的直线。

3260183|147845
3260183|78246
3260183|13898
3260183|184783
3260183|116315
3260183|184483262216|105843262217|1461703262217|175593262217|1360303262217

另一件事是这个程序将运行接近26小时,而通过sql循环,是否有任何机会,数据可以弄乱?但是它仍然不能解释为什么突然换行不再工作了

我试图减少杂乱。首先,您定义的常量会造成很多混乱,而不是有助于提高可读性。如果你有像

use constant LICENSE_NO => 42;

我可以理解,但如果常量只是对应整数数组的下标,那我就不明白了。

我还把所有的打印放在一个单独的子程序中,并在printclose语句中添加了错误检查。

我不认为这是解决你的问题的方法,但这是我开始实际调试的地方。这里可能有一些错别字,所以要小心。

#!/usr/bin/perl
use warnings; use strict;
use DBI;
use File::Spec::Functions qw( catfile );
my @proc = qw(ccr1 ccr2 ccr3 ccr4 ccr5);
# Capture Directory Path from Environment Variable:
my $DIRECTORY = $ENV{DATADIR};
# Process Counters:
my %fileCntr = map { $_ => 0 } @proc;
# Process Control Hashes:
my %xref = ();
# Process Control Variables:
my $diag = 0;
my $proc = 0;
my $ndcc = 0;
my $previous = "";
# Claims Extract array:
my @arr = ();
my $hdr = "";
# Accept/Parse DSS Connection String:
my ($USER, $PASS, $CONN) = ($ENV{PSWD} =~ m{^(.+)/(.+)@(.+)});
# ALTER Date format:
my $ATL = qq(ALTER SESSION SET NLS_DATE_FORMAT = 'YYYY-MM-DD');
# Database Connection:
my $dbh = DBI->connect(
    "DBI::Oracle:$CONN", $USER, $PASS,
    { RaiseError => 1, AutoCommit => 0 },
);
$dbh->do($ATL);   # Execute ALTER session.
my $SQL = qq(
    SELECT ... here is a big sql query
);
my %outh;
for my $proc ( @proc ) {
    my $fn = catfile $DIRECTORY, "$proc.dat";
    open $outh{ $proc }, '>', $fn
        or die "Cannot open '$fn' for writing: $!";
}
# Redirect STDOUT to log file:
open STDOUT, '>', catfile($DIRECTORY, 'ccr.log')
    or die "Unable to open LOG file: $!";
# Prepare $SQL for execution:
my $sth = $dbh->prepare($SQL);
$sth->execute();
# Produce out files:
while (my @arr = $sth->fetchrow_array) {
    # Direct Write of CCR1&2 records:
    BuildCCR12(@arr);
    # Write and Wipe CCR3 HASH Table:
    WriteCCR3(@arr) unless ($arr[0] == $previous);
    BuildCCR3(@arr);
    # Loop processing for CCR4:
    BuildCCR4(@arr);
    # Loop processing for CCR5:
    BuildCCR5(@arr);
}
# Print Record Counts for OUTPUT files:
foreach my $key (keys %fileCntr) {
    printf "%s: %sn", $key, $fileCntr{$key};
}
# Terminate DB connection:
$sth->finish();
$dbh->disconnect();
for my $proc (keys %outh) {
    close $outh{ $proc } or die "Cannot close filehandle for '$proc': $!";
}
sub print_to {
    my ($dest, $data) = @_;
    my $fh = $outh{$dest};
    print $fh join('|', @$data), "n"
        or die "Error writing to '$dest' file: $!";
    $fileCntr{$dest}++;
    return;
}
sub BuildCCR12 {
    my ($arr) = @_;
    print_to(ccr1 =>
        [@{$arr}[6, 7, 5, 0, 8, 9, 10, 13, 2, 3, 40, 16]]);
    if ($arr->[17] ne '###########') {
        print_to(ccr2 =>
            [++$ndcc, @{ $arr }[0, 6, 7, 17, 19, 18, 2, 3, 39]]);
    }
    return;
}
sub WriteCCR3 {
    my ($arr) = @_;
    unless ($previous) {
        # Produce ccr3 from DISTINCT combo listing:
        print_to(ccr3 => [ keys %xref ]);
        %xref = ();
    }
    return;
}
sub BuildCCR3 {
    my ($arr) = @_;
    # Spin off relationship:
    for my $i (8 .. 13) {
        unless ($arr->[$i] == -1) {
            my $k = join '|', @{ $arr }[0, $i];
            $xref{ $k } = $k;
        }
    }
    $previous = $arr->[0];
    return;
}
sub BuildCCR4 {
    my ($arr) = @_;
    # Spin off relationship:
    for my $i (26 .. 37) {
        my $sak = join '|', @{ $arr }[0, 6, 7, $i];
        my $v = $arr->[$i];
        unless ( $v =~ /^#{6,7}z/ ) {
            print_to(ccr4 => [++$diag, @{ $arr }[0, 6, 7, $v]]);
        }
    }
    return;
}
sub BuildCCR5 {
    my ($arr) = @_;
    # Spin off field0/Procedure relationship:
    for my $i (20 .. 23) {
        my $v = $arr[$i];
        my $sak = join('', @{ $arr }[0, 6, 7], $v);
        unless ($v eq '######' or $v eq '####') {
            print_to(ccr5 => [++$proc, @{ $arr }[0, 6, 7], $v]);
        }
    }
    return;
}

最新更新