Perl:如果任务耗时太长,如何跳过循环



这段perl代码处理一个文件夹中的txt文件,它的子目录分为标题、正文和xml。

    #!perl -w
use strict;
use utf8;
use File::Copy;
use File::Basename;
our @folders=();
our %errors=();
our $page_errors='';
our $folder_out='';
our $folder_in='';
our $sub_folder="";
our $dev=0;
our $anker='#a_';
our $coded_lb=0;
our $line_cnt=0;
sub get_complete_filename
{
    my $return = $_[0];
    $return=~m/([^d]+)(d+)/;
    return $return if (!$1 || !$2);
    my $name=$1;
    my $number=$2;
    open (IN,"<:encoding(utf-8)","..\..\complete_filenames.txt");
    while (<IN>)
    {
        my $line=$_; chomp($line);
        next if ($line eq '' || $line=~m/Datei/);       
        if ($line=~m/$name[a-zA-Z_-]+$number/)
        {
            $return = $line;
            last;
        }       
    }   
    close IN;
    return $return;
}
sub get_files 
{   
    my $dir = $_[0];
    my $file;
    opendir(DIR, $dir) || die "Unable to open $dir: $!";
    my @fl = grep {!/^..?$/ } readdir(DIR);
    closedir(DIR);
    foreach (@fl) 
    {
        if (-d ($file = "$dir\$_")) 
        {
            push(@folders,$file); 
            get_files($file); 
        } 
    }
}
sub header
{
    my $fn=$_[0];
    my $folder_in=$_[1];
    my $folder_out=$_[2];
    if (-e ($folder_in."\".$fn.".teih"))
    {
        open (IN,"<:encoding(utf-8)", $folder_in."\".$fn.".teih");
        my $input = do { local $/; <IN> };
        close IN;
        my @lines=split(/[nr]/,$input);   
        my $read=0;
        my $output="";
        foreach my $line (@lines) 
        {
            chomp($line);
            if($line=~m/</teiHeader>/)
            {
                $read=0;
                $output.=$line."n";
                last;
            }
            elsif ($read eq 1 || $line=~m/<teiHeader>/)
            {
                $read=1;
                $output.=$line."n";
            }
        }           
        open (OUT,">:encoding(utf-8)", $folder_out.($dev eq 0 ? "\".$sub_folder : "")."\".$fn.".teih");
        print OUT $output;
        close OUT;
        #copy($folder_in."\".$fn.".teih", $folder_out."\".$fn."\".$fn.".teih"); 
    }
    else
    {   
    open (H,">:encoding(utf-8)", $folder_out.($dev eq 0 ? "\".$sub_folder : "")."\".$fn.".teih");
    print H "nt<!--nt copy of the main_header or empty header!!!nt please update its contentnt-->n".
'<teiHeader>
    <fileDesc>
        <titleStmt>
            <title/>
            <respStmt>
                <resp/>
                <name/>
            </respStmt>
        </titleStmt>
        <publicationStmt>
            <distributor/>
        </publicationStmt>
        <sourceDesc>
            <bibl/>
        </sourceDesc>
    </fileDesc>
</teiHeader>';
    close H;
    }
    return '<?xml version="1.0" encoding="utf-8"?>'; 
}
sub check_linebreak
{
    my $line=$_[0]; my $pg=$_[1];
    my $ret="";
    $line_cnt++;
    if ($line=~m/([აბგდევზთიკლმნოპჟრსტუფქღყშჩცძწჭხჯჰჱჲჳჴჵჶ-]+)/([აბგდევზთიკლმნოპჟრსტუფქღყშჩცძწჭხჯჰჱჲჳჴჵჶ-]+[[:punct:]]*)/)
    {                   
        my $tmp1=$1; my $tmp2=$2;
        my $z="TRENNENDERZBTRENNENDERZB".($line_cnt+1)."TRENNENDERZBTRENNENDERZB";
        $line=~s/Q$tmp1E/Q$tmp2E/$tmp1$z$tmp2/; $line.=" ";
        if ($coded_lb eq 0)
        {
            $ret=$pg."NORMALERZBNORMALERZB".$line_cnt."NORMALERZBNORMALERZB ".$line;
        }
        else
        {
            $ret=$pg.$line; 
        }
        $coded_lb=1;        
    }
    else
    {
        if ($coded_lb eq 0)
        {
            $ret=$pg."NORMALERZBNORMALERZB".$line_cnt."NORMALERZBNORMALERZB ".$line;
        }
        else
        {
            $ret=$pg.$line;                     
        }
        $coded_lb=0;
    }
    return $ret;
}
sub anfangs_verarbeitung
{
    my $tmp=$_[0];
    $tmp =~ s/^x{FEFF}//;  # removes BOM

    $tmp =~ s/#(?:(?:rn)|n|r)+(-{2,})/#$1n/mg;
    $tmp =~ s/^ps*$//g;
    $tmp =~ s/^s*([pP]d+)s*([cC]s*[0-9IVX]+)/$1n$2/g;
    $tmp =~ s/(?<=#)|(?=[pppctPCT])//g;
    $tmp =~ s/|(?=[pppctPCT])/#/g;
    $tmp =~ s/(?<![|#])([pppcPC]s*[dIVXMC]+)+/#$1/g;
    $tmp =~ s/|(d+)/#p$1/g;
    $tmp =~ s/«(?=[აბგდევზთიკლმნოპჟრსტუფქღყშჩცძწჭხჯჰჱჲჳჴჵჶ])/»/g;
    $tmp =~ s/»(?![აბგდევზთიკლმნოპჟრსტუფქღყშჩცძწჭხჯჰჱჲჳჴჵჶ])/«/g;
    $tmp =~ s/<<(?=[აბგდევზთიკლმნოპჟრსტუფქღყშჩცძწჭხჯჰჱჲჳჴჵჶ])/»/g;
    $tmp =~ s/>>(?![აბგდევზთიკლმნოპჟრსტუფქღყშჩცძწჭხჯჰჱჲჳჴჵჶ])/«/g;
    $tmp =~ s/(?:„|,,|")([აბგდევზთიკლმნოპჟრსტუფქღყშჩცძწჭხჯჰჱჲჳჴჵჶ]+)«/»$1«/g;
    $tmp =~ s/»([აბგდევზთიკლმნოპჟრსტუფქღყშჩცძწჭხჯჰჱჲჳჴჵჶ]+)[“"']/»$1«/g;

    $tmp =~ s/^(d{4}s*წ.)s*$/#d $1/g;
    $tmp =~ s/<pol>/<pol>/g;    
    $tmp =~ s/<ა>/<a>/g;    
    $tmp =~ s/<?([athzee])>/</$1>/g;                  #<?a>           
    $tmp =~ s/[<>]/([athzee])(?![<>])/</$1>/g;            #</a >/a            
    $tmp =~ s/<([athzee])/>/</$1>/g;                  #<a/>
    $tmp =~ s/[<>]/([athzee])[<>]/</$1>/g;                #>/a> etc.          
    $tmp =~ s/[<>]([athzee])[<>]/<$1>/g;                    #>a< etc.
    $tmp =~ s/<([athzee])(?![<>])/<$1>/g;                   #<a >a          
    $tmp =~ s/(?<=[^></#])([athzee])[<>]/<$1>/g;           #a< a>          
    $tmp =~ s/(?<=[^><])/([athzee])[<>]/</$1>/g;      #/a< /a>    
    #$tmp =~ s/</([athze])>([^<]+)</[^1]>/<$1>$2</$1>/gm;
    ##$tmp =~s/<([athz])>([^<]+)</[^(?:$1)]>/<$1>$2</$1>/g;   
    ##$tmp =~s/<([athz])>([^<]+)<[^(?:1)]>/<$1>$2</$1>/g;
    $tmp =~ s/<([pol])>([^<]+)</1>-<1>([^<]+)</1>/<$1>$2-$3</$1>/g;
    #$tmp =~ s/<([athze])>([^<]+)</1>[--]<([athze])>([^<]+)</3>/<$1>$2-$4</$1>/g;
    ##$tmp =~ s/([^s]+)-<([athz])>([^<]+)</2>/<$2>$1-$3</$2>/g;
    ##$tmp =~ s/<name([^>]+)>([^<]+)</name>//g;
    $tmp =~ s/</</</g;
    ####$tmp =~ s/</(?![athzee])//g;
    $tmp =~ s/#{2,}/#/g;
    $tmp =~ s/(//?([^)]*))/<unclear>$1</unclear>/gm;
    $tmp =~ s/<unclear></unclear>/<unclear/>/g;
    $tmp =~ s/(//([^)]+))/<corr>$1</corr>/gm;
    #$tmp =~ s/<s(d+)>([^<]+)</s1>/$2<ref target="#a$1" type="noteAnchor">$1</ref>/gm;
    $tmp =~ s/<[sS](d+)>/<ref target="a$1" type="noteAnchor">/g;
    $tmp =~ s/</[sS]d+>/</ref>/g;
    $tmp =~ s/([sS](d+)=?s*([^)]+))/<note xml:id="a$1" type="footnote">$2</note>n/gm;
    #$tmp =~ s/#f(d+)s*(.*)([^#|]+)/<note xml:id="a$1" type="footnote">$2</note>nn/gm;
    #$tmp =~ s/(ss*(d+)s*([^)]+))/<note xml:id="a$1" type="footnote">$2</note>nn/gm;
    $tmp =~ s/n{1,}</note>/</note>/gm;
    #$tmp =~ s/s*#-{2,}//gm;  
    $tmp=~s/ვი$/ჳ/g;
    $tmp=~s/ხ$/ჴ/g;
    $tmp=~s/ე$/ჱ/g;
    $tmp=~s/ი$/ჲ/g;
    $tmp=~s/ფ$/ჶ/g;
    $tmp=~s/ვ$/უ/g;
    $tmp=~s/ო$/ჵ/g;
    $tmp=~s/#.{2,}/#--------------/g;
    return $tmp;
}
sub end_verarbeitung
{
    my $tmp=$_[0];
    $tmp =~ s/[nr]{2,}/n/g;
    $tmp =~ s/<p>s+/<p>/g;
    $tmp =~ s/</p>s+/</p>/g;
    $tmp =~ s/<p></p>//g;
    $tmp =~ s/<div><p><div type="dateline">/<div type="dateline">/g;
    $tmp =~ s/<p><div type="dateline">/<div type="dateline">/g;
    $tmp =~ s/<pol>([^<]+)</pol>/<term type="political">$1</term>/g;
    $tmp =~ s/<term type="political"> ([^<]+)</name>/ <term type="political">$1</term>/g;
    $tmp =~ s/<a><name/<name/g;
    $tmp =~ s/<t>([^<]+)</t>/<name type="toponym">$1</name>/g;
    $tmp =~ s/<z>([^<]+)</z>/<name type="zoonym">$1</name>/g;         
    $tmp =~ s/<h>([^<]+)</h>/<name type="hydronym">$1</name>/g;           
    $tmp =~ s/<e>([^<]+)</e>/<name type="ethnonym">$1</name>/g;           
    #$tmp =~ s/<a>([^<]+)/<name type="anthroponym">$1</name>/g;
    #$tmp =~ s/([^>]+)</a>/<name type="anthroponym">$1</name>/g;
    $tmp =~ s/<u>([^<]+)<?/u>/<name type="unknown">$1</name>/g;
    $tmp =~ s/s+([.:,!?)])/$1/g;
    $tmp =~ s/(()s+/$1/g;
    $tmp=~s/<p>#</p>//g;
    $tmp=~s/<div></div>//g;
    $tmp=~s/.s+./../g;
    $tmp=~s/..(?!<.)/.../g;
    $tmp=~s/.../…/g;
    $tmp=~s/…s*./…/g; 
    $tmp=~s/ +([,.…;:!?])/$1/g;
    #$tmp=~s/([,.…;:!?])(?!< )/$1 /g;
    $tmp=~s/-/–/g;
    $tmp=~s/,–/, –/g;
    $tmp=~s/([.:,!?)])–/$1 -/g;
    $tmp=~s/. </.</g;
    $tmp=~s/xml: id/xml:id/g;
    $tmp=~s/#-{2,}//g;
    $tmp=~s/<p></p>//g;
    $tmp=~s/s*</p><p>/</p>ntttt<p>/g;
    $tmp=~s/ +/ /g;
    #$tmp =~ s/„([^„“]+)„/„$1“/g;
    #$tmp=~s/<pb n="(d+)"/>(?:rn)*n*s*</div>/<pb n="$1"/>/gm;
    #$tmp=~s/<div type="Section">(?:rn)*n*s*<head>([^<]+)</head>/<div type="Section">ntttt<head>$1</head>ntttt</div>/gm;
    #$tmp=~s/s*<pb n="(d+)"/>(?:rn)*n*s*<div type="Section">/<div type="Section">ntttt<pb n="$1"/>/gm;
    $tmp=~s/</p><lg>/</p>ntttt<lg>/g;
    $tmp=~s/</p></div>/</p>nttt</div>/g;
    $tmp=~s/(<name[^>]*>) +/ $1/g;
    $tmp=~s/([^აბგდევზთიკლმნოპჟრსტუფქღყშჩცძწჭხჯჰჱჲჳჴჵჶ]*) +</name>/</name>$1 /g;
    $tmp=~s/…s*</name>/</name>…/g;
    $tmp=~s/,s*././g;
    $tmp=~s/ +/ /g;
    $tmp=~s/NORMALERZBNORMALERZB(d+)NORMALERZBNORMALERZB/nttttt<lb n="$1"/> /g;
    $tmp=~s/TRENNENDERZBTRENNENDERZB(d+)TRENNENDERZBTRENNENDERZB/<lb n="$1"/>/g;  
    $tmp=~s/PAGE PAGE PAGE PAGE PAGE(d+)PAGE PAGE PAGE PAGE PAGE/<pb n="$1"/>/g;  #//<pb n="".$current_page.""/>";  
    $tmp=~s/</p>(<pb n="d+"/>)/</p>ntttt$1/g;
    $tmp=~s/ (<pb n="d+"/>)/$1/g;
    $tmp=~s/</p>[rn]+s+<p>(<pb n="d+"/>)</p>/$1</p>/g;
    $tmp=~s/</l>(<pb n="d+"/>)/$1</l>/g;
    $tmp=~s/ +/ /g;
    $tmp=~s/<a><name/<name/g;
    $tmp=~s/<head></head>//;
    my $sperr="";   
    if ($_[1]!~m/(?:04|07|11).1857/ && $_[1]!~m/(?:04|08).1858/)
    {
        while ($tmp=~m/(?<![აბგდევზთიკლმნოპჟრსტუფქღყშჩცძწჭხჯჰჱჲჳჴჵჶ])((?:[აბგდევზთიკლმნოპჟრსტუფქღყშჩცძწჭხჯჰჱჲჳჴჵჶ][^აბგდევზთიკლმნოპჟრსტუფქღყშჩცძწჭხჯჰჱჲჳჴჵჶ~–-]){3,})/)
        {
            my $sperr_org=$1;
            my $sperr_edit=$1;
            my $rest="";
            $sperr_edit=~s/ //g;
            $sperr.=$sperr_edit."n";
            if ($sperr_edit=~m/([^აბგდევზთიკლმნოპჟრსტუფქღყშჩცძწჭხჯჰჱჲჳჴჵჶ]+)$/)
            {
                $rest=($1 ne "<"?" ":"").$1;
                $sperr_edit=~s/[^აბგდევზთიკლმნოპჟრსტუფქღყშჩცძწჭხჯჰჱჲჳჴჵჶ]+$//;
            }       
            else { $rest=" "; }
            $tmp=~s/(?<![აბგდევზთიკლმნოპჟრსტუფქღყშჩცძწჭხჯჰჱჲჳჴჵჶ])Q$sperr_orgE/<hi rend="letter-spacing">$sperr_edit</hi>$rest/;
        }   
        if ($sperr ne "")
        {
            open (OUT,">>:encoding(utf-8)", $_[2]."\000_sperrschrift.txt");
            print OUT $_[1]."nt".$sperr;
            close OUT;
        }
    }
    $tmp=~s/>([^<])</name>./>$1.</name>/g;
    return $tmp;
}
sub go_go_gadget
{
    my $file_xml=$_[0];
    my $file_html='';   my $output='';  my $output_filename=''; 
    my $chapter=0;  my $div=0; my $p=0; my $last_p=0;  my $v=0;
    my $input_xml=''; my $chapter_type=''; my $written=0;
    my $page=0; my $started=0;
    (my $fn,my $pn)=fileparse $file_xml;
    return if ($fn=~m/instruqcia/);
    print "tkonvertiere $fnn";
    $fn=~s/.txt//g;
    $fn=~s/(d+)_/$1+/;
    $fn=~s/_/-/g;   $fn=~s/^([a-z]+)-/$1_/g;
    $sub_folder="";
    if ($fn=~m/^([^_]+_[a-zA-Z]+)/)
    {   
        $sub_folder=$1;     
    }
    $file_xml=~s/(?:/|\+)/\/g;
    open (IN,"<:encoding(utf-8)", $file_xml) || die "konnte die datei nicht oeffnen: $!n"; 
    $input_xml = do { local $/; <IN> } ;                        # Eingabedatei komplett in String einlesen
    close IN;
    # -----------------------------------------
    $input_xml=anfangs_verarbeitung($input_xml);
    # ----------------------------------------- 
    $div=0;
    my $last_line='';   
    my @lines=split(/n/,$input_xml);   
    $line_cnt=0;    
    my $group_cnt=0;
    my $verse_cnt=0;
    my $pg='';
    my $first_page=0;
    my $last_page=0;
    my $has_chapters=0;
    my $ut=0;
    my $quote_open=0;
    my $section_cnt=0;
    my $chapter_cnt=0;
    $coded_lb=0;
    $chapter_type="Section";
    $has_chapters=1 if ($input_xml=~m/#s*[cC]s*[dIVXMC]+[–-]?[dIVXMC]*/);
    if ($has_chapters eq 0)
    {
        $output='<div type="Section">' ;
        #$chapter=1; 
    }
    $output='<div type="Content" n="1">'."n";
    foreach my $line (@lines)
    {   
        $line=~s/^Ls*//;
        chomp($line); $line=~s/n//g; $line=~s/r//g;   $line=~s/(s){2,}/$1/g; $line =~ s/^s+//g; $line =~ s/s+$//g;
        #$line=~s/([აბგდევზთიკლმნოპჟრსტუფქღყშჩცძწჭხჯჰჱჲჳჴჵჶ])([,;.])([აბგდევზთიკლმნოპჟრსტუფქღყშჩცძწჭხჯჰჱჲჳჴჵჶ])/$1$2 $3/g;
        if ($has_chapters eq 1 && $line =~ m/^s*#?s*|?[cC]s*(d+)s*(.*)/)      # chapter
        {
            $output.="</note>" if ($ut == 1);
            if ($p eq 1) { $output.="</p>";   } 
            elsif ($v eq 1) { $output.="ntttt</lg>"; }
            if ($started eq 1)
            {
                if ($div eq 1) { $output.="nttt</div>"; }
                elsif ($chapter eq 1) { $output.="nttt</div>"; }
            }
            my $title=$2;
            if ($title) { $title=~s/<ref target="#a(d+)" type="noteAnchor">/<ref target="#a_$page_$1" type="noteAnchor">/g; }
            $chapter_cnt=$1;
            $output.="nttt".'<div type="Chapter" n="'.$chapter_cnt.'">'."ntttt<head>".($title?check_linebreak($title,$pg):$pg)."</head>";
            $chapter=1; $chapter_type="Chapter"; #$div=0;
            $p=0;  $written=0;  $v=0; $ut=0;
            $last_line=""; $started=0;
            $pg='';
        }
        elsif ($line =~ m/^s*#s*[pP]s*(d+)/)                # page break
        {
            if ($v ne 1 && $p eq 0) 
            { 
                if ($div == 0)
                {
                    if ($chapter_type eq "Section" || $has_chapters == 0) { $section_cnt++; $output.='<div type="Section" n="'.$section_cnt.'">'; }
                    else { $output.='<div type="Chapter" n="'.$chapter_cnt.'">'; }
                    $div=1;
                }
                $output.="<p>"; $p=1;  
            }
            #$output.="<pb n="".$1.""/>"; 
            # --- detecting page errors
            my $current_page=$1; #0;            
            #if ($first_page > 0)
            #{
            #   $current_page=$1;
            #   if ($current_page-$last_page<1)
            #   {
            #       $current_page=$last_page+1;
            #       $page_errors.=$fn."t".$last_page."n";
            #   }               
            #   elsif ($current_page-$last_page>1)
            #   {
            #       $page_errors.=$fn."t".$last_page."n";
            #   }               
            #}
            #else
            #{
            #   $first_page=$1;
            #   $current_page=$1;
            #}  
            #$last_page=$current_page;
            # ----
            $pg.="PAGE PAGE PAGE PAGE PAGE".$current_page."PAGE PAGE PAGE PAGE PAGE";   
            #$p=0;
            $page=$1;
            #$written=0; 
            $last_line="";
            $line_cnt=0;
        }
        elsif ($line =~ m/s*#[tT]s*(.+)/)             # title
        {
            $output.="</note>" if ($ut == 1);
            if ($p eq 1) { $output.="</p>"; }
            elsif ($v eq 1) { $output.="ntttt</lg>"; }
            if (($chapter eq 1 || $div eq 1) && $chapter_type ne 'chapter')
            {
            #   if($chapter_type eq 'chapter') 
            #   {
            #       if ($started eq 1)
            #       {   $output.="nttt</div>nttt".'<div type="Chapter" n="'.$1.'">';}
            #       else { $output.='<div type="Chapter" n="'.$1.'">';}
            #   }
            #   else
            #   { 
                    if ($started eq 1) { $section_cnt++; $output.="nttt</div>nttt".'<div type="Section" n="'.$section_cnt.'">'; }       
                    else { $section_cnt++; $output.="nttt".'<div type="Section" n="'.$section_cnt.'">'; }
            #   }               
            }
            else
            {
                #$section_cnt++;
                #$output.='<div type="Section" n="'.$section_cnt.'">';
                #$div=1;
            }
            #$line_cnt++;
            $output.="ntttt<head>".$pg.$1."</head>"; 
            $pg='';
            $p=0; $written=0; $v=0; $ut=0;
            $last_line=''; $started=1;
        }
        elsif ($line =~ m/#vs*(.+)/)                           # verse
        {
            $output.="</note>" if ($ut == 1);
            if ($p eq 1) { $output.="</p>" ;}
            if ($v eq 0) { $group_cnt++; $verse_cnt=0; $output.="ntttt".'<lg n="'.$group_cnt.'">';  }
            $verse_cnt++;
            $last_line=$1;
            $line_cnt++;            
            $output.="nttttt".'<l n="'.$verse_cnt.'">'.$pg."NORMALERZBNORMALERZB".$line_cnt."NORMALERZBNORMALERZB".$1."</l>";         
            $p=0; 
            $written=1; $v=1;   
            $started=1; $pg=''; $ut=0;
        }
        elsif ($line=~m/#s*-{2,}/) #elsif ($line eq '' && $last_line ne '') # && $last_line!~m/[.!?]s*$/)          # paragraph
        {           
            if ($written eq 1)
            {
                $output.="</note>" if ($ut == 1); 
                if ($p eq 1) { $output.="</p>"; $p=0;  }
                elsif ($v eq 1 && $written eq 0) { $output.="nttt</lg>ntttt"; $v=0; }
            }               
            #if ($p eq 0 && $v eq 0) { $output.="ntttt<p>"; $p=1; }
            $written=0; $last_line=''; $ut=0;
        }
        elsif ($line =~ m/^(?:#d)?s*(d{4}s*წ.)$/ || $line=~m/^s*(d{4}(?: – d+s*წ*.)?s*)$/ || $line=~m/^s*([0-9]+s*[აბგდევზთიკლმნოპჟრსტუფქღყშჩცძწჭხჯჰჱჲჳჴჵჶ]+s*[0-9]+s*[აბგდევზთიკლმნოპჟრსტუფქღყშჩცძწჭხჯჰჱჲჳჴჵჶ]*)$/)          # dateline
        {
            if ($div eq 1 || $chapter eq 1)
            { 
                $output.="</note>" if ($ut == 1);
                if ($p eq 1) { $output.="</p>"; }
                elsif ($v eq 1) { $output.="ntttt</lg>"; }
                $output.="nttt</div>"; 
                $chapter=0; $div=0; $ut=0;
            }
            $line_cnt++;
            $output.="ntttt<div type="dateline"><p>".$pg."NORMALERZBNORMALERZB".$line_cnt."NORMALERZBNORMALERZB".$1."</p></div>"; 
            $p=0;  $written=0; $v=0; $pg=''; 
            $last_line="";
        }
        elsif ($div eq 1 || $chapter eq 1 || ($div eq 0 && $chapter eq 0))
        {
            if ($line!~m/^s*$/)
            {
                $output.="ntttt</lg>" if ($v eq 1);
                if ($div eq 0 && $chapter eq 0) { $div=1; $section_cnt++; $output.="nttt".'<div type="Section" n="'.$section_cnt.'">';}
                if ($p eq 0) { $output.="ntttt<p>";  }
                $line=~s/s*#s*//g;
                # --- quotes
                $line=~s/([.,;?!:])„/$1“/g;       #„ “
                if ($line=~m/^s*„/ && $line!~m/“/ && $line=~m/[.?!:]+s*$/)
                {   
                    $line.="“"; 
                }               
                else 
                {
                    $line=~s/„//g;
                }
                $line=~s/“//g if ($line=~m/“/ && $line!~m/„/);
                # ---
                # --- ref
                $line=~s/<ref target="a(d+)" type="noteAnchor">/<ref target="#a_$page_$1" type="noteAnchor">/g;
                $line=~s/<note xml:id="a(d+)" type="footnote">/<note xml:id="a_$page_$1" type="footnote">/g;          
                # ---
                if ($line=~m/|ut/)
                {
                    $line=~s/|ut/<note type="comment">/;
                    $ut=1;
                }
                $output.=check_linebreak($line,$pg);
                #$line_cnt++;
                #if ($line=~m/([აბგდევზთიკლმნოპჟრსტუფქღყშჩცძწჭხჯჰჱჲჳჴჵჶ-]+)/([აბგდევზთიკლმნოპჟრსტუფქღყშჩცძწჭხჯჰჱჲჳჴჵჶ-]+[[:punct:]]*)/)
                #{                  
                #   my $tmp1=$1; my $tmp2=$2;
                #   my $z="TRENNENDERZBTRENNENDERZB".($line_cnt+1)."TRENNENDERZBTRENNENDERZB";
                #   $line=~s/Q$tmp1E/Q$tmp2E/$tmp1$z$tmp2/; $line.=" ";
                #   if ($coded_lb eq 0)
                #   {
                #       $output.=$pg."NORMALERZBNORMALERZB".$line_cnt."NORMALERZBNORMALERZB ".$line;
                #   }
                #   else
                #   {
                #       $output.=$pg.$line; 
                #   }
                #   $coded_lb=1;
                #}
                #else
                #{
                #   if ($coded_lb eq 0)
                #   {
                #       $output.=$pg."NORMALERZBNORMALERZB".$line_cnt."NORMALERZBNORMALERZB ".$line;
                #   }
                #   else
                #   {
                #       $output.=$pg.$line;                     
                #   }
                #   $coded_lb=0;
                #}              
                #$output.=$pg." "."NORMALERZBNORMALERZB".$line_cnt."NORMALERZBNORMALERZB".$line;
                $last_line=$line;
                $p=1; $written=1; $v=0; $started=1;$pg='';
            }
        }
    }
    if ($p eq 1) { $output.="</p>";}
    elsif ($v eq 1) { $output.="ntttt</lg>"; }
    if ($div eq 1) { $output.="nttt</div>"; }
    elsif ($chapter eq 1) { $output.="nttt</div>"; }
    # -----------------------------------------
    $output=end_verarbeitung($output,$fn,$folder_out)."</div>";
    # -----------------------------------------
    #$fn=get_complete_filename($fn);    
    mkdir($folder_out."\".$sub_folder,0777) if ($dev eq 0 && !(-d $folder_out."\".$sub_folder));  
    my $txt='<text rend="Section" xml:lang="kat">';
    $txt='<text rend="'.($section_cnt?"Section ":"").'Chapter" xml:lang="kat">' if ($has_chapters eq 1);
    $output=header($fn,$folder_in,$folder_out)."nt".$txt.'    
        <body>
            '.$output.' 
        </body>
    </text>';
    $output=~s/(<body>(?:rn)*s*<pb n="d+"/>)(?:rn)*s*</div>/$1/g;
    $output_filename=$folder_out.($dev eq 0 ? "\".$sub_folder : "")."\".$fn.".xml";           
    open (OUT, ">:encoding(utf-8)", $output_filename);
    print OUT '<?xml version="1.0" encoding="utf-8"?>'."n".'<!DOCTYPE TEI [
    <!ENTITY header SYSTEM "'.$fn.'.teih">
    <!ENTITY text SYSTEM "'.$fn.'.txml">
]>
<TEI xmlns="http://www.tei-c.org/ns/1.0" xmlns:gnc="http://iness.uib.no/ns/1.0">
    &header;
    &text;
</TEI>';
    close (OUT);
    $output_filename=~s/.xml/.txml/gi;
    open (OUT, ">:encoding(utf-8)",$output_filename) || die "konnte die ausgabedatei "$output_filename" nicht oeffnen: $!n";
    print OUT $output;
    close OUT;          
}
sub main
{
    print "nBeginne...n";
    my $root="D:\bla";
    my @startfolders=($root."\");
    $folder_out="D:\bla";
    foreach my $startfolder(@startfolders)
    {
        @folders=();
        get_files($startfolder);
        if (scalar(@folders)<1) { push(@folders,$startfolder); }
        $root=~s/0_Eingabe/1_Ausgabe\1/;
        foreach $folder_in(@folders)
        {
            $page_errors='';
            $folder_out=$folder_in;
            $folder_out=~s/0_Eingabe/1_Ausgabe/;
            # creating subfolders too
            #my $tmp=$folder_out;
            #$tmp=~s/Q$rootE//;           
            #my @arr_tmp=split("\\",$tmp);
            #$tmp="";
            #foreach my $dings (@arr_tmp)
            #{              
            #   next if ($dings eq '');
            #   $tmp.="\".$dings;                              
            #   mkdir($root.$tmp,0777) if (!(-d $root.$tmp));
            #}#
            # ----- 
            $folder_out=~s/\+/\/g;
            $dev = 1;   # entwicklermodus an bei 1
            $folder_out=~s/1_Ausgabe.*/1_Ausgabe/ if ($dev eq 1);
            print "Ordner ".$folder_in."n";
            foreach my $file_xml(<${folder_in}/*.txt>)
            {
                go_go_gadget($file_xml);
            }
            next;
            if ($page_errors ne '')
            {
                $folder_in=~m/0_Eingabe\(.+)/;
                my $tmp=$1;
                $tmp=~s/\+/__/g;           
                open (OUT, ">:encoding(utf-8)", $root."\".$tmp.".txt") || die "ntPage errors to file ".$tmp.": ".$!."n";
                print OUT $page_errors;
                close OUT;
            }
        }
    }
    print "Fertig!nn";
}
main();

然而,有些文件处理时间太长。如果超过6秒,我就跳过其中任何一个步骤。这样,如果处理过的文件转换时间过长,它就会跳转到下一个文件。关于如何通过超时来做到这一点,有什么建议吗?

我没有仔细检查您的代码,无法准确地告诉您超时代码应该放在哪里,但是您应该能够轻松地使用Time::Out完成您想要的操作。只使用

use Time::Out 'timeout';
timeout 6 => sub {
  # code that you want to time out after 6 seconds goes here
}

相关内容

  • 没有找到相关文章

最新更新