如何加快VBA将数据从Word提取到Excel的速度



我是stackoverflow的新手,也是VBA编码的新手。在我的工作中,我们以Mss Word的形式向我们提供了货运数据,这不是很有用。我找到了一种使用 VBA 传输数据的方法,并拥有功能齐全的代码。但是,数据集包含数十万条记录。我尝试用 200k 条记录运行一个月的数据,花了 5 天时间。只是想知道我的代码中是否有任何可以改进以加快该过程的内容。我尝试关闭屏幕更新,事件,计算,但它没有多大作用。提前感谢您的帮助。

Sub Word_to_Excel()
Dim FName As String, FD As FileDialog
Dim wdApp As Object
Dim wdDoc As Object
Dim WDR, WDCheck, ShipmentID As Object
Dim ExR As Range
Dim file
Dim Path As String
Dim ImportDate As Object
Dim ImportValue As String
Dim ShipmentIDcheck As String
Dim objResult

Set objShell = CreateObject("WScript.Shell")
Set ExR = Selection ' current location in Excel Sheet
' Select Folder containing word documents
Set FD = Application.FileDialog(msoFileDialogFolderPicker)
FD.Show
FName = FD.SelectedItems(1)
file = Dir(FName & "*.docx")
Set wdApp = CreateObject("Word.Application")
' Open word document in the folder, run macro, close it and open the next word document until there are none left
Do While file <> ""
wdApp.Documents.Open Filename:=FName & "" & file
wdApp.ActiveWindow.ActivePane.View.Type = 1
wdApp.Visible = True
' Once the word doc is open, go to beginning of document and search for CTY/SITE/SORT:
wdApp.Selection.HomeKey Unit:=6
wdApp.Selection.Find.ClearFormatting
wdApp.Selection.Find.Execute "CTY/SITE/SORT:"
Set WDCheck = wdApp.Selection
' If "CTY/SITE/SORT:" is found, then look for Shipment ID
Do While WDCheck = "CTY/SITE/SORT:"
' Find first shipment
wdApp.Selection.HomeKey Unit:=5
wdApp.Selection.MoveDown Unit:=5, Count:=11
wdApp.Selection.MoveRight Unit:=1, Count:=1
wdApp.Selection.MoveRight Unit:=1, Count:=11, Extend:=1
Set ShipmentID = wdApp.Selection
ShipmentIDcheck = Replace(ShipmentID, " ", "")
' Transfer information from Word to Excel for a Shipment ID and go to the next one.
' Shipment ID should be a string that is 11 characters long
' If Shipment ID no longer exist, go to next page by searching for the next CTY/SITE/SORT:
Do While Len(Trim(ShipmentIDcheck)) = 11
i = i + 1
ExR(i, 1) = file
ExR(i, 2) = ShipmentIDcheck
' Consignee Name
wdApp.Selection.MoveUp Unit:=5, Count:=1
wdApp.Selection.MoveRight Unit:=1, Count:=12
wdApp.Selection.MoveRight Unit:=1, Count:=23, Extend:=1
Set WDR = wdApp.Selection
ExR(i, 3) = Trim(WDR)
' Importer Name
wdApp.Selection.MoveRight Unit:=1, Count:=2
wdApp.Selection.MoveRight Unit:=1, Count:=23, Extend:=1
Set WDR = wdApp.Selection
ExR(i, 8) = Trim(WDR)
' Shipper Name
wdApp.Selection.MoveRight Unit:=1, Count:=2
wdApp.Selection.MoveRight Unit:=1, Count:=23, Extend:=1
Set WDR = wdApp.Selection
ExR(i, 13) = Trim(WDR)
' Quantity
wdApp.Selection.MoveRight Unit:=1, Count:=2
wdApp.Selection.MoveRight Unit:=1, Count:=10, Extend:=1
Set WDR = wdApp.Selection
ExR(i, 19) = Trim(WDR)
' Weight
wdApp.Selection.MoveRight Unit:=1, Count:=2
wdApp.Selection.MoveRight Unit:=1, Count:=12, Extend:=1
Set WDR = wdApp.Selection
ExR(i, 20) = Trim(WDR)
' Value
wdApp.Selection.MoveRight Unit:=1, Count:=2
wdApp.Selection.MoveRight Unit:=1, Count:=12, Extend:=1
Set WDR = wdApp.Selection
ExR(i, 21) = Trim(WDR)
' Broker
wdApp.Selection.MoveRight Unit:=1, Count:=2
wdApp.Selection.MoveRight Unit:=1, Count:=11, Extend:=1
Set WDR = wdApp.Selection
ExR(i, 23) = Trim(WDR)
' Consignee Street
wdApp.Selection.HomeKey Unit:=5
wdApp.Selection.MoveDown Unit:=5, Count:=1
wdApp.Selection.MoveRight Unit:=1, Count:=13
wdApp.Selection.MoveRight Unit:=1, Count:=23, Extend:=1
Set WDR = wdApp.Selection
ExR(i, 4) = Trim(WDR)
' Importer Street
wdApp.Selection.MoveRight Unit:=1, Count:=2
wdApp.Selection.MoveRight Unit:=1, Count:=23, Extend:=1
Set WDR = wdApp.Selection
ExR(i, 9) = Trim(WDR)
' Shipper Street
wdApp.Selection.MoveRight Unit:=1, Count:=2
wdApp.Selection.MoveRight Unit:=1, Count:=23, Extend:=1
Set WDR = wdApp.Selection
ExR(i, 14) = Trim(WDR)
' Description
wdApp.Selection.MoveRight Unit:=1, Count:=8
wdApp.Selection.MoveRight Unit:=1, Count:=40, Extend:=1
Set WDR = wdApp.Selection
ExR(i, 18) = Trim(WDR)
' Consignee City
wdApp.Selection.HomeKey Unit:=5
wdApp.Selection.MoveDown Unit:=5, Count:=1
wdApp.Selection.MoveRight Unit:=1, Count:=13
wdApp.Selection.MoveRight Unit:=1, Count:=13, Extend:=1
Set WDR = wdApp.Selection
ExR(i, 5) = Trim(WDR)
' Consignee Province
wdApp.Selection.MoveRight Unit:=1, Count:=2
wdApp.Selection.MoveRight Unit:=1, Count:=2, Extend:=1
Set WDR = wdApp.Selection
ExR(i, 6) = Trim(WDR)
' Consignee Postal
wdApp.Selection.MoveRight Unit:=1, Count:=2
wdApp.Selection.MoveRight Unit:=1, Count:=6, Extend:=1
Set WDR = wdApp.Selection
ExR(i, 7) = Trim(WDR)
' Importer City
wdApp.Selection.MoveRight Unit:=1, Count:=2
wdApp.Selection.MoveRight Unit:=1, Count:=13, Extend:=1
Set WDR = wdApp.Selection
ExR(i, 10) = Trim(WDR)
' Importer Province
wdApp.Selection.MoveRight Unit:=1, Count:=2
wdApp.Selection.MoveRight Unit:=1, Count:=2, Extend:=1
Set WDR = wdApp.Selection
ExR(i, 11) = Trim(WDR)
' Importer Postal
wdApp.Selection.MoveRight Unit:=1, Count:=2
wdApp.Selection.MoveRight Unit:=1, Count:=6, Extend:=1
Set WDR = wdApp.Selection
ExR(i, 12) = Trim(WDR)
' Shipper City
wdApp.Selection.MoveRight Unit:=1, Count:=2
wdApp.Selection.MoveRight Unit:=1, Count:=13, Extend:=1
Set WDR = wdApp.Selection
ExR(i, 15) = Trim(WDR)
' Shipper Province
wdApp.Selection.MoveRight Unit:=1, Count:=2
wdApp.Selection.MoveRight Unit:=1, Count:=2, Extend:=1
Set WDR = wdApp.Selection
ExR(i, 16) = Trim(WDR)
' Shipper Postal
wdApp.Selection.MoveRight Unit:=1, Count:=2
wdApp.Selection.MoveRight Unit:=1, Count:=6, Extend:=1
Set WDR = wdApp.Selection
ExR(i, 17) = Trim(WDR)
' Country of Origin
wdApp.Selection.MoveRight Unit:=1, Count:=29
wdApp.Selection.MoveRight Unit:=1, Count:=21, Extend:=1
Set WDR = wdApp.Selection
ExR(i, 22) = Trim(WDR)
wdApp.Selection.HomeKey Unit:=5
wdApp.Selection.MoveDown Unit:=5, Count:=2
wdApp.Selection.MoveRight Unit:=1, Count:=1
wdApp.Selection.MoveRight Unit:=1, Count:=11, Extend:=1
Set ShipmentID = wdApp.Selection
' Remove spaces from selection. Selection is then used to check if it is a shipment ID.
' If it is, then data for that shipment ID is transferred. If not, macro will go to the next page in the Word Doc.
ShipmentIDcheck = Replace(ShipmentID, " ", "")
ActiveCell.Offset(1).Select
Loop
'Simulate keyboard press "NUMLOCK" to prevent screen from locking
objResult = objShell.SendKeys("{NUMLOCK}")
wdApp.Selection.HomeKey Unit:=5
wdApp.Selection.Find.ClearFormatting
wdApp.Selection.Find.Execute "CTY/SITE/SORT:"
Set WDCheck = wdApp.Selection
Loop
wdApp.ActiveDocument.Close SaveChanges:=wdDoNotSaveChanges
ActiveWorkbook.Save
file = Dir()
Loop
wdApp.Quit
MsgBox "Data extraction completed at:" & vbNewLine & Format(Now, "mmmm d, yyyy hh:mm AM/PM")
End Sub

这就是数据集的格式设置方式。每天有多个 Word 文档包含此数据集的页面和页面。每页的货件数量各不相同。但格式始终相同。word文档中没有表格,只有用空格分隔的文本。CTY/SITE/SORT:每个页面都是唯一的,我将其用作锚点。如果宏找到它,那么它会向下 11 行并获取第一个货件 ID 和其他信息。然后,它会检查下一个货件编号。如果不存在,则转到下一页并重复该过程。

REPORT NUM   : ABC1234                                   OPERATIONS SYSTEM                                       PAGE NUM:   2      
CTY/SITE/SORT: CA 00123                                    SUMMARY CARGO                                         RUN TIME: 07:33:43 
SORT DATE    :                                            INBOUND - SCAN                                         RUN DATE: 01AUG19  
                                            OPER ID : ABC123  
MVMT: 12345678   MVMT DT: 01AUG19    MAWB:                  PROD TYP:      DTY TYP:      IMP CTY:      EXP CTY:      BL TYP:        
COURIER REMISSION  MANIFEST               EXPORT SITE: US 12345                                
GCCN ID:               EXPECTED SHPTS:           EXPECTED PKGS:             EXPECTED WEIGHT:                                        
CUSTOMS NUM     CONSIGNEE NAME           IMPORTER NAME           SHIPPER NAME        CSA    QTY     WGT(LBS)   VALUE  BROKER        
SHIPMENT ID                                                                               DESCRIPTION           (CAD) CTRY OF ORIGIN
JOHN SMITH              ABC COMPANY             XYZ COMPANY                      1          1.1      1000.00 UNCONSIGNED
ABC12345678 123 MAIN STREET         345 RANDOM ROAD         UNIVERSITY OF WASHINGTO       BICYCLE PARTS                             
VANCOUVER     BC V1A1A1 VANCOUVER     BC V2B1B2 SEATTLE       WA 981234                            US                   
JOHN SMITH              ABC COMPANY             XYZ COMPANY                      1          1.1      1000.00 UNCONSIGNED
ABC12345678 123 MAIN STREET         345 RANDOM ROAD         UNIVERSITY OF WASHINGTO       BICYCLE PARTS                             
VANCOUVER     BC V1A1A1 VANCOUVER     BC V2B1B2 SEATTLE       WA 981234                            US                   
JOHN SMITH              ABC COMPANY             XYZ COMPANY                      1          1.1      1000.00 UNCONSIGNED
ABC12345678 123 MAIN STREET         345 RANDOM ROAD         UNIVERSITY OF WASHINGTO       BICYCLE PARTS                             
VANCOUVER     BC V1A1A1 VANCOUVER     BC V2B1B2 SEATTLE       WA 981234                            US                   
JOHN SMITH              ABC COMPANY             XYZ COMPANY                      1          1.1      1000.00 UNCONSIGNED
ABC12345678 123 MAIN STREET         345 RANDOM ROAD         UNIVERSITY OF WASHINGTO       BICYCLE PARTS                             
VANCOUVER     BC V1A1A1 VANCOUVER     BC V2B1B2 SEATTLE       WA 981234                            US                   
TOTAL FOR DUTY TYPE COURIER REMISSION                                                                                         
TOTAL SHIPMENTS:                      4                                                                                       
TOTAL PACKAGES :                      4                                                                                       
TOTAL WEIGHT   :                     70.9 LBS                                                                                 
TOTAL VALUES   :                   4000.00                                                                                         
* * *                                      

我使用以下代码来清理数据集并将它们排列为每行一条记录,每行由一个段落分隔(谢谢你,macropod(。由于数据被排列成由空格分隔的列,我可以将其另存为.txt文件并将其导入 excel。现在的挑战是将代码应用于文件夹中的所有文档,并为每个文档生成一个.txt文件。或者,如果代码可以将清理后的.docx文件中的所有数据合并到一个.txt文件中,那就更好了。

Sub CleanWordDoc()
Application.ScreenUpdating = False
Dim p As Long, StrOut As String
With ActiveDocument.Range
With .Find
.ClearFormatting
.Replacement.ClearFormatting
.Wrap = wdFindContinue
.Forward = True
.Format = False
.MatchWildcards = True
.Text = "REPORT NUM   : * CTRY OF ORIGIN^13" 'Clean header on each page
.Replacement.Text = ""
.Execute Replace:=wdReplaceAll
.Text = "[ ]{2,}ACTUAL SHP TOTAL*[ ]{20,}^13^m" 'Clean footer on some pages
.Replacement.Text = ""
.Execute Replace:=wdReplaceAll
.Text = "[ ]{2,}TOTAL FOR DUTY*[ ]{20,}^13^m" 'Clean more footers
.Replacement.Text = ""
.Execute Replace:=wdReplaceAll
.Text = "REPORT NUM   :*SUMMARY*[*] [*][ ]{20,}^13" 'Clean last page
.Replacement.Text = ""
.Execute Replace:=wdReplaceAll
.Text = "^m^13" ' Clean all page breaks
.Replacement.Text = ""
.Execute Replace:=wdReplaceAll
.Text = "^13^13" ' Clean empty paragraphs
.Replacement.Text = "^13"
.Execute Replace:=wdReplaceAll
.Text = "<[ ]{1,}^13" ' Clean spaces and paragraphs at the beginning of doc
.Replacement.Text = ""
.Execute Replace:=wdReplaceAll
.Text = "(*)^13(*)^13(*^13)" ' Combine 3 paragraphs into one
.Replacement.Text = "1 2 3"
.Execute Replace:=wdReplaceAll
End With
End With
Application.ScreenUpdating = True
End Sub

这就是清理后的.docx的样子(有数百条记录(:

12345678900 ABC COMPANY             DEF COMPANY             XYZ COMPANY                      1          1.1       123.45 AAABROKER    A0B12345LFD ABC ADDRESS             DEF ADDRESS             XYZ ADDRESS                   BICYCLE PARTS                                           VANCOUVER     BC V1A1A1 MARKHAM       ON L1L1L1 SHENZHEN         512323                            CN                   
98765432100 ABC COMPANY             DEF COMPANY             XYZ COMPANY                      1          1.1       123.45 AAABROKER    A0B12345LFD ABC ADDRESS             DEF ADDRESS             XYZ ADDRESS                   BICYCLE PARTS                                           VANCOUVER     BC V1A1A1 MARKHAM       ON L1L1L1 SHENZHEN         512323                            CN                   

您的代码很慢,因为您正在驱动 Word 来分析您的数据。 将其处理为纯文本会快得多

我要采取的方法是说服您的数据提供商将其作为文本文件提供。 如果这是不可能的,那么编写一个VBA程序将每个Word文件转换为文本。

完成后,使用简单的文本文件处理从文件中读取每一行,分析所需数据并将其提取到变体数组中,然后将结果写入 Excel。

注意:我没有包含完整的代码来分析和提取您的数据,我将留给您。 其中包含一个小片段来帮助您入门。

像这样的东西

Option Explicit
Sub Demo()
Dim t1 As Single, t2 As Single
Dim DataFile As String
Dim DataPath As String
Dim SavePath As String
Dim rw As Long
Dim ws As Worksheet
Dim WordApp As Word.Application
On Error GoTo EH
'identify sheet to take results
Set ws = ActiveSheet
t1 = Timer() '<~~ only used to report run time
' Create an instance of Word
Set WordApp = New Word.Application
WordApp.Visible = False
' Set up path to data files
DataPath = "C:DataTempSO" '<~~ update to suit
SavePath = DataPath & "Txt" '<~~ optional: save text files to a seperate subfolder
' Get first word file in directory
DataFile = Dir(DataPath & "*.docx")
Do While DataFile <> vbNullString
Debug.Print "Convert ", DataFile
' Open in word, save as text
ConvertToText WordApp, DataPath, DataFile, SavePath
DoEvents
' Get next file
DataFile = Dir
Loop
' Tidy up
WordApp.Quit
Set WordApp = Nothing
t2 = Timer
Debug.Print "Convert Time", t2 - t1

t1 = Timer()
' Get first text file in directory
DataFile = Dir(SavePath & "*.txt")
rw = 1
Do While DataFile <> vbNullString
Debug.Print "Read ", DataFile
' process the file
ReadFile ws, SavePath, DataFile, rw
DoEvents
' Get next file
DataFile = Dir
Loop

t2 = Timer
Debug.Print "Read Time", t2 - t1
Exit Sub
EH:
On Error Resume Next
' Tidy up
If Not WordApp Is Nothing Then WordApp.Quit
Set WordApp = Nothing
End Sub
Sub ConvertToText(WordApp As Word.Application, ByVal FilePath As String, ByVal FileName As String, ByVal SavePath As String)
Dim WordDoc As Word.Document
Dim i As Long
' ensure file is closed if Sub errors
On Error GoTo EH
' Open the file
Set WordDoc = WordApp.Documents.Open(FilePath & FileName)
' generate Text file name
i = InStrRev(FileName, ".")
FileName = Left$(FileName, i) & "txt"
' Save as text
WordDoc.SaveAs2 _
FileName:=SavePath & FileName, _
FileFormat:=wdFormatText, _
AddToRecentFiles:=False, _
EmbedTrueTypeFonts:=False, _
SaveNativePictureFormat:=False, _
SaveFormsData:=False, _
SaveAsAOCELetter:=False, _
Encoding:=1252, _
InsertLineBreaks:=False, _
AllowSubstitutions:=False, _
LineEnding:=0, _
CompatibilityMode:=0
EH:
On Error Resume Next
' Close file
WordDoc.Close False
End Sub
Sub ReadFile(ws As Worksheet, FilePath As String, FileName As String, ByRef rw)
'parse text file
Dim Ln As String
Dim FileNum As Integer
Dim ExtractedData() As Variant
Dim idx As Long
' ensure file is closed if Sub errors
On Error GoTo EH
' Text file handling
FileNum = FreeFile
Open FilePath & FileName For Input As FileNum
' Restults array.
ReDim ExtractedData(1 To 1000000, 1 To 1) ' Excel sheet can hold at most 1048576 rows
idx = 0
Do While Not EOF(FileNum)
' Read a line from file
Line Input #FileNum, Ln
' Add your code to extract required data here
If Ln Like " [A-Z][A-Z][A-z]########*" Then
idx = idx + 1
ExtractedData(idx, 1) = Ln
End If
'============================================
Loop
' Place extracted data onto sheet
ws.Cells(rw, 1).Resize(idx, 1) = ExtractedData
' Update row num for next file
rw = rw + idx
EH:
On Error Resume Next
' Clean Up
Close #FileNum
End Sub

问题解决了。感谢@chris尼尔森和@macropod的帮助。

这是我使用的完成代码,它能够在几分钟而不是几天内处理数据:

Option Explicit
Sub ConvertWordtoExcel()
Dim t1 As Single, t2 As Single
Dim DataFile As String
Dim DataPath As String
Dim SavePath As String
Dim SavePathFolder As String
Dim rw As Long
Dim ws As Worksheet
Dim WordApp As Word.Application
Dim FD As FileDialog
On Error GoTo EH
'identify sheet to take results
Set ws = ActiveSheet
t1 = Timer() '<~~ only used to report run time
' Create an instance of Word
Set WordApp = New Word.Application
WordApp.Visible = False
' Set up path to data files
Set FD = Application.FileDialog(msoFileDialogFolderPicker) 'Open Folder Picker
FD.Show
DataPath = FD.SelectedItems(1) & ""
Debug.Print "Folder", DataPath
SavePath = DataPath & "Txt" '<~~ save text files to a separate subfolder called Txt
SavePathFolder = Dir(SavePath, vbDirectory) ' If the Txt subfolder does not exist, create it
If SavePathFolder = vbNullString Then
VBA.FileSystem.MkDir (SavePath)
End If
' Get first word file in directory
DataFile = Dir(DataPath & "*.docx")
Do While DataFile <> vbNullString
Debug.Print "Convert ", DataFile
' Open in word, save as text
ConvertToText WordApp, DataPath, DataFile, SavePath
DoEvents
' Get next file
DataFile = Dir
Loop
' Tidy up
WordApp.Quit
Set WordApp = Nothing
t2 = Timer
Debug.Print "Convert Time", t2 - t1

t1 = Timer()
' Get first text file in directory
DataFile = Dir(SavePath & "*.txt")
rw = 1
Do While DataFile <> vbNullString
Debug.Print "Read ", DataFile
' process the file
ReadFile ws, SavePath, DataFile, rw
DoEvents
' Get next file
DataFile = Dir
Loop

t2 = Timer
Debug.Print "Read Time", t2 - t1
Exit Sub
EH:
On Error Resume Next
' Tidy up
If Not WordApp Is Nothing Then WordApp.Quit
Set WordApp = Nothing
End Sub
Sub ConvertToText(WordApp As Word.Application, ByVal FilePath As String, ByVal FileName As String, ByVal SavePath As String)
Dim WordDoc As Word.Document
Dim i As Long
' ensure file is closed if Sub errors
On Error GoTo EH
' Open the file
Set WordDoc = WordApp.Documents.Open(FilePath & FileName)
With WordDoc.Range
With .Find
.ClearFormatting
.Replacement.ClearFormatting
.Wrap = wdFindContinue
.Forward = True
.Format = False
.MatchWildcards = True
.Text = "[ ]{2,}[^13]{1,}(REPORT NUM   :)" 'Clean header on each page
.Replacement.Text = "1"
.Execute Replace:=wdReplaceAll
.Text = "REPORT NUM   : * CTRY OF ORIGIN^13" 'Clean header on each page
.Replacement.Text = ""
.Execute Replace:=wdReplaceAll
.Text = "[ ]{2,}ACTUAL SHP TOTAL*[ ]{20,}^13^m" 'Clean footer on some pages
.Replacement.Text = ""
.Execute Replace:=wdReplaceAll
.Text = "[ ]{2,}TOTAL FOR DUTY*[ ]{20,}^13^m" 'Clean more footers
.Replacement.Text = ""
.Execute Replace:=wdReplaceAll
.Text = "REPORT NUM   :*SUMMARY*[*] [*][ ]{20,}[^13]{1,}" 'Clean last page
.Replacement.Text = ""
.Execute Replace:=wdReplaceAll
.Text = "^13^m" ' Clean all page breaks
.Replacement.Text = "^13"
.Execute Replace:=wdReplaceAll
.Text = "[^13]{2,}" ' Clean empty paragraphs
.Replacement.Text = "^13"
.Execute Replace:=wdReplaceAll
.Text = "(*)^13(*)^13(*)^13" ' Combine 3 paragraphs into one and add file name at the end
.Replacement.Text = "1 2 3 " + FileName + "^13"
.Execute Replace:=wdReplaceAll
End With
End With

' generate Text file name
i = InStrRev(FileName, ".")
FileName = Left$(FileName, i) & "txt"
' Save as text
WordDoc.SaveAs2 _
FileName:=SavePath & FileName, _
FileFormat:=wdFormatText, _
AddToRecentFiles:=False, _
EmbedTrueTypeFonts:=False, _
SaveNativePictureFormat:=False, _
SaveFormsData:=False, _
SaveAsAOCELetter:=False, _
Encoding:=1252, _
InsertLineBreaks:=False, _
AllowSubstitutions:=False, _
LineEnding:=0, _
CompatibilityMode:=0
EH:
On Error Resume Next
' Close file
WordDoc.Close False
End Sub
Sub ReadFile(ws As Worksheet, FilePath As String, FileName As String, ByRef rw)
'parse text file
Dim Ln As String
Dim FileNum As Integer
Dim ExtractedData() As Variant
Dim idx As Long
' ensure file is closed if Sub errors
On Error GoTo EH
' Text file handling
FileNum = FreeFile
Open FilePath & FileName For Input As FileNum
' Restults array.
ReDim ExtractedData(1 To 1000000, 1 To 1) ' Excel sheet can hold at most 1048576 rows
idx = 0
Do While Not EOF(FileNum)
' Read a line from file
Line Input #FileNum, Ln
' Add your code to extract required data here
'If Ln Like " [A-Z][A-Z][A-z]########*" Then
If Ln Like " *" Then
idx = idx + 1
ExtractedData(idx, 1) = Ln
End If
'End If
'============================================
Loop
' Place extracted data onto sheet
ws.Cells(rw, 1).Resize(idx, 1) = ExtractedData
' Update row num for next file
rw = rw + idx
EH:
On Error Resume Next
' Clean Up
Close #FileNum
End Sub

对我来说,"CTY/SITE/SORT:"在哪里或如何与您正在做的事情相关并不明显,因为它不会出现在您发布的数据片段中。以下 Word 宏演示如何分析仅包含已发布数据片段中的数据的文档。按照编码,它只是在文档末尾输出第一个这样的记录 - 为整个文档生成输出所需的代码已被注释掉。代码中的注释显示输出的结构。只需将除最后一个实例之外的所有 vbCr 实例替换为 vbTab 即可将每条记录的输出转换为 Excel 的单行。

有关用于处理整个 Word 文档文件夹的 Excel 驱动的代码,请参阅例如:https://www.excelguru.ca/forums/showthread.php?8900-Help-with-VBA-to-extract-data-from-Word-to-Excel&p=36586&viewfull=1#post36586。如您所见,无需使用 Select - 这会对性能造成重大影响。

Sub Demo()
Application.ScreenUpdating = False
Dim p As Long, StrOut As String
With ActiveDocument.Range
With .Find
.ClearFormatting
.Replacement.ClearFormatting
.Wrap = wdFindContinue
.Forward = True
.Format = False
.MatchWildcards = True
.Text = "^13[!^13]@^13 <[A-Z]{3}[0-9]{8}"
.Replacement.Text = "^p^&"
.Execute Replace:=wdReplaceAll
.Text = "REPORT NUM * CTRY OF ORIGIN^13"
.Replacement.Text = ""
.Execute Replace:=wdReplaceAll
.Text = "^13[ ]@TOTAL FOR DUTY * TOTAL VALUES[!^13]@^13*^13"
.Replacement.Text = "^p"
.Execute Replace:=wdReplaceAll
.Text = "[ ]{2,}"
.Replacement.Text = "^t"
.Execute Replace:=wdReplaceAll
.Text = "(^t[A-Z]{2}) ([A-Z0-9]{5,})[ ^t]"
.Replacement.Text = "1^t2^t"
.Execute Replace:=wdReplaceAll
.Text = "([0-9]{1,}.[0-9]{2}) "
.Replacement.Text = "1^t"
.Execute Replace:=wdReplaceAll
.Text = "^13 (<[A-Z]{3}[0-9]{8}) "
.Replacement.Text = "^p1^t"
.Execute Replace:=wdReplaceAll
.Text = "^13"
.Replacement.Text = "^l"
.Execute Replace:=wdReplaceAll
.Text = "[^l]{2,}"
.Replacement.Text = "^p"
.Execute Replace:=wdReplaceAll
.Text = "^l"
.Replacement.Text = "^t"
.Execute Replace:=wdReplaceAll
End With
For p = 2 To .Paragraphs.Count - 1
With .Paragraphs(p).Range
'StrOut =
'Shipment ID, Description, Quantity, Weight, Value, Broker, Country of Origin
'Consignee Name, Consignee Street, Consignee City, Consignee State, Consignee Zip,
'Importer Name, Importer Street, Importer City, Importer State, Importer Zip,
'Shipper Name, Shipper Street, Shipper City, Shipper State, Shipper Zip,
StrOut = StrOut & Split(.Text, vbTab)(8) & vbTab & Split(.Text, vbTab)(12) & vbTab & Split(.Text, vbTab)(4) & vbTab & Split(.Text, vbTab)(5) & vbTab & Split(.Text, vbTab)(6) & vbTab & Split(.Text, vbTab)(7) & vbTab & Split(.Text, vbTab)(24) & vbCr & _
Split(.Text, vbTab)(1) & vbTab & Split(.Text, vbTab)(9) & vbTab & Split(.Text, vbTab)(15) & vbTab & Split(.Text, vbTab)(16) & vbTab & Split(.Text, vbTab)(17) & vbCr & _
Split(.Text, vbTab)(2) & vbTab & Split(.Text, vbTab)(10) & vbTab & Split(.Text, vbTab)(18) & vbTab & Split(.Text, vbTab)(19) & vbTab & Split(.Text, vbTab)(20) & vbCr & _
Split(.Text, vbTab)(3) & vbTab & Split(.Text, vbTab)(11) & vbTab & Split(.Text, vbTab)(21) & vbTab & Split(.Text, vbTab)(22) & vbTab & Split(.Text, vbTab)(23) & vbCr
End With
Next
'Instead of .InsertAfter, write StrOut to Excel
.InsertAfter vbCr & StrOut
End With
Application.ScreenUpdating = True
End Sub

要填充工作表,您可以使用如下内容:

Dim StrRow As String, lRow As Long, r As Long, c As Long
With ActiveSheet
lRow = .UsedRange.Cells.SpecialCells(xlCellTypeLastCell).Row + 1
For r = 0 To UBound(Split(StrOut, vbCr))
StrRow = Split(StrOut, vbCr)(r)
For c = 0 To UBound(Split(StrRow, vbTab))
.Cells(r + lRow, c + 1).Value = Split(StrRow, vbTab)(c)
Next
Next
End With

最新更新