如何将数据从 pdf 复制并粘贴到 excel?



>场景:我的电子邮件中收到了大量发票,并将它们保存在folder("C:UsersVbattul2DesktopInvoices")中,然后手动逐个打开它们,复制所需的数据并将它们粘贴到我的 excel 跟踪器中。请注意,所有发票的格式相同。

自动化的机会:自动复制和粘贴所需数据的过程,并将其粘贴到Excel虚拟工作表(数据用冒号和空白分隔)并将单元格引用粘贴到Excel发票跟踪器中。

设计:我需要复制pdf并将其粘贴到虚拟工作表中,然后将其粘贴到我的excel发票跟踪器中。

我尝试从 3 年前发布的线程运行此代码:

VBA 从 pdf 复制数据

我似乎无法理解这行,所以它让我返回一个

error: 1004 Method 'Range' of object'_Global failed
Line Error: For Each fName In Range("path")

我还尝试将("路径")替换为保存所有发票的文件夹路径,但它不起作用。

Sub StartAdobe1()
Dim fName       As Variant
Dim wbTransfer  As Excel.Workbook
Dim wsNew       As Excel.Worksheet
Dim dOpenCol    As Double
Dim oPDFApp     As AcroApp
Dim oAVDoc      As AcroAVDoc
Dim oPDDoc      As AcroPDDoc
'Define your spreadsheet
Set wbTransfer = Workbooks("transfer.xlsm")
Set wsNew = wbTransfer.Sheets("new")
'Find first open column
dOpenCol = wsNew.Cells(1, Columns.Count).End(xlToLeft).Column + 1
'Instantiate Acrobat Objects
Set oPDFApp = CreateObject("AcroExch.App")
Set oAVDoc = CreateObject("AcroExch.AVDoc")
Set oPDDoc = CreateObject("AcroExch.PDDoc")
For Each fName In Range("path")
'Open the PDF file. The AcroAVDoc.Open function returns a true/false
'to tell you if it worked
If oAVDoc.Open(fName.Text, "") = True Then
Set oPDDoc = oAVDoc.GetPDDoc
Else
Debug.Assert False
End If
'Copy all using Acrobat menu
oPDFApp.MenuItemExecute ("SelectAll")
oPDFApp.MenuItemExecute ("Copy")
'Paste into open column
wbTransfer.Activate
wsNew.Cells(1, dOpenCol).Select
ActiveSheet.Paste
'Select next open column
dOpenCol = dOpenCol + 1
oAVDoc.Close (1)    '(1)=Do not save changes
oPDDoc.Close
Next
'Clean up
Set wbTransfer = Nothing
Set wsNew = Nothing
Set oPDFApp = Nothing
Set oAVDoc = Nothing
Set oPDDoc = Nothing

End Sub

我已经上传了一个附件,其中虚拟工作表中的实际结果(粘贴为分隔值 - 冒号和空白)应该看起来像 Excel 发票跟踪器。

以下是链接。

https://www.dropbox.com/preview/Marketing/desired%20output%20from%20pdf%20to%20excel.xlsm?role=work

https://www.dropbox.com/preview/Marketing/MRC%201380%20INVENTORY%202019.xlsb?role=work

那么,您现在拥有什么,所有Excel文件还是所有文本文件?

如果所有 Excel 文件,请尝试此操作。

Note: Copy all code below in a normal module of your workbook
#If VBA7 Then
Declare PtrSafe Function SetCurrentDirectoryA Lib _
"kernel32" (ByVal lpPathName As String) As Long
#Else
Declare Function SetCurrentDirectoryA Lib _
"kernel32" (ByVal lpPathName As String) As Long
#End If

Sub ChDirNet(szPath As String)
SetCurrentDirectoryA szPath
End Sub
Sub Basic_Example_2()
Dim MyPath As String
Dim SourceRcount As Long, Fnum As Long
Dim mybook As Workbook, BaseWks As Worksheet
Dim sourceRange As Range, destrange As Range
Dim rnum As Long, CalcMode As Long
Dim SaveDriveDir As String
Dim FName As Variant

'Change ScreenUpdating, Calculation and EnableEvents
With Application
CalcMode = .Calculation
.Calculation = xlCalculationManual
.ScreenUpdating = False
.EnableEvents = False
End With
SaveDriveDir = CurDir
ChDirNet "C:UsersRontest"
FName = Application.GetOpenFilename(filefilter:="Excel Files (*.xl*), *.xl*", _
MultiSelect:=True)
If IsArray(FName) Then
'Add a new workbook with one sheet
Set BaseWks = Workbooks.Add(xlWBATWorksheet).Worksheets(1)
rnum = 1

'Loop through all files in the array(myFiles)
For Fnum = LBound(FName) To UBound(FName)
Set mybook = Nothing
On Error Resume Next
Set mybook = Workbooks.Open(FName(Fnum))
On Error GoTo 0
If Not mybook Is Nothing Then
On Error Resume Next
With mybook.Worksheets(1)
Set sourceRange = .Range("A1:C1")
End With
If Err.Number > 0 Then
Err.Clear
Set sourceRange = Nothing
Else
'if SourceRange use all columns then skip this file
If sourceRange.Columns.Count >= BaseWks.Columns.Count Then
Set sourceRange = Nothing
End If
End If
On Error GoTo 0
If Not sourceRange Is Nothing Then
SourceRcount = sourceRange.Rows.Count
If rnum + SourceRcount >= BaseWks.Rows.Count Then
MsgBox "Sorry there are not enough rows in the sheet"
BaseWks.Columns.AutoFit
mybook.Close savechanges:=False
GoTo ExitTheSub
Else
'Copy the file name in column A
With sourceRange
BaseWks.Cells(rnum, "A"). _
Resize(.Rows.Count).Value = FName(Fnum)
End With
'Set the destrange
Set destrange = BaseWks.Range("B" & rnum)
'we copy the values from the sourceRange to the destrange
With sourceRange
Set destrange = destrange. _
Resize(.Rows.Count, .Columns.Count)
End With
destrange.Value = sourceRange.Value
rnum = rnum + SourceRcount
End If
End If
mybook.Close savechanges:=False
End If
Next Fnum
BaseWks.Columns.AutoFit
End If
ExitTheSub:
'Restore ScreenUpdating, Calculation and EnableEvents
With Application
.ScreenUpdating = True
.EnableEvents = True
.Calculation = CalcMode
End With
ChDirNet SaveDriveDir
End Sub

如果所有文本文件,请尝试此操作。

Option Explicit
#If VBA7 Then
Private Declare PtrSafe Function OpenProcess Lib "kernel32" _
(ByVal dwDesiredAccess As Long, _
ByVal bInheritHandle As Long, _
ByVal dwProcessId As Long) As Long
Private Declare PtrSafe Function GetExitCodeProcess Lib "kernel32" _
(ByVal hProcess As Long, _
lpExitCode As Long) As Long
#Else
Private Declare Function OpenProcess Lib "kernel32" _
(ByVal dwDesiredAccess As Long, _
ByVal bInheritHandle As Long, _
ByVal dwProcessId As Long) As Long
Private Declare Function GetExitCodeProcess Lib "kernel32" _
(ByVal hProcess As Long, _
lpExitCode As Long) As Long
#End If

Public Const PROCESS_QUERY_INFORMATION = &H400
Public Const STILL_ACTIVE = &H103

Public Sub ShellAndWait(ByVal PathName As String, Optional WindowState)
Dim hProg As Long
Dim hProcess As Long, ExitCode As Long
'fill in the missing parameter and execute the program
If IsMissing(WindowState) Then WindowState = 1
hProg = Shell(PathName, WindowState)
'hProg is a "process ID under Win32. To get the process handle:
hProcess = OpenProcess(PROCESS_QUERY_INFORMATION, False, hProg)
Do
'populate Exitcode variable
GetExitCodeProcess hProcess, ExitCode
DoEvents
Loop While ExitCode = STILL_ACTIVE
End Sub

Sub Merge_CSV_Files()
Dim BatFileName As String
Dim TXTFileName As String
Dim XLSFileName As String
Dim FileExtStr As String
Dim FileFormatNum As Long
Dim DefPath As String
Dim Wb As Workbook
Dim oApp As Object
Dim oFolder
Dim foldername
'Create two temporary file names
BatFileName = Environ("Temp") & _
"CollectCSVData" & Format(Now, "dd-mm-yy-h-mm-ss") & ".bat"
TXTFileName = Environ("Temp") & _
"AllCSV" & Format(Now, "dd-mm-yy-h-mm-ss") & ".txt"
'Folder where you want to save the Excel file
DefPath = Application.DefaultFilePath
If Right(DefPath, 1) <> "" Then
DefPath = DefPath & ""
End If
'Set the extension and file format
If Val(Application.Version) < 12 Then
'You use Excel 97-2003
FileExtStr = ".xls": FileFormatNum = -4143
Else
'You use Excel 2007 or higher
FileExtStr = ".xlsx": FileFormatNum = 51
'If you want to save as xls(97-2003 format) in 2007 use
'FileExtStr = ".xls": FileFormatNum = 56
End If
'Name of the Excel file with a date/time stamp
XLSFileName = DefPath & "MasterCSV " & _
Format(Now, "dd-mmm-yyyy h-mm-ss") & FileExtStr
'Browse to the folder with CSV files
Set oApp = CreateObject("Shell.Application")
Set oFolder = oApp.BrowseForFolder(0, "Select folder with CSV files", 512)
If Not oFolder Is Nothing Then
foldername = oFolder.Self.Path
If Right(foldername, 1) <> "" Then
foldername = foldername & ""
End If
'Create the bat file
Open BatFileName For Output As #1
Print #1, "Copy " & Chr(34) & foldername & "*.csv" _
& Chr(34) & " " & TXTFileName
Close #1
'Run the Bat file to collect all data from the CSV files into a TXT file
ShellAndWait BatFileName, 0
If Dir(TXTFileName) = "" Then
MsgBox "There are no csv files in this folder"
Kill BatFileName
Exit Sub
End If
'Open the TXT file in Excel
Application.ScreenUpdating = False
Workbooks.OpenText Filename:=TXTFileName, Origin:=xlWindows, StartRow _
:=1, DataType:=xlDelimited, TextQualifier:=xlDoubleQuote, _
ConsecutiveDelimiter:=False, Tab:=False, Semicolon:=False, Comma:=True, _
Space:=False, Other:=False
'Save text file as a Excel file
Set Wb = ActiveWorkbook
Application.DisplayAlerts = False
Wb.SaveAs Filename:=XLSFileName, FileFormat:=FileFormatNum
Application.DisplayAlerts = True
Wb.Close savechanges:=False
MsgBox "You find the Excel file here: " & vbNewLine & XLSFileName
'Delete the bat and text file you temporary used
Kill BatFileName
Kill TXTFileName
Application.ScreenUpdating = True
End If
End Sub

有关所有详细信息,请参阅下面的链接。

https://www.rondebruin.nl/win/section3.htm

另外,请考虑使用这个非常有用的 Excel 加载项。

https://www.rondebruin.nl/win/addins/rdbmerge.htm

如果您首先将所有PDF文件转换为文本文件,然后将所有文本文件中的所有数据导入Excel中的多个工作表,您可能会得到很好的服务。

Sub convertpdf2()
Dim AcroXApp As Acrobat.AcroApp
Dim AcroXAVDoc As Acrobat.AcroAVDoc
Dim AcroXPDDoc As Acrobat.AcroPDDoc
Dim Filename As String
Dim jsObj As Object
Dim NewFileName As String
Filename = "C:your_path_heretest.pdf"
NewFileName = "C:your_path_hereDesktoptest.txt"
Set AcroXApp = CreateObject("AcroExch.App")
'AcroXApp.Show
Set AcroXAVDoc = CreateObject("AcroExch.AVDoc")
AcroXAVDoc.Open Filename, "Acrobat"
Set AcroXPDDoc = AcroXAVDoc.GetPDDoc
Set jsObj = AcroXPDDoc.GetJSObject
jsObj.SaveAs NewFileName, "com.adobe.acrobat.plain-text"
AcroXAVDoc.Close False
AcroXApp.Hide
AcroXApp.Exit
End Sub

或者,将所有文本文件中的所有内容放入Excel中的单个工作表中,当然,假设所有文件都具有相同的架构。

将多个测试文件导入到 Excel 中的单独工作表中。

Sub CombineTextFiles()
'updateby Extendoffice 20151015
Dim xFilesToOpen As Variant
Dim I As Integer
Dim xWb As Workbook
Dim xTempWb As Workbook
Dim xDelimiter As String
Dim xScreen As Boolean
On Error GoTo ErrHandler
xScreen = Application.ScreenUpdating
Application.ScreenUpdating = False
xDelimiter = "|"
xFilesToOpen = Application.GetOpenFilename("Text Files (*.txt), *.txt", , "Kutools for Excel", , True)
If TypeName(xFilesToOpen) = "Boolean" Then
MsgBox "No files were selected", , "Kutools for Excel"
GoTo ExitHandler
End If
I = 1
Set xTempWb = Workbooks.Open(xFilesToOpen(I))
xTempWb.Sheets(1).Copy
Set xWb = Application.ActiveWorkbook
xTempWb.Close False
xWb.Worksheets(I).Columns("A:A").TextToColumns _
Destination:=Range("A1"), DataType:=xlDelimited, _
TextQualifier:=xlDoubleQuote, _
ConsecutiveDelimiter:=False, _
Tab:=False, Semicolon:=False, _
Comma:=False, Space:=False, _
Other:=True, OtherChar:="|"
Do While I < UBound(xFilesToOpen)
I = I + 1
Set xTempWb = Workbooks.Open(xFilesToOpen(I))
With xWb
xTempWb.Sheets(1).Move after:=.Sheets(.Sheets.Count)
.Worksheets(I).Columns("A:A").TextToColumns _
Destination:=Range("A1"), DataType:=xlDelimited, _
TextQualifier:=xlDoubleQuote, _
ConsecutiveDelimiter:=False, _
Tab:=False, Semicolon:=False, _
Comma:=False, Space:=False, _
Other:=True, OtherChar:=xDelimiter
End With
Loop
ExitHandler:
Application.ScreenUpdating = xScreen
Set xWb = Nothing
Set xTempWb = Nothing
Exit Sub
ErrHandler:
MsgBox Err.Description, , "Kutools for Excel"
Resume ExitHandler
End Sub

如果您安装了 Acrobat,则可以尝试以下脚本。

Sub Convert_to_TXT()
Dim AcroXApp As Acrobat.AcroApp
Dim AcroXAVDoc As Acrobat.AcroAVDoc
Dim AcroXPDDoc As Acrobat.AcroPDDoc
Dim Filename As String
Dim jsObj As Object
Dim NewFileName As String
Const sPath = "C:UsersExcelDesktoptest"
Const sExt = ".pdf"
Const dPath = "C:UsersExcelDesktoptest"
Const dExt = ".txt"
Dim sName As String, dName As String, fCount As Long
'loop through all files in source
sName = Dir(sPath & "*" & sExt)
Do While sName <> ""
fCount = fCount + 1
'we have sName. Now figure out dName
dName = Left(sName, InStrRev(sName, ".") - 1) & dExt
Set AcroXApp = CreateObject("AcroExch.App")
'AcroXApp.Show
Set AcroXAVDoc = CreateObject("AcroExch.AVDoc")
AcroXAVDoc.Open sName, "Acrobat"
Set AcroXPDDoc = AcroXAVDoc.GetPDDoc
Set jsObj = AcroXPDDoc.GetJSObject
jsObj.SaveAs dName, "com.adobe.acrobat.plain-text"
'find the next file
sName = Dir
Loop
AcroXAVDoc.Close False
AcroXApp.Hide
AcroXApp.Exit
MsgBox "Found " & fCount & " files."
End Sub

如果您没有安装 Acrobat,可以尝试以下脚本

Sub ConvertToTXT()
Dim file As Variant, wdDoc As Document
file = Dir("C:your_path" & "*.pdf") 'txt path
Do While (file <> "")
Set wdDoc = Documents.Open(Filename:="C:your_path" & file, ReadOnly:=True, _
AddToRecentFiles:=False, Format:=wdOpenFormatAuto, Visible:=False)
wdDoc.SaveAs2 Filename:="C:your_path" & Replace(file, ".pdf", ".txt"), _
FileFormat:=wdFormatPDF, AddToRecentFiles:=False
wdDoc.Close False
file = Dir
Loop
End Sub

最新更新