以编程方式计算保存在不同位置的多个pdf文件中的总页数



我目前在vb.net工作。我的公司正在走向无纸化,我想做一个关于纸张节约的成本节约分析。目前我们将所有的PDF文件保存在服务器上。文件路径如下:"Server>Folder1>Folder2>Folder3>Folder4>PDF文件"。文件夹1和2总是用于导航。文件夹3是部门列表,文件夹4是每个职位。每个文件夹4有多个pdf文件。简单地说,文件夹1和文件夹2的名称是静态的,而文件夹3和4是动态的。为了使事情变得更加困难,文件夹4之后的所有PDF文件都有不同的名称。我有下面的代码位来检测pdf有多少页,而不必打开它,但它需要文件路径。考虑到有数百个pdf文件,如果没有超过1000个,我想通过编程循环遍历所有这些文件,检测文件是否是pdf文件,然后对找到的所有页面求和。然后我可以用这个数字来计算无纸化所节省的成本。

 PdfReader pr = new PdfReader("/path/to/yourFile.pdf");
 return pr.getNumberOfPages();

另一个想法是把所有的文件合并成一个PDF文件,这样就像打开文件看有多少页一样简单。

这是一个VBA解决方案。在Excel中运行代码

Sub PDFandNumPages()
   Dim Folder As Object
   Dim file As Object
   Dim fso As Object
   Dim iExtLen As Integer, iRow As Integer
   Dim sFolder As String, sExt As String
   Dim sPDFName As String
   sExt = "pdf"
   iExtLen = Len(sExt)
   iRow = 1
   ' Must have a '' at the end of path
   sFolder = "C:your_path_here"
   Set fso = CreateObject("Scripting.FileSystemObject")
   If sFolder <> "" Then
      Set Folder = fso.GetFolder(sFolder)
      For Each file In Folder.Files
         If Right(file, iExtLen) = sExt Then
            Cells(iRow, 1).Value = file.Name
            Cells(iRow, 2).Value = pageCount(sFolder & file.Name)
            iRow = iRow + 1
         End If
      Next file
   End If
End Sub
Function pageCount(sFilePathName As String) As Integer
Dim nFileNum As Integer
Dim sInput As String
Dim sNumPages As String
Dim iPosN1 As Integer, iPosN2 As Integer
Dim iPosCount1 As Integer, iPosCount2 As Integer
Dim iEndsearch As Integer
' Get an available file number from the system
nFileNum = FreeFile
'OPEN the PDF file in Binary mode
Open sFilePathName For Binary Lock Read Write As #nFileNum
  ' Get the data from the file
  Do Until EOF(nFileNum)
      Input #1, sInput
      sInput = UCase(sInput)
      iPosN1 = InStr(1, sInput, "/N ") + 3
      iPosN2 = InStr(iPosN1, sInput, "/")
      iPosCount1 = InStr(1, sInput, "/COUNT ") + 7
      iPosCount2 = InStr(iPosCount1, sInput, "/")
   If iPosN1 > 3 Then
      sNumPages = Mid(sInput, iPosN1, iPosN2 - iPosN1)
      Exit Do
   ElseIf iPosCount1 > 7 Then
      sNumPages = Mid(sInput, iPosCount1, iPosCount2 - iPosCount1)
      Exit Do
   ' Prevent overflow and assigns 0 to number of pages if strings are not in binary
   ElseIf iEndsearch > 1001 Then
      sNumPages = "0"
      Exit Do
   End If
      iEndsearch = iEndsearch + 1
   Loop
  ' Close pdf file
  Close #nFileNum
  pageCount = CInt(sNumPages)
End Function

这是做本质上相同事情的另一种方法。

Sub Test()
    Dim MyPath As String, MyFile As String
    Dim i As Long
    MyPath = "C:your_path_here"
    MyFile = Dir(MyPath & Application.PathSeparator & "*.pdf", vbDirectory)
    Range("A:B").ClearContents
    Range("A1") = "File Name": Range("B1") = "Pages"
    Range("A1:B1").Font.Bold = True
    i = 1
    Do While MyFile <> ""
        i = i + 1
        Cells(i, 1) = MyFile
        Cells(i, 2) = GetPageNum(MyPath & Application.PathSeparator & MyFile)
        MyFile = Dir
    Loop
    Columns("A:B").AutoFit
    MsgBox "Total of " & i - 1 & " PDF files have been found" & vbCrLf _
           & " File names and corresponding count of pages have been written on " _
           & ActiveSheet.Name, vbInformation, "Report..."
End Sub
'
Function GetPageNum(PDF_File As String)
    'Haluk 19/10/2008
    Dim FileNum As Long
    Dim strRetVal As String
    Dim RegExp
    Set RegExp = CreateObject("VBscript.RegExp")
    RegExp.Global = True
    RegExp.Pattern = "/Types*/Page[^s]"
    FileNum = FreeFile
    Open PDF_File For Binary As #FileNum
        strRetVal = Space(LOF(FileNum))
        Get #FileNum, , strRetVal
    Close #FileNum
    GetPageNum = RegExp.Execute(strRetVal).Count
End Function

最新更新