如何将Word文件中的标题提取到Excel



我有数百个Word文件(docx(,每个文件都有不同的标题,定义为标题1、标题2、标题3等。这些文件中的每个都有一个与标题相对应的目录
我想将这些文件中的每个标题提取到Excel工作簿中,以构建数据库。

我的第一次尝试是将单个Word文档中的标题提取到Excel工作簿中。我在网上找到了将Word中的标题提取到Outlook中的代码,还找到了将Word中的标题分离到新Word文件中的代码
这两个我都没能适应。

如何将单个Word文件的标题提取到Excel?然后我将尝试制定进一步的步骤。

Word到Outlook

Sub CopyHeadingsIntoOutlookMail()
Dim objOutlookApp, objMail As Object
Dim objMailDocument As Word.Document
Dim objMailRange As Word.Range
Dim varHeadings As Variant
Dim i As Long
Dim strText As String
Dim nLongDiff As Integer
'Create a new Outlook email
Set objOutlookApp = CreateObject("Outlook.Application")
Set objMail = objOutlookApp.CreateItem(olMailItem)
objMail.Display
Set objMailDocument = objMail.GetInspector.WordEditor
Set objMailRange = objMailDocument.Range(0, 0)

'Get the headings of the current Word document
varHeadings = ActiveDocument.GetCrossReferenceItems(wdRefTypeHeading)
For i = LBound(varHeadings) To UBound(varHeadings)
strText = Trim(varHeadings(i))

'Get the heading level
nLongDiff = Len(RTrim$(CStr(varHeadings(i)))) - Len(Trim(CStr(varHeadings(i))))
nHeadingLevel = (nLongDiff / 2) + 1

'Insert the heading into the Outlook mail
With objMailRange
.InsertAfter strText & vbNewLine
.Style = "Heading " & nHeadingLevel
.Collapse wdCollapseEnd
End With
Next i
End Sub

逐字

Public Sub CreateOutline()
Dim docOutline As Word.Document
Dim docSource As Word.Document
Dim rng As Word.Range

Dim astrHeadings As Variant
Dim strText As String
Dim intLevel As Integer
Dim intItem As Integer

Set docSource = ActiveDocument
Set docOutline = Documents.Add

' Content returns only the
' main body of the document, not
' the headers and footer.
Set rng = docOutline.Content
astrHeadings = _
docSource.GetCrossReferenceItems(wdRefTypeHeading)

For intItem = LBound(astrHeadings) To UBound(astrHeadings)
' Get the text and the level.
strText = Trim$(astrHeadings(intItem))
intLevel = GetLevel(CStr(astrHeadings(intItem)))

' Add the text to the document.
rng.InsertAfter strText & vbNewLine

' Set the style of the selected range and
' then collapse the range for the next entry.
rng.Style = "Heading " & intLevel
rng.Collapse wdCollapseEnd
Next intItem
End Sub
Private Function GetLevel(strItem As String) As Integer
' Return the heading level of a header from the
' array returned by Word.

' The number of leading spaces indicates the
' outline level (2 spaces per level: H1 has
' 0 spaces, H2 has 2 spaces, H3 has 4 spaces.

Dim strTemp As String
Dim strOriginal As String
Dim intDiff As Integer

' Get rid of all trailing spaces.
strOriginal = RTrim$(strItem)

' Trim leading spaces, and then compare with
' the original.
strTemp = LTrim$(strOriginal)

' Subtract to find the number of
' leading spaces in the original string.
intDiff = Len(strOriginal) - Len(strTemp)
GetLevel = (intDiff / 2) + 1
End Function

尝试以下Excel宏。运行它时,只需选择要处理的文件夹。

Sub GetTOCHeadings()
'Note: this code requires a reference to the Word object model.
'See under the VBE's Tools|References.
Application.ScreenUpdating = False
Dim wdDoc As Word.Document, wdRng As Word.Range, wdPara As Word.Paragraph
Dim strFolder As String, strFile As String
Dim WkSht As Worksheet, i As Long, j As Long
strFolder = GetFolder
If strFolder = "" Then Exit Sub
Dim wdApp As New Word.Application
Set WkSht = ActiveSheet
i = WkSht.Cells(WkSht.Rows.Count, 1).End(xlUp).Row
wdApp.WordBasic.DisableAutoMacros
wdApp.DisplayAlerts = wdAlertsNone
strFile = Dir(strFolder & "*.doc", vbNormal)
While strFile <> ""
i = i + 1
Set wdDoc = wdApp.Documents.Open(Filename:=strFolder & "" & strFile, AddToRecentFiles:=False, Visible:=False)
With wdDoc
j = 1: WkSht.Cells(i, j) = strFile
If .TablesOfContents.Count > 0 Then
With .TablesOfContents(1)
.IncludePageNumbers = False
.Update
Set wdRng = .Range
End With
With wdRng
.Fields(1).Unlink
For Each wdPara In .Paragraphs
j = j + 1
WkSht.Cells(i, j).Value = Replace(wdPara.Range.Text, vbTab, " ")
Next
End With
End If
.Close SaveChanges:=False
End With
strFile = Dir()
Wend
wdApp.DisplayAlerts = wdAlertsAll
wdApp.Quit
Set wdDoc = Nothing: Set wdApp = Nothing: Set WkSht = Nothing
Application.ScreenUpdating = True
End Sub

Function GetFolder() As String
Dim oFolder As Object
GetFolder = ""
Set oFolder = CreateObject("Shell.Application").BrowseForFolder(0, "Choose a folder", 0)
If (Not oFolder Is Nothing) Then GetFolder = oFolder.Items.Item.Path
Set oFolder = Nothing
End Function

最新更新