C# 从中提取文本.XPS 文档



我一直在使用另一个StackOverflow答案作为解决这个问题的参考,但是我遇到了一个问题。我在FixedDocumentSequence时收到一个错误,说找不到它。我已经添加了对PresentationCorePresentationFrameworkWindowsBaseReachFramework的引用,我不太确定是否需要为FixedDocumentSequence添加另一个引用。

这是我的代码:

public string convertXPS(string fileName)
    {
        XpsDocument _xpsDocument = new XpsDocument(fileName, System.IO.FileAccess.Read);
        IXpsFixedDocumentSequenceReader fixedDocSeqReader = _xpsDocument.FixedDocumentSequenceReader;
        IXpsFixedDocumentReader _document = fixedDocSeqReader.FixedDocuments[0];
        FixedDocumentSequence sequence = _xpsDocument.GetFixedDocumentSequence();
        string _fullPageText = "";
        for (int pageCount = 0; pageCount < sequence.DocumentPaginator.PageCount; ++pageCount)
        {
            IXpsFixedPageReader _page = _document.FixedPages[pageCount];
            StringBuilder _currentText = new StringBuilder();
            System.Xml.XmlReader _pageContentReader = _page.XmlReader;
            if (_pageContentReader != null)
            {
                while (_pageContentReader.Read())
                {
                    if (_pageContentReader.Name == "Glyphs")
                    {
                        if (_pageContentReader.HasAttributes)
                        {
                            if (_pageContentReader.GetAttribute("UnicodeString") != null)
                            {
                                _currentText.
                                  Append(_pageContentReader.
                                  GetAttribute("UnicodeString"));
                            }
                        }
                    }
                }
            }
            _fullPageText += _currentText.ToString();
        }
        return _fullPageText;


    }
[STAThread]
static void Main(string[] args)
{
    try
    {
        XpsDocument _xpsDocument = new XpsDocument(@"C:Usersadmin-Desktoptesting.xps", System.IO.FileAccess.Read);
        IXpsFixedDocumentSequenceReader fixedDocSeqReader = _xpsDocument.FixedDocumentSequenceReader;
        IXpsFixedDocumentReader _document = fixedDocSeqReader.FixedDocuments[0];
        FixedDocumentSequence sequence = _xpsDocument.GetFixedDocumentSequence();
        string _fullPageText = "";
        for (int pageCount = 0; pageCount < sequence.DocumentPaginator.PageCount; ++pageCount)
        {
            IXpsFixedPageReader _page = _document.FixedPages[pageCount];
            StringBuilder _currentText = new StringBuilder();
            System.Xml.XmlReader _pageContentReader = _page.XmlReader;
            if (_pageContentReader != null)
            {
                while (_pageContentReader.Read())
                {
                    if (_pageContentReader.Name == "Glyphs")
                    {
                        if (_pageContentReader.HasAttributes)
                        {
                            if (_pageContentReader.GetAttribute("UnicodeString") != null)
                            {
                                _currentText.
                                  Append(_pageContentReader.
                                  GetAttribute("UnicodeString"));
                            }
                        }
                    }
                }
            }
            _fullPageText += _currentText.ToString();
        }
    }
    catch(Exception e)
    {
    }
}  

我认为代码没有太大变化,尝试添加帮助我读取 xps 的 [STAThread],我也只使用上面提到的引用来读取文件,我也得到了与您相同的错误,但不知何故解决了它,您离获得结果
更近 90%另请参阅添加System.Windows.Documents;所需的参考

最新更新