更改现有 PDF 中的元素颜色



使用 PdfSharp.NET,我想加载现有的PDF文件并将具有某种颜色的所有元素更改为不同的颜色。

直觉告诉我,这需要遍历PDF文档中的每个元素,然后更改颜色属性,但我无法找到遍历所有元素的位置,更不用说它们的颜色属性了。

这甚至可以用PDF锐利实现吗,如果可能的话,我将如何做到这一点?

如果您考虑使用替代库,请查看 Docotic.Pdf 库。免责声明:我是作者。

您可以像这样检查和更改颜色:

  1. 基于复制页面对象示例复制页面对象
  2. 修改 setBrush 和 setPen 方法,如下所示:
    if (color != null)
    dst.Color = getReplacement(color);
    ...
    // implement getReplacement method based on your requrements
    private static PdfColor getReplacement(PdfColor color)
    {
    // replace pure red RGB colors with green
    if (color is PdfRgbColor rgb)
    {
    if (rgb.R == 255 && rgb.G == 0 && rgb.B == 0)
    return new PdfRgbColor(0, 255, 0);
    }
    return color;
    }
    
  3. 如果您还需要更改图像对象中的颜色,则需要在target.DrawImage(image.Image, 0, 0, 0);行之前保存,更改和替换图像。诸如此类:
    string fileName = image.Image.Save(..);
    // change colors in the "fileName" image.
    // For example: https://stackoverflow.com/questions/17208254/how-to-change-pixel-color-of-an-image-in-c-net
    string replacementImage = changeImageColors(fileName); 
    image.Image.ReplaceWith(replacementImage);
    

以下是在矢量路径和文本对象中将 (255, 0, 0( 颜色替换为 (0, 255, 0( 的完整示例代码:

using System.Diagnostics;
namespace BitMiracle.Docotic.Pdf.Samples
{
public static class CopyPageObjects
{
public static void Main()
{
// NOTE: 
// When used in trial mode, the library imposes some restrictions.
// Please visit http://bitmiracle.com/pdf-library/trial-restrictions.aspx
// for more information.
const string PathToFile = "CopyPageObjects.pdf";
using (var pdf = new PdfDocument(@"your_document.pdf"))
{
using (PdfDocument copy = pdf.CopyPages(0, 1))
{
PdfPage sourcePage = copy.Pages[0];
PdfPage copyPage = copy.AddPage();
copyPage.Rotation = sourcePage.Rotation;
copyPage.MediaBox = sourcePage.MediaBox;
if (sourcePage.CropBox != sourcePage.MediaBox)
copyPage.CropBox = sourcePage.CropBox;
PdfCanvas target = copyPage.Canvas;
foreach (PdfPageObject obj in sourcePage.GetObjects())
{
target.SaveState();
setClipRegion(target, obj.ClipRegion);
if (obj.Type == PdfPageObjectType.Path)
{
PdfPath path = (PdfPath)obj;
target.Transform(path.TransformationMatrix);
if (path.PaintMode == PdfDrawMode.Fill || path.PaintMode == PdfDrawMode.FillAndStroke)
setBrush(target.Brush, path.Brush);
if (path.PaintMode == PdfDrawMode.Stroke || path.PaintMode == PdfDrawMode.FillAndStroke)
setPen(target.Pen, path.Pen);
appendPath(target, path);
drawPath(target, path);
}
else if (obj.Type == PdfPageObjectType.Image)
{
PdfPaintedImage image = (PdfPaintedImage)obj;
target.TranslateTransform(image.Position.X, image.Position.Y);
target.Transform(image.TransformationMatrix);
setBrush(target.Brush, image.Brush);
target.DrawImage(image.Image, 0, 0, 0);
}
else if (obj.Type == PdfPageObjectType.Text)
{
PdfTextData text = (PdfTextData)obj;
drawText(target, text);
}
target.RestoreState();
}
copy.RemovePage(0);
copy.Save(PathToFile);
}
}
Process.Start(PathToFile);
}
private static void setClipRegion(PdfCanvas canvas, PdfClipRegion clipRegion)
{
if (clipRegion.IntersectedPaths.Count == 0)
return;
PdfMatrix transformationBefore = canvas.TransformationMatrix;
try
{
foreach (PdfPath clipPath in clipRegion.IntersectedPaths)
{
canvas.ResetTransform();
canvas.Transform(clipPath.TransformationMatrix);
appendPath(canvas, clipPath);
canvas.SetClip(clipPath.ClipMode.Value);
}
}
finally
{
canvas.ResetTransform();
canvas.Transform(transformationBefore);
}
}
private static void setBrush(PdfBrush dst, PdfBrushInfo src)
{
PdfColor color = src.Color;
if (color != null)
dst.Color = getReplacement(color);
dst.Opacity = src.Opacity;
var pattern = src.Pattern;
if (pattern != null)
dst.Pattern = pattern;
}
private static void setPen(PdfPen dst, PdfPenInfo src)
{
PdfColor color = src.Color;
if (color != null)
dst.Color = getReplacement(color);
var pattern = src.Pattern;
if (pattern != null)
dst.Pattern = pattern;
dst.DashPattern = src.DashPattern;
dst.EndCap = src.EndCap;
dst.LineJoin = src.LineJoin;
dst.MiterLimit = src.MiterLimit;
dst.Opacity = src.Opacity;
dst.Width = src.Width;
}
private static PdfColor getReplacement(PdfColor color)
{
// replace pure red RGB colors with green
if (color is PdfRgbColor rgb)
{
if (rgb.R == 255 && rgb.G == 0 && rgb.B == 0)
return new PdfRgbColor(0, 255, 0);
}
return color;
}
private static void appendPath(PdfCanvas target, PdfPath path)
{
foreach (PdfSubpath subpath in path.Subpaths)
{
foreach (PdfPathSegment segment in subpath.Segments)
{
switch (segment.Type)
{
case PdfPathSegmentType.Point:
target.CurrentPosition = ((PdfPointSegment)segment).Value;
break;
case PdfPathSegmentType.Line:
PdfLineSegment line = (PdfLineSegment)segment;
target.CurrentPosition = line.Start;
target.AppendLineTo(line.End);
break;
case PdfPathSegmentType.Bezier:
PdfBezierSegment bezier = (PdfBezierSegment)segment;
target.CurrentPosition = bezier.Start;
target.AppendCurveTo(bezier.FirstControl, bezier.SecondControl, bezier.End);
break;
case PdfPathSegmentType.Rectangle:
target.AppendRectangle(((PdfRectangleSegment)segment).Bounds);
break;
case PdfPathSegmentType.CloseSubpath:
target.ClosePath();
break;
}
}
}
}
private static void drawPath(PdfCanvas target, PdfPath path)
{
switch (path.PaintMode)
{
case PdfDrawMode.Fill:
target.FillPath(path.FillMode.Value);
break;
case PdfDrawMode.FillAndStroke:
target.FillAndStrokePath(path.FillMode.Value);
break;
case PdfDrawMode.Stroke:
target.StrokePath();
break;
default:
target.ResetPath();
break;
}
}
private static void drawText(PdfCanvas target, PdfTextData td)
{
target.TextRenderingMode = td.RenderingMode;
setBrush(target.Brush, td.Brush);
setPen(target.Pen, td.Pen);
target.TextPosition = PdfPoint.Empty;
target.FontSize = td.FontSize;
target.Font = td.Font;
target.TranslateTransform(td.Position.X, td.Position.Y);
target.Transform(td.TransformationMatrix);
target.DrawString(td.GetCharacterCodes());
}
}
}

> 如果你试图用PDF Sharp做到这一点,你会陷入一个受伤的世界。看看这个线程就知道为什么:

更改 PDF - 文本重新定位

PDFSharp允许您访问PDF的构建块(Adobe在其库中称为COS层(,但它不会构建页面上对象的图形表示。

因此,您需要访问包含页面所有PDF图形元素的文本流,将此文本解释为实际的对象定义,确定要更改的对象以及这些对象的着色说明的位置,并在必要时进行更改。这绝非微不足道。

为了让您了解您将要使用的内容,您必须解释如下内容:

q
0 g
0 G    
0 0 200 100 re
1 0 0 0 k
(Hi!) T*
Q   

实际上,事情会比简单地为每个页面读取这些类型的文本字符串稍微复杂一些,因为该页面可以(并且经常(包含"表单",然后您必须在PDF中找到这些表单并执行相同的步骤。

不想气馁,但这确实是一项相当复杂的任务,因为库不支持图形元素解析。

最新更新