我正在尝试修改PDF文件中的文本。文本可以在类型为的对象中
Tj
BDC
但是如果我把整个页面传递给PdfFileWriter,更改就丢失了。我可能正在更新一个副本,而不是真正的对象。我查过了
id()
这是不同的。有人知道怎么解决这个问题吗?
from PyPDF2 import PdfFileReader, PdfFileWriter
from PyPDF2.pdf import ContentStream
from PyPDF2.generic import TextStringObject, NameObject
from PyPDF2.utils import b_
source = PdfFileReader(open('some.pdf', "rb"))
output = PdfFileWriter()
for page_idx in range(0, 1):
# Get the current page and it's contents
page = source.getPage(page_idx)
content_object = page["/Contents"].getObject()
content = ContentStream(content_object, source)
for operands, operator in content.operations:
if operator == b_("BDC"):
operands[1][NameObject('/Contents')] = TextStringObject('xyz')
if operator == b_("Tj"):
operands[0] = TextStringObject('xyz')
output.addPage(page)
# Write the stream
outputStream = open("output.pdf", "wb")
output.write(outputStream)
outputStream.close()