import PyPDF2 as pdf
import sys
import os
def pdf_del_first_page(inputfile):
print(os.path.splitext(inputfile))
outputfile = os.path.splitext(inputfile)[0] + '_out' + os.path.splitext(inputfile)[1]
reader = pdf.PdfFileReader(inputfile)
getpages = list()
PageIndexMax = reader.getNumPages()
print("pages = %d" % PageIndexMax)
pageLabels = {}
for i in range(0, PageIndexMax):
page = reader.getPage(i)
getpages.append(page)
pageLabels[page.indirectRef.idnum] = i - 1
outlines= reader.getOutlines()
BookMark = []
for outline in outlines:
if type(outline) == pdf.generic.Destination:
title = outline[r'/Title']
page = pageLabels[outline.page.idnum]
connect = (title,page)
BookMark.append(connect)
print(BookMark)
writer = pdf.PdfFileWriter()
bm_index = 0
page_num = 0
for page in getpages[1:]:
writer.addPage(page)
while bm_index < len(BookMark) and BookMark[bm_index][1] == page_num:
writer.addBookmark(BookMark[bm_index][0], BookMark[bm_index][1])
bm_index += 1
page_num += 1
with open(outputfile, 'wb+') as fh:
writer.write(fh)
if __name__ == '__main__':
if len(sys.argv) != 2:
print('参数不对')
os.exit(-1)
pdf_del_first_page(sys.argv[1])
print('删除pdf第一页成功')
import PyPDF2 as pdf
import os
import sys
def split_pdf(inputfile):
outputpath = ".\\split\\"
reader = pdf.PdfFileReader(inputfile)
PageIndexMax = reader.getNumPages()
print("pages = %d" % PageIndexMax)
if not os.path.exists(outputpath):
os.mkdir(outputpath)
for i in range(0, PageIndexMax):
if i % 5 == 0:
writer = pdf.PdfFileWriter()
split_pdf = outputpath + 'split_' + '{0:02d}'.format(i//5) + '.pdf'
page = reader.getPage(i)
writer.addPage(page)
if (i + 1) % 5 == 0 or (i + 5 > PageIndexMax):
with open(split_pdf, 'wb+') as fh:
writer.write(fh)
if __name__ == '__main__':
if len(sys.argv) != 2:
print('命令行参数错误')
inputfile = sys.argv[1]
split_pdf(inputfile)