环境搭建
使用语言 python3
安装imagemagick(pdf转jpg是内部需要调用到此工具)
apt-get install imagemagick
安装libreoffice(此工具用于将word文档转化成pdf文件)
apt-get install libreoffice
安装python wand,PIL库
pip install wand
pip install PIL
PDF转JPG
先转png,再转jpg是为了避免出现黑色,透明等背景,造成转换出来的图片与pdf文件显示不一样
- from PIL import Image as Image2
- from wand.image import Image
- from wand.color import Color
- def convert_pdf_to_jpg(filename):
- end_length = len(filename.split('.')[-1]) + 1
- title = filename[0:-end_length]
- title = title.split('/')[-1]
- #resolution为分辨率,background为背景颜色
- with Image(filename=filename, resolution=150, background=Color('White')) as img :
- #页数
- length = len(img.sequence)
- #如果页数超过1页,生成的文件名会依次加上页码数
- with img.convert('png') as converted:
- path = 'static/local_images/%s.png' % title
- converted.save(filename=path)
- image_list = []
- if length == 1:
- path = 'static/local_images/%s.png' % title
- image_list.append(path)
- else:
- for i in range(0, length):
- path = 'static/local_images/%s-%d.png' % (title, i)
- image_list.append(path)
- jpg_list = []
- for img in image_list:
- image = Image2.open(img)
- x,y = image.size
- background = Image2.new('RGBA', image.size, (255,255,255))
- try:
- background.paste(image, (0, 0, x, y), image)
- image = background.convert('RGB')
- except:
- image = image.convert('RGBA')
- background.paste(image, (0, 0, x, y), image)
- image = background.convert('RGB')
- title = img.split('.')[0]
- name = title + '.jpg'
- image.save(name)
- os.remove(img)
- name = "%s/%s" %(static_host, name)
- jpg_list.append(name)
- return jpg_list
word文档转PDF
python没有直接把word转换成pdf文档的库,只能先安装libreoffice工具,然后利用os库系统调用libreoffice工具
- import os
- def convert_doc_to_pdf(filename):
- end_length = len(filename.split('.')[-1]) + 1
- name = filename[0:-end_length]
- cmd = 'libreoffice --convert-to pdf %s' % filename
- os.system(cmd)
- name = name.split('/')[-1] + '.pdf'
- return name