当前位置：搜档网 › word转换成html及图片

word转换成html及图片

package com.certusnet.portlet.freeterms.utils;

import java.awt.Image;

import java.awt.Rectangle;

import java.awt.image.BufferedImage;

import java.io.ByteArrayOutputStream;

import java.io.File;

import java.io.FileInputStream;

import java.io.FileNotFoundException;

import java.io.FileOutputStream;

import java.io.IOException;

import java.io.InputStream;

import java.io.OutputStreamWriter;

import java.io.RandomAccessFile;

import https://www.sodocs.net/doc/793205385.html,.ConnectException;

import java.nio.ByteBuffer;

import java.nio.channels.FileChannel;

import java.util.Date;

import java.util.List;

import java.util.regex.Matcher;

import java.util.regex.Pattern;

import javax.imageio.ImageIO;

import javax.xml.parsers.DocumentBuilderFactory;

import javax.xml.parsers.ParserConfigurationException;

import javax.xml.transform.OutputKeys;

import javax.xml.transform.Transformer;

import javax.xml.transform.TransformerException;

import javax.xml.transform.TransformerFactory;

import javax.xml.transform.dom.DOMSource;

import javax.xml.transform.stream.StreamResult;

import org.apache.poi.hwpf.HWPFDocument;

import org.apache.poi.hwpf.converter.PicturesManager;

import org.apache.poi.hwpf.converter.WordToHtmlConverter;

import https://www.sodocs.net/doc/793205385.html,ermodel.Picture;

import https://www.sodocs.net/doc/793205385.html,ermodel.PictureType;

import org.w3c.dom.Document;

import com.artofsolving.jodconverter.DocumentConverter;

import

com.artofsolving.jodconverter.openoffice.connection.OpenOfficeCon nection;

import

com.artofsolving.jodconverter.openoffice.connection.SocketOpenOff iceConnection;

import

com.artofsolving.jodconverter.openoffice.converter.OpenOfficeDocu mentConverter;

import com.certusnet.portal.kernel.configuration.Configuration; import com.certusnet.portal.kernel.exception.PortalException; import com.certusnet.portal.kernel.util.PropsKeys;

import com.certusnet.portal.kernel.util.StringPool;

import

com.certusnet.portlet.configuration.PortletConfigurationServiceUt il;

import com.sun.image.codec.jpeg.JPEGCodec;

import com.sun.image.codec.jpeg.JPEGImageEncoder;

import com.sun.pdfview.PDFFile;

import com.sun.pdfview.PDFPage;

/**

* 将Word文档转换成html字符串的工具类

* @author MZULE

public class ConverterUtil {

private static final String _BMS_FREE_TERMS_MANAGE_PORTLET_NAME = "BMS-FreeTermsManage-portlet";

private static final String _EXCEPTION_CONVERSION_FAILURE = "conversion_failure";

private static final String _IMG_SRC_TAG = "

private static String _SOFTWARE_UPLOAD_PATH = null;

private static Process pro = null;

public static String getFileUploadPath(Long versionid){ if(null == _SOFTWARE_UPLOAD_PATH || StringPool.BLANK.equals(_SOFTWARE_UPLOAD_PATH)){

Configuration config = PortletConfigurationServiceUtil

.getConfiguration(

PortletConfigurationServiceUtil.class.getClassLoader(),

_BMS_FREE_TERMS_MANAGE_PORTLET_NAME);

_SOFTWARE_UPLOAD_PATH = config.get("tms.apk.upload.path");

}

return _SOFTWARE_UPLOAD_PATH + versionid + StringPool.FORWARD_SLASH;

}

/**

* 上传文件

* @param path 上传路径

* @param fileName 上传文件名

* @param file 文件

* @throws PortalException

public static String uploadFile(String path, String fileName, File file)

throws PortalException {

InputStream from = null;

FileOutputStream to = null;

File pathFile = new File(path);

if (!pathFile.exists()) {

pathFile.mkdirs();

}

pathFile = new File(path +StringPool.FORWARD_SLASH+ fileName);

try {

if (!pathFile.exists()) {

pathFile.createNewFile();

}

from = new FileInputStream(file);

to = new FileOutputStream(pathFile);

byte[] buffer = new byte[4096];

int bytes_read;

while ((bytes_read = from.read(buffer)) != -1)

to.write(buffer, 0, bytes_read);

} catch (Exception e) {

throw new PortalException();

} finally {

try {

if (null != from) {

from.close();

}

if (null != to) {

to.close();

}

} catch (IOException e) {

throw new PortalException();

}

return pathFile.getAbsolutePath();

}

public static File docToPdf(File docFile, String filepath) throws PortalException{

try {

startService();

File pdfFile = new File(filepath + StringPool.FORWARD_SLASH + new Date().getTime()+ ".pdf");

OpenOfficeConnection con = new SocketOpenOfficeConnection(8100);

con.connect();

DocumentConverter converter = new OpenOfficeDocumentConverter(con);

converter.convert(docFile, pdfFile);

con.disconnect();

stopService();

return pdfFile;

}

catch (Exception e) {

throw new PortalException(_EXCEPTION_CONVERSION_FAILURE);

}

//pdf he picturedir和docpath放在同一目录/目录根据当前版本号创建

public static int pdfToPicture(File pdfFile,String pictureDir) throws PortalException {

// "D:/test/doctest.pdf"D://test//picturehd//

int pictureSum=0;

try{

RandomAccessFile raf = new RandomAccessFile(pdfFile, "r");

FileChannel channel = raf.getChannel();

ByteBuffer buf = channel.map(FileChannel.MapMode.READ_ONLY, 0, channel.size());

PDFFile pdffile = new PDFFile(buf);

for (int i = 1; i <= pdffile.getNumPages(); i++) { PDFPage page = pdffile.getPage(i);

Rectangle rect = new Rectangle(0, 0, (int) page.getBBox().getWidth(), (int) page.getBBox().getHeight());

Image img = page.getImage(rect.width, rect.height,rect,null,true,true);

BufferedImage tag = new BufferedImage(rect.width, rect.height,BufferedImage.TYPE_INT_RGB);

tag.getGraphics().drawImage(img, 0, 0, rect.width, rect.height,null);

FileOutputStream out = new FileOutputStream(pictureDir+StringPool.FORWARD_SLASH+ i + ".jpg"); // 输出到文件流

JPEGImageEncoder encoder = JPEGCodec.createJPEGEncoder(out);

encoder.encode(tag); // JPEG编码

out.close();

}

pictureSum=pdffile.getNumPages();

}catch(Exception e){

throw new PortalException(_EXCEPTION_CONVERSION_FAILURE);

}

return pictureSum;

}

//压缩并存储图片

public static File compressImage(File file, String directoryFileName,

int width, int height, boolean proportion) throws PortalException {

FileOutputStream fileOutputStream = null;

File compressImage=new File(directoryFileName);

try {

if (file == null || directoryFileName == null) { return null;

}

fileOutputStream = new FileOutputStream(compressImage);

Image image = ImageIO.read(file);

if (image.getWidth(null) == -1) {

return null;

}

int newWidth = 0;

int newHeight = 0;

if (image.getWidth(null) > width || image.getHeight(null) > height) {

if (proportion) {

int rate1 = image.getWidth(null) / width;

int rate2 = image.getHeight(null) / height;

int rate = rate1 > rate2 ? rate1 : rate2;

newWidth = image.getWidth(null) / rate;

newHeight = image.getHeight(null) / rate;

} else {

newWidth = width;

newHeight = height;

}

} else {

newWidth = image.getWidth(null);

newHeight = image.getHeight(null);

}

BufferedImage bufferedImage = new BufferedImage(newWidth,

newHeight, BufferedImage.TYPE_INT_RGB);

bufferedImage.getGraphics().drawImage(

image.getScaledInstance(newWidth, newHeight,

image.SCALE_SMOOTH), 0, 0, null);

JPEGImageEncoder encoder = JPEGCodec

.createJPEGEncoder(fileOutputStream);

encoder.encode(bufferedImage);

fileOutputStream.close();

} catch (Exception e) {

throw new PortalException();

} finally {

if (fileOutputStream != null) {

try {

fileOutputStream.close();

} catch (Exception e) {

throw new PortalException(_EXCEPTION_CONVERSION_FAILURE);

}

return compressImage;

}

/**

* 将word文档转换成html文档

* @param docFile

* 需要转换的word文档

* @param filepath

* 转换之后html的存放路径

* @return 转换之后的html文件

* @throws IOException

* @throws PortalException

public static String docToHtml(File docFile, String filepath) throws TransformerException, IOException, ParserConfigurationException, PortalException {

filepath = filepath + StringPool.FORWARD_SLASH;

String htmlFileName = new Date().getTime()+ ".html";

String htmlFilePath=filepath + htmlFileName;

InputStream input = new FileInputStream(docFile);

HWPFDocument wordDocument = new HWPFDocument(input);

WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(DocumentBuilderFactory.newInstance().newDocum entBuilder().newDocument());

wordToHtmlConverter.setPicturesManager(new PicturesManager() { public String savePicture(byte[] content, PictureType pictureType, String suggestedName,

float widthInches,float heightInches) {

return suggestedName;

}

});

wordToHtmlConverter.processDocument(wordDocument);

List pics = wordDocument.getPicturesTable().getAllPictures(); if (pics != null) {

for (int i = 0; i < pics.size(); i++) {

Picture pic = (Picture) pics.get(i);

try {

pic.writeImageContent(new

FileOutputStream(filepath + pic.suggestFullFileName()));

} catch (FileNotFoundException e) {

throw new PortalException(_EXCEPTION_CONVERSION_FAILURE);

}

Document htmlDocument = wordToHtmlConverter.getDocument();

ByteArrayOutputStream outStream = new ByteArrayOutputStream(); DOMSource domSource = new DOMSource(htmlDocument);

StreamResult streamResult = new StreamResult(outStream);

TransformerFactory tf = TransformerFactory.newInstance();

Transformer serializer = tf.newTransformer();

serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");

serializer.setOutputProperty(OutputKeys.INDENT, "yes");

serializer.setOutputProperty(OutputKeys.METHOD, "html");

serializer.transform(domSource, streamResult);

outStream.close();

String content = new String(outStream.toByteArray());

OutputStreamWriter osw = new OutputStreamWriter(new FileOutputStream(htmlFilePath), "UTF-8");

osw.write(content);

osw.flush();

osw.close();

//这是源文件

File htmlFile = new File(htmlFilePath);

String htmlMoveFilePath = "";

try {

htmlMoveFilePath = uploadHtmlFile(htmlFile.getName(), htmlFile);

} catch (PortalException e) {

throw new PortalException(_EXCEPTION_CONVERSION_FAILURE);

}

//FileUtils.write(new File(filepath, htmlFileName), content, "utf-8");

return htmlMoveFilePath;

}

/**

* 上传文件

* @param fileName 上传文件名

* @param file 源文件

* @throws PortalException

public static String uploadHtmlFile(String fileName, File file) throws PortalException {

Configuration config = PortletConfigurationServiceUtil

.getConfiguration(

PortletConfigurationServiceUtil.class.getClassLoader(),

_BMS_FREE_TERMS_MANAGE_PORTLET_NAME);

String htmpservicepath = config.get("free.terms.html.path");

///upload-apk/htmlfile

String portalWebDir = System.getProperty("env.DOMAIN_HOME") + "/portal/ROOT"; //获取服务器路径

//需要移动的文件路径

htmpservicepath = portalWebDir + htmpservicepath;

//需要移动到的文件

File htmlServiceFile = new File(htmpservicepath);

if (!htmlServiceFile.exists()) {

htmlServiceFile.mkdirs();

}

htmlServiceFile = new File(htmpservicepath +StringPool.FORWARD_SLASH+ fileName);

InputStream from = null;

FileOutputStream to = null;

try {

if (!htmlServiceFile.exists()) {

htmlServiceFile.createNewFile();

}

from = new FileInputStream(file);

to = new FileOutputStream(htmlServiceFile.getPath());

byte[] buffer = new byte[4096];

int bytes_read;

while ((bytes_read = from.read(buffer)) != -1)

to.write(buffer, 0, bytes_read);

} catch (Exception e) {

throw new PortalException() ;

} finally {

try {

if (null != from) {

from.close();

}

if (null != to) {

to.close();

}

} catch (IOException e) {

throw new PortalException() ;

}

return htmlServiceFile.getAbsolutePath();

}

public static void startService() throws PortalException{ // 启动OpenOffice的服务 -nofirststartwizard &

String command = "/opt/openoffice4/program/soffice -headless

-accept=\"socket,host=127.0.0.1,port=8100;urp;StarOffice.Service\ " -nofirststartwizard &";

// connect to an https://www.sodocs.net/doc/793205385.html, instance running on port 8100

try{

pro = Runtime.getRuntime().exec(command);

}catch(Exception ex){

throw new PortalException("conversion_service_unstarted") ;

}

public static void stopService(){

if(pro!=null){

pro.destroy();

}

/**

* 清除一些不需要的html标记

* @param htmlStr

* 带有复杂html标记的html语句

* @return 去除了不需要html标记的语句

protected static String clearFormat(String htmlStr, String docImgPath) {

// 获取body内容的正则

String bodyReg = "";

Pattern bodyPattern = https://www.sodocs.net/doc/793205385.html,pile(bodyReg);

Matcher bodyMatcher = bodyPattern.matcher(htmlStr);

if (bodyMatcher.find()) {

// 获取BODY内容，并转化BODY标签为DIV

htmlStr = bodyMatcher.group().replaceFirst("", "

");

}

// 调整图片地址

htmlStr = htmlStr.replaceAll(_IMG_SRC_TAG, _IMG_SRC_TAG + docImgPath

+ StringPool.FORWARD_SLASH);

// 把

转换成

word转换成html及图片

为什么我用word文档转换成html格式后表格边框不见了

C# Word转PDF、TXT、图片、HTML

将Word文档转换成图片PDF的办法

教你如何将Word文档转化为图片

word转图片在线转换在线将Word文档转换至JPG图片

POI读取word转换html

Word文档转换为HTML帮助文档操作手册

word域代码转换html丢失解决办法

word转换成html及图片

为什么我用word文档转换成html格式后表格边框不见了

C# Word转PDF、TXT、图片、HTML

将Word文档转换成图片PDF的办法

教你如何将Word文档转化为图片

word转图片在线转换 在线将Word文档转换至JPG图片

POI读取word转换html

Word文档转换为HTML帮助文档操作手册

word域代码转换html丢失解决办法

word转图片在线转换在线将Word文档转换至JPG图片