java使用POI实现html和word相互转换_Java教程

项目后端使用了springboot，maven，前端使用了ckeditor富文本编辑器。目前从html转换的word为doc格式，而图片处理支持的是docx格式，所以需要手动把doc另存为docx，然后才可以进行图片替换。

一.添加maven依赖

主要使用了以下和poi相关的依赖，为了便于获取html的图片元素，还使用了jsoup：

				?

									<dependency>

									  <groupid>org.apache.poi</groupid>

									  <artifactid>poi</artifactid>

									  <version>3.14</version>

									</dependency>

									<dependency>

									  <groupid>org.apache.poi</groupid>

									  <artifactid>poi-scratchpad</artifactid>

									  <version>3.14</version>

									</dependency>

									<dependency>

									  <groupid>org.apache.poi</groupid>

									  <artifactid>poi-ooxml</artifactid>

									  <version>3.14</version>

									</dependency>

									<dependency>

									  <groupid>fr.opensagres.xdocreport</groupid>

									  <artifactid>xdocreport</artifactid>

									  <version>1.0.6</version>

									</dependency>

									<dependency>

									  <groupid>org.apache.poi</groupid>

									  <artifactid>poi-ooxml-schemas</artifactid>

									  <version>3.14</version>

									</dependency>

									<dependency>

									  <groupid>org.apache.poi</groupid>

									  <artifactid>ooxml-schemas</artifactid>

									  <version>1.3</version>

									</dependency>

									<dependency>

									  <groupid>org.jsoup</groupid>

									  <artifactid>jsoup</artifactid>

									  <version>1.11.3</version>

									</dependency>

二.word转换为html

在springboot项目的resources目录下新建static文件夹，将需要转换的word文件temp.docx粘贴进去，由于static是springboot的默认资源文件，所以不需要在配置文件里面另行配置了，如果改成其他名字，需要在application.yml进行相应配置。

doc格式转换为html：

				?

									public static string doctohtml() throws exception {

									  file path = new file(resourceutils.geturl("classpath:").getpath());

									  string imagepathstr = path.getabsolutepath() + "\\static\\image\\";

									  string sourcefilename = path.getabsolutepath() + "\\static\\test.doc";

									  string targetfilename = path.getabsolutepath() + "\\static\\test2.html";

									  file file = new file(imagepathstr);

									  if(!file.exists()) {

									    file.mkdirs();

									  }

									  hwpfdocument worddocument = new hwpfdocument(new fileinputstream(sourcefilename));

									  org.w3c.dom.document document = documentbuilderfactory.newinstance().newdocumentbuilder().newdocument();

									  wordtohtmlconverter wordtohtmlconverter = new wordtohtmlconverter(document);

									  //保存图片，并返回图片的相对路径

									  wordtohtmlconverter.setpicturesmanager((content, picturetype, name, width, height) -> {

									    try (fileoutputstream out = new fileoutputstream(imagepathstr + name)) {

									      out.write(content);

									    } catch (exception e) {

									      e.printstacktrace();

									    }

									    return "image/" + name;

									  });

									  wordtohtmlconverter.processdocument(worddocument);

									  org.w3c.dom.document htmldocument = wordtohtmlconverter.getdocument();

									  domsource domsource = new domsource(htmldocument);

									  streamresult streamresult = new streamresult(new file(targetfilename));

									  transformerfactory tf = transformerfactory.newinstance();

									  transformer serializer = tf.newtransformer();

									  serializer.setoutputproperty(outputkeys.encoding, "utf-8");

									  serializer.setoutputproperty(outputkeys.indent, "yes");

									  serializer.setoutputproperty(outputkeys.method, "html");

									  serializer.transform(domsource, streamresult);

									  return targetfilename;

									}

docx格式转换为html

				?

									public static string docxtohtml() throws exception {

									  file path = new file(resourceutils.geturl("classpath:").getpath());

									  string imagepath = path.getabsolutepath() + "\\static\\image";

									  string sourcefilename = path.getabsolutepath() + "\\static\\test.docx";

									  string targetfilename = path.getabsolutepath() + "\\static\\test.html";

									  outputstreamwriter outputstreamwriter = null;

									  try {

									    xwpfdocument document = new xwpfdocument(new fileinputstream(sourcefilename));

									    xhtmloptions options = xhtmloptions.create();

									    // 存放图片的文件夹

									    options.setextractor(new fileimageextractor(new file(imagepath)));

									    // html中图片的路径

									    options.uriresolver(new basicuriresolver("image"));

									    outputstreamwriter = new outputstreamwriter(new fileoutputstream(targetfilename), "utf-8");

									    xhtmlconverter xhtmlconverter = (xhtmlconverter) xhtmlconverter.getinstance();

									    xhtmlconverter.convert(document, outputstreamwriter, options);

									  } finally {

									    if (outputstreamwriter != null) {

									      outputstreamwriter.close();

									    }

									  }

									  return targetfilename;

									}

转换成功后会生成对应的html文件，如果想在前端展示，直接读取文件转换为string返回给前端即可。

				?

									public static string readfile(string filepath) {

									  file file = new file(filepath);

									  inputstream input = null;

									  try {

									    input = new fileinputstream(file);

									  } catch (filenotfoundexception e) {

									    e.printstacktrace();

									  }

									  stringbuffer buffer = new stringbuffer();

									  byte[] bytes = new byte[1024];

									  try {

									    for (int n; (n = input.read(bytes)) != -1;) {

									      buffer.append(new string(bytes, 0, n, "utf8"));

									    }

									  } catch (ioexception e) {

									    e.printstacktrace();

									  }

									  return buffer.tostring();

									}

在富文本编辑器ckeditor中的显示效果：

java使用POI实现html和word相互转换

三.html转换为word

实现思路就是先把html中的所有图片元素提取出来，统一替换为变量字符”${imgreplace}“，如果多张图片，可以依序排列下去，之后生成对应的doc文件（之前试过直接生成docx文件发现打不开，这个问题尚未找到好的解决方法），我们将其另存为docx文件，之后就可以替换变量为图片了：

				?

									public static string writewordfile(string content) {

									    string path = "d:/wordfile";

									    map<string, object> param = new hashmap<string, object>();

									    if (!"".equals(path)) {

									      file filedir = new file(path);

									      if (!filedir.exists()) {

									        filedir.mkdirs();

									      }

									      content = htmlutils.htmlunescape(content);

									      list<hashmap<string, string>> imgs = getimgstr(content);

									      int count = 0;

									      for (hashmap<string, string> img : imgs) {

									        count++;

									        //处理替换以“/>”结尾的img标签

									        content = content.replace(img.get("img"), "${imgreplace" + count + "}");

									        //处理替换以“>”结尾的img标签

									        content = content.replace(img.get("img1"), "${imgreplace" + count + "}");

									        map<string, object> header = new hashmap<string, object>();

									        try {

									          file filepath = new file(resourceutils.geturl("classpath:").getpath());

									          string imagepath = filepath.getabsolutepath() + "\\static\\";

									          imagepath += img.get("src").replaceall("/", "\\\\");

									          //如果没有宽高属性，默认设置为400*300

									          if(img.get("width") == null || img.get("height") == null) {

									            header.put("width", 400);

									            header.put("height", 300);

									          }else {

									            header.put("width", (int) (double.parsedouble(img.get("width"))));

									            header.put("height", (int) (double.parsedouble(img.get("height"))));

									          }

									          header.put("type", "jpg");

									          header.put("content", officeutil.inputstream2bytearray(new fileinputstream(imagepath), true));

									        } catch (filenotfoundexception e) {

									          e.printstacktrace();

									        }

									        param.put("${imgreplace" + count + "}", header);

									      }

									      try {

									        // 生成doc格式的word文档，需要手动改为docx

									        byte by[] = content.getbytes("utf-8");

									        bytearrayinputstream bais = new bytearrayinputstream(by);

									        poifsfilesystem poifs = new poifsfilesystem();

									        directoryentry directory = poifs.getroot();

									        documententry documententry = directory.createdocument("worddocument", bais);

									        fileoutputstream ostream = new fileoutputstream("d:\\wordfile\\temp.doc");

									        poifs.writefilesystem(ostream);

									        bais.close();

									        ostream.close();

									        // 临时文件（手动改好的docx文件）

									        customxwpfdocument doc = officeutil.generateword(param, "d:\\wordfile\\temp.docx");

									        //最终生成的带图片的word文件

									        fileoutputstream fopts = new fileoutputstream("d:\\wordfile\\final.docx");

									        doc.write(fopts);

									        fopts.close();

									      } catch (exception e) {

									        e.printstacktrace();

									      }

									    }

									    return "d:/wordfile/final.docx";

									  }

									  //获取html中的图片元素信息

									  public static list<hashmap<string, string>> getimgstr(string htmlstr) {

									    list<hashmap<string, string>> pics = new arraylist<hashmap<string, string>>();

									    document doc = jsoup.parse(htmlstr);

									    elements imgs = doc.select("img");

									    for (element img : imgs) {

									      hashmap<string, string> map = new hashmap<string, string>();

									      if(!"".equals(img.attr("width"))) {

									        map.put("width", img.attr("width").substring(0, img.attr("width").length() - 2));

									      }

									      if(!"".equals(img.attr("height"))) {

									        map.put("height", img.attr("height").substring(0, img.attr("height").length() - 2));

									      }

									      map.put("img", img.tostring().substring(0, img.tostring().length() - 1) + "/>");

									      map.put("img1", img.tostring());

									      map.put("src", img.attr("src"));

									      pics.add(map);

									    }

									    return pics;

									  }

officeutil工具类，之前发现网上的写法只支持一张图片的修改，多张图片就会报错，是因为添加了图片，processparagraphs方法中的runs的大小改变了，会报arraylist的异常，就和我们循环list中删除元素会报异常道理一样，解决方法就是复制一个新的arraylist进行循环即可：

				?

									package com.example.demo.util; 

									import java.io.bytearrayinputstream;

									import java.io.fileinputstream;

									import java.io.ioexception;

									import java.io.inputstream;

									import java.util.arraylist;

									import java.util.iterator;

									import java.util.list;

									import java.util.map;

									import java.util.map.entry;

									import org.apache.poi.poixmldocument;

									import org.apache.poi.hwpf.extractor.wordextractor;

									import org.apache.poi.openxml4j.opc.opcpackage;

									import org.apache.poi.xwpf.usermodel.xwpfparagraph;

									import org.apache.poi.xwpf.usermodel.xwpfrun;

									import org.apache.poi.xwpf.usermodel.xwpftable;

									import org.apache.poi.xwpf.usermodel.xwpftablecell;

									import org.apache.poi.xwpf.usermodel.xwpftablerow; 

									/** 

									 * 适用于word 2007

									 */

									public class officeutil { 

									  /** 

									   * 根据指定的参数值、模板，生成 word 文档 

									   * @param param 需要替换的变量 

									   * @param template 模板 

									   */

									  public static customxwpfdocument generateword(map<string, object> param, string template) { 

									    customxwpfdocument doc = null;

									    try { 

									      opcpackage pack = poixmldocument.openpackage(template); 

									      doc = new customxwpfdocument(pack); 

									      if (param != null && param.size() > 0) { 

									        //处理段落 

									        list<xwpfparagraph> paragraphlist = doc.getparagraphs(); 

									        processparagraphs(paragraphlist, param, doc); 

									        //处理表格 

									        iterator<xwpftable> it = doc.gettablesiterator(); 

									        while (it.hasnext()) {

									          xwpftable table = it.next(); 

									          list<xwpftablerow> rows = table.getrows(); 

									          for (xwpftablerow row : rows) { 

									            list<xwpftablecell> cells = row.gettablecells(); 

									            for (xwpftablecell cell : cells) { 

									              list<xwpfparagraph> paragraphlisttable = cell.getparagraphs(); 

									              processparagraphs(paragraphlisttable, param, doc); 

									            } 

									          } 

									        } 

									      } 

									    } catch (exception e) { 

									      e.printstacktrace(); 

									    } 

									    return doc; 

									  } 

									  /** 

									   * 处理段落 

									   * @param paragraphlist 

									   */

									  public static void processparagraphs(list<xwpfparagraph> paragraphlist,map<string, object> param,customxwpfdocument doc){ 

									    if(paragraphlist != null && paragraphlist.size() > 0){ 

									      for(xwpfparagraph paragraph:paragraphlist){

									        //poi转换过来的行间距过大，需要手动调整

									        if(paragraph.getspacingbefore() >= 1000 || paragraph.getspacingafter() > 1000) {

									          paragraph.setspacingbefore(0);

									          paragraph.setspacingafter(0);

									        }

									        //设置word中左右间距

									        paragraph.setindentationleft(0);

									        paragraph.setindentationright(0);

									        list<xwpfrun> runs = paragraph.getruns();

									        //加了图片，修改了paragraph的runs的size，所以循环不能使用runs

									        list<xwpfrun> allruns = new arraylist<xwpfrun>(runs);

									        for (xwpfrun run : allruns) {

									          string text = run.gettext(0); 

									          if(text != null){

									            boolean issettext = false; 

									            for (entry<string, object> entry : param.entryset()) { 

									              string key = entry.getkey(); 

									              if(text.indexof(key) != -1){ 

									                issettext = true; 

									                object value = entry.getvalue(); 

									                if (value instanceof string) {//文本替换 

									                  text = text.replace(key, value.tostring()); 

									                } else if (value instanceof map) {//图片替换 

									                  text = text.replace(key, ""); 

									                  map pic = (map)value; 

									                  int width = integer.parseint(pic.get("width").tostring()); 

									                  int height = integer.parseint(pic.get("height").tostring()); 

									                  int pictype = getpicturetype(pic.get("type").tostring()); 

									                  byte[] bytearray = (byte[]) pic.get("content"); 

									                  bytearrayinputstream byteinputstream = new bytearrayinputstream(bytearray); 

									                  try { 

									                    string blipid = doc.addpicturedata(byteinputstream,pictype); 

									                    doc.createpicture(blipid,doc.getnextpicnamenumber(pictype), width, height,paragraph);

									                  } catch (exception e) { 

									                    e.printstacktrace(); 

									                  } 

									                } 

									              } 

									            } 

									            if(issettext){ 

									              run.settext(text,0); 

									            } 

									          } 

									        } 

									      } 

									    } 

									  } 

									  /** 

									   * 根据图片类型，取得对应的图片类型代码 

									   * @param pictype 

									   * @return int 

									   */

									  private static int getpicturetype(string pictype){ 

									    int res = customxwpfdocument.picture_type_pict; 

									    if(pictype != null){ 

									      if(pictype.equalsignorecase("png")){ 

									        res = customxwpfdocument.picture_type_png; 

									      }else if(pictype.equalsignorecase("dib")){ 

									        res = customxwpfdocument.picture_type_dib; 

									      }else if(pictype.equalsignorecase("emf")){ 

									        res = customxwpfdocument.picture_type_emf; 

									      }else if(pictype.equalsignorecase("jpg") || pictype.equalsignorecase("jpeg")){ 

									        res = customxwpfdocument.picture_type_jpeg; 

									      }else if(pictype.equalsignorecase("wmf")){ 

									        res = customxwpfdocument.picture_type_wmf; 

									      } 

									    } 

									    return res; 

									  } 

									  /** 

									   * 将输入流中的数据写入字节数组 

									   * @param in 

									   * @return 

									   */

									  public static byte[] inputstream2bytearray(inputstream in,boolean isclose){ 

									    byte[] bytearray = null; 

									    try { 

									      int total = in.available(); 

									      bytearray = new byte[total]; 

									      in.read(bytearray); 

									    } catch (ioexception e) { 

									      e.printstacktrace(); 

									    }finally{ 

									      if(isclose){ 

									        try { 

									          in.close(); 

									        } catch (exception e2) { 

									          system.out.println("关闭流失败"); 

									        } 

									      } 

									    } 

									    return bytearray; 

									  } 

									}

我认为之所以word2003不支持图片替换，主要是处理2003版本的hwpfdocument对象被声明为了final，我们就无法重写他的方法了。而处理2007版本的类为xwpfdocument，是可以继承的，通过继承xwpfdocument，重写createpicture方法即可实现图片替换，以下为对应的customxwpfdocument类：

				?

									package com.example.demo.util;  

									import java.io.ioexception; 

									import java.io.inputstream; 

									import org.apache.poi.openxml4j.opc.opcpackage; 

									import org.apache.poi.xwpf.usermodel.xwpfdocument; 

									import org.apache.poi.xwpf.usermodel.xwpfparagraph; 

									import org.apache.xmlbeans.xmlexception; 

									import org.apache.xmlbeans.xmltoken; 

									import org.openxmlformats.schemas.drawingml.x2006.main.ctnonvisualdrawingprops; 

									import org.openxmlformats.schemas.drawingml.x2006.main.ctpositivesize2d; 

									import org.openxmlformats.schemas.drawingml.x2006.wordprocessingdrawing.ctinline; 

									/** 

									 * 自定义 xwpfdocument，并重写 createpicture()方法 

									 */

									public class customxwpfdocument extends xwpfdocument {  

									  public customxwpfdocument(inputstream in) throws ioexception {  

									    super(in);  

									  }  

									  public customxwpfdocument() {  

									    super();  

									  }  

									  public customxwpfdocument(opcpackage pkg) throws ioexception {  

									    super(pkg);  

									  }  

									  /** 

									   * @param ind 

									   * @param width 宽 

									   * @param height 高 

									   * @param paragraph 段落 

									   */

									  public void createpicture(string blipid, int ind, int width, int height,xwpfparagraph paragraph) {  

									    final int emu = 9525;  

									    width *= emu;  

									    height *= emu;  

									    ctinline inline = paragraph.createrun().getctr().addnewdrawing().addnewinline();  

									    string picxml = ""

									        + "<a:graphic xmlns:a=\"http://schemas.openxmlformats.org/drawingml/2006/main\">"

									        + "  <a:graphicdata uri=\"http://schemas.openxmlformats.org/drawingml/2006/picture\">"

									        + "   <pic:pic xmlns:pic=\"http://schemas.openxmlformats.org/drawingml/2006/picture\">"

									        + "     <pic:nvpicpr>" + "      <pic:cnvpr id=\""

									        + ind  

									        + "\" name=\"generated\"/>"

									        + "      <pic:cnvpicpr/>"

									        + "     </pic:nvpicpr>"

									        + "     <pic:blipfill>"

									        + "      <a:blip r:embed=\""

									        + blipid  

									        + "\" xmlns:r=\"http://schemas.openxmlformats.org/officedocument/2006/relationships\"/>"

									        + "      <a:stretch>"

									        + "        <a:fillrect/>"

									        + "      </a:stretch>"

									        + "     </pic:blipfill>"

									        + "     <pic:sppr>"

									        + "      <a:xfrm>"

									        + "        <a:off x=\"0\" y=\"0\"/>"

									        + "        <a:ext cx=\""

									        + width  

									        + "\" cy=\""

									        + height  

									        + "\"/>"

									        + "      </a:xfrm>"

									        + "      <a:prstgeom prst=\"rect\">"

									        + "        <a:avlst/>"

									        + "      </a:prstgeom>"

									        + "     </pic:sppr>"

									        + "   </pic:pic>"

									        + "  </a:graphicdata>" + "</a:graphic>";  

									    inline.addnewgraphic().addnewgraphicdata();  

									    xmltoken xmltoken = null;  

									    try {  

									      xmltoken = xmltoken.factory.parse(picxml);  

									    } catch (xmlexception xe) {  

									      xe.printstacktrace();  

									    }  

									    inline.set(xmltoken);  

									    inline.setdistt(0);   

									    inline.setdistb(0);   

									    inline.setdistl(0);   

									    inline.setdistr(0);   

									    ctpositivesize2d extent = inline.addnewextent();  

									    extent.setcx(width);  

									    extent.setcy(height);  

									    ctnonvisualdrawingprops docpr = inline.addnewdocpr();   

									    docpr.setid(ind);   

									    docpr.setname("图片" + ind);   

									    docpr.setdescr("测试");  

									  }  

									}