Java如何将html转换成word

码拜

10年 ago

1、有一个比较复杂的页面，能不能直接转成word？因为页面涉及到一些样式、图片啥的 2、如果不能直接转的话，能不能提取html中table里面的数据然后转成word里面的表格？我分段来生成这个word文档。
10分	本人最近一个项目也是这个需求，谈下我们目前的解决方法：首先html要有该有的格式，要注意编码什么的。 html = html.replaceAll("<html>", "<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8" />"); String fileName = yaxx.getYamc()+".doc"; final String userAgent = request.getHeader("USER-AGENT"); try { InputStream input = new ByteArrayInputStream(html.getBytes()); BufferedInputStream br = new BufferedInputStream(input); byte[] buf = new byte[1024]; int len = 0; response.reset(); // 非常重要 // 纯下载方式 response.setContentType("application/x-msdownload"); if (-1 != userAgent.indexOf("Firefox")) {//Firefox fileName = "=?UTF-8?B?" + (new String(org.apache.commons.codec.binary.Base64.encodeBase64(fileName.getBytes("UTF-8"))))+ "?="; }else if (-1 != userAgent.indexOf("Chrome")) {//Chrome fileName = new String(fileName.getBytes(), "ISO8859-1"); } else {//IE7+ fileName = java.net.URLEncoder.encode(fileName, "UTF-8"); fileName = StringUtils.replace(fileName, "+", "%20");//替换空格 } response.setHeader("Content-Disposition", "attachment; filename="+fileName); OutputStream out = response.getOutputStream(); while ((len = br.read(buf)) > 0) out.write(buf, 0, len); br.close(); out.close(); 后面直接下载即可，至于图片的话，虽然能正常显示，但是并不是图片，而是路径。
10分	页面上的图片一般都是img标签链的，可以解析其连接，通过URL下载图片到本地，再通过word的api，将图片写入word
	引用 1 楼 Acana_Dendi 的回复: 本人最近一个项目也是这个需求，谈下我们目前的解决方法：首先html要有该有的格式，要注意编码什么的。 html = html.replaceAll("<html>", "<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8" />"); String fileName = yaxx.getYamc()+".doc"; final String userAgent = request.getHeader("USER-AGENT"); try { InputStream input = new ByteArrayInputStream(html.getBytes()); BufferedInputStream br = new BufferedInputStream(input); byte[] buf = new byte[1024]; int len = 0; response.reset(); // 非常重要 // 纯下载方式 response.setContentType("application/x-msdownload"); if (-1 != userAgent.indexOf("Firefox")) {//Firefox fileName = "=?UTF-8?B?" + (new String(org.apache.commons.codec.binary.Base64.encodeBase64(fileName.getBytes("UTF-8"))))+ "?="; }else if (-1 != userAgent.indexOf("Chrome")) {//Chrome fileName = new String(fileName.getBytes(), "ISO8859-1"); } else {//IE7+ fileName = java.net.URLEncoder.encode(fileName, "UTF-8"); fileName = StringUtils.replace(fileName, "+", "%20");//替换空格 } response.setHeader("Content-Disposition", "attachment; filename="+fileName); OutputStream out = response.getOutputStream(); while ((len = br.read(buf)) > 0) out.write(buf, 0, len); br.close(); out.close(); 后面直接下载即可，至于图片的话，虽然能正常显示，但是并不是图片，而是路径。 3Q 有时间试试。
10分	楼主请参考http://llhdf.iteye.com/blog/376084
	分成两部分处理： 1：apache htmlpraser解析html，取里面的文本框，table，图片等信息： public class T extends TestCase { private static final Logger logger = Logger.getLogger(T.class); /* * 测试ObjectFindVisitor的用法 / public void testImageVisitor() { try { ImageTag imgLink; ObjectFindingVisitor visitor = new ObjectFindingVisitor(ImageTag.class); Parser parser = new Parser(); parser.setURL("http://www.google.com"); parser.setEncoding(parser.getEncoding()); parser.visitAllNodesWith(visitor); Node[] nodes = visitor.getTags(); for (int i = 0; i < nodes.length; i++) { imgLink = (ImageTag) nodes[i]; logger.fatal("testImageVisitor() ImageURL = " + imgLink.getImageURL()); logger.fatal("testImageVisitor() ImageLocation = " + imgLink.extractImageLocn()); logger.fatal("testImageVisitor() SRC = " + imgLink.getAttribute("SRC")); } } catch (Exception e) { e.printStackTrace(); } } / * 测试TagNameFilter用法 / public void testNodeFilter() { try { NodeFilter filter = new TagNameFilter("IMG"); Parser parser = new Parser(); parser.setURL("http://www.google.com"); parser.setEncoding(parser.getEncoding()); NodeList list = parser.extractAllNodesThatMatch(filter); for (int i = 0; i < list.size(); i++) { logger.fatal("testNodeFilter() " + list.elementAt(i).toHtml()); } } catch (Exception e) { e.printStackTrace(); } } } 2：iText把解析出来的数据填入word public class CreateWordDemo { public void createDocContext(String file) throws DocumentException, IOException { // 设置纸张大小 Document document = new Document(PageSize.A4); // 建立一个书写器(Writer)与document对象关联，通过书写器(Writer)可以将文档写入到磁盘中 RtfWriter2.getInstance(document, new FileOutputStream(file)); document.open(); // 设置中文字体 BaseFont bfChinese = BaseFont.createFont("STSongStd-Light", "UniGB-UCS2-H", BaseFont.NOT_EMBEDDED); // 标题字体风格 Font titleFont = new Font(bfChinese, 12, Font.BOLD); // 正文字体风格 Font contextFont = new Font(bfChinese, 10, Font.NORMAL); Paragraph title = new Paragraph("标题"); // 设置标题格式对齐方式 title.setAlignment(Element.ALIGN_CENTER); title.setFont(titleFont); document.add(title); String contextString = "iText是一个能够快速产生PDF文件的java类库。" + " \n"// 换行 + "iText的java类对于那些要产生包含文本，" + "表格，图形的只读文档是很有用的。它的类库尤其与java Servlet有很好的给合。" + "使用iText与PDF能够使你正确的控制Servlet的输出。"; Paragraph context = new Paragraph(contextString); // 正文格式左对齐 context.setAlignment(Element.ALIGN_LEFT); context.setFont(contextFont); // 离上一段落（标题）空的行数 context.setSpacingBefore(5); // 设置第一行空的列数 context.setFirstLineIndent(20); document.add(context); //利用类FontFactory结合Font和Color可以设置各种各样字体样式 /* * Font.UNDERLINE 下划线，Font.BOLD 粗体 / Paragraph underline = new Paragraph("下划线的实现", FontFactory.getFont( FontFactory.HELVETICA_BOLDOBLIQUE, 18, Font.UNDERLINE, new Color(0, 0, 255))); document.add(underline); // 设置 Table 表格 Table aTable = new Table(3); int width[] = {25,25,50}; aTable.setWidths(width);//设置每列所占比例 aTable.setWidth(90); // 占页面宽度 90% aTable.setAlignment(Element.ALIGN_CENTER);//居中显示 aTable.setAlignment(Element.ALIGN_MIDDLE);//纵向居中显示 aTable.setAutoFillEmptyCells(true); //自动填满 aTable.setBorderWidth(1); //边框宽度 aTable.setBorderColor(new Color(0, 125, 255)); //边框颜色 aTable.setPadding(2);//衬距，看效果就知道什么意思了 aTable.setSpacing(3);//即单元格之间的间距 aTable.setBorder(2);//边框 //设置表头 /* * cell.setHeader(true);是将该单元格作为表头信息显示； * cell.setColspan(3);指定了该单元格占3列； * 为表格添加表头信息时，要注意的是一旦表头信息添加完了之后， * 必须调用 endHeaders()方法，否则当表格跨页后，表头信息不会再显示 / Cell haderCell = new Cell("表格表头"); haderCell.setHeader(true); haderCell.setColspan(3); aTable.addCell(haderCell); aTable.endHeaders(); Font fontChinese = new Font(bfChinese, 12, Font.NORMAL, Color.GREEN); Cell cell = new Cell(new Phrase("这是一个测试的 33 Table 数据", fontChinese )); cell.setVerticalAlignment(Element.ALIGN_TOP); cell.setBorderColor(new Color(255, 0, 0)); cell.setRowspan(2); aTable.addCell(cell); aTable.addCell(new Cell("#1")); aTable.addCell(new Cell("#2")); aTable.addCell(new Cell("#3")); aTable.addCell(new Cell("#4")); Cell cell3 = new Cell(new Phrase("一行三列数据", fontChinese )); cell3.setColspan(3); cell3.setVerticalAlignment(Element.ALIGN_CENTER); aTable.addCell(cell3); document.add(aTable); document.add(new Paragraph("\n")); //添加图片 Image img=Image.getInstance("d:\img01800.jpg"); img.setAbsolutePosition(0, 0); img.setAlignment(Image.RIGHT);//设置图片显示位置 img.scaleAbsolute(12,35);//直接设定显示尺寸 img.scalePercent(50);//表示显示的大小为原尺寸的50% img.scalePercent(25, 12);//图像高宽的显示比例 img.setRotation(30);//图像旋转一定角度 document.add(img); document.close(); } /** * @param args */ public static void main(String[] args) { CreateWordDemo word = new CreateWordDemo(); String file = "c:/demo1.doc"; try { word.createDocContext(file); } catch (DocumentException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } } cell.setRowspan(lineNum)–这个API处理跨行的情况