php实现将HTML页面转换成word并且保存的方法

发布时间:2022-04-30 发布网站:脚本宝典
脚本宝典收集整理的这篇文章主要介绍了php实现将HTML页面转换成word并且保存的方法脚本宝典觉得挺不错的,现在分享给大家,也给大家做个参考。

本文实例讲述了PHP实现将HTML页面转换成word并且保存的方法分享给大家供大家参考,具体如下:

这里用使用到一个PHP的工具叫:

生成Word的原理是,将堆规定好了的XMl压缩成一个zip包,并且把后缀名改成doc或者docx即可。

所以使用PHPWord,需要你的PHP环境安装zip.dll压缩扩展,我写了一个demo.

功能说明:

20150507 — HTML中的

标签和

    列表标签的获取 20150508 — 新增获取文章中的图片功能 20150509 — 新增行间距,并且过滤一下错误图片 20150514 — 新增表格处理,并且将代码改成面向对象 20150519 — 新增GD库处理网络图片

    _Time();
     $startMemory = $this->_memory();
     $this->url = $url;
     $UrlArr = parse_url($this->url);
     $this->host = $UrlArr["scheme"]."://".$UrlArr['host'];
     $this->currentDir = getcwd();
     $this->LinetextArr["table"] = array();
     $html = new simple_html_dom($this->url);
     $this->HttPRequestArr[] = $this->url;
     $this->HttpRequestTime++;
     foreach($html->find($this->Allowtag) as $key=>$value)
     {
     if($value->tag == "table")
     {
     $this->ParseTable($value,$this->LinetextArr["table"]);
     }
     else
     {
     $this->AnalysisHtmlDom($value);
     }
     $this->error[] = error_get_last();
     }
     $endTime = $this->_Time();
     $endMemory = $this->_memory();
     $this->expendTime = round(($endTime-$startTime),2); //微秒
     $this->exPEndmemory = round(($endMemory-$startMemory)/1000,2); //bytes
     $this->CreateWordDom();
     }
     private function _Time()
     {
     return array_sum(explode(" ",microtime()));
     }
     private function _memory()
     {
     return memory_get_usage();
     }
     /**
     * 解析HTML中的Table,这里考虑到多层table嵌套的情况
     * @param $value HTMLDOM
     * @param $i 遍历层级
     * **/
     private function ParseTable($value,$i,$Arr)
     {
     if($value->FirstChild() && in_array($value->firstChild()->tag,array("table","tbody","thead","tfoot","tr")))
     {
     foreach($value->children as $k=>$v)
     {
     $this->ParseTable($v,$i++,$Arr);
     }
     }
     else
     {
     foreach($value->children as $k=>$v)
     {
     if($v->firstChild() && $v->firstChild()->tag != "table")
     {
     $Arr[$i][] = array("tag"=>$v->tag,"text"=>trim($v->plaintext));
     }
     if(!$v->firstChild())
     {
     $Arr[$i][] = array("tag"=>$v->tag,"text"=>trim($v->plaintext));
     }
     }
     }
     }
     /**
     * 解析HTML里面的表情
     * @param $value HTMLDOM
     * **/
     private function AnalysisHtmlDom($value)
     {
     $tmp = array();
     if($value->has_child())
     {
     foreach($value->children as $k=>$v)
     {
     $this->AnalysisHtmlDom($v);
     }
     }
     else
     {
     if($value->tag == "a")
     {
     $tmp = array("tag"=>$value->tag,"href"=>$value->href,"text"=>$value->innertext);
     }
     else if($value->tag == "img")
     {
     $src = $this->unescape($value->src);
     $UrlArr = parse_url($src);
     if(!isset($UrlArr['host']))
     {
     $src = $this->host.$value->src;
     $UrlArr = parse_url($src);
     }
     $src = $this->getImageFromNet($src,$UrlArr); //表示有网络图片,需要下载
     if($src)
     {
      $imgsArr = $this->GD($src);
      $tmp = array("tag"=>$value->tag,"src"=>$src,"text"=>$value->alt,"width"=>$imgsArr['width'],"height"=>$imgsArr['height']); }
     }
     else
     {
     $tmp = array("tag"=>$value->tag,"text"=>strip_tags($value->innertext));
     }
     $this->LinetextArr[] = $tmp;
     }
     }
     /**
     * 根据GD库来获取图片的如果太多,进行比例压缩
     * **/
     private function GD($src)
     {
     list($width,$height,$type,$attr) = getimagesize($src);
     if($width > 800 || $height > 800 )
     {
     $width = $width/2;
     $height = $height/2;
     }
     return array("width"=>$width,"height"=>$height);
     }
     /**
     * 将Uincode编码转移回原来的字符
     * **/
     public function unescape($str) {
     $str = rawurldecode($str);
     preg_match_all("/(?:%u.{4})|&#x.{4};|&#\d+;|.+/U",$str,$r);
     $ar = $r[0];
     foreach($ar as $k=>$v) {
     if(substr($v,2) == "%u"){
     $ar[$k] = iconv("UCS-2BE","UTF-8",pack("H4",substr($v,-4)));
     }
     elseif(substr($v,3) == "&#x"){
     $ar[$k] = iconv("UCS-2BE",3,-1)));
     }
     elseif(substr($v,2) == "&#"){
     $ar[$k] = iconv("UCS-2BE",pack("n",2,-1)));
     }
     }
     return join("",$ar);
    }
     /**
     * 图片下载
     * @param $Src 目标资
     * @param $UrlArr 目标URL对应的数组
     * **/
     private function getImageFromNet($Src,$UrlArr)
     {
     $file = basename($UrlArr['path']);
     $ext = explode('.',$file);
     $this->ImgDir = $this->CurrentDir."/".$UrlArr['host'];
     $_supportedImageTypes = array('jpg','jpeg','gif','png','bmp','tif','tiff');
     if(isset($ext['1']) && in_array($ext['1'],$_supportedImageTypes))
     {
     $file = file_get_contents($Src);
     $this->HttpRequestArr[] = $Src;
     $this->HttpRequestTime++;
     $this->_mkdir(); //创建目录,或者收集错误
     $imgName = md5($UrlArr['path']).".".$ext['1'];
     file_put_contents($this->ImgDir."/".$imgName,$file);
     $this->DownImg++;
     return $UrlArr['host']."/".$imgName;
     }
     return false;
     }
     /**
     * 创建目录
     * **/
     private function _mkdir()
     {
     if(!is_dir($this->ImgDir))
     {
     if(!mkdir($this->ImgDir,"7777"))
     {
     $this->error[] = error_get_last();
     }
     }
     }
     /**
     * 构造WordDom
     * **/
     private function CreateWordDom()
     {
     $PHPWord = new PHPWord();
     $PHPWord->setDefaultFontName('宋体');
     $PHPWord->setDefaultFontSize("11");
     $styleTable = array('borderSize'=>6,'borderColor'=>'006699','cellMargin'=>120);
     // New portraIT section
     $section = $PHPWord->createSection();
     $section->addText($this->Details(),array(),array('spacing'=>120));
     //数据进行处理
     foreach($this->LinetextArr as $key=>$lineArr)
     {
     if(isset($lineArr['tag']))
     {
     if($lineArr['tag'] == "li")
     {
     $section->addListItem($lineArr['text'],"",array('spacing'=>120));
     }
     else if($lineArr['tag'] == "img")
     {
     $section->addImage($lineArr['src'],array('width'=>$lineArr['width'],'height'=>$lineArr['height'],'align'=>'center'));
     }
     else if($lineArr['tag'] == "p")
     {
     $section->addText($lineArr['text'],array('spacing'=>120));
     }
     }
     else if($key == "table")
     {
     $PHPWord->addTableStyle('myOwnTableStyle',$styleTable);
     $table = $section->addTable("myOwnTableStyle");
     foreach($lineArr as $key=>$tr)
     {
     $table->addRow();
     foreach($tr as $ky=>$td)
     {
     $table->addCell(2000)->addText($td['text']);
     }
     }
     }
     }
     $this->downFile($PHPWord);
     }
     public function Details()
     {
     $msg = "一共请求:{$this->HttpRequestTime}次,共下载的图片有{$this->DownImg}张,并且下载完成大约使用时间:{$this->expendTime}秒,整个程序执行大约消耗内存是:{$this->expendmemory}KB,";
     return $msg;
     }
     public function downFile($PHPWord)
     {
     if(empty($this->filename))
     {
     $UrlArr = parse_url($this->url);
     $this->filename = $UrlArr['host'].".docx";
     }
     // Save File
     $objWriter = PHPWord_IOFactory::createWriter($PHPWord,'Word2007');
     $objWriter->save($this->filename);
     header("Pragma: public");
     header("Expires: 0");
     header("Cache-Control: must-revalidate,post-check=0,pre-check=0");
     header("Cache-Control: public");
     header("Content-Description: File transfer");
     //Use the switch-generated Content-type
     header('Content-type: application/msword');//输出的类型
     //Force the download
     $header="Content-Disposition: attachment; filename=".$this->filename.";";
     header($header);
     @reaDFile($this->filename);
     }
    }
    

脚本宝典总结

以上是脚本宝典为你收集整理的php实现将HTML页面转换成word并且保存的方法全部内容,希望文章能够帮你解决php实现将HTML页面转换成word并且保存的方法所遇到的问题。

如果觉得脚本宝典网站内容还不错,欢迎将脚本宝典推荐好友。

本图文内容来源于网友网络收集整理提供,作为学习参考使用,版权属于原作者。
如您有任何意见或建议可联系处理。小编QQ:384754419,请注明来意。