PHP制作百度词典查词采集器_php教程

上一篇: 新浪SAE搭建PHP项目教程下一篇:php+mysqli事务控制实现银行转账...

PHP制作百度词典查词采集器

发布时间：2022-04-30 发布网站：脚本宝典

脚本宝典收集整理的这篇文章主要介绍了PHP制作百度词典查词采集器，脚本宝典觉得挺不错的，现在分享给大家，也给大家做个参考。

PHP制作百度词典查词采集器

百度dict 采集样本

写的采集百度dict词典翻译后的所有结果数据，当然附带了13.5w单词库和采集简单的案例，这里我把写出的主要类dict.class.PHP放出来，项目地址http://gIThub .COM/widuu/baidu_dict，有需要的直接fork就可以了~么么哒，这东西用的人很少，所以有用的兄弟拿走了哈~

PHP;">
@H_404_9@ 音标
     *              "pro"    => 发音
     *              "example"=> 例句
     *              "explain"=> 简明释义
     *              "synonym"=> 同反义词
     *              "phrase" => 短语数组
     *          )
   *
     */
    public function content($word){
         $this -> word = $word;
         $symbol = $this -> PRonounced();
         $pro    = $this->getSay();
         $example = $this -> getExample();
         $explain = $this -> getExplain();
         $synonym = $this -> getSynonym();
         $phrase = $this -> getPhrase();
         $result = array(
                "symbol" => $symbol,//音标
                "pro"    => $pro,//发音
                "example"=> $example,//例句
                "explain"=> $explain,//简明释义
                "synonym"=> $synonym,//同反义词
                "phrase" => $phrase         //短语数组
            );
        return $result;
    }
/**

远程获取百度翻译内容
get function curl
retun string

*/


private function getContent(){
    $useragent = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:23.0) Gecko/20100101 Firefox/23.0";
    $ch = curl_init();
    $url = "http://dict.baidu.com/s?wd=".$this->word;
    curl_setopt($ch,CURLOPT_URL,$url);
    curl_setopt($ch,CURLOPT_USERAGENT,$useragent);
    curl_setopt($ch,CURLOPT_RETURNtransfer,TRUE); 
    curl_setopt($ch,CURLOPT_FOLLOWLOCATION,1); 
    curl_setopt($ch,CURLOPT_HTTPGET,1);
    curl_setopt($ch,CURLOPT_AUTOREFERER,CURLOPT_HEADER,0); 
    curl_setopt($ch,CURLOPT_TIMEOUT,30);
    $result = curl_exec($ch);
    if (curl_errno($curl)) {
        echo 'Errno'.curl_error($curl);
    }
    curl_close($ch);
    return $result;
}


/**

获取百度翻译发音
retun array(英，美)

*/


private function Pronounced(){
    $data = $this -> getContent();
    preg_match_all("/\"EN\-US\"\>(.*)\<\/b\>/Ui",$data,$pronounced);
    return array(
        'en' => $pronounced[1][0],'us' => $pronounced[1][1]
    );
}

/**
 * <a href="https://www.js-code.com/tag/huoqu/" target="_blank" class="keywords">获取</a><a href="https://www.js-code.com/tag/baidu/" target="_blank" class="keywords">百度</a>翻译发音
 * return array(英，美)
 *
 */

private function getSay(){
    $data = $this -> getContent();
    preg_match_all("/url=\"(.*)\"/Ui",'us' => $pronounced[1][1]
    );  
}

/**

获取百度翻译例句
return array() 多维数组 例句

*/


private function getExample(){
    $str = "";
    $data = $this -> getContent();
    preg_match_all("/VAR example_data = (.*)\]\;/Us",$example);
  $data1 = "[[[".ltrim($example[1][0],"[");
  $data2 = explode("[[[",$data1);
  $num = count(array_filter($data2));
    foreach($data2 as $key => $value){
        $data3 = explode("[[","[[".$value);
        foreach ($data3 as $k => $v) {
            preg_match_all("/\[\"(.*)\",/Us","[".$v,$match);
            if(!empty($match[1])){
                $str .= implode($match[1]," ")."@";
            }
        }
    }
    $data4 = trim($str,"@");
    $data5 = explode("@",$data4);
    $result = array_chunk($data5,2);
    return $result;
}

/**

获取简明释义
return array (x => "词性"，b => "附属")

**/


private function getExplain(){
    $data = $this -> getContent();
    preg_match_all("/id\=\"en\-simple\-means\"\>(.*)\<div(\s+)class\=\"source\"\>/Us",$explain);
    $r_data = $explain[1][0];
    preg_match_all("/\<p\>\<h3\>(?P<adj>.*)\<\/h3\>\<span\>(?P<name>.*)\<\/span\>\<\/p\>/Us",$r_data,$a_data);
    preg_match_all("/\<span\>(?P<tag>[^\>]+)\：\<a(\s+)href\=\"(.*)\"\>(?P<word>.*)\<\/a\>\<\/span\>/Us",$b_data);

    $result = array();
    foreach ($a_data["adj"] as $key => $value) {
        $result[$value] = $a_data["name"][$key];
    }

    $word_b = array();
    foreach ($b_data["tag"] as $key => $value) {
        $word_b[$value] = strip_tags($b_data["word"][$key]);
    }

    $result_data = array("x" => $result,"b" => $word_b);

    return $result_data;
}


/**

获取同义词
return array(0 => "同义词",1 => "反义词") 一般为多维数组

*/


private function getSynonym(){
    $data = $this -> getContent();
    preg_match_all("/id=\"en\-syn\-ant\"\>(.*)<div(\s+)class\=\"source\"&amp;gt;/Us",$synonym);
    $content = $synonym[1][0];
    $data1 = explode("</dl>",$content);
    $result = array();
    $data2 = array();
    foreach ($data1 as $key => $value) {
        preg_match_all("/\<h3\>(?P<adj>.*)\&nbsp\;\<\/h3\>\<\/div\>\<div(\s+)class\=\"syn\-ant\-list\"\>\<ul\>(?<content>.*)\<\/ul\>/Us",$value,$r_data);
        $data2[$key]["adj"] = $r_data["adj"];
        $data2[$key]["content"] = $r_data["content"];
    }

    foreach ($data2 as $key => $value) {
        foreach ($value["content"] as $k => $v) {
            if(!empty($v)){
                preg_match_all("/\<li\>\<p\>(?P<title>.*)\<\/p\>(?P<value>.*)\<\/li>/Us",$v,$v_data);
                foreach ($v_data['title'] as $m => $d) {
$data = strip_tags(preg_replace("<>"," ",$v_data["value"][$m]));

$result[$key][$value["adj"][$k]][$d] = $data;

}

}

}

}

return $result;

}
/**

获取短语词组
return array (key => value) 一维或者多维数组

*/


private function getPhrase(){
    $num = self::$num;
    $data = $this -> getContent();
    preg_match_all("/id=\"en\-phrase\"\>(.*)\<div class\=\"source\"\>/Us",$phrase);
    $data = explode("</dd>",$phrase[1][0]);
    $data1 = array_slice($data,$num);
    $result = array();
    foreach ($data1 as $key => $value) {
        $data2 = explode("</p>",$value);
        $n = count($data2);
        if($n<=3){
            $result[str_replace("","",strip_tags($data2[0]))] = strip_tags($data2[1]);
        }else{
            $data3 = array_slice($data2,$n-1);
            $data4 = array_slice($data2,2);
            $res = array_diff($data3,$data4);
            $data5 = array_chunk($res,2);
            $key_value = trim(str_replace("",strip_tags($data4[0])));
            $result[$key_value] = strip_tags($data4[1]);
            foreach ($data5 as $key => $value) {
                foreach ($value as $k => $v) {
$value[$k] = strip_tags($v);

}

$array = array($result[$key_value],$value);

if (array_key_exists($key_value,$result)){

$result[$key_value] = $array;

}

}
        }
    }
    return $result;
}

/**
 * 将数组转换为字符串
 *
 * @param  array  $data    数组
 * @param  bool  $isformdata 如果为0，则不使用new_stripslashes处理，可选参数，<a href="https://www.js-code.com/tag/mo/" target="_blank" class="keywords">默</a>认为1
 * @return  string 返回字符串，如果，data为空，则返回空
 */
private function array2string($data,$isformdata = 1) {
  if($data == '') return '';
  if($isformdata) $data = $this->new_stripslashes($data);
  return addslashes(var_export($data,TRUE));
}

/**
 * 返回经stripslashes处理过的字符串或数组
 * @param $string 需要处理的字符串或数组
 * @return mixed
 */
private function new_stripslashes($string) {
  if(!is_array($string)) return stripslashes($string);
  foreach($string as $key => $val) $string[$key] = $this->new_stripslashes($val);
  return $string;
}
}
// $word = new dict("exPress");

// $word ->content();

@H_419_19@

以上就是本文的全部内容了，非常实用的功能，希望小伙伴们能够喜欢。

脚本宝典总结

以上是脚本宝典为你收集整理的PHP制作百度词典查词采集器全部内容，希望文章能够帮你解决PHP制作百度词典查词采集器所遇到的问题。

如果觉得脚本宝典网站内容还不错，欢迎将脚本宝典推荐好友。

本图文内容来源于网友网络收集整理提供，作为学习参考使用，版权属于原作者。
如您有任何意见或建议可联系处理。小编QQ：384754419，请注明来意。

标签：php 采集采集

上一篇: 新浪SAE搭建PHP项目教程下一篇:php+mysqli事务控制实现银行转账...

猜你在找的php教程相关文章

浅谈Windows下 PHP4.0与oracle 8的连接设置 2022-04-30
PHP｜入阶PHP-FPM 2019-08-07
PHP 规范开发（二）：测试 2019-08-07
使用php 爬取拉勾网的php 招聘信息~ 2019-08-07
php 实现分布式文件服务器 2019-08-07
【PHP】php安装bcmath扩展脚本 2019-08-07
php资源收集 2019-08-07
【面向对象的PHP】之模式：目录 2019-08-07
php 安装zip模块 2019-08-07
PHP 8 中新特性以及重大调整 2022-05-30

全站导航更多

最新php教程教程

热门php教程教程