我想在大多数网站上使用站点URL获取网站标题,但它正在使用Japennese和Chinnese网站获得一些不可读的文本。
这是我的功能
function file_get_contents_curl($url) {
$ch = curl_init();
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
$data = curl_exec($ch);
curl_close($ch);
return $data;
}
使用
use--------
$html = $this->file_get_contents_curl($url);
解析
$doc = new DOMDocument();
@$doc->loadHTML($html);
$nodes = $doc->getElementsByTagName('title');
$title = $nodes->item(0)->nodeValue;
我得到了这个ouput"•â€â€â€€â€â€«â€®ç¢ºèªï½ameba(•�•âtameba)"
网站URL:https://user.ameba.jp/regist/registerintro.do?campaignid=0053&frmid=3051
请帮助我建议以任何语言获得确切的网站标题的建议。
//示例
/* MEthod----------4 */
function file_get_contents_curl($url){
$ch = curl_init();
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
$data = curl_exec($ch);
curl_close($ch);
return $data;
}
$uurl="http://www.piaohua.com/html/xuannian/index.html";
$html = file_get_contents_curl($uurl);
//parsing begins here:
$doc = new DOMDocument();
@$doc->loadHTML($html);
$nodes = $doc->getElementsByTagName('title');
//get and display what you need:
if(!empty($nodes->item(0)->nodeValue)){
$title = utf8_decode($nodes->item(0)->nodeValue);
}else{
$title =$uurl;
}
echo $title;
通过将以下行添加到文件的开始
来确保您的脚本正在使用utf-8
编码 mb_internal_encoding('UTF-8');
这样做后,从代码中删除utf8_decode
功能。没有它,一切都应该正常工作
[DOMDocument::loadHtml]
1功能从HTML页面元标记进行编码。因此,如果页面不典型地指定其编码,则可能会遇到问题。
只需在PHP代码的顶部添加此行。
header('Content-Type: text/html;charset=utf-8');
代码..
<?php
header('Content-Type: text/html;charset=utf-8');
function file_get_contents_curl($url) {
$ch = curl_init();
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
$data = curl_exec($ch);
curl_close($ch);
return $data;
}
$html = file_get_contents_curl('http://www.piaohua.com/html/lianxuju/2013/1108/27730.html');
$doc = new DOMDocument();
@$doc->loadHTML($html);
$nodes = $doc->getElementsByTagName('title');
echo $title = $nodes->item(0)->nodeValue;