php采集页面内容并自动转码
/*
* 用法 get_contents('xxx', 'utf-8');
* 采集页面内容并自动转码
* get_contents()自定义函数
* $url 需要采集的页面地址
* $timeout 超时时间,默认20
*/
function get_contents($url, $timeout = 20)
{
if( function_exists('curl_init') ){
$ch = curl_init();
curl_setopt( $ch, CURLOPT_URL, $url );
curl_setopt( $ch, CURLOPT_HEADER, false );
curl_setopt( $ch, CURLOPT_TIMEOUT, $timeout );
curl_setopt( $ch, CURLOPT_RETURNTRANSFER, 1 );
curl_setopt( $ch, CURLOPT_CONNECTTIMEOUT, $timeout );
$content = curl_exec( $ch );
curl_close( $ch );
$data = $content ? $content : false;
} else {
//利用了stream_context_create()设置超时时间:
$pots = array(
'http' => array(
'timeout' => $timeout
)
);
$context = stream_context_create( $pots );
$content = @file_get_contents( $url, false, $context );
$data = $content ? $content : false;
}
return $data ? my_encoding( $content, 'utf-8' ) : false;
}
/*
* 页面内容并自动转码
* my_encoding()自定义函数
* $data 为 curl_exec() 或 file_get_contents() 所获得的页面内容
* $to 需要转成的编码
*/
function my_encoding( $data, $to )
{
$encode_arr = array('UTF-8','ASCII','GBK','GB2312','BIG5','JIS','eucjp-win','sjis-win','EUC-JP');
$encoded = mb_detect_encoding($data, $encode_arr);
$data = mb_convert_encoding($data,$to,$encoded);
return $data;
}