curl模拟浏览器操作采集

header(“content-Type: text/html; charset=UTF-8”);
function curl_request($url, $data=null, $method=’get’, $https=true){
$ip2 = rand(1,233);
$ip3 = rand(1,233);
$ip4 = rand(1,233);
$ip = ‘120.’.$ip2.’.’.$ip3.’.’.$ip4;//构造Ip
$ch = curl_init();//初始化
curl_setopt($ch, CURLOPT_URL, $url);//访问的URL
curl_setopt($ch, CURLOPT_HEADER, false);//设置不需要头信息
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);//只获取页面内容,但不输出
if($https){
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);//https请求 不验证证书
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false);//https请求 不验证HOST
}
curl_setopt($ch,CURLOPT_ENCODING,’gzip’);//百度返回的内容进行了gzip压缩,需要用这个设置解析
$headers[‘CLIENT-IP’] = $ip;
$headers[‘X-FORWARDED-FOR’] = $ip;

//curl模拟头部信息
curl_setopt($ch, CURLOPT_HTTPHEADER, array(
‘Accept: */*’,
‘Accept-Encoding: gzip, deflate, br’,
‘Accept-Language: zh-CN,zh;q=0.9,en;q=0.8’,
‘Connection: keep-alive’,
‘Host: www.so.com’,
‘is_referer: https://www.so.com/’,
‘is_xhr: 1’,
‘Referer: https://www.so.com/’,
‘User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36’,
‘X-Requested-With: XMLHttpRequest’,
‘CLIENT-IP: ‘.$ip,
‘X-FORWARDED-FOR: ‘.$ip));
if($method == ‘post’){
curl_setopt($ch, CURLOPT_POST, true);//请求方式为post请求
curl_setopt($ch, CURLOPT_POSTFIELDS, $data);//请求数据
}
$result = curl_exec($ch);//执行请求
curl_close($ch);//关闭curl,释放资源
$result = mb_convert_encoding($result, ‘utf-8’, ‘GBK,UTF-8,ASCII,gb2312’);//百度默认编码是gb2312 这个设置转化为utf8编码
return $result;
}