PHP获取关键词在百度、360、搜狗排名 关键词排名查询
工作过程需要定时查询某些关键在百度、360及搜狗中的排序,于是写了一个简单的查询类,有更好的方法或错误可以回复交流。
基本思路:根据关键词搜索结果匹配所在页面中的排序值
ThinkPHP5 /extend/org/util/KeyRank.php
<?php
namespace org\util;
error_reporting(E_ALL & ~E_NOTICE);
/**
* 获取关键词在百度、360、搜狗排名
*
* Author: IceCry <http://www.zhinizhiwo.com>
*
* @param string $key 检测关键字
* @param string $url 检测域名
* @param integer $deep 页面深度
* @param integer $start 起始页面
* @param integer $trytime尝试次数
*
* @return integer 排名(不包含竞价广告,为自然排名)
*/
class KeyRank{
private static $url;
private static $key;
private static $start;
private static $deep;
private static $trytime;
public function __construct($key='', $url='', $deep=3, $start=1, $trytime=3) {
if($key==='' || $url===''){
die('key & url required !');
}
self::$url = $url;
self::$key = $key;
self::$deep = $deep;
self::$start = $start;
self::$trytime = $trytime;
}
//百度
public static function baiduRank(){
$rank=$page=0;
$res = ['rank'=>$rank, 'page'=>$rank];
for ($d=self::$start; $d <= self::$deep; $d++) {
$pn = 10*($d-1);
$url = "http://www.baidu.com/s?ie=utf-8&wd=".urlencode(self::$key).'&pn='.$pn;
$str = self::https_request($url);
preg_match("/<div id=\"content_left\">.*?<div id=\"rs\">/ism", $str, $content);
$str = $content[0];
$arr = explode('<div class="result', $str);
// var_dump($arr);die;
if(!$arr[0]){
for ($i=0; $i < self::$trytime; $i++) {
$str = self::https_request($url);
preg_match("/<div id=\"content_left\">.*?<div id=\"rs\">/ism", $str, $content);
$str = $content[0];
$arr = explode('<div class="result', $str);
if($arr[0] != ''){
break;
}
sleep(3);
}
if(!$arr[0]){
$err = date('Y/m/d H:i:s')."【错误】#百度#关键词@".self::$key."@页面抓取失败\r\n";
file_put_contents('./err.log', $err, FILE_APPEND);
break;
}
}
foreach ($arr as $k => $v) {
if($k==0) continue;
// preg_match("/<div class=\"f13\">.*?<\/a>/ism", $v, $xxx);
preg_match("/class=\"c-showurl\".*?>.*?<\/[^b|.]*?>/ism", $v, $xxx);
if(isset($xxx[0]) && strstr(strip_tags($xxx[0]), self::$url)){
global $rank, $page;
$page = $d;
$rank = $k + ($d-1)*10;
break;
}
}
if($rank){
$res['rank'] = $rank;
$res['page'] = $page;
return $res;
}
}
return $res;
}
//360
public static function soRank(){
$rank=$page=0;
$res = ['rank'=>$rank, 'page'=>$rank];
for ($d=self::$start; $d <= self::$deep; $d++) {
$url = "https://www.so.com/s?ie=utf-8&fr=so.com&src=home_so.com&q=".urlencode(self::$key)."&pn=".$d;
$str = self::https_request($url);
preg_match("/<ul class=\"result\">.*?<div id=\"side\">/ism", $str, $content);
$str = $content[0];
$arr = explode('<li class="res-list', $str);
if(!$arr[0]){
for ($i=0; $i < self::$trytime; $i++) {
$str = self::https_request($url);
preg_match("/<ul class=\"result\">.*?<div id=\"side\">/ism", $str, $content);
$str = $content[0];
$arr = explode('<li class="res-list', $str);
if($arr[0] != ''){
break;
}
sleep(3);
}
if(!$arr[0]){
$err = date('Y/m/d H:i:s')."【错误】#360#关键词@".self::$key."@页面抓取失败\r\n";
file_put_contents('./err.log', $err, FILE_APPEND);
break;
}
}
foreach ($arr as $k => $v) {
if($k==0) continue;
preg_match("/<cite>.*?<\/cite>/ism", $v, $xxx);
if(isset($xxx[0]) && strstr(strip_tags($xxx[0]), self::$url)){
global $rank, $page;
$page = $d;
$rank = $k + ($d-1)*10;
break;
}
}
if($rank){
$res['rank'] = $rank;
$res['page'] = $page;
return $res;
}
}
return $res;
}
//搜狗
public static function sogouRank(){
$rank=$page=0;
$res = ['rank'=>$rank, 'page'=>$rank];
for ($d=self::$start; $d <= self::$deep; $d++) {
$url = "https://www.sogou.com/web?query=".urlencode(self::$key)."&page=".$d;
$str = self::https_request($url);
preg_match("/<div class=\"results\".*?<div class=\"right\"/ism", $str, $content);
$str = $content[0];
$arr = explode('<!-- a -->', $str);
if(!$arr[0]){
for ($i=0; $i < self::$trytime; $i++) {
$str = self::https_request($url);
preg_match("/<div class=\"results\".*?<div class=\"right\"/ism", $str, $content);
$str = $content[0];
$arr = explode('<!-- a -->', $str);
if($arr[0] != ''){
break;
}
sleep(3);
}
if(!$arr[0]){
$err = date('Y/m/d H:i:s')."【错误】#搜狗#关键词@".self::$key."@页面抓取失败\r\n";
file_put_contents('./err.log', $err, FILE_APPEND);
break;
}
}
//sogou首页个数非稳定10个 判断排名依据
foreach ($arr as $k => $v) {
$total = count($arr);
if($k==0) continue;
preg_match("/<cite.*?>.*?<\/cite>/ism", $v, $xxx);
if(isset($xxx[0]) && strstr(strip_tags($xxx[0]), self::$url)){
global $rank, $page;
$page = $d;
// $rank = $k + ($d-1)*10;
$tmp += $total;
$rank = $k + $tmp - $total;
break;
}
}
if($rank){
$res['rank'] = $rank;
$res['page'] = $page;
return $res;
}
}
return $res;
}
//curl
public static function https_request($url, $data = null){
$headers = array(
"Content-type:application/html;charset=utf-8",
"User-Agent:Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.86 Safari/537.36",
'Cache-Control:no-cache'
);
$curl = curl_init();
curl_setopt($curl, CURLOPT_URL, $url);
curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, FALSE);
curl_setopt($curl, CURLOPT_SSL_VERIFYHOST, FALSE);
curl_setopt($curl, CURLOPT_TIMEOUT,3);
if (!empty($data)){
curl_setopt($curl, CURLOPT_POST, 1);
curl_setopt($curl, CURLOPT_POSTFIELDS, $data);
}
curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($curl, CURLOPT_HTTPHEADER, $headers);
$output = curl_exec($curl);
curl_close($curl);
return $output;
}
}
/*$KeyRank = new KeyRank('婧氏纸尿裤', 'magibaby.net', 2);
$baidu = $KeyRank::baiduRank();
$so = $KeyRank::soRank();
$sogou = $KeyRank::sogouRank();
var_dump($baidu);
var_dump($so);
var_dump($sogou);*/
Thanks, A good amount of material.
修复百度搜索,网址中包含搜索关键词引起的bug
preg_match("/class=\"c-showurl\".*?>.*?/ism", $v, $xxx);