PHP获取关键词在百度、360、搜狗排名 关键词排名查询

工作过程需要定时查询某些关键在百度、360及搜狗中的排序,于是写了一个简单的查询类,有更好的方法或错误可以回复交流。
基本思路:根据关键词搜索结果匹配所在页面中的排序值
ThinkPHP5 /extend/org/util/KeyRank.php

<?php
namespace org\util;

error_reporting(E_ALL & ~E_NOTICE);

/**
 * 获取关键词在百度、360、搜狗排名
 * 
 * Author: IceCry <http://www.zhinizhiwo.com>
 *
 * @param      string   $key    检测关键字
 * @param      string   $url    检测域名
 * @param      integer  $deep   页面深度
 * @param      integer  $start  起始页面
 * @param      integer  $trytime尝试次数
 *
 * @return     integer  排名(不包含竞价广告,为自然排名)
 */
class KeyRank{
    private static $url;
    private static $key;
    private static $start;
    private static $deep;
    private static $trytime;

    public function __construct($key='', $url='', $deep=3, $start=1, $trytime=3) {
        if($key==='' || $url===''){
            die('key & url required !');
        }
        self::$url = $url;
        self::$key = $key;
        self::$deep = $deep;
        self::$start = $start;
        self::$trytime = $trytime;
    }
    
    //百度
    public static function baiduRank(){
        $rank=$page=0;
        $res = ['rank'=>$rank, 'page'=>$rank];
        for ($d=self::$start; $d <= self::$deep; $d++) {
            $pn = 10*($d-1);
            $url  = "http://www.baidu.com/s?ie=utf-8&wd=".urlencode(self::$key).'&pn='.$pn;
            $str = self::https_request($url);
            preg_match("/<div id=\"content_left\">.*?<div id=\"rs\">/ism", $str, $content);
            $str = $content[0];
            $arr = explode('<div class="result', $str);
            // var_dump($arr);die;

            if(!$arr[0]){
                for ($i=0; $i < self::$trytime; $i++) {
                    $str = self::https_request($url);
                    preg_match("/<div id=\"content_left\">.*?<div id=\"rs\">/ism", $str, $content);
                    $str = $content[0];
                    $arr = explode('<div class="result', $str);
                    if($arr[0] != ''){
                        break;
                    }
                    sleep(3);
                }
                if(!$arr[0]){
                    $err = date('Y/m/d H:i:s')."【错误】#百度#关键词@".self::$key."@页面抓取失败\r\n";
                    file_put_contents('./err.log', $err, FILE_APPEND);
                    break;
                }
            }

            foreach ($arr as $k => $v) {
                if($k==0) continue;
                // preg_match("/<div class=\"f13\">.*?<\/a>/ism", $v, $xxx);
                preg_match("/class=\"c-showurl\".*?>.*?<\/[^b|.]*?>/ism", $v, $xxx);
                if(isset($xxx[0]) && strstr(strip_tags($xxx[0]), self::$url)){
                    global $rank, $page;
                    $page = $d;
                    $rank = $k + ($d-1)*10;
                    break;
                }
            }
            
            if($rank){
                $res['rank'] = $rank;
                $res['page'] = $page;
                return $res;
            }
        }
        return $res;
    }

    //360
    public static function soRank(){
        $rank=$page=0;
        $res = ['rank'=>$rank, 'page'=>$rank];
        for ($d=self::$start; $d <= self::$deep; $d++) {
            $url = "https://www.so.com/s?ie=utf-8&fr=so.com&src=home_so.com&q=".urlencode(self::$key)."&pn=".$d;
            $str = self::https_request($url);
            preg_match("/<ul class=\"result\">.*?<div id=\"side\">/ism", $str, $content);
            $str = $content[0];
            $arr = explode('<li class="res-list', $str);

            if(!$arr[0]){
                for ($i=0; $i < self::$trytime; $i++) {
                    $str = self::https_request($url);
                    preg_match("/<ul class=\"result\">.*?<div id=\"side\">/ism", $str, $content);
                    $str = $content[0];
                    $arr = explode('<li class="res-list', $str);
                    if($arr[0] != ''){
                        break;
                    }
                    sleep(3);
                }
                if(!$arr[0]){
                    $err = date('Y/m/d H:i:s')."【错误】#360#关键词@".self::$key."@页面抓取失败\r\n";
                    file_put_contents('./err.log', $err, FILE_APPEND);
                    break;
                }
            }

            foreach ($arr as $k => $v) {
                if($k==0) continue;
                preg_match("/<cite>.*?<\/cite>/ism", $v, $xxx);
                if(isset($xxx[0]) && strstr(strip_tags($xxx[0]), self::$url)){
                    global $rank, $page;
                    $page = $d;
                    $rank = $k + ($d-1)*10;
                    break;
                }
            }
            
            if($rank){
                $res['rank'] = $rank;
                $res['page'] = $page;
                return $res;
            }
        }
        return $res;
    }

    //搜狗
    public static function sogouRank(){
        $rank=$page=0;
        $res = ['rank'=>$rank, 'page'=>$rank];
        for ($d=self::$start; $d <= self::$deep; $d++) {
            $url = "https://www.sogou.com/web?query=".urlencode(self::$key)."&page=".$d;
            $str = self::https_request($url);
            preg_match("/<div class=\"results\".*?<div class=\"right\"/ism", $str, $content);
            $str = $content[0];
            $arr = explode('<!-- a -->', $str);

            if(!$arr[0]){
                for ($i=0; $i < self::$trytime; $i++) {
                    $str = self::https_request($url);
                    preg_match("/<div class=\"results\".*?<div class=\"right\"/ism", $str, $content);
                    $str = $content[0];
                    $arr = explode('<!-- a -->', $str);
                    if($arr[0] != ''){
                        break;
                    }
                    sleep(3);
                }
                if(!$arr[0]){
                    $err = date('Y/m/d H:i:s')."【错误】#搜狗#关键词@".self::$key."@页面抓取失败\r\n";
                    file_put_contents('./err.log', $err, FILE_APPEND);
                    break;
                }
            }

            //sogou首页个数非稳定10个 判断排名依据
            foreach ($arr as $k => $v) {
                $total = count($arr);
                if($k==0) continue;
                preg_match("/<cite.*?>.*?<\/cite>/ism", $v, $xxx);
                if(isset($xxx[0]) && strstr(strip_tags($xxx[0]), self::$url)){
                    global $rank, $page;
                    $page = $d;
                    // $rank = $k + ($d-1)*10;
                    $tmp += $total;
                    $rank = $k + $tmp - $total;
                    break;
                }
            }
            
            if($rank){
                $res['rank'] = $rank;
                $res['page'] = $page;
                return $res;
            }
        }
        return $res;
    }

    //curl
    public static function https_request($url, $data = null){
        $headers = array(
            "Content-type:application/html;charset=utf-8",
            "User-Agent:Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.86 Safari/537.36",
            'Cache-Control:no-cache'
        );
        $curl = curl_init();
        curl_setopt($curl, CURLOPT_URL, $url);
        curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, FALSE);
        curl_setopt($curl, CURLOPT_SSL_VERIFYHOST, FALSE);
        curl_setopt($curl, CURLOPT_TIMEOUT,3);
        if (!empty($data)){
            curl_setopt($curl, CURLOPT_POST, 1);
            curl_setopt($curl, CURLOPT_POSTFIELDS, $data);
        }
        curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
        curl_setopt($curl, CURLOPT_HTTPHEADER, $headers);
        $output = curl_exec($curl);
        curl_close($curl);
        return $output;
    }
}

/*$KeyRank = new KeyRank('婧氏纸尿裤', 'magibaby.net', 2);
$baidu = $KeyRank::baiduRank();
$so = $KeyRank::soRank();
$sogou = $KeyRank::sogouRank();
var_dump($baidu);
var_dump($so);
var_dump($sogou);*/

Tags: PHP

已有 2 条评论

  1. Thanks, A good amount of material.

  2. 修复百度搜索,网址中包含搜索关键词引起的bug
    preg_match("/class=\"c-showurl\".*?>.*?/ism", $v, $xxx);

添加新评论