[转载]微信文章和公众号排名查询工具 – 勇哥实验室

[转载]微信文章和公众号排名查询工具 – 勇哥实验室.

 

概述

闲来无事,研究了下搜狗的微信搜索,顺手写了两个工具:查询文章的关键词排名和公众号的关键词排名。用到的知识很简单:分析搜狗微信搜索参数和正则匹配。点击下面链接可直接访问工具:
微信文章排名查询
微信公众号排名查询

实现思路

1.分析搜狗微信搜索的主要参数,说明如下:
query:查询的关键词
type:查询类型:1-查询公众号;2-查询文章
p:当前页码
num:查询每页的结果数,默认为10
_ast:当前请求的时间
2.为保障查询参数的正确,我首先模拟登陆首页,获得其它意义不明确的参数,和自定义的查询参数还有相关cookies数据一起,作为一个完整请求发送。
3.用正则表达式匹配排名数据即可。
为保证能检索到排名,需要确认一个文章的识别码(公众号识别码就是微信号),具体如下:
识别码

/**
* @params $keyword 关键词
* @params $id 身份标识
* @params $type 搜索类型,1:公众号;2:搜索文章
* @return int 0-无排名;非零值表示当前排名
*/
function getRank($keyword, $id, $type=2)
{
//模擬從首頁登陸
$client = new HttpClient('weixin.sogou.com');
if(!$client->get('/'))
{
die('查询发生错误');
}

$param = array(
'query'=>$keywords,
'type'=>$type,
'p'=>1,
'num'=>100,
'_ast'=>time(),
);
//获取額外参数
preg_match_all('/<input name="([^" type="hidden" value="([^" />content, $out);
foreach($out[1] as $k=&gt;$v)
{
$param[$v] = $out[2][$k];
}

$re = array();
if($client-&gt;get('/weixin',$param))
{
if($type==2) //查找文章排名
{
//匹配字串为
<h4><a id="sogou_vr_11002601_title_2" href="http://mp.weixin.qq.com/s?__biz=MzA5ODE2OTYwMA==&amp;mid=200270633&amp;idx=4&amp;sn=db2642f26640871b1747e92fcd8070af&amp;3rd=MzA3MDU4NTYzMw==&amp;scene=6#rd" target="_blank">夏养小常识 让你轻松<em><!--red_beg-->祛湿<!--red_end--></em>又健脾</a></h4>
$patten = '/
<h4>.*<a href="([^" target="_blank">]+&gt;(.+)&lt;\/a&gt;.*&lt;\/h4&gt;/Us';
preg_match_all($patten, $client-&gt;content, $result);
if(!empty($result[1]))
{
foreach($result[1] as $k=&gt;$val)
{
if(strpos($val, $id)!==false)
{
return $k+1;
}
}
}
}
else //查找公众号排名
{
/**匹配字串示例
</a><a id="sogou_vr_11002301_box_25" class="wx-rb bg-blue wx-rb_v1" href="/gzh?openid=oIWsFt_TJmVxf3hcRW7af1Gxi3lU" target="_blank"></a></h4>
<div class="img-box"><img style="visibility: hidden;" src="http://img01.sogoucdn.com/app/a/100520090/oIWsFt_TJmVxf3hcRW7af1Gxi3lU" alt="" /></div>
<div class="txt-box">
<h3>移动<em><!--red_beg-->互联网<!--red_end--></em>中心</h3>
<h4>微信号:yidongwang2003</h4>
<p class="s-p3"><span class="sp-tit">功能介绍:</span><span class="sp-txt">移动<em><!--red_beg-->互联网<!--red_end--></em>中心官方平台主要专注于移动<em><!--red_beg-->互联网<!--red_end--></em>领域,分享行业内资讯、产品、运营、营销、创业等最新资讯;广泛发布正能量及移动<em><!--red_beg-->互联网<!--red_end--></em>合作信息,力求给您的工作及生活带来真正的帮助!中国人必定帐号!</span></p>
<p class="s-p3"><span class="sp-tit"><script>// <![CDATA[
authnamewrite('1')
// ]]></script>认证:</span><span class="sp-txt">来自腾讯微博认证资料:移动互联网中心官方微博! @移动互联网中心</span></p>

</div>
<div class="v-box"><img src="http://img03.sogoucdn.com/app/a/100520081/tUy9svLEHsxhh_PRnxmR" alt="" width="70" height="70" />
微信扫描关注</div>
*/
$patten = '/微信号:([^&lt;]+)&lt;\/span&gt;/U';
preg_match_all($patten, $client-&gt;content, $result);
if(!empty($result[1]))
{
$k = array_search($id, $result[1]);
if($k!==false)
{
return $k+1;
}
}
}
}

return 0;
}

其中$client是httpclient类,主要用户发送http请求。代码如下:

class HttpClient
{
// Request vars
var $host;
var $port;
var $path;
var $method;
var $postdata = '';
var $cookies = array();
var $referer;
var $accept = 'text/xml,application/xml,application/xhtml+xml,text/html,text/plain,image/png,image/jpeg,image/gif,*/*';
var $accept_encoding = 'gzip';
var $accept_language = 'en-us';
var $user_agent = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.114 Safari/537.36';
// Options
var $timeout = 20;
var $use_gzip = true;
var $persist_cookies = true; // If true, received cookies are placed in the $this-&gt;cookies array ready for the next request
// Note: This currently ignores the cookie path (and time) completely. Time is not important,
// but path could possibly lead to security problems.
var $persist_referers = true; // For each request, sends path of last request as referer
var $debug = false;
var $handle_redirects = true; // Auaomtically redirect if Location or URI header is found
var $max_redirects = 5;
var $headers_only = false; // If true, stops receiving once headers have been read.
// Basic authorization variables
var $username;
var $password;
// Response vars
var $status;
var $headers = array();
var $content = '';
var $errormsg;
// Tracker variables
var $redirect_count = 0;
var $cookie_host = '';
function HttpClient($host, $port = 80)
{
$this-&gt;host = $host;
$this-&gt;port = $port;
}
function get($path, $data = false)
{
$this-&gt;path = $path;
$this-&gt;method = 'GET';
if ($data)
{
$this-&gt;path .= '?' . $this-&gt;buildQueryString($data);
}
return $this-&gt;doRequest();
}
function post($path, $data)
{
$this-&gt;path = $path;
$this-&gt;method = 'POST';
$this-&gt;postdata = $this-&gt;buildQueryString($data);
return $this-&gt;doRequest();
}
function buildQueryString($data)
{
$querystring = '';
if (is_array($data))
{
// Change data in to postable data
foreach ($data as $key =&gt; $val)
{
if (is_array($val))
{
foreach ($val as $val2)
{
$querystring .= urlencode($key) . '=' . urlencode($val2) . '&amp;';
}
} else
{
$querystring .= urlencode($key) . '=' . urlencode($val) . '&amp;';
}
}
$querystring = substr($querystring, 0, -1); // Eliminate unnecessary &amp;
} else
{
$querystring = $data;
}
return $querystring;
}
function doRequest()
{
// Performs the actual HTTP request, returning true or false depending on outcome
if (!$fp = @fsockopen($this-&gt;host, $this-&gt;port, $errno, $errstr, $this-&gt;timeout))
{
// Set error message
switch ($errno)
{
case - 3:
$this-&gt;errormsg = 'Socket creation failed (-3)';
case - 4:
$this-&gt;errormsg = 'DNS lookup failure (-4)';
case - 5:
$this-&gt;errormsg = 'Connection refused or timed out (-5)';
default:
$this-&gt;errormsg = 'Connection failed (' . $errno . ')';
$this-&gt;errormsg .= ' ' . $errstr;
$this-&gt;debug($this-&gt;errormsg);
}
return false;
}
socket_set_timeout($fp, $this-&gt;timeout);
$request = $this-&gt;buildRequest();
$this-&gt;debug('Request', $request);
fwrite($fp, $request);
// Reset all the variables that should not persist between requests
$this-&gt;headers = array();
$this-&gt;content = '';
$this-&gt;errormsg = '';
// Set a couple of flags
$inHeaders = true;
$atStart = true;
// Now start reading back the response
while (!feof($fp))
{
$line = fgets($fp, 4096);
if ($atStart)
{
// Deal with first line of returned data
$atStart = false;
if (!preg_match('/HTTP\/(\\d\\.\\d)\\s*(\\d+)\\s*(.*)/', $line, $m))
{
$this-&gt;errormsg = "Status code line invalid: " . htmlentities($line);
$this-&gt;debug($this-&gt;errormsg);
return false;
}
$http_version = $m[1]; // not used
$this-&gt;status = $m[2];
$status_string = $m[3]; // not used
$this-&gt;debug(trim($line));
continue;
}
if ($inHeaders)
{
if (trim($line) == '')
{
$inHeaders = false;
$this-&gt;debug('Received Headers', $this-&gt;headers);
if ($this-&gt;headers_only)
{
break; // Skip the rest of the input
}
continue;
}
if (!preg_match('/([^:]+):\\s*(.*)/', $line, $m))
{
// Skip to the next header
continue;
}
$key = strtolower(trim($m[1]));
$val = trim($m[2]);
// Deal with the possibility of multiple headers of same name
if (isset($this-&gt;headers[$key]))
{
if (is_array($this-&gt;headers[$key]))
{
$this-&gt;headers[$key][] = $val;
} else
{
$this-&gt;headers[$key] = array($this-&gt;headers[$key], $val);
}
} else
{
$this-&gt;headers[$key] = $val;
}
continue;
}
// We're not in the headers, so append the line to the contents
$this-&gt;content .= $line;
}
fclose($fp);
// If data is compressed, uncompress it
if (isset($this-&gt;headers['content-encoding']) &amp;&amp; $this-&gt;headers['content-encoding'] ==
'gzip')
{
$this-&gt;debug('Content is gzip encoded, unzipping it');
$this-&gt;content = substr($this-&gt;content, 10); // See http://www.php.net/manual/en/function.gzencode.php
$this-&gt;content = gzinflate($this-&gt;content);
}
// If $persist_cookies, deal with any cookies
//if ($this-&gt;persist_cookies &amp;&amp; isset($this-&gt;headers['set-cookie']) &amp;&amp; $this-&gt;host == $this-&gt;cookie_host) {
if ($this-&gt;persist_cookies &amp;&amp; isset($this-&gt;headers['set-cookie']))
{
$cookies = $this-&gt;headers['set-cookie'];
if (!is_array($cookies))
{
$cookies = array($cookies);
}
foreach ($cookies as $cookie)
{
if (preg_match('/([^=]+)=([^;]+);/', $cookie, $m))
{
$this-&gt;cookies[$m[1]] = $m[2];
}
}
// Record domain of cookies for security reasons
//$this-&gt;cookie_host = $this-&gt;host;
}
// If $persist_referers, set the referer ready for the next request
if ($this-&gt;persist_referers)
{
$this-&gt;debug('Persisting referer: ' . $this-&gt;getRequestURL());
$this-&gt;referer = $this-&gt;getRequestURL();
}
// Finally, if handle_redirects and a redirect is sent, do that
if ($this-&gt;handle_redirects) {
if (++$this-&gt;redirect_count &gt;= $this-&gt;max_redirects) {
$this-&gt;errormsg = 'Number of redirects exceeded maximum ('.$this-&gt;max_redirects.')';
$this-&gt;debug($this-&gt;errormsg);
$this-&gt;redirect_count = 0;
return false;
}
$location = isset($this-&gt;headers['location']) ? $this-&gt;headers['location'] : '';
$uri = isset($this-&gt;headers['uri']) ? $this-&gt;headers['uri'] : '';
if ($location || $uri) {
$url = parse_url($location.$uri);
// This will FAIL if redirect is to a different site
return $this-&gt;get($url['path']);
}
}
return true;
}
function buildRequest()
{
$headers = array();
$headers[] = "{$this-&gt;method} {$this-&gt;path} HTTP/1.0"; // Using 1.1 leads to all manner of problems, such as "chunked" encoding
$headers[] = "Host: {$this-&gt;host}";
$headers[] = "User-Agent: {$this-&gt;user_agent}";
$headers[] = "Accept: {$this-&gt;accept}";
if ($this-&gt;use_gzip)
{
$headers[] = "Accept-encoding: {$this-&gt;accept_encoding}";
}
$headers[] = "Accept-language: {$this-&gt;accept_language}";
if ($this-&gt;referer)
{
$headers[] = "Referer: {$this-&gt;referer}";
}
// Cookies
if ($this-&gt;cookies)
{
$cookie = 'Cookie: ';
foreach ($this-&gt;cookies as $key =&gt; $value)
{
$cookie .= "$key=$value; ";
}
$headers[] = $cookie;
}
// Basic authentication
if ($this-&gt;username &amp;&amp; $this-&gt;password)
{
$headers[] = 'Authorization: BASIC ' . base64_encode($this-&gt;username . ':' . $this-&gt;
password);
}
// If this is a POST, set the content type and length
if ($this-&gt;postdata)
{
$headers[] = 'Content-Type: application/x-www-form-urlencoded';
$headers[] = 'Content-Length: ' . strlen($this-&gt;postdata);
}
$request = implode("
", $headers) . "

" . $this-&gt;postdata;
return $request;
}
function getStatus()
{
return $this-&gt;status;
}
function getContent()
{
return $this-&gt;content;
}
function getHeaders()
{
return $this-&gt;headers;
}
function getHeader($header)
{
$header = strtolower($header);
if (isset($this-&gt;headers[$header]))
{
return $this-&gt;headers[$header];
} else
{
return false;
}
}
function getError()
{
return $this-&gt;errormsg;
}
function getCookies()
{
return $this-&gt;cookies;
}
function getRequestURL()
{
$url = 'http://' . $this-&gt;host;
if ($this-&gt;port != 80)
{
$url .= ':' . $this-&gt;port;
}
$url .= $this-&gt;path;
return $url;
}
function setReferee($string)
{
$this-&gt;referer = $string;
}
// Setter methods
function setUserAgent($string)
{
$this-&gt;user_agent = $string;
}
function setAuthorization($username, $password)
{
$this-&gt;username = $username;
$this-&gt;password = $password;
}
function setCookies($array)
{
$this-&gt;cookies = $array;
}
// Option setting methods
function useGzip($boolean)
{
$this-&gt;use_gzip = $boolean;
}
function setPersistCookies($boolean)
{
$this-&gt;persist_cookies = $boolean;
}
function setPersistReferers($boolean)
{
$this-&gt;persist_referers = $boolean;
}
function setHandleRedirects($boolean)
{
$this-&gt;handle_redirects = $boolean;
}
function setMaxRedirects($num)
{
$this-&gt;max_redirects = $num;
}
function setHeadersOnly($boolean)
{
$this-&gt;headers_only = $boolean;
}
function setDebug($boolean)
{
$this-&gt;debug = $boolean;
}
// "Quick" static methods
function quickGet($url)
{
$bits = parse_url($url);
$host = $bits['host'];
$port = isset($bits['port']) ? $bits['port'] : 80;
$path = isset($bits['path']) ? $bits['path'] : '/';
if (isset($bits['query']))
{
$path .= '?' . $bits['query'];
}
$client = new HttpClient($host, $port);
if (!$client-&gt;get($path))
{
return false;
} else
{
return $client-&gt;getContent();
}
}
function quickPost($url, $data)
{
$bits = parse_url($url);
$host = $bits['host'];
$port = isset($bits['port']) ? $bits['port'] : 80;
$path = isset($bits['path']) ? $bits['path'] : '/';
$client = new HttpClient($host, $port);
if (!$client-&gt;post($path, $data))
{
return false;
} else
{
return $client-&gt;getContent();
}
}
function debug($msg, $object = false)
{
if ($this-&gt;debug)
{
print '
<div style="border: 1px solid red; padding: 0.5em; margin: 0.5em;"><strong>HttpClient Debug:</strong> ' .$msg;
if ($object)
{
ob_start();
print_r($object);
$content = htmlentities(ob_get_contents());
ob_end_clean();
print $content;
}
print '</div>
';
}
}
}

备注:微信排名变动比较大,所以排名的准确性比百度要低好多。本示例仅供参考!

转载请注明:勇哥实验室 » 微信文章和公众号排名查询工具

赞(0) 打赏
分享到: 更多 (0)

觉得文章有用就打赏一下文章作者

支付宝扫一扫打赏

微信扫一扫打赏