海洋CMS整合自动百度推送API的实现方法_源码动态

做网站最重要的其中一环肯定是收录，页面没有收录，其他都是空谈，更不会有搜索流量。由于每个行业的网站众多，如何让搜索引擎第一时间发现你的网站页面并且收录呢，百度的主动推送操作不能丢。每当你在百度站长平台提交网站后，百度也会提示你去主动推送URL让蜘蛛第一时间去抓取收录，收录上去了，SEO才有希望。对于海洋CMS专门做影视的程序，百度自动推送URL的功能并不完善，所以做一下教程来实现。

2、在map目录里新建一个index.php，文件代码内容如下：

<?php

require_once(dirname(__FILE__)."/../include/common.php");

//前置跳转start

$cs=$_SERVER["REQUEST_URI"];

if($GLOBALS['cfg_mskin']==3 AND $GLOBALS['isMobile']==1){header("location:$cfg_mhost$cs");}

if($GLOBALS['cfg_mskin']==4 AND $GLOBALS['isMobile']==1){header("location:$cfg_mhost");}

//前置跳转end

require_once(sea_INC."/main.class.php");

header('Content-Type:text/xml;charset=UTF-8');

if($GLOBALS['cfg_runmode']==2||$GLOBALS['cfg_paramset']==0){

$paras=str_replace(getfileSuffix(),'',$_SERVER['QUERY_STRING']);

if(strpos($paras,"-")>0){

$parasArray=explode("-",$paras);

$tid=$parasArray[0];

$page=$parasArray[1];

}else{

$tid=intval($paras);

$page=1;

}

$tid = isset($tid) && is_numeric($tid) ? $tid : 0;

$page = isset($page) && is_numeric($page) ? $page : 1;

}else{

$tid = $$GLOBALS['cfg_paramid'];

$page = $$GLOBALS['cfg_parampage'];

$tid = isset($tid) && is_numeric($tid) ? $tid : 0;

$page = isset($page) && is_numeric($page) ? $page : 1;

}

$tid=intval($tid);

$page=intval($page);

//if($tid==0){

// showmsg('参数丢失，请返回！', -1);

// exit;

//}

$GLOBALS[tid]=$tid;

echoChannel($tid);

function echoChannel($typeId)

{

global $dsql,$cfg_iscache,$mainClassObj,$page,$t1,$cfg_user,$cfg_basehost;

$channelTmpName=getTypeTemplate($typeId);

$channelTmpName=empty($channelTmpName) ? "channel.html" : $channelTmpName;

$channelTemplatePath = "/map/channel.html";

if($GLOBALS['cfg_mskin']!=0 AND $GLOBALS['cfg_mskin']!=3 AND $GLOBALS['cfg_mskin']!=4 AND $GLOBALS['isMobile']==1)

{$channelTemplatePath = "/map/channel.html";}

//if (strpos(" ,".getHideTypeIDS().",",",".$typeId.",")>0) exit("<font color='red'>视频列表为空或被隐藏</font><br>");

//if ($cfg_user == 1){

// if (!getUserAuth($typeId, "list")){ShowMsg("您当前的会员级别没有权限浏览此内容！","../member.php",0,20000);exit();}

//}

$pSize = getPageSizeOnCache($channelTemplatePath,"channel",$channelTmpName);

if (empty($pSize)) $pSize=12;

$typeIds = getTypeId($typeId);

$typename=getTypeName($typeId);

if($typeId!="")

$extrasql = " or FIND_IN_SET('".$typeId."',v_extratype)<>0 ";

else

$extrasql = "";

$sql="select count(*) as dd from sea_data where (tid in (".$typeIds.") ".$extrasql.")";

$row = $dsql->GetOne($sql);

if(is_array($row))

{

$TotalResult = $row['dd'];

}

else

{

$TotalResult = 0;

}

$pCount = ceil($TotalResult/$pSize);

$currentTypeId = $typeId;

$cacheName = "parse_channel_".$currentTypeId.$GLOBALS['cfg_mskin'].$GLOBALS['isMobile'];

if($cfg_iscache){

if(chkFileCache($cacheName)){

$content = getFileCache($cacheName);

}else{

$content = parseChannelPart($channelTemplatePath,$currentTypeId);

$content = str_replace("{channelpage:typename}",$typename,$content);

$content = str_replace("{channelpage:typeid}",$currentTypeId,$content);

setFileCache($cacheName,$content);

}

}else{

$content = parseChannelPart($channelTemplatePath,$currentTypeId);

$content = str_replace("{channelpage:typename}",$typename,$content);

$content = str_replace("{channelpage:typeid}",$currentTypeId,$content);

}

$content = str_replace("{channelpage:page}",$page,$content);

$content=$mainClassObj->ParsePageList($content,$typeIds,$page,$pCount,$TotalResult,"channel",$currentTypeId);

$content=$mainClassObj->parseIf($content);

$content=str_replace("{seacms:member}",front_member(),$content);

$content = str_replace("{channelpage:order-hit-link}",$cfg_basehost."/search.php?page=1&searchtype=5&order=hit&tid=".$typeId,$content);

$content = str_replace("{channelpage:order-hitasc-link}",$cfg_basehost."/search.php?page=1&searchtype=5&order=hitasc&tid=".$typeId,$content);

$content = str_replace("{channelpage:order-id-link}",$cfg_basehost."/search.php?page=1&searchtype=5&order=id&tid=".$typeId,$content);

$content = str_replace("{channelpage:order-idasc-link}",$cfg_basehost."/search.php?page=1&searchtype=5&order=idasc&tid=".$typeId,$content);

$content = str_replace("{channelpage:order-time-link}",$cfg_basehost."/search.php?page=1&searchtype=5&order=time&tid=".$typeId,$content);

$content = str_replace("{channelpage:order-timeasc-link}",$cfg_basehost."/search.php?page=1&searchtype=5&order=timeasc&tid=".$typeId,$content);

$content = str_replace("{channelpage:order-commend-link}",$cfg_basehost."/search.php?page=1&searchtype=5&order=commend&tid=".$typeId,$content);

$content = str_replace("{channelpage:order-commendasc-link}",$cfg_basehost."/search.php?page=1&searchtype=5&order=commendasc&tid=".$typeId,$content);

$content = str_replace("{channelpage:order-score-link}",$cfg_basehost."/search.php?page=1&searchtype=5&order=score&tid=".$typeId,$content);

$content = str_replace("{channelpage:order-scoreasc-link}",$cfg_basehost."/search.php?page=1&searchtype=5&order=scoreasc&tid=".$typeId,$content);

echo str_replace("{seacms:runinfo}",getRunTime($t1),$content) ;

}

function parseChannelPart($templatePath,$currentTypeId)

{

global $mainClassObj;

$content=loadFile(sea_ROOT.$templatePath);

$content=$mainClassObj->parseTopAndFoot($content);

$content = str_replace("{seacms:currenttypeid}",$currentTypeId,$content);

$content=$mainClassObj->parseSelf($content);

$content=$mainClassObj->parseHistory($content);

$content=$mainClassObj->parseGlobal($content);

$content=$mainClassObj->parseMenuList($content,"",$currentTypeId);

$content=$mainClassObj->parseAreaList($content);

$content=$mainClassObj->parseVideoList($content,$currentTypeId);

$content=$mainClassObj->parseNewsList($content,$currentTypeId);

$content=$mainClassObj->parseTopicList($content);

$content = str_replace("{channelpage:typetext}",getTypeText($currentTypeId),$content);

$content = str_replace("{channelpage:keywords}",getTypeKeywords($currentTypeId),$content);

$content = str_replace("{channelpage:description}",getTypeDescription($currentTypeId),$content);

$content = str_replace("{channelpage:title}",getTypeTitle($currentTypeId),$content);

return $content;

}

?>

3、在map目录下新建一个channel.html文件，代码内容如下：

<?xml version="1.0" encoding="utf-8"?>

<urlset>

{seacms:channellist size=2000 order=time}

<url>

<loc>{seacms:siteurl}[channellist:link]</loc>

<lastmod>[channellist:time style=yyyy-mm-dd]</lastmod>

<changefreq>daily</changefreq>

<priority>0.8</priority>

</url>

{/seacms:channellist}

</urlset>

4、从xml文件取数据并做百度主动推送，代码如下：

#coding:utf-8

import requests,time,re,os

import sys

reload(sys)

sys.setdefaultencoding('utf-8')

def main():

# 删掉yesterday文件

# if os.path.exists('yesterday.txt'):

# os.remove('yesterday.txt')

#把xml中的数据拿下来，并和现有的数据去重后，留下的数据单独放到一个文件，并且追加到所有的url txt里

url = 'http://yp.jd.com/00/00_0.xml'

r = requests.get(url)

zhishi_url = re.findall(r'<loc>(.*?)</loc>',r.content)

has_push_list = [url.strip() for url in open('all_url.txt')]

f = open('all_url.txt',r'a+')#所有的url

f_ytd = open('yesterday_0.txt',r'w+')#昨天发布的文章url

f_ytd_m = open('yesterday_m_0.txt',r'w+')#昨天发布的文章url(m)

num = 0

txt_index = 0

for link in zhishi_url:#多

if link in has_push_list:

pass

else:

f.write(link+'\n')#追加到所有的url txt里

f_ytd.write(link+'\n')#把还未推送的url放到单独的文件内

f_ytd_m.write(link.replace('www','m')+'\n')#把还未推送的url放到单独的文件内(m)

if num%2000 == 1999:

f_ytd.close()

txt_index += 1

f_ytd = open('yesterday_%s.txt'%txt_index,r'w+')

f_ytd_m = open('yesterday_m_%s.txt'%txt_index,r'w+')

num += 1

f.close()

f_ytd.close()

f_ytd_m.close()

print 'yesterday has %s'%num

print 'crawl done'

time.sleep(5)

#开始推送

print 'push begin'

for i in range(0,txt_index+1):

try:

headers = {'Content-Type':'text/plain'}

url = 'http://data.zz.baidu.com/urls'

params = {'site':'www.jd.com','token':'00'}#,'type':'original'

r = requests.post(url,params=params,headers=headers,data=open('yesterday_%s.txt'%i,r'rb').read())

#m

params_m = {'site':'m.jd.com','token':'00'}#,'type':'original'

r_m = requests.post(url,params=params_m,headers=headers,data=open('yesterday_m_

%s.txt'%i,r'rb').read())

print 'PC:'+r.content+','+'M:'+r_m.content

except Exception,e:

print e

continue

print 'Finish!!!'

if __name__ == '__main__':

while True:

current_time = time.localtime(time.time())

if((current_time.tm_hour == 18) and (current_time.tm_min == 0) and (current_time.tm_sec == 0)):

main()