python 投票脚本_使用Python脚本拉取2014 CSDN博客之星投票情况
前言
最近在自學(xué)Python,正好2014 CSDN博客之星投票搞得如火如荼,拿來(lái)練練手。
環(huán)境:Win7 64位 Python 2.7;
用到了正則表達(dá)式、函數(shù)、寫文件、urllib2;
沒(méi)有用到線程;
程序也不怎么規(guī)范,但終歸是能夠達(dá)到目的了,哈哈。
源碼
# -*- coding: utf-8 -*-
import urllib2;
import re;
import os;
import thread;
def loadBlogSort(url):
pageCount = getPageCount(url);
print 'pageCount == ',pageCount;
baseUrl = 'http://vote.blog.csdn.net/Blogstar2014/Selection?PageIndex=';
urlSuffix = '#content';
filepath = 'csdn_blog_star_vote.txt';
if os.path.exists(filepath):
os.remove(filepath);
f = open(filepath,'w+');
for pageIndex in range(1,int(pageCount)+1):
contentUrl = baseUrl + str(pageIndex) + urlSuffix;
print 'pageIndex == ',pageIndex, ' contentUrl == ',contentUrl;
user_agent = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'
headers = { 'User-Agent' : user_agent }
request = urllib2.Request(contentUrl, headers = headers)
response = urllib2.urlopen(request);
result = response.read();
# unicodeResult = result.decode("utf-8");
# 名稱
#
(.+?)names = re.findall('
(.+?)',result,re.S);nameList = [];
for name in names:
# print '昵稱:',name[2];
nameList.append(name[ 2 ]);
# 博客地址
#
blogUrlList = [];
detailUrls = re.findall('
',result,re.S);for detailUrl in detailUrls:
blogUrlList.append(getBlogUrl(detailUrl[0]));
# 得票
#
得票:(.+?)
votes = re.findall('
(.+?)(.+?)
',result,re.S);voteList = [];
for vote in votes:
# print ' 得票:',str(vote[2]);
voteList.append(vote[ 2 ]);
# 博文瀏覽量、博文數(shù)、評(píng)論數(shù)
#
(.+?)(.+?)(.+?)infos = re.findall('
(.+?)(.+?)(.+?)',result,re.S);infoIndex = 0;
blankSize = 20;
for info in infos:
user = '昵稱:'+nameList[infoIndex] + ( blankSize - len(nameList[infoIndex]) )*' '+'得票:'+voteList[infoIndex] + ( blankSize - len(voteList[infoIndex]) )*' '+'博文瀏覽量: '+str(info[0]) + ( blankSize - len(str(info[0])) )*' '+'博文數(shù):'+str(info[1]) + ( blankSize - len(str(info[1])) )*' '+'評(píng)論數(shù):'+str(info[2])+ + ( blankSize - len(str(info[2])) )*' '+'博客地址:' + blogUrlList[infoIndex]+ '\n'
# print user;
f.write(user);
infoIndex += 1;
f.close();
print '寫文件完畢!';
# 得到博客鏈接
def getBlogUrl(detailUrl):
url = 'http://vote.blog.csdn.net/' + detailUrl;
user_agent = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'
headers = { 'User-Agent' : user_agent }
request = urllib2.Request(url, headers = headers)
response = urllib2.urlopen(request);
result = response.read();
blogUrls = re.findall('
(.+?)(.+?)
',result,re.S);print 'blogUrl == ',url + '\n' + str(blogUrls[0][1]);
return str(blogUrls[0][1]);
# 得到總頁(yè)碼數(shù)
def getPageCount(url):
user_agent = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'
headers = { 'User-Agent' : user_agent }
request = urllib2.Request(url, headers = headers)
response = urllib2.urlopen(request);
result = response.read();
pageCount = re.findall('
(.+?)',result,re.S);return pageCount[0];
url = 'http://vote.blog.csdn.net/Blogstar2014/Selection?PageIndex=1#content';
loadBlogSort(url);
效果
2014_csdn_blog_star_vote
總結(jié)
以上是生活随笔為你收集整理的python 投票脚本_使用Python脚本拉取2014 CSDN博客之星投票情况的全部?jī)?nèi)容,希望文章能夠幫你解決所遇到的問(wèn)題。
- 上一篇: python复制俩文件夹相同文件_Pyt
- 下一篇: 电压越低采集的ad值反而变大_80多条关