Grades Crawler 項目介紹
由于教務處成績不易自己打印,故編寫爬蟲Grades Crawler, 其可以將自己的(需要合法的學生帳號信息)教務處成績爬取下來,并本地保存為excel格式或直接保存在sqlite數(shù)據(jù)庫中需額外安裝的 libraries
bs4, xlrd, xlwt(保存成績?yōu)閑xcel格式)
python代碼 (待有空時會詳細講解)
__author__ =
'ysc'
import requests
from bs4
import BeautifulSoup
import xlrd
import xlwt
class ScrapeGrade:def __init__(self, auth_url=None, log_url=None):if not auth_url:self.auth_url =
"http://ids.xidian.edu.cn/authserver/login?service=http%3A%2F%2Fjwxt.xidian.edu.cn%2Fcaslogin.jsp"self.log_url =
"http://jwxt.xidian.edu.cn/caslogin.jsp"else:self.auth_url = auth_urlself.log_url = log_urlself.session = requests.Session()
def login(self, id='1302051****', password='****'):r = self.session.get(self.auth_url)data = r.textbsObj = BeautifulSoup(data,
"html.parser")lt_value = bsObj.find(attrs={
"name":
"lt"})[
'value']exe_value = bsObj.find(attrs={
"name":
"execution"})[
'value']params = {
'username': id,
'password': password,
"submit":
"",
"lt": lt_value,
"execution": exe_value,
"_eventId":
"submit",
"rmShown":
'1'}headers = {
'User-Agent':
"Mozilla/5.0 (Windows NT 10.0; WOW64; rv:44.0) Gecko/20100101 Firefox/44.0",
'Accept':
"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Language":
"zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3",
"Accept-Encoding":
"gzip, deflate",
"Referer":
"http://ids.xidian.edu.cn/authserver/login?service=http%3A%2F%2Fjwxt.xidian.edu.cn%2Fcaslogin.jsp",
"Content-Type":
"application/x-www-form-urlencoded"}s = self.session.post(self.auth_url, data=params, headers=headers)s = self.session.get(self.log_url)
def store_into_db_by_term(self):import sqlite3conn = sqlite3.connect(
'grades_term.db')c = conn.cursor()
try:row =
0grade_page = self.session.get(
"http://jwxt.xidian.edu.cn/gradeLnAllAction.do?type=ln&oper=qbinfo&lnxndm=2015-2016%D1%A7%C4%EA%B5%DA%D2%BB%D1%A7%C6%DA(%C1%BD%D1%A7%C6%DA)")bsObj2 = BeautifulSoup(grade_page.text,
"html.parser")datas = bsObj2.find_all(
"table", attrs={
"class":
"titleTop2"})
for i, seme
in enumerate(datas):ths = seme.find_all(
'th')titles = []
for col, th
in enumerate(ths):print(th.string.strip(), end=
' ')th = th.string.strip()
if th !=
'學分' and th !=
"成績":titles.append(th +
r' text')
else:titles.append(th +
r' real')sent =
'''CREATE TABLE {0} ( '''.format(
'table' + str(i+
1))
for ith, title
in enumerate(titles):sent += title
if ith < len(titles) -
1:sent +=
", "sent +=
")"try:c.execute(sent)conn.commit()
except sqlite3.OperationalError:
passprint(
'\n')row +=
1subs = seme.findAll(
'td', attrs={
"align":
"center"})col_iter =
0len_ths = len(ths)grade_subs = []
for sub
in subs:
if sub.string:
if sub.string.strip() !=
'':print(sub.string.strip(), end=
' ')grade_subs.append(
"'" + sub.string.strip()+
"'")
else:print(
"' '", end=
' ')grade_subs.append(
"' '")
else:print(sub.find(
'p').string.strip(), end=
' ')grade_subs.append(
"'" + sub.find(
'p').string.strip() +
"'")col_iter +=
1if col_iter == len_ths:print(
'\n')sent =
'''INSERT INTO {0} VALUES( '''.format(
'table' + str(i+
1))
for ith, grade_sub
in enumerate(grade_subs):sent += grade_sub
if ith < len(grade_subs) -
1:sent +=
", "sent +=
")"try:c.execute(sent)conn.commit()
except sqlite3.OperationalError
as e:print(e)print(sent)exit(-
2)row +=
1col_iter =
0grade_subs = []print(
"\n")
finally:conn.close()
def store_into_db_by_prop(self):import sqlite3conn = sqlite3.connect(
'grades_prop.db')c = conn.cursor()
try:row =
0grade_page = self.session.get(
"http://jwxt.xidian.edu.cn/gradeLnAllAction.do?type=ln&oper=sxinfo&lnsxdm=001")bsObj2 = BeautifulSoup(grade_page.text,
"html.parser")datas = bsObj2.find_all(
"table", attrs={
"class":
"titleTop2"})
for i, seme
in enumerate(datas):ths = seme.find_all(
'th')titles = []
for col, th
in enumerate(ths):print(th.string.strip(), end=
' ')th = th.string.strip()
if th !=
'學分' and th !=
"成績":titles.append(th +
r' text')
else:titles.append(th +
r' real')sent =
'''CREATE TABLE {0} ( '''.format(
'table' + str(i+
1))
for ith, title
in enumerate(titles):sent += title
if ith < len(titles) -
1:sent +=
", "sent +=
")"try:c.execute(sent)conn.commit()
except sqlite3.OperationalError:
passprint(
'\n')row +=
1subs = seme.findAll(
'tr', attrs={
'class':
"odd"})col_iter =
0len_ths = len(ths)grade_subs = []
for sub
in subs:infors = sub.findAll(
'td')
for infor
in infors:
if infor.string:
if infor.string.strip() !=
'':print(infor.string.strip(), end=
' ')grade_subs.append(
"'" + infor.string.strip()+
"'")
else:print(
"' '", end=
' ')grade_subs.append(
"' '")
else:infor = infor.find(
'p').string.strip()
if infor !=
'':print(infor, end=
' ')grade_subs.append(
"'" + infor +
"'")
else:print(
"' '", end=
' ')grade_subs.append(
"' '")print(
'\n')sent =
'''INSERT INTO {0} VALUES( '''.format(
'table' + str(i+
1))
for ith, grade_sub
in enumerate(grade_subs):sent += grade_sub
if ith < len(grade_subs) -
1:sent +=
", "sent +=
")"try:c.execute(sent)conn.commit()
except sqlite3.OperationalError
as e:print(e)print(sent)exit(-
2)row +=
1col_iter =
0grade_subs = []print(
"\n")
finally:conn.close()
def set_style(self, name, height, bold=False):style = xlwt.XFStyle()font = xlwt.Font()font.name = name font.bold = boldfont.color_index =
4font.height = height
'''borders= xlwt.Borders()borders.left= 6borders.right= 6borders.top= 6borders.bottom= 6'''style.font = font
return style
def store_into_xls(self):file = xlwt.Workbook()table = file.add_sheet(
'grades', cell_overwrite_ok=
True)row =
0grade_page = self.session.get(
"http://jwxt.xidian.edu.cn/gradeLnAllAction.do?type=ln&oper=qbinfo&lnxndm=2015-2016%D1%A7%C4%EA%B5%DA%D2%BB%D1%A7%C6%DA(%C1%BD%D1%A7%C6%DA)")bsObj2 = BeautifulSoup(grade_page.text,
"html.parser")datas = bsObj2.find_all(
"table", attrs={
"class":
"titleTop2"})
for seme
in datas:ths = seme.find_all(
'th')
for col, th
in enumerate(ths):print(th.string.strip(), end=
' ')table.write(row, col, th.string.strip(), self.set_style(
'Times New Roman',
220,
True))print(
'\n')row +=
1subs = seme.findAll(
'td', attrs={
"align":
"center"})col_iter =
0len_ths = len(ths)
for sub
in subs:
if sub.string:print(sub.string.strip(), end=
' ')table.write(row, col_iter, sub.string.strip())
else:print(sub.find(
'p').string.strip(), end=
' ')table.write(row, col_iter, sub.find(
'p').string.strip())col_iter +=
1if col_iter == len_ths:print(
'\n')row +=
1col_iter =
0print(
"\n")file.save(
'demo.xls')
if __name__ ==
'__main__':sg = ScrapeGrade()sg.login(id=
'1302051***', password=
'1234567')sg.store_into_xls()
總結(jié)
以上是生活随笔為你收集整理的【Grades Crawler】利用python编写爬虫 爬取西电教务处成绩并本地保存的全部內(nèi)容,希望文章能夠幫你解決所遇到的問題。
如果覺得生活随笔網(wǎng)站內(nèi)容還不錯,歡迎將生活随笔推薦給好友。