生活随笔
收集整理的這篇文章主要介紹了
python3-爬取cnnvd漏洞库
小編覺得挺不錯(cuò)的,現(xiàn)在分享給大家,幫大家做個(gè)參考.
這幾天要爬cnnvd漏洞庫
網(wǎng)上找了半天發(fā)現(xiàn)各種代碼不是跑不了就不好用
于是本菜雞自己寫了一個(gè)
先上效果
我是按頁爬的
一頁目錄10條漏洞
一次爬1000頁一萬條
一共大概128900條
表里面的內(nèi)容
不是多線程(不會(huì)),至于速度,每一萬條在半小時(shí)左右
我每次開`6個(gè)程序爬,大概一個(gè)小時(shí)爬完的,,,
源代碼就貼在這里了(雖然想要積分)
然后爬好的xls在這里,不想花時(shí)間爬的可以直接下
python代碼和爬好的xls
#
-*- coding
:utf
-8 -*-
import sys
#
print (u
'系統(tǒng)默認(rèn)編碼為',sys
.getdefaultencoding())
default_encoding
= 'utf-8' #重新設(shè)置編碼方式為uft
-8
if sys
.getdefaultencoding() != default_encoding
:reload(sys
)sys
.setdefaultencoding(default_encoding
)
#
print (u
'系統(tǒng)默認(rèn)編碼為',sys
.getdefaultencoding())
import requests
from bs4
import BeautifulSoup
import traceback
import re
import xlwtdef
getURLDATA(url
):#url
= 'http://www.cnnvd.org.cn/web/xxk/ldxqById.tag?CNNVD=CNNVD-201901-1014'header
={'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.80 Safari/537.36','Connection': 'keep-alive',}r
=requests
.get(url
,headers
=header
,timeout
=30)#r
.raise_for_status()拋出異常html
= BeautifulSoup(r
.content
.decode(),'html.parser')link
=html
.find(class_
='detail_xq w770')#漏洞信息詳情link_introduce
=html
.find(class_
='d_ldjj')#漏洞簡(jiǎn)介link_others
=html
.find_all(class_
='d_ldjj m_t_20')#其他#
print(len(link_introduce
))try:#
print ("危害等級(jí):"+link
.contents
[3].contents
[3].find('a').text
.lstrip().rstrip())#危害等級(jí)list4
.append(str(link
.contents
[3].contents
[3].find('a').text
.lstrip().rstrip()))except
:#
print("危害等級(jí):is empty")list4
.append("")try:#
print ("CVE編號(hào):"+link
.contents
[3].contents
[5].find('a').text
.lstrip().rstrip())#
CVE編號(hào)list5
.append(str(link
.contents
[3].contents
[5].find('a').text
.lstrip().rstrip()))except
:#
print("CVE編號(hào):is empty")list5
.append("")try:#
print ("漏洞類型:"+link
.contents
[3].contents
[7].find('a').text
.lstrip().rstrip())#漏洞類型list6
.append(str(link
.contents
[3].contents
[7].find('a').text
.lstrip().rstrip()))except
:#
print("漏洞類型:is empty")list6
.append("")try:#
print ("發(fā)布時(shí)間:"+link
.contents
[3].contents
[9].find('a').text
.lstrip().rstrip())#發(fā)布時(shí)間list7
.append(str(link
.contents
[3].contents
[9].find('a').text
.lstrip().rstrip()))except
:#
print("發(fā)布時(shí)間:is empty")list7
.append("") try:#
print ("威脅類型:"+link
.contents
[3].contents
[11].find('a').text
.lstrip().rstrip())#威脅類型list8
.append(str(link
.contents
[3].contents
[11].find('a').text
.lstrip().rstrip()))except
:#
print("威脅類型:is empty")list8
.append("")try:#
print ("更新時(shí)間:"+link
.contents
[3].contents
[13].find('a').text
.lstrip().rstrip())#更新時(shí)間list9
.append(str(link
.contents
[3].contents
[13].find('a').text
.lstrip().rstrip()))except
:#
print("更新時(shí)間:is empty")list9
.append("")try:#
print ("廠商:"+link
.contents
[3].contents
[15].find('a').text
.lstrip().rstrip())#廠商list10
.append(str(link
.contents
[3].contents
[15].find('a').text
.lstrip().rstrip()))except
:#
print("廠商:is empty")list10
.append("") #link_introduce
=html
.find(class_
='d_ldjj')#漏洞簡(jiǎn)介
try:link_introduce_data
=BeautifulSoup(link_introduce
.decode(),'html.parser').find_all(name
='p')s
=""for i
in range(0,len(link_introduce_data
)):##
print (link_introduce_data
[i
].text
.lstrip().rstrip())s
=s
+str(link_introduce_data
[i
].text
.lstrip().rstrip())#
print(s
)list11
.append(s
)except
:list11
.append("")if(len(link_others
)!=0):#link_others
=html
.find_all(class_
='d_ldjj m_t_20')#
print(len(link_others
))try:#漏洞公告link_others_data1
=BeautifulSoup(link_others
[0].decode(),'html.parser').find_all(name
='p')s
=""for i
in range(0,len(link_others_data1
)):##
print (link_others_data1
[i
].text
.lstrip().rstrip())s
=s
+str(link_others_data1
[i
].text
.lstrip().rstrip())#
print(s
)list12
.append(s
)except
:list12
.append("")try:#參考網(wǎng)址link_others_data2
=BeautifulSoup(link_others
[1].decode(),'html.parser').find_all(name
='p')s
=""for i
in range(0,len(link_others_data2
)):##
print (link_others_data2
[i
].text
.lstrip().rstrip())s
=s
+str(link_others_data2
[i
].text
.lstrip().rstrip())#
print(s
)list13
.append(s
)except
:list13
.append("")try:#受影響實(shí)體link_others_data3
=BeautifulSoup(link_others
[2].decode(),'html.parser').find_all('a',attrs
={'class':'a_title2'})s
=""for i
in range(0,len(link_others_data3
)):##
print (link_others_data3
[i
].text
.lstrip().rstrip())s
=s
+str(link_others_data3
[i
].text
.lstrip().rstrip())#
print(s
)list14
.append(s
)except
:list14
.append("")try:#補(bǔ)丁link_others_data3
=BeautifulSoup(link_others
[3].decode(),'html.parser').find_all('a',attrs
={'class':'a_title2'})s
=""for i
in range(0,len(link_others_data3
)):##
print (link_others_data3
[i
].text
.lstrip().rstrip())s
=s
+str(link_others_data3
[i
].text
.lstrip().rstrip())#
print(s
)list15
.append(s
)except
:list15
.append("")else:list12
.append("")list13
.append("")list14
.append("")list15
.append("")if __name__
=="__main__":global list4global list5global list6global list7global list8global list9global list10global list11global list12global list13global list14global list15list1
= []#網(wǎng)站的urllist2
= []#漏洞的名稱list3
= []#cnnvd編號(hào)list4
=[]#危害等級(jí)list5
=[]#
CVE編號(hào)list6
=[]#漏洞類型list7
=[]#發(fā)布時(shí)間list8
=[]#威脅類型list9
=[]#更新時(shí)間list10
=[]#廠商list11
=[]#漏洞簡(jiǎn)介list12
=[]#漏洞公告list13
=[]#參考網(wǎng)址list14
=[]#受影響實(shí)體list15
=[]#補(bǔ)丁 start
=11400last
=12000#url
= 'http://www.cnnvd.org.cn/web/xxk/ldxqById.tag?CNNVD=CNNVD-201901-1014'#
getURLDATA(url
)f
= xlwt
.Workbook() # 創(chuàng)建
EXCEL工作簿sheet1
= f
.add_sheet(u
'sheet1', cell_overwrite_ok
=True
) # 創(chuàng)建sheetsheet1
.write(0, 0, "漏洞名稱")sheet1
.write(0, 1, "網(wǎng)址")sheet1
.write(0, 2, "CNNVD編號(hào)")sheet1
.write(0, 3, "危害等級(jí)")sheet1
.write(0, 4, "CVE編號(hào)")sheet1
.write(0, 5, "漏洞類型")sheet1
.write(0, 6, "發(fā)布時(shí)間")sheet1
.write(0, 7, "威脅類型")sheet1
.write(0, 8, "更新時(shí)間")sheet1
.write(0, 9, "廠商")sheet1
.write(0, 10, "漏洞簡(jiǎn)介")sheet1
.write(0, 11, "漏洞公告")sheet1
.write(0, 12, "參考網(wǎng)址")sheet1
.write(0, 13, "受影響實(shí)體")sheet1
.write(0, 14, "補(bǔ)丁")for j
in range(start
,last
+1):# url
='http://www.cnnvd.org.cn/web/vulnerability/querylist.tag?pageno=1&repairLd='url
= 'http://www.cnnvd.org.cn/web/vulnerability/querylist.tag?pageno='+str(j
)+'&repairLd='print ("page"+str(j
))header
={'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.80 Safari/537.36','Connection': 'keep-alive',}r
=requests
.get(url
,headers
=header
,timeout
=30)#r
.raise_for_status()拋出異常html
= BeautifulSoup(r
.content
.decode(),'html.parser')link
=html
.find_all(class_
='a_title2')for i
in link
:##
print (i
.text
.lstrip())try:list1
.append(i
.text
.lstrip())##
print ("http://www.cnnvd.org.cn"+i
.attrs
['href'])k
=str(i
.attrs
['href'])list2
.append("http://www.cnnvd.org.cn"+k
)list3
.append(k
[28:])#
print("http://www.cnnvd.org.cn"+k
)getURLDATA("http://www.cnnvd.org.cn"+k
)except
:print("http://www.cnnvd.org.cn"+k
)breakfor i
in range(len(list15
)):sheet1
.write(i
+ 1, 0, list1
[i
])sheet1
.write(i
+ 1, 1, list2
[i
])sheet1
.write(i
+ 1, 2, list3
[i
])sheet1
.write(i
+ 1, 3, list4
[i
])sheet1
.write(i
+ 1, 4, list5
[i
])sheet1
.write(i
+ 1, 5, list6
[i
])sheet1
.write(i
+ 1, 6, list7
[i
])sheet1
.write(i
+ 1, 7, list8
[i
])sheet1
.write(i
+ 1, 8, list9
[i
])sheet1
.write(i
+ 1, 9, list10
[i
])sheet1
.write(i
+ 1, 10, list11
[i
])sheet1
.write(i
+ 1, 11, list12
[i
])sheet1
.write(i
+ 1, 12, list13
[i
])sheet1
.write(i
+ 1, 13, list14
[i
])sheet1
.write(i
+ 1, 14, list15
[i
])f
.save(str(start
)+"-"+str(last
)+".xls") #保存文件
總結(jié)
以上是生活随笔為你收集整理的python3-爬取cnnvd漏洞库的全部?jī)?nèi)容,希望文章能夠幫你解決所遇到的問題。
如果覺得生活随笔網(wǎng)站內(nèi)容還不錯(cuò),歡迎將生活随笔推薦給好友。