python推荐淘宝物美价廉商品 2.0
生活随笔
收集整理的這篇文章主要介紹了
python推荐淘宝物美价廉商品 2.0
小編覺得挺不錯的,現在分享給大家,幫大家做個參考.
改動:
新增功能 :可選擇只看天貓或淘寶
代碼模塊化封裝,參數配置或輸入單獨在一個py文件管理,主函數功能只留出參數傳入在setting配置的py文件里。
main.py代碼:
1 # -*- coding: utf-8 -*- 2 import urllib 3 import urllib2 4 import requests 5 6 import re 7 import time 8 import random 9 import os 10 from math import log 11 from math import log10 12 from math import sqrt 13 import sys 14 15 import setting 16 17 18 #"pageSize":44,"totalPage":100,"currentPage":3,"totalCount":29561 19 20 '''在Python自己IDE上要注釋掉以下兩行''' 21 22 reload(sys) 23 sys.setdefaultencoding('utf8') # python2.x的的defaultencoding是ascii 24 25 class counter(object): 26 #計數器 27 def __init__(self): 28 self.count = 0 29 self.try_time = 0 30 31 self.try_find = 0 32 self.fail_time = 0 33 self.url_list = [] 34 self.new_flag = True 35 self.results = [] 36 self.priSu = 0 37 self.descSu = 0 38 self.tm_tb = '' 39 40 def print_counter(self): 41 print 'try_time:', self.try_time, " get_count:" , self.count, " fail_time:",self.fail_time ,"try_find_time:",self.try_find 42 43 44 45 46 def recommend_rate(price, description, delivery, service, comments): 47 #描述為絕對值 48 av_p = counter1.priSu / counter1.count 49 av_d = counter1.descSu / counter1.count 50 rate = (description/av_d)**20 \ 51 *(description + delivery + service) \ 52 *(av_p/(price))**0.1 \ 53 +log((comments+5),1000) 54 55 #print 'all count=',counter1.count 56 #print "avrage price=",av_p,';',av_p/(price),';price',price,';comments=',comments,';descrip=',description 57 #print 'rate=',rate,'(price)yinzi',(av_p/(price))**0.1,'descrip_yinzi',(description/av_d)**20,'comments_factor=',log((comments+50),100) 58 return rate 59 60 61 def product_rank(list): 62 for x in list: 63 #0開始為 x0商品名 、x1圖片鏈接、x2商品鏈接、x3價格、x4評論數、 x5店鋪名、 x6快遞情況、x7描述相符情況3個、x8服務情況 64 rate = recommend_rate(x[3],x[7],x[6],x[8],x[4]) 65 x.append(rate) 66 67 68 def get_user_rate(item_url): 69 #暫時未使用該功能 70 '''獲取賣家信用情況;未登錄情況不能訪問,或者需要在頭部文件中加入cookie。。。;''' 71 html = urllib2.urlopen(item_url) 72 #"//rate.taobao.com/user-rate-282f910f3b70f2128abd0ee9170e6428.htm" 73 regrex_rate = '"(//.*?user\-rate.*?)"' 74 codes = re.findall(regrex_rate,html.read()) 75 html.close() 76 77 user_rate_url= 'http:'+codes[0] 78 print 'uu', user_rate_url 79 80 user_rate_html = urllib2.urlopen(user_rate_url) 81 print user_rate_html.read() 82 #title = "4.78589分" 83 desc_regex = u'title="(4.[0-9]{5}).*?' 84 de_pat = re.compile(desc_regex) 85 86 descs = re.findall(de_pat,user_rate_html.read()) 87 print len(descs) 88 item_url = 'https://item.taobao.com/item.htm?id=530635294653&ns=1&abbucket=0#detail' 89 #get_user_rate(item_url) 90 '''獲取賣家信用情況;未登錄情況不能訪問。。。暫時 無用''' 91 92 93 def makeNewdir(savePath): 94 while os.path.exists(savePath): 95 savePath = savePath + str(random.randrange(1,10)) 96 #print "the path exist,we'll make a new one" 97 try: 98 os.makedirs(savePath) 99 print 'ok,file_path we reserve results: %s'%savePath 100 print '保存的路徑為:'.decode('utf-8') 101 102 except : 103 print "failed to make file path\nplease restart program" 104 print '創建文件夾失敗,請重新啟動程序'.decode('utf-8') 105 return savePath 106 107 108 def get_praised_good(url, file_open, keyword, counts, descripHrequ, servHrequ, descripNrequ): 109 #從給定的淘寶鏈接中 獲取符合條件的商品list 110 html = req_s.get(url) 111 code = html.content 112 html.close() 113 114 regrex2 = ur'raw_title":"(.*?)","pic_url":"(.*?)","detail_url":"(.*?)","view_price":"(.*?)".*?"comment_count":"(.*?)".*?"nick":"(.*?)".*?"delivery":\[(.*?),(.*?),(.*?)\],"description":\[(.*?),(.*?),(.*?)\],"service":\[(.*?),(.*?),(.*?)\]' 115 #每一個匹配項 返回 15個 字符串 116 #x[0]開始為 x0商品名 、x1圖片鏈接、x2商品鏈接、x3價格、x4評論數、 x5店鋪名、 x6快遞情況3個、x9描述相符情況3個、x12服務情況3個 117 pat = re.compile(regrex2) 118 meet_code = re.findall(regrex2, code)# 119 if not len(meet_code): 120 counter1.new_flag = False 121 print 'no more new met products' 122 123 for x in meet_code: 124 # if counter1.count>=counts : 125 # print "have get enough pruducts" 126 # break 127 counter1.try_find += 1 128 129 description_higher = int(x[10])*float(x[11])/100 130 service_higher = int(x[13])*float(x[14])/100 131 try: 132 x4 = int(x[4]) #description_count 133 except: 134 x4 = 0 135 136 #如果 只要淘寶 非天貓 137 if counter1.tm_tb == 'taobao': 138 if counter1.tm_tb not in x[2].split('.'): 139 break 140 141 if (description_higher>=descripHrequ) and (service_higher>=servHrequ) and x4>=descripNrequ: 142 if re.findall(keyword,x[0]) : # 中文keyword在結果中匹配問題暫時沒有解決,,直接加在搜索詞里吧 143 x0 = x[0].replace(' ','').replace('/','') 144 detail_url = 'http:' + x[2].decode('unicode-escape').encode('utf-8') 145 x1 = 'http:'+ x[1].decode('unicode-escape').encode('utf-8') 146 #print type(x) 147 if detail_url in counter1.url_list or x4 == 0: 148 counter1.new_flag = False 149 print 'no more new met products' 150 print counter1.url_list 151 print detail_url 152 break 153 counter1.url_list.append(detail_url) 154 counter1.try_time += 1 155 counter1.count += 1 156 157 x11 = float(x[11])/100 158 x9 = float(x[9])/100 159 x12 = float(x[12])/100 160 x6 = float(x[6])/100 161 x3 = float(x[3]) 162 counter1.priSu += x3 163 counter1.descSu += x9 164 x5 = unicode(x[5],'utf-8') 165 166 result_list = [] 167 result_list.append(x0) 168 result_list.append(x1) 169 result_list.append(detail_url) 170 result_list.append(x3) 171 result_list.append(x4) 172 result_list.append(x5) 173 result_list.append(x6) 174 result_list.append(x9) 175 result_list.append(x12) 176 #0開始為 x0商品名 、x1圖片鏈接、x2商品鏈接、x3價格、x4評論數、 x5店鋪名、 x6快遞情況、x7描述相符情況、x8服務情況 177 counter1.results.append(result_list) 178 179 180 def save_downpic(lis,file_open,savePath): 181 '''從商品list下載圖片到reserve_file_path,并寫入信息至fileopen''' 182 #0開始為 x0商品名 、x1圖片鏈接、x2商品鏈接、x3價格、x4評論數、 x5店鋪名、 x6快遞情況、x7描述相符情況、x8服務情況、x9:rate 183 len_list = len(lis) 184 print 'we find:',len_list,'products' 185 cc = 0 186 for x in lis: 187 try : 188 urllib.urlretrieve(x[1], savePath+'\\%s___'%cc +unicode(x[0],'utf-8')+'.jpg') 189 190 txt_name = savePath+'\\'+ '%s__'%cc+ 'custome_description_%s __'%x[7] +'__comments_%s_'%x[4]+ '___price_%srmb___'%x[3] +x[5] +'.txt' 191 192 file_o = open(txt_name, 'a') 193 file_o.write(x[2]) 194 file_o.close() 195 196 print '\nget_one_possible_fine_goods:\n','good_name:',x[0].decode('utf-8') 197 print 'rate=',x[9] 198 print 'price:',x[3],x[5].decode('utf-8') 199 print 'custome_description:',x[7],'--','described_number:',x[4],' service:',x[8] 200 print x[2].decode('utf-8'),'\ngood_pic_url:',x[1].decode('utf-8') 201 202 print txt_name 203 print cc+1,"th" 204 205 file_open.write(u'%s__'%cc \ 206 + str(x[0]) \ 207 + '\nprice:' \ 208 + str(x[3]) \ 209 + '¥,\n' \ 210 + str(x[2]) + ' \n' + str(x[5]) + '\ncustomer_description:' + str(x[7]) + 'described_number:' + str(x[4])+'\n\n\n') 211 212 213 print 'get one -^-' 214 except : 215 print "failed to down picture or creat txt" 216 counter1.fail_time += 1 217 cc += 1 218 time.sleep(0.5) 219 220 221 222 def get_market_totalCount(url): 223 html = urllib2.urlopen(url) 224 code = html.read() 225 reg = '"pageSize":[0-9]*?,"totalPage":[0-9]*?,"currentPage":[0-9]*?,"totalCount":([0-9]*?)}' 226 totalCount = int(re.findall(reg,code)[0]) 227 228 return totalCount 229 230 #"pageSize":44,"totalPage":100,"currentPage":3,"totalCount":29561 231 232 233 def get_all_praised_goods(serchProd,counts,savePath ,keyword, price_min=0,price_max=0,descripHrequ =0,servHrequ=0 ,descripNrequ=0): 234 #邊里搜索結果每一頁 235 #initial url and page number 236 initial_url = 'https://s.taobao.com/search?q='+serchProd + '&_input_charset=utf-8' 237 238 if counter1.tm_tb == 'tmall': 239 initial_url = initial_url + '&filter_tianmao=tmall' 240 241 if price_min: 242 if price_min < price_max : 243 initial_url = initial_url+'&filter=reserve_price%5B'+'%s'%price_min+'%2C' +'%s'%price_max 244 initial_url = initial_url +'&cd=false&%5D&s=' 245 246 #tian_mall = 'https://list.tmall.com/search_product.htm?q=' 247 248 print "initial_url",initial_url+'0' 249 250 page_n = 0 251 reserve_file = savePath+r'\found_goods.txt' 252 file_open = open(reserve_file,'a') 253 254 file_open.write('****************************\n') 255 file_open.write(time.ctime()) 256 file_open.write('\n****************************\n') 257 258 total = get_market_totalCount(initial_url+'0') 259 print "totalcount",total 260 if total>counts*10: 261 total = sqrt(total) 262 263 while counter1.new_flag and counter1.try_find<total : 264 265 url_1 = initial_url + str(44*page_n) 266 #print initial_url 267 print 'url_1:', url_1 268 #print 'ss',initial_url+'%s'%(44*page_n) 269 page_n += 1 270 271 get_praised_good(url_1,file_open,keyword,counts,descripHrequ,servHrequ ,descripNrequ) 272 print "let web network rest for 1s lest make traffic jams " 273 time.sleep(1) 274 # except: 275 print page_n, "pages have been searched" 276 if total < counts : 277 print "check keyword,maybe too restrict" 278 break 279 280 print url_1 281 product_rank(counter1.results) 282 283 counter1.results.sort(key = lambda x : x[9], reverse=True) 284 counter1.results = counter1.results[:counts] 285 286 counter1.print_counter() 287 288 save_downpic(counter1.results,file_open,savePath) 289 290 # 291 for a in counter1.results: 292 for b in a : 293 file_open.write(unicode(str(b),'utf-8')) 294 file_open.write('\t') 295 file_open.write('\n\n') 296 297 file_open.close() 298 counter1.print_counter() 299 300 301 counter1 = counter() 302 303 market_totalcounts = 0 304 305 306 req_s = requests.Session() 307 req_s.adapters.DEFAULT_RETRIES = 3 308 req_s.keep_alive = True 309 310 311 def main(): 312 print "說明:".decode('utf-8') 313 print '本程序用于在淘寶上搜索商品時主動通過 價格范圍、商品描述、服務態度、評論數來篩選商品;\n篩選出來的商品圖片下載保存到磁盤(默認桌面新建find_worty_goods文件夾)并建立同序號開頭的txt文件,圖片顯示商品,其旁的txt文件名顯示價格等關鍵信息,txt里保存商品的淘寶鏈接'.decode('utf-8') 314 315 if setting.userDefine: #自己輸入 配置參數-篩選要求 316 setting.inputPara() 317 #否則 使用setting中的配置參數 318 319 serchProd = setting.serchProd #淘寶搜索詞 320 keyword = setting.keyword #raw_input().decode("gbk").encode("utf-8") #個人限定詞,商品名字必須包含,防止淘寶推薦了其他相關詞 (正則表達式). 為任意表示不作限制 321 price_min = setting.price_min #價格區間 322 price_max = setting.price_max 323 descripHrequ = setting.descripHrequ # % 默認高于average, 輸出結果大于此值 324 servHrequ = setting.servHrequ # % 默認高于average, 輸出結果大于此值 325 descripNrequ = setting.descripNrequ 326 counts = setting.counts #要求選出多少個商品 327 counter1.tm_tb = setting.tm_tb #不區分天貓淘寶則,字符串為空,,只要天貓 則 ='tmall' ,只要淘寶 = 'taobao' 328 329 #savePath = r"C:\Users\Administrator\Desktop\Python scrapy\find_worthy_goods\results"#結果保存路徑 330 savePath = u"results%s"%serchProd #結果保存路徑 331 savePath = makeNewdir(savePath) 332 333 get_all_praised_goods(serchProd, counts, savePath, keyword, price_min, price_max ,descripHrequ ,servHrequ ,descripNrequ) 334 335 336 if __name__ == "__main__" : 337 main() 338 339 340 #保存圖片,以文件名為商品圖片名字,并以序號開頭 341 #同時,輸出 價格、商家名,商品描述、服務等 到 txt文本 342 #在商品圖片看中后,便可按序號查找 343 #按描述、服務評價高于平均,購物體驗應該可以的 View Code?
?
setting.py
# -*- coding: utf-8 -*- userDefine = False #篩選要求設置 serchProd='背包' #淘寶搜索詞 keyword='' #raw_input().decode("gbk").encode("utf-8") #個人限定詞,商品名字必須包含,防止淘寶推薦了其他相關詞 (正則表達式). 為任意表示不作限制 price_min=22 #價格區間 price_max=100 descripHrequ=0 # % 默認高于average, 輸出結果大于此值 servHrequ=0 # % 默認高于average, 輸出結果大于此值 descripNrequ=6 counts=25 #要求選出多少個商品 tm_tb ='tmall' #不區分天貓淘寶則,字符串為空,,只要天貓 則 ='tmall' ,只要淘寶 = 'taobao'def inputPara():''' 用戶選擇是否自定義要求,根據要求進行獲取商品,并按推薦排序輸出'''print "please input reserch _goods_name"global serchProd , keyword , price_min, price_max, descripHrequ , servHrequ, descripNrequ ,counts ,tm_tbserchProd=raw_input().replace(' ','') #淘寶搜索詞 ,并去除中間意外輸入的空格if serchProd:print "if customise price_range ,decriptiom require .etc.\ninput Y/N \n default by : no price limit avarage than descriptiom,get 50 products \n 默認要求為:無價格限制,商品描述、快遞、服務高于均值,獲取50個商品。自定義要求請輸入 ‘Y’ (區分大小寫)".decode('utf-8')if raw_input() == 'Y':print "\nplease input _minimal price and _maximal price; \ndefault by 0,10000\nnext by 'enter'key input nothing means by default,the same below "print '請輸入價格范圍 ;默認0-10000 ;兩項用半角逗號","分隔 按回車鍵確認;什么也不輸入代表使用默認值 '.decode('utf-8')try:price_min, price_max=input()except:print 'not input or wrong number,use default range'price_min, price_max = 0 ,10000# print '是否要求 只看天貓/正品保障 還是只看淘寶 \n 只看天貓輸入 tmall ,只看淘寶輸入taobao,都看則回車略過'try:tm_tb=raw_input().decode("gbk").encode("utf-8") #個人限定詞,商品名字必須包含,防止淘寶推薦了其他相關詞 (正則表達式). 為任意表示不作限制except:tm_tb=''# # #print "please input _keyword that goods name must include:\n(more than one keyword must use Regular Expression); default by no kewords"try:keyword=raw_input().decode("gbk").encode("utf-8") #個人限定詞,商品名字必須包含,防止淘寶推薦了其他相關詞 (正則表達式). 為任意表示不作限制except:keyword=''# print "\nplease input _description_higher_percent_require and _service_higher__percent_require\n range:(-100,100) ; \ndefault by 0,0 I.e better than average"print '請輸入商品描述、服務高于平均值的百分比-100 ~100'.decode('utf-8')# % 默認高于average, 輸出結果大于此值try:descripHrequ,servHrequ=input() except:print 'not input or wrong number,use default range'descripHrequ = 0 # % 默認高于average, 輸出結果大于此值servHrequ = 0# print "\nplease input description count limit, default more than 5\n" ,'輸入最低商品評價數,默認大于5'.decode('utf-8')try:descripNrequ=input()except :print 'not input or wrong number,use default range'descripNrequ=5# # print "\nIF customise file reserve path, Y or N \ndefault/sample as: C:\\Users\\Administrator\\Desktop\\find_worthy_goods\\results "# print '是否自定義保存文件目錄 Y or N'.decode('utf-8')# if raw_input()=='Y':# print "please input path that you want to reserve; \n " # savePath = raw_input()# else:# #savePath=r"C:\Users\Administrator\Desktop\find_worthy_goods\results"#結果保存路徑 # print "\nplease input how many results you want, default by 50\n" ,'您要獲取的商品數目,默認50'.decode('utf-8')try:counts=input()except :counts=50else :counts =50keyword = ''tm_tb = ''price_min ,price_max ,descripHrequ ,servHrequ ,descripNrequ = 0,0,0,0,0else:print "no search goods,please restart"print '沒有輸入商品名稱,請重新啟動程序'.decode('utf-8') View Code?
轉載于:https://www.cnblogs.com/willowj/p/6266507.html
總結
以上是生活随笔為你收集整理的python推荐淘宝物美价廉商品 2.0的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: jquery 判断元素内容是否为空
- 下一篇: mysql补充(3)优化sql语句查询常