howdoi 简单分析
                                                            生活随笔
收集整理的這篇文章主要介紹了
                                howdoi 简单分析
小編覺得挺不錯的,現在分享給大家,幫大家做個參考.                        
                                對howdoi的一個簡單分析。
曾經看到過下面的這樣一段js代碼:
try{doSth(); } catch (e){ask_url = "https://stackoverflow.com/search?q="window.location.href= ask_url + encodeURIComponent(e) }howdoi基本就是把這個流程做成了Python腳本。其基本流程如下:
- step1:利用site語法組裝搜索語句(默認指定搜索stackoverflow網站)
 - step2:利用google搜索接口獲取搜索引擎第一頁排名第一的連接
 - step3:訪問該鏈接,根據排名從高倒下,提取代碼塊文本
 - step4:提取到就顯示到終端,沒有提取到就提示未找到答案
 
當然,howdoi也作了一些其他的工作:
- 代理設置
 - 既往問題進行緩存,提高下次查詢的速度
 - 查詢的目標網站可配置
 - 做成Python script腳本命令,方便快捷
 - 代碼高亮格式化輸出
 
更多分析請看代碼注釋:
!/usr/bin/env python###################################################### # # howdoi - instant coding answers via the command line # written by Benjamin Gleitzman (gleitz@mit.edu) # inspired by Rich Jones (rich@anomos.info) # ######################################################import argparse #用于獲取腳本命令行參數 import glob import os import random import re import requests #用于發送http(s)請求 import requests_cache import sys from . import __version__ #用于控制臺彩色高亮格式化輸出 from pygments import highlight from pygments.lexers import guess_lexer, get_lexer_by_name from pygments.formatters.terminal import TerminalFormatter from pygments.util import ClassNotFound # 用于網頁解析 from pyquery import PyQuery as pqfrom requests.exceptions import ConnectionError from requests.exceptions import SSLError# 兼容Python2.x和Python3.x的庫 if sys.version < '3':import codecsfrom urllib import quote as url_quotefrom urllib import getproxies# 處理unicode: http://stackoverflow.com/a/6633040/305414def u(x):return codecs.unicode_escape_decode(x)[0] else:from urllib.request import getproxiesfrom urllib.parse import quote as url_quotedef u(x):return x#設置google搜索url if os.getenv('HOWDOI_DISABLE_SSL'): # 使用系統環境變量中非SSL的http代替httpsSEARCH_URL = 'http://www.google.com/search?q=site:{0}%20{1}'VERIFY_SSL_CERTIFICATE = False else:SEARCH_URL = 'https://www.google.com/search?q=site:{0}%20{1}'VERIFY_SSL_CERTIFICATE = True #設置目標問答網站 URL = os.getenv('HOWDOI_URL') or 'stackoverflow.com'#瀏覽器UA,用于偽造瀏覽器請求,防止網站對腳本請求進行屏蔽 USER_AGENTS = ('Mozilla/5.0 (Macintosh; Intel Mac OS X 10.7; rv:11.0) Gecko/20100101 Firefox/11.0','Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:22.0) Gecko/20100 101 Firefox/22.0','Mozilla/5.0 (Windows NT 6.1; rv:11.0) Gecko/20100101 Firefox/11.0',('Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_4) AppleWebKit/536.5 (KHTML, like Gecko) ''Chrome/19.0.1084.46 Safari/536.5'),('Mozilla/5.0 (Windows; Windows NT 6.1) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.46''Safari/536.5'), ) #格式化答案輸出 ANSWER_HEADER = u('--- Answer {0} ---\n{1}') NO_ANSWER_MSG = '< no answer given >'#設置緩存文件路徑 XDG_CACHE_DIR = os.environ.get('XDG_CACHE_HOME',os.path.join(os.path.expanduser('~'), '.cache')) CACHE_DIR = os.path.join(XDG_CACHE_DIR, 'howdoi') CACHE_FILE = os.path.join(CACHE_DIR, 'cache{0}'.format(sys.version_info[0] if sys.version_info[0] == 3 else ''))#獲取代理(在國內China尤其有用,不解釋) def get_proxies():proxies = getproxies()filtered_proxies = {}for key, value in proxies.items():if key.startswith('http'):if not value.startswith('http'):filtered_proxies[key] = 'http://%s' % valueelse:filtered_proxies[key] = valuereturn filtered_proxiesdef _get_result(url):try:return requests.get(url, headers={'User-Agent': random.choice(USER_AGENTS)}, proxies=get_proxies(),verify=VERIFY_SSL_CERTIFICATE).textexcept requests.exceptions.SSLError as e:print('[ERROR] Encountered an SSL Error. Try using HTTP instead of ''HTTPS by setting the environment variable "HOWDOI_DISABLE_SSL".\n')raise e# 獲取google搜索結果中的連接 def _get_links(query):result = _get_result(SEARCH_URL.format(URL, url_quote(query)))html = pq(result)#用pyquery進行解析return [a.attrib['href'] for a in html('.l')] or \[a.attrib['href'] for a in html('.r')('a')]def get_link_at_pos(links, position):if not links:return Falseif len(links) >= position:link = links[position - 1]else:link = links[-1]return link#代碼格式化輸出函數 def _format_output(code, args):if not args['color']:return codelexer = None# try to find a lexer using the StackOverflow tags# or the query argumentsfor keyword in args['query'].split() + args['tags']:try:lexer = get_lexer_by_name(keyword)breakexcept ClassNotFound:pass# no lexer found above, use the guesserif not lexer:try:lexer = guess_lexer(code)except ClassNotFound:return codereturn highlight(code,lexer,TerminalFormatter(bg='dark'))#利用政策匹配判斷連接是否是問題 def _is_question(link):return re.search('questions/\d+/', link)#獲取問題連接 def _get_questions(links):return [link for link in links if _is_question(link)]#獲取答案(主要是解析stackoverflow的問答頁面) def _get_answer(args, links):links = _get_questions(links)link = get_link_at_pos(links, args['pos'])if not link:return Falseif args.get('link'):return linkpage = _get_result(link + '?answertab=votes')html = pq(page)first_answer = html('.answer').eq(0)#第一個答案instructions = first_answer.find('pre') or first_answer.find('code')#pre和code標簽為目標代碼塊args['tags'] = [t.text for t in html('.post-tag')]if not instructions and not args['all']:text = first_answer.find('.post-text').eq(0).text()elif args['all']:texts = []for html_tag in first_answer.items('.post-text > *'):current_text = html_tag.text()if current_text:if html_tag[0].tag in ['pre', 'code']:texts.append(_format_output(current_text, args))else:texts.append(current_text)texts.append('\n---\nAnswer from {0}'.format(link))text = '\n'.join(texts)else:text = _format_output(instructions.eq(0).text(), args)if text is None:text = NO_ANSWER_MSGtext = text.strip()return textdef _get_instructions(args):links = _get_links(args['query'])if not links:return Falseanswers = []append_header = args['num_answers'] > 1initial_position = args['pos']for answer_number in range(args['num_answers']):current_position = answer_number + initial_positionargs['pos'] = current_positionanswer = _get_answer(args, links)if not answer:continueif append_header:answer = ANSWER_HEADER.format(current_position, answer)answer += '\n'answers.append(answer)return '\n'.join(answers)#啟動緩存 def _enable_cache():if not os.path.exists(CACHE_DIR):os.makedirs(CACHE_DIR)requests_cache.install_cache(CACHE_FILE)#清除緩存 def _clear_cache():for cache in glob.glob('{0}*'.format(CACHE_FILE)):os.remove(cache)# 腳本主函數 def howdoi(args):#構造查詢(主要是把問號刪除)args['query'] = ' '.join(args['query']).replace('?', '')try:return _get_instructions(args) or 'Sorry, couldn\'t find any help with that topic\n'except (ConnectionError, SSLError):return 'Failed to establish network connection\n'#獲取用戶輸入的命令行參數 def get_parser():parser = argparse.ArgumentParser(description='instant coding answers via the command line')parser.add_argument('query', metavar='QUERY', type=str, nargs='*',help='the question to answer')parser.add_argument('-p', '--pos', help='select answer in specified position (default: 1)', default=1, type=int)parser.add_argument('-a', '--all', help='display the full text of the answer',action='store_true')parser.add_argument('-l', '--link', help='display only the answer link',action='store_true')parser.add_argument('-c', '--color', help='enable colorized output',action='store_true')parser.add_argument('-n', '--num-answers', help='number of answers to return', default=1, type=int)parser.add_argument('-C', '--clear-cache', help='clear the cache',action='store_true')parser.add_argument('-v', '--version', help='displays the current version of howdoi',action='store_true')return parser#啟動函數 def command_line_runner():parser = get_parser()args = vars(parser.parse_args())# 輸出腳本版本if args['version']:print(__version__)return# 清除緩存if args['clear_cache']:_clear_cache()print('Cache cleared successfully')return# 如果沒有query,就輸出幫助信息if not args['query']:parser.print_help()return# 如果環境變量設置了禁止緩存,就清除緩存if not os.getenv('HOWDOI_DISABLE_CACHE'):_enable_cache()# 彩色輸出if os.getenv('HOWDOI_COLORIZE'):args['color'] = True# 如果用戶Python版本小于3就進行utf-8編碼,如否,就正常啟動if sys.version < '3':print(howdoi(args).encode('utf-8', 'ignore'))else:print(howdoi(args))if __name__ == '__main__':command_line_runner()轉載于:https://www.cnblogs.com/taceywong/p/8051095.html
總結
以上是生活随笔為你收集整理的howdoi 简单分析的全部內容,希望文章能夠幫你解決所遇到的問題。
                            
                        - 上一篇: 完全模仿lol的手游
 - 下一篇: dns_probe_possible连不