关于web自动化操作的分析和基类的实现
                                                            生活随笔
收集整理的這篇文章主要介紹了
                                关于web自动化操作的分析和基类的实现
小編覺得挺不錯的,現(xiàn)在分享給大家,幫大家做個參考.                        
                                http協(xié)議是一種無狀態(tài)協(xié)議。這是首先要明確的,客戶端(client)發(fā)送一個請求,服務(wù)端(server)收到之后,根據(jù)請求的URL和HTTP頭信息,給出相應(yīng)的答案。505,404,400等,一般正確的200,服務(wù)器除了IP和UserAgent等常用信息之外,服務(wù)器無法知道具體的標(biāo)示,也就是說服務(wù)器無法知道這個請求來自哪個客戶端,OK!
那么就引入了Cookie的概念,服務(wù)器一般用cookie去標(biāo)示客戶端,可見cookie對于現(xiàn)在web系統(tǒng)的重要性,如果沒有cookie現(xiàn)在的web啥不是。
也就是說Cookie的web交互核心之一
QQ郵箱登陸:
1,登陸QQ郵箱的主地址(http://mail.qq.com)
?
請求頭如上
?
響應(yīng)內(nèi)容,跳轉(zhuǎn)到登陸頁(因為沒有登陸后的cookie標(biāo)示)
2,會經(jīng)過幾個跳轉(zhuǎn)步驟之后跳轉(zhuǎn)到HTTPS登陸(https://mail.qq.com/cgi-bin/loginpage?&res=local)
?
3,輸入賬號登陸
輸入密碼后會跳轉(zhuǎn)到 使用get方式提交表單,如果登陸成功會寫Cookie
?
4,登陸成功之后我們再次進(jìn)入通過mail.qq.com域名進(jìn)入,也會跳轉(zhuǎn)到登陸頁,但是由于請求頭中的cookie已經(jīng)包含登陸標(biāo)示,所以會直接跳轉(zhuǎn)到郵箱url
?
abstract public class WebAction { /* 類名:web操作基礎(chǔ)支持類 * 描述:提供web操作基礎(chǔ)接口 * 創(chuàng)建日期:2011-10-25 * 版本:0.4 * 作者:by rolends986 */ /* * 版本更新記錄 * 0.1 基本代碼的構(gòu)建與調(diào)試 * 0.2 修改主入口方法,實現(xiàn)多參數(shù)化定義 2011-11-1 * 0.3 添加SetUseUnsafeHeaderParsing功能,修訂lock邏輯,刪除url編碼邏輯(會導(dǎo)致部分服務(wù)器header解析問題) 2011-12-2 * 0.4 新增代理控制邏輯,修改useUnsafeHeaderParsing參數(shù),添加資源釋放邏輯 2011-12-12 */ static WebAction() { DefaultConnectionLimit = 1000; KeepAliveTime = 10 * 1000; KeepAliveInterval = 300; }? protected CookieManager _cookieManager = new CookieManager(); protected XQSoft.Common.Log LogObject { get { return LogManager.Logs[LogName]; } } string _logName = ""; virtual protected string LogName { get { return _logName; } } public WebAction() {? } public WebAction(string logName) { _logName = logName; } public const string _userAgent_FF = " Mozilla/5.0 (Windows NT 5.1; rv:11.0) Gecko/20100101 Firefox/11.0"; public const string _userAgent_IE = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; .NET CLR 2.0.50727; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729; .NET4.0C; .NET4.0E)"; public const string _defaultAccept = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"; public const string _defaultAcceptLanguage = "zh-cn,zh;q=0.5"; public const string _defaultAcceptCharset = "GB2312,utf-8;q=0.7,*;q=0.7"; public static int DefaultConnectionLimit { get; set; } static public int KeepAliveTime { get; set; } static public int KeepAliveInterval { get; set; } public bool EnableProxy { get; set; } ProxyInfo _proxyInfo = null; public ProxyInfo ProxyInfo { get { return _proxyInfo; } protected set { _proxyInfo = value; } } public string Key { get { return new Guid().ToString(); } } static object sslLock = new object(); static public bool CheckValidationResult(object sender, X509Certificate certificate, X509Chain chain, SslPolicyErrors errors) { //if (sender is HttpWebRequest) //{ // var request = sender as HttpWebRequest; // if (request.ClientCertificates.Count > 0) // return false; //} return true; // Always accept }?? virtual protected void ChangeProxy() { _proxyInfo = ProxyManager.GetAvailableProxy(this.Key); }? /* 新方法 by rolends1986 * 2011-10-27 1,支持自定義Content-Type 2,封裝編碼,自身維護(hù)表單鍵值轉(zhuǎn)換 3,支持各種參數(shù)的自定義 4,實現(xiàn)自動編碼 5,實現(xiàn)CA文件指定 */ virtual protected T GetWebData<T>(string url, string charset = null, string referer = null, PostInfo postInfo = null, bool? useProxy = null, NameValueCollection headers = null, string userAgent = null, Certificates certificates = null, Version protocol = null, bool? allowAutoRedirect = false, bool? keepAlive = null, string accept = null, string acceptLanguage = null, string acceptCharset = null, string urlEncoding = null, RequestCachePolicy cachePolicy = null) { System.Net.ServicePointManager.DefaultConnectionLimit = DefaultConnectionLimit; //System.Net.ServicePointManager.SetTcpKeepAlive(true, KeepAliveTime, KeepAliveInterval); //SetUseUnsafeHeaderParsing(useUnsafeHeaderParsing); var uri = new Uri(url); //url = EncodeUrl(url, urlEncoding); var request = (HttpWebRequest)WebRequest.Create(url); request.ServicePoint.Expect100Continue = false; request.Proxy = null; if (useProxy.HasValue) { if (useProxy.Value) { SetProxy(request); }? } else { if (EnableProxy) { SetProxy(request); } }? #region set default request.KeepAlive = false; request.AllowAutoRedirect = false; request.UserAgent = _userAgent_FF; request.Accept = _defaultAccept; request.Headers.Add(HttpRequestHeader.AcceptLanguage, _defaultAcceptLanguage); request.Headers.Add(HttpRequestHeader.AcceptCharset, _defaultAcceptCharset); request.CachePolicy = new System.Net.Cache.RequestCachePolicy(System.Net.Cache.RequestCacheLevel.NoCacheNoStore); request.Method = "get"; #endregion if (url.ToLower().IndexOf("https") == 0) { if (certificates != null) { X509CertificateCollection crts = null; if (certificates.IsAuto) { crts = GetCertificates(uri); } else { crts = certificates.CertificateCollection; } if (crts == null) ThrowException(505, url); request.ClientCertificates = crts; } request.ProtocolVersion = HttpVersion.Version10; } //request.Host = uri.Host; if (allowAutoRedirect.HasValue) request.AllowAutoRedirect = allowAutoRedirect.Value; //if (keepAlive.HasValue) request.KeepAlive = keepAlive.Value;//由于手動釋放了資源,keepalive設(shè)置不再有效 if (!string.IsNullOrEmpty(userAgent)) request.UserAgent = userAgent; if (!string.IsNullOrEmpty(accept)) request.Accept = accept; if (!string.IsNullOrEmpty(acceptLanguage)) { if (request.Headers[HttpRequestHeader.AcceptLanguage] == null) request.Headers.Add(HttpRequestHeader.AcceptLanguage, acceptLanguage); else request.Headers[HttpRequestHeader.AcceptLanguage] = acceptLanguage; } if (!string.IsNullOrEmpty(acceptCharset)) { if (request.Headers[HttpRequestHeader.AcceptCharset] == null) request.Headers.Add(HttpRequestHeader.AcceptCharset, acceptCharset); else request.Headers[HttpRequestHeader.AcceptCharset] = acceptCharset; } if (!string.IsNullOrEmpty(referer)) request.Referer = referer; if (cachePolicy != null) request.CachePolicy = cachePolicy; if (protocol != null) request.ProtocolVersion = protocol; try { if (headers != null) foreach (var nv in headers.AllKeys) { request.Headers.Add(nv, headers[nv]); } } catch (Exception ex) { DisposeRequest(request); //request header error 502 ThrowException(502, ex.Message); } string requestCookie = _cookieManager.GetCookieHeader(uri.Host); if (!String.IsNullOrEmpty(requestCookie)) { request.Headers.Add(HttpRequestHeader.Cookie, requestCookie); } if (postInfo != null) { request.Method = "post"; byte[] byteArray = postInfo.GetPostData(); request.ContentType = postInfo.GetContentType(); request.ContentLength = byteArray.Length; Stream dataStream = request.GetRequestStream(); dataStream.Write(byteArray, 0, byteArray.Length); dataStream.Close(); } WebResponse response = null; try { if (url.ToLower().IndexOf("https") == 0) { lock (sslLock) { if (certificates != null) { if (ServicePointManager.ServerCertificateValidationCallback != null) ServicePointManager.ServerCertificateValidationCallback -= CheckValidationResult; } else { if (ServicePointManager.ServerCertificateValidationCallback == null) ServicePointManager.ServerCertificateValidationCallback += CheckValidationResult; } response = request.GetResponse(); } } else { response = request.GetResponse(); } } catch (Exception ex) { DisposeRequest(request); DisposeResponse(response); //get response error 503 ThrowException(503, ex.Message); } string cookie = response.Headers.Get("Set-Cookie"); if (!String.IsNullOrEmpty(cookie)) { _cookieManager.SetCookie(cookie, uri.Host); } var sm = response.GetResponseStream(); if (typeof(T) == typeof(string)) { if (!String.IsNullOrEmpty(response.Headers["Content-Type"])) { string[] ct = response.Headers["Content-Type"].Split(';'); if (ct.Length > 1) { charset = ct[1].Split('=')[1];//set server response encoding } } string html = GetHtml(sm, charset); T result = (T)(object)html; DisposeRequest(request); DisposeResponse(response); return result; } else if (typeof(Image) == typeof(T)) { try { Image original = Image.FromStream(sm); T result = (T)(object)original; DisposeRequest(request); DisposeResponse(response); return result; } catch (Exception ex) { //to image error 504 DisposeRequest(request); DisposeResponse(response); ThrowException(504, ex.Message); return default(T); }? } else if (typeof(ResponseLocation) == typeof(T)) { ResponseLocation rl = new ResponseLocation() { Html = GetHtml(sm, charset), Url = response.Headers["Location"] }; T result = (T)(object)rl; DisposeRequest(request); DisposeResponse(response); return result;? } else { T result = (T)(object)GetData(sm); DisposeRequest(request); DisposeResponse(response); return result; } }? private void DisposeResponse(WebResponse response) { try { response.GetResponseStream().Close(); } catch { } try { response.Close(); } catch { } try { response = null; } catch { } }? private void DisposeRequest(HttpWebRequest request) { try { try { request.GetRequestStream().Close(); } catch { } try { request.Abort(); } catch { } try { request = null; } catch { }??? } catch { } }? private void SetProxy(HttpWebRequest request) { if (ProxyInfo == null) ThrowException(533, "代理實例為空,請先實例化"); request.Proxy = new WebProxy(ProxyInfo.IPAddress.ToString(), ProxyInfo.Port); }? public static bool SetUseUnsafeHeaderParsing(bool boolVal) { try { Assembly assem = Assembly.GetAssembly(typeof(System.Net.Configuration.SettingsSection)); if (assem == null) return false; Type assemType = assem.GetType("System.Net.Configuration.SettingsSectionInternal"); if (assemType == null) return false; object obj = assemType.InvokeMember("Section", BindingFlags.Static | BindingFlags.GetProperty | BindingFlags.NonPublic, null, null, new object[] { }); if (obj == null) return false; FieldInfo fieldInfo = assemType.GetField("useUnsafeHeaderParsing", BindingFlags.NonPublic | BindingFlags.Instance); if (fieldInfo == null) return false; fieldInfo.SetValue(obj, boolVal); } catch { } return true; }? private string EncodeUrl(string url, string code) { if (string.IsNullOrEmpty(code)) return url; Encoding urlCode = Encoding.ASCII; if (!String.IsNullOrEmpty(code)) { urlCode = Encoding.GetEncoding(code); } int pIndex = url.IndexOf('?'); if (url.Length - 1 <= pIndex) return url; if (pIndex > 1) { string[] its = url.Substring(pIndex + 1).Split('&'); StringBuilder np = new StringBuilder(); foreach (var nv in its) { string name = ""; string value = ""; int cIndex = nv.IndexOf("="); if (cIndex < 0) name = nv; else { name = nv.Substring(0, cIndex); if (nv.Length - 1 > cIndex) value = nv.Substring(cIndex + 1); } np.Append(UrlUnit.UrlEncode(name, urlCode)); np.Append("="); np.Append(UrlUnit.UrlEncode(value, urlCode)); np.Append("&"); } url = url.Substring(0, pIndex + 1) + np.Remove(np.Length - 1, 1).ToString(); } return url; }? public byte[] GZipDecompress(byte[] gzip) { using (GZipStream stream = new GZipStream(new MemoryStream(gzip), CompressionMode.Decompress)) { const int size = 4096; byte[] buffer = new byte[size]; using (MemoryStream memory = new MemoryStream()) { int count = 0; do { count = stream.Read(buffer, 0, size); if (count > 0) { memory.Write(buffer, 0, count); } } while (count > 0); return memory.ToArray(); } } }? public byte[] DeflateDecompress(byte[] deflate) { using (DeflateStream stream = new DeflateStream(new MemoryStream(deflate), CompressionMode.Decompress)) { const int size = 4096; byte[] buffer = new byte[size]; using (MemoryStream memory = new MemoryStream()) { int count = 0; do { count = stream.Read(buffer, 0, size); if (count > 0) { memory.Write(buffer, 0, count); } } while (count > 0); return memory.ToArray(); } } }? private byte[] GetData(Stream sm) { byte[] realData = null; byte[] buffer = new byte[1024 * 8]; int dataLength = 0; do { dataLength = sm.Read(buffer, 0, buffer.Length); if (realData == null) { realData = new byte[dataLength]; Array.Copy(buffer, realData, dataLength); } else { int oldLength = realData.Length; Array.Resize<byte>(ref realData, realData.Length + dataLength); Array.Copy(buffer, 0, realData, oldLength, dataLength); }?? } while (dataLength > 0); //return (T)(object)buffer.Take(dataLength).ToArray(); return realData; }? private string GetHtml(Stream sm, string charset) { var data = GetData(sm); string newCharset = string.IsNullOrEmpty(charset) ? "utf-8" : charset; try { string r = Encoding.GetEncoding(newCharset).GetString(data); if (string.IsNullOrEmpty(charset)) { r = CheckEncoding(data, newCharset, r); } LogObject.WriteLine("==============================================\r\n"); LogObject.WriteLine(r); LogObject.WriteLine("=============================================="); LogObject.WriteLine("******************************分割*************************"); return r; } catch (Exception ex) { //get response error 503 ThrowException(509, ex.Message); return ""; } }? protected static Regex regCharset = new Regex("(?<=<meta.+?content\\=.+?charset\\=).+?(?=(\\\"|[\\s]))", RegexOptions.IgnoreCase); protected static Regex regCharset2 = new Regex("(?<=<meta[\\s]+charset=[\\\"]{0,1})[a-z0-9]+(?=[\\\"]{0,1})", RegexOptions.IgnoreCase); private string CheckEncoding(byte[] data, string currentCharset, string html) { string pageCharset = ""; if (regCharset.IsMatch(html)) { pageCharset = regCharset.Match(html).Value.Trim().ToLower(); } if (regCharset2.IsMatch(html)) { pageCharset = regCharset2.Match(html).Value.Trim().ToLower(); } if (pageCharset != currentCharset.Trim().ToLower()) { try { return Encoding.GetEncoding(pageCharset).GetString(data); } catch { } } return html; }? virtual protected X509CertificateCollection GetCertificates(Uri uri) { X509CertificateCollection certs = new X509CertificateCollection(); string host = uri.Host; for (int i = 0; i < 8; i++) { for (int j = 1; j <= 2; j++) { X509Store store = new X509Store((StoreName)(i + 1), (StoreLocation)j); store.Open(OpenFlags.MaxAllowed); foreach (var cert in store.Certificates) { Console.WriteLine(cert.Subject); if (cert.Subject.ToLower().Contains(host.ToLower())) { certs.Add(cert); } }? }? } return certs; }? virtual protected void ThrowException(int errorCode, string sourceText) { throw XQException.GetException(errorCode, sourceText); }? protected NameValueCollection GetQueryParameter(string url) { NameValueCollection pars = new NameValueCollection(); var paraString = url.Substring(url.IndexOf("?") + 1, url.Length - url.IndexOf("?") - 1).Split('&'); for (int i = 0; i < paraString.Length; i++) { var nv = paraString[i].Split('='); var name = nv[0]; var value = nv[1]; pars.Add(name, value); } return pars; } } QQ 討論組廣告群發(fā)工具(已開發(fā)完成)索引
轉(zhuǎn)載于:https://www.cnblogs.com/Rolends/archive/2012/04/18/2455716.html
總結(jié)
以上是生活随笔為你收集整理的关于web自动化操作的分析和基类的实现的全部內(nèi)容,希望文章能夠幫你解決所遇到的問題。
 
                            
                        - 上一篇: [转][osgearth]版本更新说明
- 下一篇: memcache/memcached/m
