C#爬虫程序如何抓取POST请求的数据? |
|
40分 |
public class HttpProc { private bool _allowAutoRedirect; private CookieCollection _cookieGet; private CookieCollection _cookiePost; private WebHeaderCollection _customHeaders; private Encoding _encoding; private Version _httpVersion; private IWebProxy _Proxy; private string _ResHtml; private string _strAcceptType; private string _strCode; private string _strErr; private string _strPostdata; private string _strRefUrl; private string _strStatusDes; private string _strUrl; private string _strUserAgent; private bool _succeed; private int _timeOut; public HttpProc(string strURL, string PostData) { this._encoding = Encoding.UTF8; this._strAcceptType = "*/*"; this._strUserAgent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; .NET CLR 1.1.4322)"; this._allowAutoRedirect = true; this._httpVersion = HttpVersion.Version11; this._customHeaders = new WebHeaderCollection(); this._strUrl = strURL; this._strPostdata = PostData; } private HttpWebRequest CreateRequest() { this._strUrl = Formatter.CnUrlEncode(this.strUrl); HttpWebRequest request = null; request = (HttpWebRequest)WebRequest.Create(this._strUrl); request.Headers = this.customHeaders; request.Headers.Add("Accept-Encoding: gzip, deflate"); request.CookieContainer = new CookieContainer(); request.Referer = this.strRefUrl ?? this.strUrl; request.Accept = this.strAcceptType; request.ProtocolVersion = this.httpVersion; request.UserAgent = this.strUserAgent; request.AllowAutoRedirect = this.allowAutoRedirect; if (_timeOut > 0) { request.Timeout = _timeOut; request.ReadWriteTimeout = _timeOut; } if (this._cookiePost != null) { Uri uri = new Uri(this._strUrl); foreach (Cookie cookie in this._cookiePost) { cookie.Domain = uri.Host; } request.CookieContainer.Add(this._cookiePost); } if ((this._strPostdata != null) && (this._strPostdata.Length > 0)) { request.ContentType = "application/x-www-form-urlencoded"; request.Method = "POST"; byte[] bytes = this._encoding.GetBytes(this._strPostdata); request.ContentLength = bytes.Length; Stream requestStream = null; try { requestStream = request.GetRequestStream(); requestStream.Write(bytes, 0, bytes.Length); } catch (Exception exception) { this._strErr = exception.Message; } finally { if (requestStream != null) { requestStream.Close(); } } } return request; } public string Proc() { HttpWebRequest request = this.CreateRequest(); HttpWebResponse response = null; StreamReader reader = null; try { response = (HttpWebResponse)request.GetResponse(); if ((response.Headers["Content-Encoding"] != null) && (response.Headers["Content-Encoding"].ToString().IndexOf("gzip") > -1)) { GZipStream gstream = new GZipStream(response.GetResponseStream(), CompressionMode.Decompress); reader = new StreamReader(gstream, encoding); } else { reader = new StreamReader(response.GetResponseStream(), this.encoding); } this._ResHtml = reader.ReadToEnd(); } catch (Exception exception) { this._strErr = exception.Message; return ""; } finally { if (reader != null) { reader.Close(); } } this._strCode = response.StatusCode.ToString(); this._strStatusDes = response.StatusDescription.ToString(); if (this._strCode == "302") { this._ResHtml = response.Headers["location"]; } if (response.Cookies.Count > 0) { this._cookieGet = response.Cookies; } return this.ResHtml; } public static CookieCollection CookiesStrToCollection(string strCookies) { if (strCookies == "") return null; CookieCollection cookies = new CookieCollection(); string[] textArray = strCookies.Trim().Split(";"); foreach (string text in textArray) { int length = text.IndexOf("="); if (length > 0) { cookies.Add(new Cookie(text.Substring(0, length).Trim(), text.Substring(length + 1, (text.Length - length) - 1))); } } return cookies; } public int Timeout { get { return _timeOut; } set { _timeOut = value; } } public bool allowAutoRedirect { get { return this._allowAutoRedirect; } set { this._allowAutoRedirect = value; } } public CookieCollection cookieGet { get { return this._cookieGet; } set { this._cookieGet = value; } } public CookieCollection cookiePost { get { return this._cookiePost; } set { this._cookiePost = value; } } public WebHeaderCollection customHeaders { get { return this._customHeaders; } set { this._customHeaders = value; } } public Encoding encoding { get { return this._encoding; } set { this._encoding = value; } } public Version httpVersion { get { return this._httpVersion; } set { this._httpVersion = value; } } public IWebProxy Proxy { get { return this._Proxy; } set { this._Proxy = value; } } public string ResHtml { get { return this._ResHtml; } } public string strAcceptType { get { return this._strAcceptType; } set { this._strAcceptType = value; } } public string strCode { get { return this._strCode; } set { this._strCode = value; } } public string strErr { get { return this._strErr; } set { this._strErr = value; } } public string strPostdata { get { return this._strPostdata; } set { this._strPostdata = value; } } public string strRefUrl { get { return this._strRefUrl; } set { this._strRefUrl = value; } } public string strStatusDes { get { return this._strStatusDes; } set { this._strStatusDes = value; } } public string strUrl { get { return this._strUrl; } set { this._strUrl = value; } } public string strUserAgent { get { return this._strUserAgent; } set { this._strUserAgent = value; } } public bool succeed { get { return this._succeed; } set { this._succeed = value; } } } } |