用DOM实现文章采集--采集到网页源码
来源:未知 责任编辑:责任编辑 发表时间:2014-04-20 03:40 点击:次
先来个采集网页的代码。
[csharp]
using System;
using System.Collections.Generic;
using System.IO;
using System.IO.Compression;
using System.Net;
using System.Text;
namespace TopWinCMS.Common
{
public class NetHelper
{
//private string _HTTP_USER_AGENT = "Mozilla/4.0+(compatible;+MSIE+6.0;+Windows+NT+5.2;+SV1;+.NET+CLR+1.1.4322;+.NET+CLR+2.0.50727)";
private string _UserAgent = "Googlebot/2.1 (+http://www.google.com/bot.html)";
private Encoding _HttpEncoding = null;
private string _ProxyHost = string.Empty;
private int _ProxyInt = 8080;
private int _TimeOut = 200000;
#region 属性
/// <summary>
/// 设置UserAgent
/// </summary>
public string UserAgent
{
get
{
return this._UserAgent;
}
set
{
this._UserAgent = value;
}
}
/// <summary>
/// 设置编码
/// </summary>
public Encoding HttpEncoding
{
get
{
return this._HttpEncoding;
}
[csharp]
using System;
using System.Collections.Generic;
using System.IO;
using System.IO.Compression;
using System.Net;
using System.Text;
namespace TopWinCMS.Common
{
public class NetHelper
{
//private string _HTTP_USER_AGENT = "Mozilla/4.0+(compatible;+MSIE+6.0;+Windows+NT+5.2;+SV1;+.NET+CLR+1.1.4322;+.NET+CLR+2.0.50727)";
private string _UserAgent = "Googlebot/2.1 (+http://www.google.com/bot.html)";
private Encoding _HttpEncoding = null;
private string _ProxyHost = string.Empty;
private int _ProxyInt = 8080;
private int _TimeOut = 200000;
#region 属性
/// <summary>
/// 设置UserAgent
/// </summary>
public string UserAgent
{
get
{
return this._UserAgent;
}
set
{
this._UserAgent = value;
}
}
/// <summary>
/// 设置编码
/// </summary>
public Encoding HttpEncoding
{
get
{
return this._HttpEncoding;
}
相关新闻>>
最新推荐更多>>>
- 发表评论
-
- 最新评论 更多>>