using System.Text.RegularExpressions;
using MSXML2;
private string gethtm(string link)//------------------------------------下载网页源码
{
MSXML2.XMLHTTP xmlhttp = new MSXML2.XMLHTTP();
Thread.Sleep(5);
xmlhttp.open("GET", link, false, null, null);
Thread.Sleep(5);
xmlhttp.send("");
Thread.Sleep(5);
Byte[] b = (Byte[])xmlhttp.responseBody;
string str_txt_htm = Encoding.GetEncoding("GB2312").GetString(b).Trim();
//txtbox.Text = str_txt_htm;
//Thread.Sleep(5);//暂停线程
xmlhttp = null;
b = null;
Regex str_re_htm = new Regex(@"<\s*body(.|\n)*", RegexOptions.IgnoreCase);
Thread.Sleep(5);//暂停线程
MatchCollection str_re_txt_htm = str_re_htm.Matches(str_txt_htm);
str_txt_htm = str_re_txt_htm[0].ToString();
Thread.Sleep(5);//暂停线程
str_re_htm = new Regex(@"(<[.|\n]*?script(.|\n)*?/[.|\n]*?script[\n]*>)|(\n)", RegexOptions.IgnoreCase);
Thread.Sleep(5);//暂停线程
str_txt_htm = str_re_htm.Replace(str_txt_htm, "");
str_txt_htm = str_txt_htm.Replace("\\", "/");
//File.Delete(@"C:\Documents and Settings\zjc\Local Settings\Temporary Internet Files\" + Path.GetFileNameWithoutExtension(link)+"[1].htm");
//Directory.
str_re_htm = null;
link = null;
return str_txt_htm;
}