页面爬虫(获取其他页面HTML)加载到自己页面示例

639 查看

复制代码 代码如下:

//前台
<div id="showIframe"></div>
$(document).ready(function() {
var url = "@Url.Action("GetPageHtml","Catalog")";
$.ajax({
url: url,
type: "POST",
dataType:"json",
data: { url: "http://www.baidu.com" },
error: function () {
alert("bbb");
},
success: function (data) {
$("#showIframe").append(data);
//$("#showIframe div").hide();
//$("#showIframe>#container").show();
//$("#showIframe>#container>#content").show();
//$("#showIframe>#container>#content>.cmsPage").show();
}
});
});
//后台
//爬虫本质,发送URL请求,返回整个页面HTML
[HttpPost]
public JsonResult GetPageHtml(string url)
{
string pageinfo;
try
{
HttpWebRequest myReq = (HttpWebRequest)HttpWebRequest.Create(url);
myReq.Accept = "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/x-shockwave-flash, application/vnd.ms-excel, application/vnd.ms-powerpoint, application/msword, */*";
myReq.UserAgent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 2.0.50727)";
HttpWebResponse myRep = (HttpWebResponse)myReq.GetResponse();
Stream myStream = myRep.GetResponseStream();
StreamReader sr = new StreamReader(myStream, Encoding.Default);
pageinfo = sr.ReadToEnd().ToString();
}
catch
{
pageinfo = "";
}
return Json(pageinfo);
}