页面爬虫(获取其他页面HTML)加载到自己页面

当前位置 : 首页 > 网页制作 > CSS > 页面爬虫(获取其他页面HTML)加载到自己页面

页面爬虫(获取其他页面HTML)加载到自己页面

来源: 作者: 时间:2016-01-29 09:12
//前台div id=showIframe/div$(document).ready(function() { var url = @Url.Action(GetPageHtml,Catalog); $.ajax({ url: url, type: POST, dataType:json, data: { url: ht...

//前台

 <div id="showIframe"></div>

$(document).ready(function() {

        var url = "@Url.Action("GetPageHtml","Catalog")";
       
        $.ajax({
            url: url,
            type: "POST",
            dataType:"json",
            data: { url: "http://www.baidu.com" },
            error: function () {
                alert("bbb");
            },
            success: function (data) {
                $("#showIframe").append(data);
                //$("#showIframe div").hide();
                //$("#showIframe>#container").show();
                //$("#showIframe>#container>#content").show();
                //$("#showIframe>#container>#content>.cmsPage").show();
            }
        });

 });

 


//后台

//爬虫本质,发送URL请求,返回整个页面HTML

[HttpPost]
        public JsonResult GetPageHtml(string url)
        {
            string pageinfo;
            try
            {
                HttpWebRequest myReq = (HttpWebRequest)HttpWebRequest.Create(url);
                myReq.Accept = "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/x-shockwave-flash, application/vnd.ms-excel, application/vnd.ms-powerpoint, application/msword, */*";
                myReq.UserAgent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 2.0.50727)";
                HttpWebResponse myRep = (HttpWebResponse)myReq.GetResponse();
                Stream myStream = myRep.GetResponseStream();
                StreamReader sr = new StreamReader(myStream, Encoding.Default);
                pageinfo = sr.ReadToEnd().ToString();
            }
            catch
            {
                pageinfo = "";
            }
            return Json(pageinfo);
        }


 

Tag:
网友评论

<