本人用c#写了一个爬虫,爬到一个页面是用js做跳转的 获取到的html页面代码如下
<!DOCTYPE html> <html> <head> <title>正在跳转至购买页面</title> <meta charset="utf-8" /> <meta http-equiv="pragma" content="no-cache" /> <noscript><meta http-equiv="refresh" content="0; url=/"></noscript> <script> eval(function(p,a,c,k,e,d){e=function(c){return(c<a?"":e(parseInt(c/a)))+((c=c%a)>35?String.fromCharCode(c+29):c.toString(36))};if(!"".replace(/^/,String)){while(c--){d[e(c)]=k[c]||e(c)}k=[function(e){return d[e]}];e=function(){return"\w+"};c=1};while(c--){if(k[c]){p=p.replace(new RegExp("\b"+e(c)+"\b","g"),k[c])}}return p}("6 k(z){4 7,B=J 1m("(^| )"+z+"=([^;]*)(;|$)");8(7=D.1v.1w(B)){d S(7[2])}I{d\"\"}}6 9(c,y,G,9){d c.w(0,y-1)+9+c.w(G,c.1x)}4 u=n.e.h;4 3=k("3");8(3!=\"\"){3=16("("+3+")");f=3.f;b=3.b;8(u.15(f+"/"+b)<0){E=9(u,13,18,f+"/"+b);n.e.1d=E}}(6(){(6(i,s,o,g,r,a,m){i[\"1a\"]=r;i[r]=i[r]||6(){(i[r].q=i[r].q||[]).1f(Y)},i[r].l=1*J W();a=s.X(o),m=s.10(o)[0];a.R=1;a.1b=g;m.1y.1g(a,m)})(n,D,\"1z\",\"//P.1A-F.p/F.1E\",\"5\");4 K=k(\"1B\");4 7=K.1u(\"|\");8(7[1]){5(\"H\",\"C-A-1\",\"x\",{\"1t\":7[1]})}I{5(\"H\",\"C-A-1\",\"x\")}4 j=1s;6 v(){8(j)d;j=1p;e.h=Q}5(\"Z\",\"1o\",e.h);5(\"O\",\"1q\");Q=\"1r://P.N.p/1n/1i/1h/?t=M-L&1k=1l&1C=M-L\";5(\"O\",\"V\",\"直达链接\",\"1c\",\"1e\",{\"19\":\"12\",\"14\":\"17\",\"11\":\"U\",\"T\":\"N.p\",\"1D\":v});1j(v,1F)})()",62,104,"|||zdm_track_info|var|ga|function|arr|if|changeStr||channel|allstr|return|location|source||href||redirected|getCookie|||window||com|||||this_url|redirect|substring|auto|start|name|27058866|reg|UA|document|go_url|analytics|end|create|else|new|cookie_user|20|joyo01y|amazon|send|www|smzdmhref|async|unescape|dimension30|790|event|Date|createElement|arguments|set|getElementsByTagName|dimension1|aa|26|dimension6|indexOf|eval|1515|30|dimension29|GoogleAnalyticsObject|src|ca|replace|ht|push|insertBefore|B003Y3AZVY|product|setTimeout|smid|ATVPDKIKX0DER|RegExp|gp|page|true|pageview|https|false|userId|split|cookie|match|length|parentNode|script|google|user|tag|hitCallback|js|1000".split("|"),0,{})) </script> </head> </html>
现在想通过这段代码得到跳转页面的地址,求高手指点!!
已有思路:使用 MSScriptControl.ScriptControl库直接执行代码中的js,但是遇到问题,script代码中使用widow对象,本人不知道怎么去构造这个widow对象。
求高手
解决方案
40
他这段跳转的是加密的,你找下有解密的js库有,有就好办
60
把这段JS代码解压贴给你:
function getCookie(name) { var arr, reg = new RegExp("(^| )" + name + "=([^;]*)(;|$)"); if (arr = document.cookie.match(reg)) { return unescape(arr[2]) } else { return "" } } function changeStr(allstr, start, end, changeStr) { return allstr.substring(0, start - 1) + changeStr + allstr.substring(end, allstr.length) } var this_url = window.location.href; var zdm_track_info = getCookie("zdm_track_info"); if (zdm_track_info != "") { zdm_track_info = eval("(" + zdm_track_info + ")"); source = zdm_track_info.source; channel = zdm_track_info.channel; if (this_url.indexOf(source + "/" + channel) < 0) { go_url = changeStr(this_url, 26, 30, source + "/" + channel); window.location.replace = go_url } }(function () { (function (i, s, o, g, r, a, m) { i["GoogleAnalyticsObject"] = r; i[r] = i[r] || function () { (i[r].q = i[r].q || []).push(arguments) }, i[r].l = 1 * new Date(); a = s.createElement(o), m = s.getElementsByTagName(o)[0]; a.async = 1; a.src = g; m.parentNode.insertBefore(a, m) })(window, document, "script", "//www.google-analytics.com/analytics.js", "ga"); var cookie_user = getCookie("user"); var arr = cookie_user.split("|"); if (arr[1]) { ga("create", "UA-27058866-1", "auto", { "userId": arr[1] }) } else { ga("create", "UA-27058866-1", "auto") } var redirected = false; function redirect() { if (redirected) return; redirected = true; location.href = smzdmhref } ga("set", "page", location.href); ga("send", "pageview"); smzdmhref = "https://www.amazon.com/gp/product/B003Y3AZVY/?t=joyo01y-20&smid=ATVPDKIKX0DER&tag=joyo01y-20"; ga("send", "event", "直达链接", "ca", "ht", { "dimension29": "aa", "dimension6": "1515", "dimension1": "790", "dimension30": "amazon.com", "hitCallback": redirect }); setTimeout(redirect, 1000) })()
假如是针对性的抓取同类页面,分析这段代码后基本可得出跳转地址为固定连接地址;
假如是要获取不同的js代码的跳转地址,可利用C#调用浏览器来执行页面js,例如 WebBrowser控件。