最近在爬取七麦网站的app信息,连接中的url参数中有个analysis参数须要解密,它的值是由动态js文件生成的,这就须要调试js代码了。上网找了不少的博客,都是看他们的教程本身搞定的。
https://www.qimai.cn/rank
https://mp.weixin.qq.com/s/1E_ONUnMwTFozd9-dB70Yw
https://lengyue.me/index.php/2018/10/15/qimai/
https://blowingdust.com/encrypted-compression-javascript-analysis.html
很感谢这些大神。
打断点调试js代码对于新手很不友好,不少都是本身琢磨的,我调试js断点找加密代码就是主要是参照上面附上的第二个连接。在此我只附上我写的代码。代码也主要是参考上面博主写的。javascript
import requests,os, execjs,json import random user_agent = [ "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50", "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50", "Mozilla/5.0 (Windows NT 10.0; WOW64; rv:38.0) Gecko/20100101 Firefox/38.0", "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; .NET4.0C; .NET4.0E; .NET CLR 2.0.50727; .NET CLR 3.0.30729; .NET CLR 3.5.30729; InfoPath.3; rv:11.0) like Gecko", "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)", "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0)", "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)", "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:2.0.1) Gecko/20100101 Firefox/4.0.1", "Mozilla/5.0 (Windows NT 6.1; rv:2.0.1) Gecko/20100101 Firefox/4.0.1", "Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; en) Presto/2.8.131 Version/11.11", "Opera/9.80 (Windows NT 6.1; U; en) Presto/2.8.131 Version/11.11", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11", "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Maxthon 2.0)", "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; TencentTraveler 4.0)", "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)", "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; The World)", "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SE 2.X MetaSr 1.0; SE 2.X MetaSr 1.0; .NET CLR 2.0.50727; SE 2.X MetaSr 1.0)", "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; 360SE)", "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Avant Browser)", "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)", "Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5", "Mozilla/5.0 (iPod; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5", "Mozilla/5.0 (iPad; U; CPU OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5", "Mozilla/5.0 (Linux; U; Android 2.3.7; en-us; Nexus One Build/FRF91) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1", "MQQBrowser/26 Mozilla/5.0 (Linux; U; Android 2.3.7; zh-cn; MB200 Build/GRJ22; CyanogenMod-7) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1", "Opera/9.80 (Android 2.3.4; Linux; Opera Mobi/build-1107180945; U; en-GB) Presto/2.8.149 Version/11.10", "Mozilla/5.0 (Linux; U; Android 3.0; en-us; Xoom Build/HRI39) AppleWebKit/534.13 (KHTML, like Gecko) Version/4.0 Safari/534.13", "Mozilla/5.0 (BlackBerry; U; BlackBerry 9800; en) AppleWebKit/534.1+ (KHTML, like Gecko) Version/6.0.0.337 Mobile Safari/534.1+", "Mozilla/5.0 (hp-tablet; Linux; hpwOS/3.0.0; U; en-US) AppleWebKit/534.6 (KHTML, like Gecko) wOSBrowser/233.70 Safari/534.6 TouchPad/1.0", "Mozilla/5.0 (SymbianOS/9.4; Series60/5.0 NokiaN97-1/20.0.019; Profile/MIDP-2.1 Configuration/CLDC-1.1) AppleWebKit/525 (KHTML, like Gecko) BrowserNG/7.1.18124", "Mozilla/5.0 (compatible; MSIE 9.0; Windows Phone OS 7.5; Trident/5.0; IEMobile/9.0; HTC; Titan)", "UCWEB7.0.2.37/28/999", "NOKIA5700/ UCWEB7.0.2.37/28/999", "Openwave/ UCWEB7.0.2.37/28/999", "Mozilla/4.0 (compatible; MSIE 6.0; ) Opera/UCWEB7.0.2.37/28/999", "Mozilla/6.0 (iPhone; CPU iPhone OS 8_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/8.0 Mobile/10A5376e Safari/8536.25", ] headers = { "User-Agent": random.choice(user_agent), 'Origin': 'https://www.qimai.cn', 'Referer': 'https://www.qimai.cn/rank', "Accept": "application/json,text/plain,*/*", } params = { 'brand': 'all', 'country': 'cn', 'device': 'iphone', 'genre': '5000', 'date': '2019-10-31', 'page': 2 # 这里写1也是能够的 } resp = requests.get('https://www.qimai.cn/rank', headers=headers, verify=False) t = resp.cookies.get_dict() synct = t.get('synct')#时间 print(synct) def getanaly(synct, params): js_path = "demo_peopleapp.js" with open(js_path, 'r',encoding='utf8') as f: js_content = f.read() ctx = execjs.compile(js_content) new_pwd = ctx.call("get0analysis", synct, params) print(new_pwd) return new_pwd ana = getanaly(synct,params) for g in range(3): params['analysis'] = ana[g] url = 'https://api.qimai.cn/rank/indexPlus/brand_id/'+ str(g) +'?' print(url) res = requests.get(url=url,headers=headers,params = params) # print(res.text) resjson = json.loads(res.text) print(resjson)
上面是主Python代码,下面是js代码,解密analysis参数。
由于七麦网站它的analysis参数的加密js代码会改变,因此本文的代码只能做为参考。我在接加密的过程当中也发现他的加密过程和我参考博客的过程有些差异。只要认真分析,本身改变就能够了。php
function C(a, n) { a = a["split"](""); for (var t = a["length"],e = n["length"],r = "charCodeAt",i = 0; i<t;i++) a[i] = m(a[i][r](0) ^ n[(i+10) % e][r](0)); return a["join"]("") } function m(n) { var t = "fromCharCode"; return String[t](n) } //上面是分析f(k)函数 //下面是f(e)函数 //函数v(n)就是f(e)函数的结果,m函数和上面的函数同样,只须要分析其中的n_fun函数的实现 function v(n) { return n_fun(encodeURIComponent(n)["replace"](/%([0-9A-F]{2})/g, function (a, n) { return m("0x" + n) })) } function n_fun(t){ var n; n = e_from(t.toString(),"binary") return q_fromByteArray(n) // 这一处的代码至关于 n.toString("base64") } function e_from(t_str,b){ var r = t_str.length; t = new Uint8Array(r); var i = t_write(t,t_str,b,r); return t } function t_write(t,e,b,r){ return K(W(e),t,0,r) } function K(t,e,n,r) { for(var j=0;j<r && !(j + n >= e.length || j >= t.length); ++j) e[j+n]=t[j]; return j } function W(t){ for (var e=[],n=0;n<t.length;++n) e.push(255 & t.charCodeAt(n)); return e } l = "A,B,C,D,E,F,G,H,I,J,K,L,M,N,O,P,Q,R,S,T,U,V,W,X,Y,Z,a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z,0,1,2,3,4,5,6,7,8,9,+,/" l = l.split(",") function q_fromByteArray(t) { for (var e, n = t.length, r = n % 3, i = "", o = [], a = 16383, u = 0, c = n - r; u < c; u += a) o.push(s(t, u, u + a > c ? c : u + a)); return 1 === r ? (e = t[n - 1], i += l[e >> 2], i += l[e << 4 & 63], i += "==") : 2 === r && (e = (t[n - 2] << 8) + t[n - 1], i += l[e >> 10], i += l[e >> 4 & 63], i += l[e << 2 & 63], i += "="), o.push(i), o.join("") } function s(t, e, n) { for (var r, i = [], o = e; o < n; o += 3) r = (t[o] << 16 & 16711680) + (t[o + 1] << 8 & 65280) + (255 & t[o + 2]), i.push(a(r)); return i.join("") } function a(t) {return l[t >> 18 &63] + l[t >> 12 &63] + l[t >> 6 & 63] + l[63 & t]} function get0analysis(synct,params){ var g = new Date() - 1000 * synct; var e = new Date() - g - 1515125653845; var analy = []; var palist = []; for (var key in params) { palist.push(params[key]) } var mm = palist["sort"]()["join"](""); var mmm = v(mm);//参数mm先执行f(e)函数 var m_str1 = mmm + '@#/rank/indexPlus/brand_id/1@#57313212470@#1'; var m_str0 = mmm + '@#/rank/indexPlus/brand_id/0@#57313212470@#0'; var m_str2 = mmm + '@#/rank/indexPlus/brand_id/2@#57313212470@#2'; var b_str = "00000008d78d46a"; var r2 = v(C(m_str2,b_str)) var r0 = v(C(m_str0,b_str)) var r1 = v(C(m_str1,b_str)) analy.push(r0 ,r1 ,r2) return analy }