以下是引用sdta在2021-9-4 17:34:14的发言:
多谢吹版,正如楼上所说,这有点高大上了!
“高大上”,吹下水还可以,其实编程没什么高大上,编程就是这样子,重在理解和思路,只求目的,用什么语言都无所谓。
看了一下页面相关的JS代码,涉及到字符串编码解码的问题,有点复杂,要转为VFP代码可能要花点时间。
试了一下取出相关JS代码在VFP中运行来获取COOKIE的值(忽略域名和时间)
程序代码:
DECLARE LONG URLDownloadToFileA IN Urlmon LONG,STRING@,STRING@,LONG,LONG
DECLARE LONG DeleteUrlCacheEntry IN Wininet STRING@
cUrl = "https:///project/buildtablelist/938d3183-00f9-eb11-8e8f-005056b8d0cb"
cookie = GetUrlCookie(cUrl)
IF EMPTY(cookie)
RETURN
ENDIF
cTxt = UrlDownload(cUrl, cookie, 1)
nPageCount = VAL(STREXTRACT(cTxt, [page=], [">尾页], OCCURS("page=",cTxt)))
IF nPageCount == 0
RETURN
ENDIF
CREATE CURSOR tt (楼栋 C(50),房号 C(4),套内面积 c(10),建筑面积 c(10),;
所在层 C(2),规划用途 C(4),备案总价 C(12),户型 C(12),销售状态 C(8))
? "共 "+TRANSFORM(nPageCount)+" 页"
? "第 1 页 成 功"
GetTable(STREXTRACT(cTxt,[<tbody>],[</tbody>]))
FOR i=2 TO nPageCount
cTxt = UrlDownload(cUrl+"?page="+TRANSFORM(i), cookie)
? "第 "+TRANSFORM(i)+" 页"+IIF(!EMPTY(cTxt), " 成 功"," 失 败")
IF !EMPTY(cTxt)
GetTable(cTxt)
ENDIF
INKEY(1) && 要延时
ENDFOR
SELECT * FROM tt
CLEAR DLLS
RETURN
FUNCTION UrlDownload(cUrl, Cookie, nFlags)
LOCAL wh
wh = CREATEOBJECT("WinHttp.WinHttpRequest.5.1")
wh.Open("GET", cUrl, 0)
wh.SetRequestHeader("Cookie", Cookie)
wh.Send()
IF wh.status==200
IF !EMPTY(nFlags)
RETURN wh.ResponseText
ENDIF
RETURN STREXTRACT(wh.ResponseText,[<tbody>],[</tbody>])
ENDIF
RETURN ""
ENDFUNC
FUNCTION GetTable(cTxt)
LOCAL i,tr
FOR i =1 TO OCCURS("<tr>",cTxt)
tr = STREXTRACT(cTxt, [<tr>], [</tr>], i)
INSERT INTO tt VALUES (;
ALLTRIM(STREXTRACT(tr, [>], [<], 1), 0h20,0h0D,0h0A),;
ALLTRIM(STREXTRACT(tr, [>], [<], 3), 0h20,0h0D,0h0A),;
ALLTRIM(STREXTRACT(tr, [>], [<], 5), 0h20,0h0D,0h0A),;
ALLTRIM(STREXTRACT(tr, [>], [<], 7), 0h20,0h0D,0h0A),;
ALLTRIM(STREXTRACT(tr, [>], [<], 9), 0h20,0h0D,0h0A),;
ALLTRIM(STREXTRACT(tr, [>], [<], 11), 0h20,0h0D,0h0A),;
ALLTRIM(STREXTRACT(tr, [>], [<], 13), 0h20,0h0D,0h0A),;
ALLTRIM(STREXTRACT(tr, [>], [<], 15), 0h20,0h0D,0h0A),;
ALLTRIM(STREXTRACT(tr, [>], [<], 17), 0h20,0h0D,0h0A))
ENDFOR
ENDFUNC
FUNCTION GetUrlCookie(cUrl)
cHtml = UrlToStr(cUrl)
IF EMPTY(cHtml)
RETURN ""
ENDIF
jsCode = GetJSCode(cHtml,7) + 0h0D0A
cTxt = STREXTRACT(cHtml,[<script>],[</script>],2)
TEXT TO jsCode TEXTMERGE NOSHOW PRETEXT 7
<<jsCode>>
<<cTxt>>
var h = CryptoJS;
var a = h.enc.Utf8.parse(a1);
var b = h.enc.Utf8.parse(b1);
var c = h.enc.Utf8.parse(c1);
var d = h.AES.encrypt(c, a, { iv: b, mode: CryptoJS.mode.CBC, padding: CryptoJS.pad.Pkcs7 });
var f = d.ciphertext.toString();
var vCookie = d1 + "=" + f;
ENDTEXT
sc = CREATEOBJECT("ScriptControl")
sc.Language = "JavaScript"
sc.AddCode(jsCode)
RETURN sc.Eval("vCookie")
ENDFUNC
FUNCTION UrlToStr(cUrl)
IF URLDownloadToFileA(0,cUrl,"tmp.txt",0,0) != 0
MESSAGEBOX("调用UrlToStr失败")
RETURN ""
ENDIF
DeleteUrlCacheEntry(cUrl)
ret = STRCONV(FILETOSTR("tmp.txt"),11)
DELETE FILE tmp.txt
RETURN ret
ENDFUNC
FUNCTION GetJSCode(cHtml, n)
LOCAL jsCode, jsUrl
jsUrl = "https:// + STREXTRACT(cHtml,[<script src="],["></script>],n)
RETURN UrlToStr(jsUrl)
ENDFUNC