通过java获取抖音用户主页信息(2020年9月)
生活随笔
收集整理的這篇文章主要介紹了
通过java获取抖音用户主页信息(2020年9月)
小編覺得挺不錯的,現在分享給大家,幫大家做個參考.
通過java獲取抖音用戶主頁信息(2020年9月)
環(huán)境準備
本篇文章基于sts編輯器,jdk1.8,maven3
項目結構如下圖
執(zhí)行結果如下圖
html請求工具類
HttpUtils代碼如下
package test1;import org.jsoup.Jsoup; import org.jsoup.nodes.Document;import com.gargoylesoftware.htmlunit.BrowserVersion; import com.gargoylesoftware.htmlunit.NicelyResynchronizingAjaxController; import com.gargoylesoftware.htmlunit.WebClient; import com.gargoylesoftware.htmlunit.html.HtmlPage;/*** <pre>* 使用net.sourceforge.htmlunit獲取完整的html頁面,即完成后臺js代碼的運行* </pre>*/ public class HttpUtils {/*** 請求超時時間,默認30秒*/private int timeout = 30000;/*** 等待異步JS執(zhí)行時間,默認20秒*/private int waitForBackgroundJavaScript = 20000;private static HttpUtils httpUtils;private HttpUtils() {}public static HttpUtils getInstance() {if (httpUtils == null)httpUtils = new HttpUtils();return httpUtils;}public int getTimeout() {return timeout;}/*** 請求超時時間** @param timeout*/public void setTimeout(int timeout) {this.timeout = timeout;}public int getWaitForBackgroundJavaScript() {return waitForBackgroundJavaScript;}/*** 設置獲取完整HTML頁面時等待異步JS執(zhí)行的時間*/public void setWaitForBackgroundJavaScript(int waitForBackgroundJavaScript) {this.waitForBackgroundJavaScript = waitForBackgroundJavaScript;}/*** 將網頁返回為解析后的文檔格式*/public static Document parseHtmlToDoc(String html) throws Exception {return removeHtmlSpace(html);}private static Document removeHtmlSpace(String str) {Document doc = Jsoup.parse(str);String result = doc.html().replace(" ", "");return Jsoup.parse(result);}/*** 獲取頁面文檔字符串(等待異步JS執(zhí)行)*/public String getHtmlPageResponse(String url) throws Exception {String result = null;final WebClient webClient = new WebClient(BrowserVersion.CHROME);webClient.getOptions().setThrowExceptionOnScriptError(false);//當JS執(zhí)行出錯的時候是否拋出異常webClient.getOptions().setThrowExceptionOnFailingStatusCode(false);//當HTTP的狀態(tài)非200時是否拋出異常webClient.getOptions().setActiveXNative(false);webClient.getOptions().setCssEnabled(false);//是否啟用CSSwebClient.getOptions().setJavaScriptEnabled(true); //非常重要,啟用JS,適用于頁面加載后異步調用jswebClient.setAjaxController(new NicelyResynchronizingAjaxController());//很重要,設置支持AJAXwebClient.getOptions().setTimeout(timeout);//設置的請求超時時間webClient.setJavaScriptTimeout(timeout);//設置JS執(zhí)行的超時時間HtmlPage page;try {page = webClient.getPage(url);} catch (Exception e) {webClient.close();throw e;}webClient.waitForBackgroundJavaScript(waitForBackgroundJavaScript);//方法阻塞線程result = page.asXml();webClient.close();return result;}/*** 獲取頁面文檔Document對象(等待異步JS執(zhí)行)*/public Document getHtmlPageResponseAsDocument(String url) throws Exception {return parseHtmlToDoc(getHtmlPageResponse(url));} }通過junit的測試類
package test1;import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.junit.Test;public class HttpUtilsTest {private static final String TEST_URL = "用戶主頁url";@Testpublic void testGetHtmlPageResponseAsDocument() {HttpUtils httpUtils = HttpUtils.getInstance();httpUtils.setTimeout(30000);httpUtils.setWaitForBackgroundJavaScript(30000);try {Document document = httpUtils.getHtmlPageResponseAsDocument(TEST_URL);//TODO//System.out.println(document);Element element = document.getElementById("pagelet-user-info");//獲取元素節(jié)點等//System.out.println(element);System.out.println("頭像url:"+element.getElementsByTag("img").attr("src"));System.out.println("昵稱:"+element.getElementsByTag("p").get(0).text());System.out.println(element.getElementsByTag("p").get(1).text());System.out.println("簽名:"+element.getElementsByTag("p").get(2).text());System.out.println("關注:"+element.getElementsByTag("p").get(3).getElementsByAttributeValue("class", "num").get(0).text());System.out.println("粉絲:"+element.getElementsByTag("p").get(3).getElementsByAttributeValue("class", "num").get(1).text());System.out.println("贊:"+element.getElementsByTag("p").get(3).getElementsByAttributeValue("class", "num").get(2).text());System.out.println("作品:"+element.getElementsByAttributeValue("class", "video-tab").first().getElementsByAttributeValue("class", "num").get(0).text());System.out.println("喜歡:"+element.getElementsByAttributeValue("class", "video-tab").first().getElementsByAttributeValue("class", "num").get(1).text());} catch (Exception e) {e.printStackTrace();}}}pom.xml
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"><modelVersion>4.0.0</modelVersion><groupId>com.test1</groupId><artifactId>test1</artifactId><version>0.0.1-SNAPSHOT</version><dependencies><dependency><groupId>net.sourceforge.htmlunit</groupId><artifactId>htmlunit</artifactId><version>2.27</version></dependency><dependency><groupId>org.jsoup</groupId><artifactId>jsoup</artifactId><version>1.8.3</version></dependency><dependency><groupId>junit</groupId><artifactId>junit</artifactId><version>4.12</version> </dependency> </dependencies> </project>以上為實現獲取抖音主頁用戶信息的全部內容,隨著抖音的不斷更新,代碼需要及時調整。本文會持續(xù)更新。初來乍到,不足之處還請大神指教。謝謝
總結
以上是生活随笔為你收集整理的通过java获取抖音用户主页信息(2020年9月)的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: 数据库技术支持文档
- 下一篇: 图像去雾算法(二)基于暗通道先验算法学习