java爬取酷狗榜单歌曲信息并存入数据库
生活随笔
收集整理的這篇文章主要介紹了
java爬取酷狗榜单歌曲信息并存入数据库
小編覺(jué)得挺不錯(cuò)的,現(xiàn)在分享給大家,幫大家做個(gè)參考.
這里只解析一下代碼,所需工具jsoup、HttpClient
httpCLient獲取html后,用jsoup解析html,再用java來(lái)獲取所需要的信息。
之前寫的有點(diǎn)問(wèn)題,今天改了一下。因?yàn)橥ㄟ^(guò)hash值拼接的地址是個(gè)臨時(shí)地址,存在數(shù)據(jù)庫(kù)后一天就失效了,所以我改了一下。先把爬到的歌曲下載到本地,然后上傳到七牛云的對(duì)象存儲(chǔ)空間,再返回這個(gè)地址。最后把七牛云上的地址存到數(shù)據(jù)庫(kù)中,這樣就是永久的了。
HTTPManage:
package com.after.demo.spider;import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.select.Elements;import java.io.IOException; import java.util.ArrayList; import java.util.List;/*** @author www.xyjz123.xyz* @description* @date 2019/4/19 19:06*/ public class HtmlManage {public Document manage(String html){Document doc = Jsoup.parse(html);return doc;} }HttpGetConnect:
package com.after.demo.spider;import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.http.HttpEntity; import org.apache.http.client.config.RequestConfig; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpGet; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClients; import org.apache.http.impl.conn.BasicHttpClientConnectionManager;import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; /*** @author www.xyjz123.xyz* @description* @date 2019/4/19 19:05*/ public class HttpGetConnect {/*** 獲取html內(nèi)容* @param url* @param charsetName UTF-8、GB2312* @return* @throws IOException*/public static String connect(String url,String charsetName) throws IOException{BasicHttpClientConnectionManager connManager = new BasicHttpClientConnectionManager();CloseableHttpClient httpclient = HttpClients.custom().setConnectionManager(connManager).build();String content = "";try{HttpGet httpget = new HttpGet(url);RequestConfig requestConfig = RequestConfig.custom().setSocketTimeout(5000).setConnectTimeout(50000).setConnectionRequestTimeout(50000).build();httpget.setConfig(requestConfig);httpget.setHeader("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8");httpget.setHeader("Accept-Encoding", "gzip,deflate,sdch");httpget.setHeader("Accept-Language", "zh-CN,zh;q=0.8");httpget.setHeader("Connection", "keep-alive");httpget.setHeader("Upgrade-Insecure-Requests", "1");httpget.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36");httpget.setHeader("cache-control", "max-age=0");CloseableHttpResponse response = httpclient.execute(httpget);int status = response.getStatusLine().getStatusCode();if (status >= 200 && status < 300) {HttpEntity entity = response.getEntity();InputStream instream = entity.getContent();BufferedReader br = new BufferedReader(new InputStreamReader(instream,charsetName));StringBuffer sbf = new StringBuffer();String line = null;while ((line = br.readLine()) != null){sbf.append(line + "\n");}br.close();content = sbf.toString();} else {content = "";}}catch(Exception e){e.printStackTrace();}finally{httpclient.close();}//log.info("content is " + content);return content;}private static Log log = LogFactory.getLog(HttpGetConnect.class); }MusicController:
package com.after.demo.controller;import com.after.demo.entity.Music; import com.after.demo.service.impl.MusicServiceImpl; import com.after.demo.service.impl.UploadServiceImpl; import com.after.demo.spider.FileDownload; import com.after.demo.spider.HtmlManage; import com.after.demo.spider.HttpGetConnect; import com.after.demo.utils.GetString; import com.after.demo.utils.JsonResult; import com.google.gson.Gson; import com.qiniu.common.QiniuException; import com.qiniu.http.Response; import com.qiniu.storage.model.DefaultPutRet; import io.swagger.annotations.ApiOperation; import net.sf.json.JSONObject; import org.apache.commons.lang3.StringUtils; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.web.bind.annotation.GetMapping; import org.springframework.web.bind.annotation.PostMapping; import org.springframework.web.bind.annotation.RestController;import java.io.File; import java.io.IOException; import java.util.regex.Matcher; import java.util.regex.Pattern;/*** @author www.xyjz123.xyz* @description* @date 2019/4/19 21:18*/ @RestController public class MusicController {@AutowiredMusicServiceImpl musicService;@AutowiredUploadServiceImpl uploadService;public static String FILEPATH = "F:/music/";public static String mp3 = "https://wwwapi.kugou.com/yy/index.php?r=play/getdata&callback=jQuery191027067069941080546_1546235744250&"+ "hash=HASH&album_id=0&_=TIME";public static final String LINK = "https://www.kugou.com/yy/rank/home/PAGE-33164.html?from=rank";@GetMapping("/music/save")@ApiOperation("將酷狗歌單爬取存入數(shù)據(jù)庫(kù)")public JsonResult saveMusic() throws IOException{for(int i = 1 ; i < 10 ; i++){String url = LINK.replace("PAGE", i + "");getTitle(url);}return JsonResult.ok();}@PostMapping("/music/getOne")@ApiOperation("隨機(jī)獲取一首歌")public JsonResult getMusic(){int id = GetString.getId();Music music = musicService.getMusicById(id);return JsonResult.ok(music);}public String getTitle(String url) throws IOException {String content = HttpGetConnect.connect(url, "utf-8");HtmlManage html = new HtmlManage();Document doc = html.manage(content);Element ele = doc.getElementsByClass("pc_temp_songlist").get(0);Elements eles = ele.getElementsByTag("li");for(int i = 0 ; i < eles.size() ; i++){Element item = eles.get(i);String title = item.attr("title").trim();String link = item.getElementsByTag("a").first().attr("href");download(link,title);}return null;}public String download(String url,String name) throws IOException{String hash = "";String content = HttpGetConnect.connect(url, "utf-8");HtmlManage html = new HtmlManage();String regEx = "\"hash\":\"[0-9A-Z]+\"";// 編譯正則表達(dá)式Pattern pattern = Pattern.compile(regEx);Matcher matcher = pattern.matcher(content);if (matcher.find()) {hash = matcher.group();hash = hash.replace("\"hash\":\"", "");hash = hash.replace("\"", "");}//爬取歌曲的封面圖Document doc = html.manage(content);Element ele = doc.getElementsByClass("albumImg").get(0);String imgUrl = ele.getElementsByTag("img").attr("src");//利用hash值構(gòu)造歌曲mp3地址String item = mp3.replace("HASH", hash);item = item.replace("TIME", System.currentTimeMillis() + "");String mp = HttpGetConnect.connect(item, "utf-8");mp = mp.substring(mp.indexOf("(") + 1, mp.length() - 3);JSONObject json = JSONObject.fromObject(mp);String playUrl = json.getJSONObject("data").getString("play_url");System.out.println(playUrl);FileDownload fileDownload = new FileDownload();fileDownload.download(playUrl,FILEPATH + name + ".mp3");String src = null;try{File file = new File(FILEPATH + name + ".mp3");Response response = uploadService.uploadFile(file);//解析上傳成功的結(jié)果DefaultPutRet putRet = new Gson().fromJson(response.bodyString(), DefaultPutRet.class);src = "http://www.jie12366.xyz/" + putRet.key;}catch (QiniuException e){e.printStackTrace();}//如果圖片地址或mp3地址為空,則不爬取(歌曲是收費(fèi)的無(wú)法爬取)if (StringUtils.isNotBlank(src) && StringUtils.isNotBlank(imgUrl)){musicService.saveMusic(name,imgUrl,src);}return playUrl;} }新增FileDownload類:
package com.after.demo.spider;import org.apache.http.client.config.RequestConfig; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpGet; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClients;import java.io.BufferedInputStream; import java.io.BufferedOutputStream; import java.io.File; import java.io.FileOutputStream;/*** @author www.xyjz123.xyz* @description* @date 2019/4/24 16:37*/ public class FileDownload {/*** 文件下載* @param url 鏈接地址* @param path 要保存的路徑及文件名* @return*/public void download(String url,String path){CloseableHttpClient httpclient = HttpClients.createDefault();RequestConfig requestConfig = RequestConfig.custom().setSocketTimeout(2000).setConnectTimeout(2000).build();HttpGet get = new HttpGet(url);get.setConfig(requestConfig);BufferedInputStream in = null;BufferedOutputStream out = null;try{for(int i=0;i < 3;i++){CloseableHttpResponse result = httpclient.execute(get);if(result.getStatusLine().getStatusCode() == 200){in = new BufferedInputStream(result.getEntity().getContent());File file = new File(path);out = new BufferedOutputStream(new FileOutputStream(file));byte[] buffer = new byte[1024];int len = -1;while((len = in.read(buffer,0,1024)) > -1){out.write(buffer,0,len);}break;}else if(result.getStatusLine().getStatusCode() == 500){continue ;}}}catch(Exception e){e.printStackTrace();}finally{get.releaseConnection();try{if(in != null){in.close();}if(out != null){out.close();}}catch(Exception e){e.printStackTrace();}}} }entiry:
package com.after.demo.entity;import com.gitee.sunchenbin.mybatis.actable.annotation.Column; import com.gitee.sunchenbin.mybatis.actable.annotation.Table; import com.gitee.sunchenbin.mybatis.actable.constants.MySqlTypeConstant; import lombok.AllArgsConstructor; import lombok.Data; import lombok.NoArgsConstructor;/*** @author www.xyjz123.xyz* @description* @date 2019/4/19 19:20*/ @Data @AllArgsConstructor @NoArgsConstructor @Table(name = "music") public class Music {@Column(name = "id",type = MySqlTypeConstant.INT,isKey = true,isAutoIncrement = true,length = 5)private int id;@Column(name = "name",type = MySqlTypeConstant.VARCHAR,isUnique = true)private String name;@Column(name = "imgUrl",type = MySqlTypeConstant.VARCHAR,length = 80)private String imgUrl;@Column(name = "src",type = MySqlTypeConstant.VARCHAR)private String src; }mapper:
package com.after.demo.mapper;import com.after.demo.entity.Music; import org.apache.ibatis.annotations.Insert; import org.apache.ibatis.annotations.Mapper; import org.apache.ibatis.annotations.Select;import java.util.List;/*** @author www.xyjz123.xyz* @description* @date 2019/4/19 21:03*/ @Mapper public interface MusicMapper {/*** 將爬取的歌曲信息存入數(shù)據(jù)庫(kù)* @param name 歌曲名* @param imgUrl 歌曲封面* @param src 歌曲地址* @return 是否成功*/@Insert("insert into music(name,imgUrl,src) values(#{name},#{imgUrl},#{src})")int saveMusic(String name,String imgUrl,String src);/*** 獲取數(shù)據(jù)庫(kù)中的歌曲信息* @return list*/@Select("select * from music")List<Music> listMusic();/*** 根據(jù)id隨機(jī)獲取一首歌* @param id int* @return Music*/@Select("select * from music where id=#{id}")Music getMusicById(int id); }srerivce:
package com.after.demo.service.impl;import com.after.demo.entity.Music; import com.after.demo.mapper.MusicMapper; import com.after.demo.service.MusicService; import com.after.demo.utils.GetString; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.cache.annotation.CacheConfig; import org.springframework.cache.annotation.Cacheable; import org.springframework.stereotype.Service;import java.util.List;/*** @author www.xyjz123.xyz* @description* @date 2019/4/19 21:09*/ @Service @CacheConfig public class MusicServiceImpl implements MusicService {@AutowiredMusicMapper musicMapper;@Overridepublic int saveMusic(String name, String imgUrl, String src) {List<Music> musicList = musicMapper.listMusic();for (Music music:musicList){if (music.getName().equals(name)){return 0;}}return musicMapper.saveMusic(name,imgUrl,src);}@Override@Cacheable(value = "music")public List<Music> listMusic() {return musicMapper.listMusic();}@Overridepublic Music getMusicById(int id) {int maxSize = GetString.MAXSIZE;if (id <= maxSize){return musicMapper.getMusicById(id);}return null;} }utils:
package com.after.demo.utils;/*** @author 熊義杰* @date 2019-3-16*/public class GetString {public static final int MAXSIZE = 165;public static int getId(){int id = (int)(Math.random() * MAXSIZE);return id;} }數(shù)據(jù)庫(kù)效果:
隨機(jī)獲取一首歌:
總結(jié)
以上是生活随笔為你收集整理的java爬取酷狗榜单歌曲信息并存入数据库的全部?jī)?nèi)容,希望文章能夠幫你解決所遇到的問(wèn)題。
- 上一篇: IBM存储设备配置
- 下一篇: [侠客风云传]游戏攻略