爬虫 spider09——爬取指定数据,去重复,并存储到mysql
生活随笔
收集整理的這篇文章主要介紹了
爬虫 spider09——爬取指定数据,去重复,并存储到mysql
小編覺得挺不錯的,現在分享給大家,幫大家做個參考.
爬取指定數據,去重復,并存儲到mysql。
技術棧:Springboot+ssm+定時(定時器)爬取+redis去重+mybatis保存
一、創建父項目Spider
pom.xml
<?xml version="1.0" encoding="UTF-8"?> <project xmlns="http://maven.apache.org/POM/4.0.0"xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"><modelVersion>4.0.0</modelVersion><groupId>com.henu</groupId><artifactId>com.henu.spider</artifactId><packaging>pom</packaging><version>1.0-SNAPSHOT</version><modules><module>spider_provider</module></modules><dependencyManagement><dependencies><!--spring cloud--><dependency><groupId>org.springframework.cloud</groupId><artifactId>spring-cloud-dependencies</artifactId><version>Dalston.SR1</version><type>pom</type><scope>import</scope></dependency><!--spring boot--><dependency><groupId>org.springframework.boot</groupId><artifactId>spring-boot-dependencies</artifactId><version>1.5.14.RELEASE</version><type>pom</type><scope>import</scope></dependency><!--druid數據源--><dependency><groupId>com.alibaba</groupId><artifactId>druid</artifactId><version>1.0.31</version></dependency></dependencies></dependencyManagement><build><resources><resource><directory>src/main/resources</directory><filtering>true</filtering></resource></resources><plugins><plugin><groupId>org.apache.maven.plugins</groupId><artifactId>maven-resources-plugin</artifactId><configuration><delimiters><delimit>$</delimit></delimiters></configuration></plugin></plugins></build></project>二、創建maven項目spider_provider
1、創建表
create table news( id int(9) auto_increment primary key, appId varchar(32), title varchar(100), intro varchar(1000), source varchar(100), url varchar(100), updateTime DATETIME );2、pom.xml
<?xml version="1.0" encoding="UTF-8"?> <project xmlns="http://maven.apache.org/POM/4.0.0"xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"><parent><artifactId>com.henu.spider</artifactId><groupId>com.henu</groupId><version>1.0-SNAPSHOT</version></parent><modelVersion>4.0.0</modelVersion><artifactId>spider_provider</artifactId><properties><project.build.sourceEncoding>UTF-8</project.build.sourceEncoding><maven.compiler.source>1.8</maven.compiler.source><maven.compiler.target>1.8</maven.compiler.target></properties><dependencies><!--web啟動器--><dependency><groupId>org.springframework.boot</groupId><artifactId>spring-boot-starter-web</artifactId></dependency><!--mybatis啟動器--><dependency><groupId>org.mybatis.spring.boot</groupId><artifactId>mybatis-spring-boot-starter</artifactId><version>1.2.2</version></dependency><!-- redis依賴包 --><dependency><groupId>org.springframework.boot</groupId><artifactId>spring-boot-starter-data-redis</artifactId></dependency><!--httpclient--><dependency><groupId>org.apache.httpcomponents</groupId><artifactId>httpclient</artifactId><version>4.5.6</version></dependency><!--mysql驅動--><dependency><groupId>mysql</groupId><artifactId>mysql-connector-java</artifactId><version>5.1.6</version><scope>runtime</scope></dependency><!-- 阿里數據源 --><dependency><groupId>com.alibaba</groupId><artifactId>druid</artifactId><version>1.1.3</version></dependency><dependency><groupId>org.springframework.boot</groupId><artifactId>spring-boot-starter-tomcat</artifactId></dependency><dependency><groupId>org.apache.tomcat.embed</groupId><artifactId>tomcat-embed-jasper</artifactId></dependency><!-- 熱部署 --><dependency><groupId>org.springframework.boot</groupId><artifactId>spring-boot-devtools</artifactId><!-- optional=true,依賴不會往下傳遞,如果有項目依賴本項目,并且想要使用devtools,需要重新引入 --><optional>true</optional><scope>true</scope></dependency><!-- https://mvnrepository.com/artifact/com.alibaba/fastjson --><dependency><groupId>com.alibaba</groupId><artifactId>fastjson</artifactId><version>1.2.48</version></dependency><dependency><groupId>org.springframework.boot</groupId><artifactId>spring-boot-starter-test</artifactId><scope>test</scope></dependency></dependencies><build><plugins><plugin><groupId>org.springframework.boot</groupId><artifactId>spring-boot-maven-plugin</artifactId><configuration><fork>true</fork><!-- 如果沒有該配置,熱部署的devtools不生效 --></configuration></plugin><!-- 自定義配置spring Boot使用的JDK版本 --><plugin><artifactId>maven-compiler-plugin</artifactId><configuration><source>1.8</source><target>1.8</target></configuration></plugin></plugins></build></project>3、application.yml
#數據源配置 spring:application:name: spider8001datasource:type: com.alibaba.druid.pool.DruidDataSource # 當前數據源操作類型driver-class-name: org.gjt.mm.mysql.Driver # mysql驅動包url: jdbc:mysql://localhost:3306/spider # 數據庫名稱username: rootpassword: 123456dbcp2:min-idle: 5 # 數據庫連接池的最小維持連接數initial-size: 5 # 初始化連接數max-total: 5 # 最大連接數max-wait-millis: 200 # 等待連接獲取的最大超時時間#redis配置,向代碼中注入RedisTemplate對象,代替之前所用的Jedisredis:host: 127.0.0.1 #Redis服務器地址port: 6379 #Redis服務器連接端口database: 0 #Redis數據庫索引(默認為0)timeout: 5000 #連接超時時間(毫秒)jedis:pool:max-active: 50 #連接池最大連接數(使用負值表示沒有限制)max-wait: 3000 #連接池最大阻塞等待時間(使用負值表示沒有限制)max-idle: 20 #連接池中的最大空閑連接min-idle: 2 #連接池中的最小空閑連接#mybatis配置 mybatis:config-location: classpath:mybatis/mybatisConfig.xml # mybatis配置文件所在路徑type-aliases-package: com.henu.spider.dao.entity # 所有Entity別名類所在包mapper-locations:- classpath:mybatis/mapper/**/*.xml # mapper映射文件4、配置文件
mybatisConfig.xml
<?xml version="1.0" encoding="UTF-8" ?> <!DOCTYPE configurationPUBLIC "-//mybatis.org//DTD Config 3.0//EN""http://mybatis.org/dtd/mybatis-3-config.dtd"> <configuration></configuration>db.properties
driverClassName=com.mysql.jdbc.Driver url=jdbc:mysql://localhost:3306/spider?useUnicode=true&characterEncoding=utf-8 username=root password=1234565、啟動類
package com.henu.spider;import org.mybatis.spring.annotation.MapperScan; import org.springframework.boot.SpringApplication; import org.springframework.boot.autoconfigure.SpringBootApplication; import org.springframework.context.annotation.ComponentScan;@SpringBootApplication @MapperScan(basePackages = "com.henu.spider.dao.mapper") public class SpiderApplication8001 {public static void main(String[] args){SpringApplication.run(SpiderApplication8001.class, args);} }6、配置類
RedisConfigBean
package com.henu.spider.config;import com.fasterxml.jackson.annotation.JsonAutoDetect; import com.fasterxml.jackson.annotation.PropertyAccessor; import com.fasterxml.jackson.databind.ObjectMapper; import org.springframework.boot.SpringBootConfiguration; import org.springframework.context.annotation.Bean; import org.springframework.data.redis.connection.RedisConnectionFactory; import org.springframework.data.redis.core.RedisTemplate; import org.springframework.data.redis.serializer.Jackson2JsonRedisSerializer; import org.springframework.data.redis.serializer.StringRedisSerializer;/*** redis配置類,修改RedisTemplate的相關屬性,解決redis庫中key亂碼問題*/ @SpringBootConfiguration public class RedisConfigBean {@Bean@SuppressWarnings("all")public RedisTemplate<String, Object> redisTemplate(RedisConnectionFactory factory) {RedisTemplate<String, Object> template = new RedisTemplate<String, Object>();template.setConnectionFactory(factory);Jackson2JsonRedisSerializer jackson2JsonRedisSerializer = new Jackson2JsonRedisSerializer(Object.class);ObjectMapper om = new ObjectMapper();om.setVisibility(PropertyAccessor.ALL, JsonAutoDetect.Visibility.ANY);om.enableDefaultTyping(ObjectMapper.DefaultTyping.NON_FINAL);jackson2JsonRedisSerializer.setObjectMapper(om);StringRedisSerializer stringRedisSerializer = new StringRedisSerializer();// key采用String的序列化方式template.setKeySerializer(stringRedisSerializer);// hash的key也采用String的序列化方式template.setHashKeySerializer(stringRedisSerializer);// value序列化方式采用jacksontemplate.setValueSerializer(jackson2JsonRedisSerializer);// hash的value序列化方式采用jacksontemplate.setHashValueSerializer(jackson2JsonRedisSerializer);template.afterPropertiesSet();return template;} }SpiderConfigBean
package com.henu.spider.config;import com.henu.spider.service.NewsService; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.boot.SpringBootConfiguration; import org.springframework.scheduling.annotation.EnableScheduling; import org.springframework.scheduling.annotation.Scheduled;import java.io.IOException;/*** 配置類:* 1.配置定時器,定時啟動爬蟲爬取數據*/ @SpringBootConfiguration @EnableScheduling//啟用定時器 public class SpiderConfigBean {@Autowiredpublic NewsService newsService;/*** 定時器啟用的方法* @throws IOException*/@Scheduled(cron="*/2 * * * * ?")public void startReadNews() throws IOException {System.out.println("定時器啟動開始爬取......");newsService.readNewsFromWebsite();} }7、映射文件
<?xml version="1.0" encoding="UTF-8" ?> <!DOCTYPE mapperPUBLIC "-//mybatis.org//DTD Mapper 3.0//EN""http://mybatis.org/dtd/mybatis-3-mapper.dtd"><mapper namespace="com.henu.spider.dao.mapper.NewsMapper"><!--批量添加娛樂信息數據--><insert id="batchSaveNews">INSERT INTO news (appId,title,intro,source,url,updateTime)VALUES<foreach collection="news" item="item" separator=",">(#{item.appId},#{item.title},#{item.intro},#{item.source},#{item.url},#{item.updateTime})</foreach></insert> </mapper>8、實體類
package com.henu.spider.dao.entity;import com.alibaba.fastjson.annotation.JSONField;import java.util.Date;/*** 爬蟲實體類* 通過@JSONField注解設置json屬性與成員變量對應關系*/ public class News {private String appId;private String title;private String intro;private String source;private String url;private Date updateTime;@Overridepublic String toString() {return "News{" +"appId='" + appId + '\'' +", title='" + title + '\'' +", intro='" + intro + '\'' +", source='" + source + '\'' +", url='" + url + '\'' +", updateTime=" + updateTime +'}';}@JSONField(name="app_id")public String getAppId() {return appId;}@JSONField(name="app_id")public void setAppId(String appId) {this.appId = appId;}@JSONField(name="title")public String getTitle() {return title;}@JSONField(name="title")public void setTitle(String title) {this.title = title;}@JSONField(name="intro")public String getIntro() {return intro;}@JSONField(name="intro")public void setIntro(String intro) {this.intro = intro;}@JSONField(name="source")public String getSource() {return source;}@JSONField(name="source")public void setSource(String source) {this.source = source;}@JSONField(name="url")public String getUrl() {return url;}@JSONField(name="url")public void setUrl(String url) {this.url = url;}@JSONField(name="update_time",format = "yyyy-MM-dd HH:mm:ss")public Date getUpdateTime() {return updateTime;}@JSONField(name="update_time",format = "yyyy-MM-dd HH:mm:ss")public void setUpdateTime(Date updateTime) {this.updateTime = updateTime;} }9、Dao層
package com.henu.spider.dao.mapper;import com.henu.spider.dao.entity.News; import org.apache.ibatis.annotations.Mapper; import org.apache.ibatis.annotations.Param; import java.util.List;/*** 爬蟲持久層接口*/ @Mapper public interface NewsMapper {/*** 批量添加爬取數據* @param list* @return*/public int batchSaveNews(@Param("news") List<News> list); }10、service層
package com.henu.spider.service;import java.io.IOException;/*** 爬蟲業務層接口*/ public interface NewsService {/*** 執行爬取數據任務,由定時器調用*/public void readNewsFromWebsite()throws IOException;} package com.henu.spider.service.impl; import com.alibaba.fastjson.JSON; import com.henu.spider.dao.entity.News; import com.henu.spider.dao.mapper.NewsMapper; import com.henu.spider.service.NewsService; import com.henu.spider.util.NewsCollection; import org.apache.http.HttpEntity; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpGet; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClients; import org.apache.http.util.EntityUtils; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.data.redis.core.RedisTemplate; import org.springframework.stereotype.Service;import java.io.IOException; import java.util.Iterator; import java.util.List;/*** 爬蟲業務類*/ @Service public class NewsServiceImpl implements NewsService {@SuppressWarnings("SpringJavaAutowiringInspection")//注入dao層對象@Autowiredpublic NewsMapper newsMapper;//注入RedisTempdate對象@Autowiredpublic RedisTemplate redisTemplate;/*** 執行爬取數據任務,由定時器調用*/public void readNewsFromWebsite() throws IOException {//讀和不讀標記 true:爬取 false:停止爬取boolean startFlag=true;int pageNum=1;while(startFlag) {System.out.println("當前爬取第"+pageNum+"頁");String url = "https://pacaio.match.qq.com/irs/rcd?cid=146&token=49cbb2154853ef1a74ff4e53723372ce&ext=ent&page="+pageNum+"&callback=__jp6";CloseableHttpClient closeableHttpClient = HttpClients.createDefault();HttpGet httpGet = new HttpGet(url);//執行請求 user center pageCloseableHttpResponse closeableHttpResponse = closeableHttpClient.execute(httpGet);HttpEntity entity = closeableHttpResponse.getEntity();String content = EntityUtils.toString(entity, "GB2312");String jsonContent = StringToJson(content);//爬取源數據List<News> news = jsonToObject(jsonContent);//判斷爬取結束System.out.println("爬取數據量:" + news.size());if(news.size()==0){//沒有數據startFlag=false;}else{//去重之后數據List<News> cleanNews = isExistInRedis(news);System.out.println("有效數據量:" + cleanNews.size());if(cleanNews.size()!=0){//調用dao存儲數據newsMapper.batchSaveNews(cleanNews);}}pageNum++;}}/*** string轉換成json工具方法* @param srcJson* @return*/public String StringToJson(String srcJson){int start=srcJson.indexOf('(')+1;int end=srcJson.lastIndexOf(')');String jsonContent= srcJson.substring(start,end);return jsonContent;}public List<News> jsonToObject(String srcJson){//通過fastjson把json字符串轉換成java對象NewsCollection newsCollection = JSON.parseObject(srcJson, NewsCollection.class);List<News> news = newsCollection.getData();for (News newsMessage : news) {System.out.println(newsMessage);System.out.println("---------------------------------------------------------");}return news;}//判斷redis中是否存在相同地址,不存在則添加到數據庫和同步到redis,否則不添加數據到數據庫public List<News> isExistInRedis(List<News> newsList){//Jedis jedis = JedisPoolUtil.getJedis();Iterator<News> iterator = newsList.iterator();while(iterator.hasNext()){//存入redisNews news=iterator.next();//boolean isExist=jedis.hexists("bigdata:0715:com.henu.spider:news",news.getUrl());//通過redisTemplate判讀url是否存在boolean isExist=redisTemplate.opsForHash().hasKey("bigdata:0715:com.henu.spider:news",news.getUrl());if(!isExist){//不存在System.out.println("不存在:"+news.getUrl());//jedis.hset("bigdata:0715:com.henu.spider:news",news.getUrl(),objectToJson(news));//保存數據到redisredisTemplate.opsForHash().put("bigdata:0715:com.henu.spider:news",news.getUrl(),objectToJson(news));}else{//存在則刪除System.out.println("存在:"+news.getUrl());iterator.remove();}}return newsList;}//news to jsonpublic String objectToJson(Object obj){return JSON.toJSONString(obj);} }11、工具類
NewsCollection
package com.henu.spider.util;import com.henu.spider.dao.entity.News; import java.util.List;public class NewsCollection {//data與json中data相對應public List<News> data;public List<News> getData() {return data;}public void setData(List<News> data) {this.data = data;} }RedisUtil
package com.henu.spider.util;import org.springframework.data.redis.core.RedisTemplate; import org.springframework.util.CollectionUtils; import java.util.List; import java.util.Map; import java.util.Set; import java.util.concurrent.TimeUnit;/*** Redis工具類*/ //@Component public final class RedisUtil {//@Autowiredprivate RedisTemplate<String, Object> redisTemplate;// =============================common============================/*** 指定緩存失效時間** @param key 鍵* @param time 時間(秒)* @return*/public boolean expire(String key, long time) {try {if (time > 0) {redisTemplate.expire(key, time, TimeUnit.SECONDS);}return true;} catch (Exception e) {e.printStackTrace();return false;}}/*** 根據key 獲取過期時間** @param key 鍵 不能為null* @return 時間(秒) 返回0代表為永久有效*/public long getExpire(String key) {return redisTemplate.getExpire(key, TimeUnit.SECONDS);}/*** 判斷key是否存在** @param key 鍵* @return true 存在 false不存在*/public boolean hasKey(String key) {try {return redisTemplate.hasKey(key);} catch (Exception e) {e.printStackTrace();return false;}}/*** 刪除緩存** @param key 可以傳一個值 或多個*/@SuppressWarnings("unchecked")public void del(String... key) {if (key != null && key.length > 0) {if (key.length == 1) {redisTemplate.delete(key[0]);} else {redisTemplate.delete(CollectionUtils.arrayToList(key));}}}// ============================String=============================/*** 普通緩存獲取** @param key 鍵* @return 值*/public Object get(String key) {return key == null ? null : redisTemplate.opsForValue().get(key);}/*** 普通緩存放入** @param key 鍵* @param value 值* @return true成功 false失敗*/public boolean set(String key, Object value) {try {redisTemplate.opsForValue().set(key, value);return true;} catch (Exception e) {e.printStackTrace();return false;}}/*** 普通緩存放入并設置時間** @param key 鍵* @param value 值* @param time 時間(秒) time要大于0 如果time小于等于0 將設置無限期* @return true成功 false 失敗*/public boolean set(String key, Object value, long time) {try {if (time > 0) {redisTemplate.opsForValue().set(key, value, time, TimeUnit.SECONDS);} else {set(key, value);}return true;} catch (Exception e) {e.printStackTrace();return false;}}/*** 遞增** @param key 鍵* @param delta 要增加幾(大于0)* @return*/public long incr(String key, long delta) {if (delta < 0) {throw new RuntimeException("遞增因子必須大于0");}return redisTemplate.opsForValue().increment(key, delta);}/*** 遞減** @param key 鍵* @param delta 要減少幾(小于0)* @return*/public long decr(String key, long delta) {if (delta < 0) {throw new RuntimeException("遞減因子必須大于0");}return redisTemplate.opsForValue().increment(key, -delta);}// ================================Map=================================/*** HashGet** @param key 鍵 不能為null* @param item 項 不能為null* @return 值*/public Object hget(String key, String item) {return redisTemplate.opsForHash().get(key, item);}/*** 獲取hashKey對應的所有鍵值** @param key 鍵* @return 對應的多個鍵值*/public Map<Object, Object> hmget(String key) {return redisTemplate.opsForHash().entries(key);}/*** HashSet** @param key 鍵* @param map 對應多個鍵值* @return true 成功 false 失敗*/public boolean hmset(String key, Map<String, Object> map) {try {redisTemplate.opsForHash().putAll(key, map);return true;} catch (Exception e) {e.printStackTrace();return false;}}/*** HashSet 并設置時間** @param key 鍵* @param map 對應多個鍵值* @param time 時間(秒)* @return true成功 false失敗*/public boolean hmset(String key, Map<String, Object> map, long time) {try {redisTemplate.opsForHash().putAll(key, map);if (time > 0) {expire(key, time);}return true;} catch (Exception e) {e.printStackTrace();return false;}}/*** 向一張hash表中放入數據,如果不存在將創建** @param key 鍵* @param item 項* @param value 值* @return true 成功 false失敗*/public boolean hset(String key, String item, Object value) {try {redisTemplate.opsForHash().put(key, item, value);return true;} catch (Exception e) {e.printStackTrace();return false;}}/*** 向一張hash表中放入數據,如果不存在將創建** @param key 鍵* @param item 項* @param value 值* @param time 時間(秒) 注意:如果已存在的hash表有時間,這里將會替換原有的時間* @return true 成功 false失敗*/public boolean hset(String key, String item, Object value, long time) {try {redisTemplate.opsForHash().put(key, item, value);if (time > 0) {expire(key, time);}return true;} catch (Exception e) {e.printStackTrace();return false;}}/*** 刪除hash表中的值** @param key 鍵 不能為null* @param item 項 可以使多個 不能為null*/public void hdel(String key, Object... item) {redisTemplate.opsForHash().delete(key, item);}/*** 判斷hash表中是否有該項的值** @param key 鍵 不能為null* @param item 項 不能為null* @return true 存在 false不存在*/public boolean hHasKey(String key, String item) {return redisTemplate.opsForHash().hasKey(key, item);}/*** hash遞增 如果不存在,就會創建一個 并把新增后的值返回** @param key 鍵* @param item 項* @param by 要增加幾(大于0)* @return*/public double hincr(String key, String item, double by) {return redisTemplate.opsForHash().increment(key, item, by);}/*** hash遞減** @param key 鍵* @param item 項* @param by 要減少記(小于0)* @return*/public double hdecr(String key, String item, double by) {return redisTemplate.opsForHash().increment(key, item, -by);}// ============================set=============================/*** 根據key獲取Set中的所有值** @param key 鍵* @return*/public Set<Object> sGet(String key) {try {return redisTemplate.opsForSet().members(key);} catch (Exception e) {e.printStackTrace();return null;}}/*** 根據value從一個set中查詢,是否存在** @param key 鍵* @param value 值* @return true 存在 false不存在*/public boolean sHasKey(String key, Object value) {try {return redisTemplate.opsForSet().isMember(key, value);} catch (Exception e) {e.printStackTrace();return false;}}/*** 將數據放入set緩存** @param key 鍵* @param values 值 可以是多個* @return 成功個數*/public long sSet(String key, Object... values) {try {return redisTemplate.opsForSet().add(key, values);} catch (Exception e) {e.printStackTrace();return 0;}}/*** 將set數據放入緩存** @param key 鍵* @param time 時間(秒)* @param values 值 可以是多個* @return 成功個數*/public long sSetAndTime(String key, long time, Object... values) {try {Long count = redisTemplate.opsForSet().add(key, values);if (time > 0)expire(key, time);return count;} catch (Exception e) {e.printStackTrace();return 0;}}/*** 獲取set緩存的長度** @param key 鍵* @return*/public long sGetSetSize(String key) {try {return redisTemplate.opsForSet().size(key);} catch (Exception e) {e.printStackTrace();return 0;}}/*** 移除值為value的** @param key 鍵* @param values 值 可以是多個* @return 移除的個數*/public long setRemove(String key, Object... values) {try {Long count = redisTemplate.opsForSet().remove(key, values);return count;} catch (Exception e) {e.printStackTrace();return 0;}}// ===============================list=================================/*** 獲取list緩存的內容** @param key 鍵* @param start 開始* @param end 結束 0 到 -1代表所有值* @return*/public List<Object> lGet(String key, long start, long end) {try {return redisTemplate.opsForList().range(key, start, end);} catch (Exception e) {e.printStackTrace();return null;}}/*** 獲取list緩存的長度** @param key 鍵* @return*/public long lGetListSize(String key) {try {return redisTemplate.opsForList().size(key);} catch (Exception e) {e.printStackTrace();return 0;}}/*** 通過索引 獲取list中的值** @param key 鍵* @param index 索引 index>=0時, 0 表頭,1 第二個元素,依次類推;index<0時,-1,表尾,-2倒數第二個元素,依次類推* @return*/public Object lGetIndex(String key, long index) {try {return redisTemplate.opsForList().index(key, index);} catch (Exception e) {e.printStackTrace();return null;}}/*** 將list放入緩存** @param key 鍵* @param value 值* @return*/public boolean lSet(String key, Object value) {try {redisTemplate.opsForList().rightPush(key, value);return true;} catch (Exception e) {e.printStackTrace();return false;}}/*** 將list放入緩存** @param key 鍵* @param value 值* @param time 時間(秒)* @return*/public boolean lSet(String key, Object value, long time) {try {redisTemplate.opsForList().rightPush(key, value);if (time > 0)expire(key, time);return true;} catch (Exception e) {e.printStackTrace();return false;}}/*** 將list放入緩存** @param key 鍵* @param value 值* @return*/public boolean lSet(String key, List<Object> value) {try {redisTemplate.opsForList().rightPushAll(key, value);return true;} catch (Exception e) {e.printStackTrace();return false;}}/*** 將list放入緩存** @param key 鍵* @param value 值* @param time 時間(秒)* @return*/public boolean lSet(String key, List<Object> value, long time) {try {redisTemplate.opsForList().rightPushAll(key, value);if (time > 0)expire(key, time);return true;} catch (Exception e) {e.printStackTrace();return false;}}/*** 根據索引修改list中的某條數據** @param key 鍵* @param index 索引* @param value 值* @return*/public boolean lUpdateIndex(String key, long index, Object value) {try {redisTemplate.opsForList().set(key, index, value);return true;} catch (Exception e) {e.printStackTrace();return false;}}/*** 移除N個值為value** @param key 鍵* @param count 移除多少個* @param value 值* @return 移除的個數*/public long lRemove(String key, long count, Object value) {try {Long remove = redisTemplate.opsForList().remove(key, count, value);return remove;} catch (Exception e) {e.printStackTrace();return 0;}} }?
?
結果展示:
?
?
持續進展!!!?
?
總結
以上是生活随笔為你收集整理的爬虫 spider09——爬取指定数据,去重复,并存储到mysql的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: 爬虫 spider08——爬取腾讯娱乐新
- 下一篇: 如何优化网站?