1、36壁纸网爬虫持久到数据库代码优化
This commit is contained in:
parent
8f36a77d2b
commit
7c2e1b684d
|
|
@ -397,9 +397,9 @@ export default {
|
|||
this.queryParams.endCreateTime = this.daterangeCreateTime[1];
|
||||
}
|
||||
listWord(this.queryParams).then(response => {
|
||||
this.loading = false;
|
||||
this.wordList = response.rows;
|
||||
this.total = response.total;
|
||||
this.loading = false;
|
||||
});
|
||||
},
|
||||
// 取消按钮
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
package com.xjs._36wallpaper.task;
|
||||
|
||||
import com.xjs._36wallpaper.service._36wallpaperService;
|
||||
import com.xjs._36wallpaper.webmagic._36wallpaperProcessor;
|
||||
import com.xjs.annotation.ReptileLog;
|
||||
import lombok.extern.log4j.Log4j2;
|
||||
|
|
@ -20,6 +21,9 @@ public class _36wallpaperTask {
|
|||
@Autowired
|
||||
private _36wallpaperProcessor wallpaperProcessor;
|
||||
|
||||
@Autowired
|
||||
private _36wallpaperService wallpaperService;
|
||||
|
||||
|
||||
/**
|
||||
* 提供定时任务调取
|
||||
|
|
@ -27,7 +31,12 @@ public class _36wallpaperTask {
|
|||
*/
|
||||
@ReptileLog(name = "36壁纸网", url = _36_WALLPAPER_URL)
|
||||
public Long reptileWallpaper() {
|
||||
return wallpaperProcessor.run();
|
||||
Long run = wallpaperProcessor.run();
|
||||
//删除重复数据
|
||||
int count = wallpaperService.deleteRepeatData();
|
||||
log.info("36壁纸删除重复数据数:" + count);
|
||||
|
||||
return run;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,37 @@
|
|||
package com.xjs._36wallpaper.webmagic;
|
||||
|
||||
import cn.hutool.core.collection.CollUtil;
|
||||
import com.xjs._36wallpaper.pojo._36wallpaper;
|
||||
import com.xjs._36wallpaper.service._36wallpaperService;
|
||||
import lombok.extern.log4j.Log4j2;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.stereotype.Component;
|
||||
import us.codecraft.webmagic.ResultItems;
|
||||
import us.codecraft.webmagic.Task;
|
||||
import us.codecraft.webmagic.pipeline.Pipeline;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* webmagic持久化保存
|
||||
* @author xiejs
|
||||
* @since 2022-02-20
|
||||
*/
|
||||
@Component
|
||||
@Log4j2
|
||||
public class _36wallpaperPipeline implements Pipeline {
|
||||
|
||||
@Autowired
|
||||
private _36wallpaperService wallpaperService;
|
||||
|
||||
@Override
|
||||
public void process(ResultItems resultItems, Task task) {
|
||||
|
||||
List<_36wallpaper> wallpaperData = resultItems.get("_36wallpaperData");
|
||||
|
||||
if (CollUtil.isNotEmpty(wallpaperData)) {
|
||||
wallpaperService.saveBatch(wallpaperData, 25);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
|
@ -2,7 +2,6 @@ package com.xjs._36wallpaper.webmagic;
|
|||
|
||||
import com.ruoyi.common.redis.service.RedisService;
|
||||
import com.xjs._36wallpaper.pojo._36wallpaper;
|
||||
import com.xjs._36wallpaper.service._36wallpaperService;
|
||||
import lombok.extern.log4j.Log4j2;
|
||||
import org.jsoup.Jsoup;
|
||||
import org.jsoup.nodes.Document;
|
||||
|
|
@ -44,6 +43,9 @@ public class _36wallpaperProcessor implements PageProcessor {
|
|||
private static final String headerValue = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.45 Safari/537.36";
|
||||
|
||||
|
||||
@Autowired
|
||||
private _36wallpaperPipeline wallpaperPipeline;
|
||||
|
||||
private static RedisService redisService;
|
||||
|
||||
@Autowired
|
||||
|
|
@ -52,12 +54,12 @@ public class _36wallpaperProcessor implements PageProcessor {
|
|||
}
|
||||
|
||||
|
||||
private static _36wallpaperService wallpaperService;
|
||||
/*private static _36wallpaperService wallpaperService;
|
||||
|
||||
@Autowired
|
||||
public void setWallpaperService(_36wallpaperService wallpaperService) {
|
||||
_36wallpaperProcessor.wallpaperService = wallpaperService;
|
||||
}
|
||||
}*/
|
||||
|
||||
/**
|
||||
* 图片保存到磁盘的路径
|
||||
|
|
@ -159,8 +161,11 @@ public class _36wallpaperProcessor implements PageProcessor {
|
|||
}
|
||||
}
|
||||
|
||||
//持久化
|
||||
wallpaperService.saveBatch(wallpapers, 25);
|
||||
//持久化 --使用Pipeline实现持久化了
|
||||
//wallpaperService.saveBatch(wallpapers, 25);
|
||||
|
||||
//暂时保存到内存中,后续实现Pipeline接口保存到数据库
|
||||
page.putField("_36wallpaperData",wallpapers);
|
||||
|
||||
//循环次数存入redis中
|
||||
Integer count = redisService.getCacheObject(REPTILE_COUNT);
|
||||
|
|
@ -191,11 +196,10 @@ public class _36wallpaperProcessor implements PageProcessor {
|
|||
public Long run() {
|
||||
Spider.create(new _36wallpaperProcessor()).addUrl(_36_WALLPAPER_URL).thread(20)
|
||||
.setScheduler(new QueueScheduler().setDuplicateRemover(new BloomFilterDuplicateRemover(110000)))
|
||||
.addPipeline(wallpaperPipeline)
|
||||
.run();
|
||||
|
||||
//删除重复数据
|
||||
int count = wallpaperService.deleteRepeatData();
|
||||
log.info("36壁纸删除重复数据数:" + count);
|
||||
|
||||
|
||||
//从redis中获取循环次数
|
||||
Integer cache = redisService.getCacheObject(REPTILE_COUNT);
|
||||
|
|
|
|||
|
|
@ -112,7 +112,7 @@ public class reptileLogAspect {
|
|||
* 持久化保存数据
|
||||
*/
|
||||
private void saveData(WebmagicLog webmagicLog) {
|
||||
if (webmagicLog.getComplexRate() != null && webmagicLog.getComplexRate() == 0L) {
|
||||
if (webmagicLog.getComplexRate() != null && webmagicLog.getComplexRate() == 0L || webmagicLog.getComplexRate() == 1L) {
|
||||
webmagicLog.setStatus(ERROR);
|
||||
} else {
|
||||
webmagicLog.setStatus(SUCCESS);
|
||||
|
|
|
|||
Loading…
Reference in New Issue