parent
3cbfd09956
commit
ede181d001
|
|
@ -3,6 +3,7 @@ package com.xjs.business.log;
|
|||
import com.ruoyi.common.core.constant.ServiceNameConstants;
|
||||
import com.ruoyi.common.core.domain.R;
|
||||
import com.xjs.business.log.domain.ApiLog;
|
||||
import com.xjs.business.log.domain.WebmagicLog;
|
||||
import com.xjs.business.log.factory.RemoteLogFactory;
|
||||
import org.springframework.cloud.openfeign.FeignClient;
|
||||
import org.springframework.web.bind.annotation.PostMapping;
|
||||
|
|
@ -20,4 +21,8 @@ public interface RemoteLogFeign {
|
|||
|
||||
@PostMapping("/apilog/forPRC")
|
||||
R<Object> saveApiLog(@RequestBody ApiLog apiLog);
|
||||
|
||||
|
||||
@PostMapping("reptileLog/saveForPRC")
|
||||
public R<Object> saveReptileLog(@RequestBody WebmagicLog webmagicLog);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,39 @@
|
|||
package com.xjs.business.log.domain;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.Date;
|
||||
|
||||
/**
|
||||
* 爬虫日志实体类
|
||||
* @author xiejs
|
||||
* @since 2022-02-17
|
||||
*/
|
||||
@Data
|
||||
public class WebmagicLog implements Serializable {
|
||||
|
||||
private static final long serialVersionUID = 1L;
|
||||
|
||||
/** 主键 */
|
||||
private Long id;
|
||||
|
||||
/** 爬虫名称 */
|
||||
private String name;
|
||||
|
||||
/** 爬虫地址 */
|
||||
private String url;
|
||||
|
||||
/**
|
||||
* 复杂度
|
||||
*/
|
||||
private Long complexRate;
|
||||
|
||||
private Integer status;
|
||||
|
||||
/** 请求耗费时间(单位毫秒) */
|
||||
private Long requestTime;
|
||||
|
||||
private Date createTime;
|
||||
|
||||
}
|
||||
|
|
@ -1,8 +1,9 @@
|
|||
package com.xjs.business.log.factory;
|
||||
|
||||
import com.ruoyi.common.core.domain.R;
|
||||
import com.xjs.business.api.factory.RemoteTranDictFactory;
|
||||
import com.xjs.business.log.RemoteLogFeign;
|
||||
import com.xjs.business.log.domain.ApiLog;
|
||||
import com.xjs.business.log.domain.WebmagicLog;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.cloud.openfeign.FallbackFactory;
|
||||
|
|
@ -19,7 +20,18 @@ public class RemoteLogFactory implements FallbackFactory<RemoteLogFeign> {
|
|||
|
||||
@Override
|
||||
public RemoteLogFeign create(Throwable cause) {
|
||||
log.error("日志模块服务添加调用失败:{}", cause.getMessage());
|
||||
return apiLog -> R.fail("日志模块服务添加调用失败" + cause.getMessage());
|
||||
return new RemoteLogFeign() {
|
||||
@Override
|
||||
public R<Object> saveApiLog(ApiLog apiLog) {
|
||||
log.error("日志模块api日志服务添加调用失败");
|
||||
return R.fail("日志模块api日志服务添加调用失败" + cause.getMessage());
|
||||
}
|
||||
|
||||
@Override
|
||||
public R<Object> saveReptileLog(WebmagicLog webmagicLog) {
|
||||
log.error("日志模块爬虫日志服务添加调用失败");
|
||||
return R.fail("日志模块爬虫日志服务添加调用失败" + cause.getMessage());
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -82,6 +82,8 @@
|
|||
<el-table-column label="文案标签" align="center" prop="type" :show-overflow-tooltip="true" width="120px"/>
|
||||
<el-table-column label="文案主题" align="center" prop="theme" :show-overflow-tooltip="true" width="200px"/>
|
||||
<el-table-column label="文案内容" align="center" prop="content" :show-overflow-tooltip="true"/>
|
||||
<el-table-column label="创建时间" align="center" prop="createTime" width="250px" :show-overflow-tooltip="true">
|
||||
</el-table-column>
|
||||
<el-table-column label="操作" align="center" class-name="small-padding fixed-width" width="150px">
|
||||
<template slot-scope="scope">
|
||||
<el-button
|
||||
|
|
|
|||
|
|
@ -0,0 +1,25 @@
|
|||
package com.xjs.annotation;
|
||||
|
||||
import java.lang.annotation.*;
|
||||
|
||||
/**
|
||||
* 自定义爬虫日志注解
|
||||
* @author xiejs
|
||||
* @since 2022-02-17
|
||||
*/
|
||||
@Target({ ElementType.PARAMETER, ElementType.METHOD })
|
||||
@Retention(RetentionPolicy.RUNTIME)
|
||||
@Documented
|
||||
public @interface ReptileLog {
|
||||
|
||||
/**
|
||||
* 爬虫名称
|
||||
*/
|
||||
String name() default "";
|
||||
|
||||
/**
|
||||
* 请求url
|
||||
*/
|
||||
String url() default "";
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,38 @@
|
|||
package com.xjs.reptileLog.controller;
|
||||
|
||||
import com.ruoyi.common.core.domain.R;
|
||||
import com.xjs.reptileLog.domain.WebmagicLog;
|
||||
import com.xjs.reptileLog.service.WebmagicLogService;
|
||||
import io.swagger.annotations.Api;
|
||||
import io.swagger.annotations.ApiOperation;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.web.bind.annotation.PostMapping;
|
||||
import org.springframework.web.bind.annotation.RequestBody;
|
||||
import org.springframework.web.bind.annotation.RequestMapping;
|
||||
import org.springframework.web.bind.annotation.RestController;
|
||||
|
||||
/**
|
||||
* 爬虫日志控制器
|
||||
* @author xiejs
|
||||
* @since 2022-02-17
|
||||
*/
|
||||
@RestController
|
||||
@RequestMapping("reptileLog")
|
||||
@Api(tags = "业务模块-爬虫日志")
|
||||
public class WebmagicLogController {
|
||||
|
||||
@Autowired
|
||||
private WebmagicLogService webmagicLogService;
|
||||
|
||||
|
||||
|
||||
//-----------------------内部调用rpc------------------------
|
||||
|
||||
@PostMapping("saveForPRC")
|
||||
@ApiOperation("供AOP切面RPC远程调用")
|
||||
public R<Object> saveReptileLog(@RequestBody WebmagicLog webmagicLog) {
|
||||
boolean save = webmagicLogService.save(webmagicLog);
|
||||
return save?R.ok():R.fail();
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,50 @@
|
|||
package com.xjs.reptileLog.domain;
|
||||
|
||||
import com.baomidou.mybatisplus.annotation.FieldFill;
|
||||
import com.baomidou.mybatisplus.annotation.TableField;
|
||||
import com.ruoyi.common.core.annotation.Excel;
|
||||
import lombok.Data;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.Date;
|
||||
|
||||
/**
|
||||
* 爬虫日志实体类
|
||||
* @author xiejs
|
||||
* @since 2022-02-17
|
||||
*/
|
||||
@Data
|
||||
public class WebmagicLog implements Serializable {
|
||||
|
||||
private static final long serialVersionUID = 1L;
|
||||
|
||||
/** 主键 */
|
||||
private Long id;
|
||||
|
||||
/** 爬虫名称 */
|
||||
@Excel(name = "爬虫名称")
|
||||
private String name;
|
||||
|
||||
/** 爬虫地址 */
|
||||
@Excel(name = "爬虫地址")
|
||||
private String url;
|
||||
|
||||
/**
|
||||
* 复杂度
|
||||
*/
|
||||
@Excel(name = "复杂度")
|
||||
private Long complexRate;
|
||||
|
||||
|
||||
@Excel(name = "执行结果",readConverterExp = "1=成功,2=失败")
|
||||
private Integer status;
|
||||
|
||||
/** 请求耗费时间(单位毫秒) */
|
||||
@Excel(name = "请求耗费时间")
|
||||
private Long requestTime;
|
||||
|
||||
@Excel(name = "创建时间" ,dateFormat = "yyyy-MM-dd HH:mm:ss")
|
||||
@TableField(fill = FieldFill.INSERT)
|
||||
private Date createTime;
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,13 @@
|
|||
package com.xjs.reptileLog.mapper;
|
||||
|
||||
import com.baomidou.mybatisplus.core.mapper.BaseMapper;
|
||||
import com.xjs.reptileLog.domain.WebmagicLog;
|
||||
|
||||
/**
|
||||
* WebmagicLog mapper
|
||||
* @author xiejs
|
||||
* @since 2022-02-17
|
||||
*/
|
||||
public interface WebmagicLogMapper extends BaseMapper<WebmagicLog> {
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,13 @@
|
|||
package com.xjs.reptileLog.service;
|
||||
|
||||
import com.baomidou.mybatisplus.extension.service.IService;
|
||||
import com.xjs.reptileLog.domain.WebmagicLog;
|
||||
|
||||
/**
|
||||
* 爬虫日志 Service接口
|
||||
* @author xiejs
|
||||
* @since 2022-02-17
|
||||
*/
|
||||
public interface WebmagicLogService extends IService<WebmagicLog> {
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,16 @@
|
|||
package com.xjs.reptileLog.service.impl;
|
||||
|
||||
import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl;
|
||||
import com.xjs.reptileLog.domain.WebmagicLog;
|
||||
import com.xjs.reptileLog.mapper.WebmagicLogMapper;
|
||||
import com.xjs.reptileLog.service.WebmagicLogService;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
/**
|
||||
* @author xiejs
|
||||
* @since 2022-02-17
|
||||
*/
|
||||
@Service
|
||||
public class WebmagicLogServiceImpl extends ServiceImpl<WebmagicLogMapper, WebmagicLog> implements WebmagicLogService {
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,123 @@
|
|||
package com.xjs.common.aop;
|
||||
|
||||
import cn.hutool.core.date.DateUtil;
|
||||
import com.ruoyi.common.core.domain.R;
|
||||
import com.xjs.annotation.ReptileLog;
|
||||
import com.xjs.business.log.RemoteLogFeign;
|
||||
import com.xjs.business.log.domain.WebmagicLog;
|
||||
import lombok.extern.log4j.Log4j2;
|
||||
import org.aspectj.lang.ProceedingJoinPoint;
|
||||
import org.aspectj.lang.Signature;
|
||||
import org.aspectj.lang.annotation.Around;
|
||||
import org.aspectj.lang.annotation.Aspect;
|
||||
import org.aspectj.lang.annotation.Pointcut;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import javax.annotation.Resource;
|
||||
import java.lang.annotation.Annotation;
|
||||
import java.lang.reflect.Method;
|
||||
import java.time.LocalDateTime;
|
||||
import java.time.temporal.ChronoUnit;
|
||||
|
||||
import static com.xjs.consts.ReqConst.ERROR;
|
||||
import static com.xjs.consts.ReqConst.SUCCESS;
|
||||
|
||||
/**
|
||||
* 爬虫日志切面类
|
||||
*
|
||||
* @author xiejs
|
||||
* @since 2022-02-17
|
||||
*/
|
||||
@Component
|
||||
@Aspect
|
||||
@Log4j2
|
||||
public class reptileLogAspect {
|
||||
|
||||
@Resource
|
||||
private RemoteLogFeign remoteLogFeign;
|
||||
|
||||
/**
|
||||
* 声明AOP签名
|
||||
*/
|
||||
@Pointcut("@annotation(com.xjs.annotation.ReptileLog)")
|
||||
public void pointcut() {
|
||||
}
|
||||
|
||||
/**
|
||||
* 环绕切入
|
||||
*/
|
||||
@Around("pointcut()")
|
||||
public Object doAround(ProceedingJoinPoint joinPoint) throws Throwable {
|
||||
Object obj = null;
|
||||
try {
|
||||
//切入前-----
|
||||
LocalDateTime localDateTime1 = DateUtil.date().toLocalDateTime();
|
||||
|
||||
obj = joinPoint.proceed();
|
||||
|
||||
//切入后-----
|
||||
LocalDateTime localDateTime2 = DateUtil.date().toLocalDateTime();
|
||||
long between = ChronoUnit.MILLIS.between(localDateTime1, localDateTime2);
|
||||
log.info("调用爬虫接口耗费时间:{}ms", between);
|
||||
|
||||
this.handle(joinPoint, between, obj);
|
||||
} catch (Throwable e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
|
||||
return obj;
|
||||
}
|
||||
|
||||
/**
|
||||
* 处理切面逻辑
|
||||
*
|
||||
* @param joinPoint 切入点
|
||||
* @param between 请求时长
|
||||
* @param obj 返回值
|
||||
*/
|
||||
private void handle(ProceedingJoinPoint joinPoint, Long between, Object obj) {
|
||||
//获取目标类名及方法名
|
||||
Signature signature = joinPoint.getSignature();
|
||||
String method = signature.getName();
|
||||
Class aClass = signature.getDeclaringType();
|
||||
Method[] methods = aClass.getMethods();
|
||||
|
||||
//根据目标的方法名判断当前方法
|
||||
for (Method thisMethod : methods) {
|
||||
if (method.equals(thisMethod.getName())) {
|
||||
Annotation[] declaredAnnotations = thisMethod.getDeclaredAnnotations();
|
||||
for (Annotation annotation : declaredAnnotations) {
|
||||
if (annotation instanceof ReptileLog) {
|
||||
String name = ((ReptileLog) annotation).name();
|
||||
String url = ((ReptileLog) annotation).url();
|
||||
|
||||
WebmagicLog webmagicLog = new WebmagicLog();
|
||||
webmagicLog.setName(name);
|
||||
webmagicLog.setUrl(url);
|
||||
webmagicLog.setRequestTime(between);
|
||||
if (obj instanceof Long) {
|
||||
webmagicLog.setComplexRate((Long) obj);
|
||||
}
|
||||
this.saveData(webmagicLog);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* 持久化保存数据
|
||||
*/
|
||||
private void saveData(WebmagicLog webmagicLog) {
|
||||
if (webmagicLog.getComplexRate() != null && webmagicLog.getComplexRate() == 0L) {
|
||||
webmagicLog.setStatus(ERROR);
|
||||
} else {
|
||||
webmagicLog.setStatus(SUCCESS);
|
||||
}
|
||||
R<Object> r = remoteLogFeign.saveReptileLog(webmagicLog);
|
||||
log.info(r.getMsg());
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
@ -49,8 +49,8 @@ public class CopyWritingNetworkController extends MyBaseController {
|
|||
@GetMapping("taskForPRC")
|
||||
@ApiOperation("供定时任务服务RPC远程调用")
|
||||
public R copyWritingNetworkTaskForPRC() {
|
||||
copyWritingNetworkTask.reptileCopyWriting();
|
||||
return R.ok();
|
||||
Long count = copyWritingNetworkTask.reptileCopyWriting();
|
||||
return R.ok(count);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
package com.xjs.copywritingNetwork.task;
|
||||
|
||||
import com.xjs.annotation.ReptileLog;
|
||||
import com.xjs.common.util.HttpUtils;
|
||||
import com.xjs.copywritingNetwork.pojo.CopyWritingNetwork;
|
||||
import com.xjs.copywritingNetwork.service.CopyWritingNetworkService;
|
||||
|
|
@ -39,25 +40,33 @@ public class CopyWritingNetworkTask {
|
|||
|
||||
private static final Pattern pattern = Pattern.compile(NUMBER_REGEX);
|
||||
|
||||
public void reptileCopyWriting() {
|
||||
@ReptileLog(name = "文案网", url = URL)
|
||||
public Long reptileCopyWriting() {
|
||||
//定义循环次数计时器
|
||||
Long count = 0L;
|
||||
|
||||
try {
|
||||
|
||||
String html = httpUtils.doGetHtml(URL);
|
||||
|
||||
Document document = Jsoup.parse(html);
|
||||
|
||||
this.parseHtmlGetUrl(document);
|
||||
count = this.parseHtmlGetUrl(document, count);
|
||||
|
||||
} catch (Exception e) {
|
||||
log.error(e.getMessage());
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
/**
|
||||
* 解析html获取url
|
||||
*
|
||||
* @param document
|
||||
* @param document dom
|
||||
* @param count 循环次数
|
||||
* @return 返回循环次数
|
||||
*/
|
||||
private void parseHtmlGetUrl(Document document) {
|
||||
private Long parseHtmlGetUrl(Document document, Long count) {
|
||||
Elements zyzt = document.getElementsByClass("zyzt");
|
||||
|
||||
Map<String, String> map = new HashMap<>();
|
||||
|
|
@ -67,19 +76,24 @@ public class CopyWritingNetworkTask {
|
|||
String text = elementA.text();
|
||||
String href = elementA.attr("href");
|
||||
map.put(text, href);
|
||||
|
||||
//计数
|
||||
count++;
|
||||
}
|
||||
}
|
||||
|
||||
this.parseHtmlGetCopyWriting(map);
|
||||
return this.parseHtmlGetCopyWriting(map, count);
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* 解析html获取文案内容并持久化
|
||||
*
|
||||
* @param map 存放了url和名称
|
||||
* @param map 存放了url和名称
|
||||
* @param count 循环次数
|
||||
* @return 返回循环次数
|
||||
*/
|
||||
private void parseHtmlGetCopyWriting(Map<String, String> map) {
|
||||
private Long parseHtmlGetCopyWriting(Map<String, String> map, Long count) {
|
||||
ArrayList<CopyWritingNetwork> copyWritingNetworks = new ArrayList<>();
|
||||
|
||||
for (Map.Entry<String, String> entry : map.entrySet()) {
|
||||
|
|
@ -110,6 +124,9 @@ public class CopyWritingNetworkTask {
|
|||
if (StringUtils.isNotEmpty(content) && !matches) {
|
||||
copyWritingNetworks.add(copyWritingNetwork);
|
||||
}
|
||||
|
||||
//计数
|
||||
count++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -119,6 +136,7 @@ public class CopyWritingNetworkTask {
|
|||
int i = copyWritingNetworkService.deleteRepeatData();
|
||||
log.info("删除文案网数据重复数:" + i);
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -45,8 +45,8 @@ public class SinaNewsController extends MyBaseController {
|
|||
@GetMapping("taskForPRC")
|
||||
@ApiOperation("供定时任务服务RPC远程调用")
|
||||
public R sinaTaskForPRC() {
|
||||
sinaNewsTask.reptileSinaNews();
|
||||
return R.ok();
|
||||
Long count = sinaNewsTask.reptileSinaNews();
|
||||
return R.ok(count);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ package com.xjs.sina.task;
|
|||
|
||||
import cn.hutool.core.collection.CollUtil;
|
||||
import com.ruoyi.common.core.utils.StringUtils;
|
||||
import com.xjs.annotation.ReptileLog;
|
||||
import com.xjs.common.util.HttpUtils;
|
||||
import com.xjs.sina.pojo.SinaNews;
|
||||
import com.xjs.sina.service.SinaNewsService;
|
||||
|
|
@ -32,25 +33,31 @@ public class SinaNewsTask {
|
|||
|
||||
public static final String URL = "https://news.sina.com.cn/";
|
||||
|
||||
public void reptileSinaNews() {
|
||||
@ReptileLog(name = "新浪新闻", url = URL)
|
||||
public Long reptileSinaNews() {
|
||||
//定义循环次数计时器
|
||||
Long count = 0L;
|
||||
|
||||
try {
|
||||
|
||||
String html = httpUtils.doGetHtml(URL);
|
||||
|
||||
Document document = Jsoup.parse(html);
|
||||
|
||||
this.parse(document);
|
||||
count = this.parse(document,count);
|
||||
} catch (Exception e) {
|
||||
log.error(e.getMessage());
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
/**
|
||||
* 解析dom
|
||||
*
|
||||
* @param document dom
|
||||
* @param count 循环次数
|
||||
*/
|
||||
private void parse(Document document) {
|
||||
private Long parse(Document document,Long count) {
|
||||
try {
|
||||
//获取子链接
|
||||
Elements nav_mod_1 = document.getElementsByClass("nav-mod-1");
|
||||
|
|
@ -69,13 +76,17 @@ public class SinaNewsTask {
|
|||
for (Map.Entry<String, String> entry : entrySet) {
|
||||
String html = httpUtils.doGetHtml(entry.getValue());
|
||||
Document docChild = Jsoup.parse(html);
|
||||
this.parseChile(docChild, entry.getKey());
|
||||
|
||||
//计数
|
||||
count++;
|
||||
|
||||
count =this.parseChile(docChild, entry.getKey(),count);
|
||||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
log.error(e.getMessage());
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -84,7 +95,7 @@ public class SinaNewsTask {
|
|||
* @param docChild 子
|
||||
* @param key key
|
||||
*/
|
||||
private void parseChile(Document docChild, String key) {
|
||||
private Long parseChile(Document docChild, String key,Long count) {
|
||||
try {
|
||||
Elements a = docChild.getElementsByTag("a");
|
||||
ArrayList<String> link = new ArrayList<>();
|
||||
|
|
@ -163,15 +174,21 @@ public class SinaNewsTask {
|
|||
sinaNewsList.add(sinaNews);
|
||||
}
|
||||
}
|
||||
|
||||
//计数
|
||||
count++;
|
||||
|
||||
sinaNewsService.saveBatch(sinaNewsList, 30);
|
||||
|
||||
//删除重复
|
||||
int count = sinaNewsService.deleteRepeatData();
|
||||
log.info("重复数据为:{}", count);
|
||||
int num = sinaNewsService.deleteRepeatData();
|
||||
log.info("重复数据为:{}", num);
|
||||
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
log.error(e.getMessage());
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue