parent
899de00d9f
commit
cf8b312c88
|
|
@ -1,9 +1,9 @@
|
|||
package com.xjs.consts;
|
||||
|
||||
/**
|
||||
* api预警处理常量
|
||||
* @author xiejs
|
||||
* @since 2022-01-07
|
||||
* @desc api预警处理常量
|
||||
* @create 2022-01-07
|
||||
*/
|
||||
public class ApiWarnHandleConst {
|
||||
//已处理
|
||||
|
|
|
|||
|
|
@ -1,9 +1,9 @@
|
|||
package com.xjs.consts;
|
||||
|
||||
/**
|
||||
* 各个数据平台常量类
|
||||
* @author xiejs
|
||||
* @since 2021-12-28
|
||||
* @desc 各个数据平台常量类
|
||||
* @create 2021-12-28
|
||||
*/
|
||||
public class CopyWritingConst {
|
||||
|
||||
|
|
|
|||
|
|
@ -1,9 +1,9 @@
|
|||
package com.xjs.consts;
|
||||
|
||||
/**
|
||||
* 英语单词常量类
|
||||
* @author xiejs
|
||||
* @since 2021-12-31
|
||||
* @desc 英语单词常量类
|
||||
* @create 2021-12-31
|
||||
*/
|
||||
public class EnglishWordConst {
|
||||
|
||||
|
|
|
|||
|
|
@ -27,9 +27,4 @@ public class RegexConst {
|
|||
* ip地址v4、v6正则
|
||||
*/
|
||||
public static final String IP_REGEX ="^((2[0-4]\\d|25[0-5]|[01]?\\d\\d?)\\.){3}(2[0-4]\\d|25[0-5]|[01]?\\d\\d?)$";
|
||||
|
||||
/**
|
||||
* 数字校验正则
|
||||
*/
|
||||
public static final String NUMBER_REGEX= "[0-9]*";
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,24 +0,0 @@
|
|||
package com.xjs.consts;
|
||||
|
||||
/**
|
||||
* 爬虫网址常量类
|
||||
* @author xiejs
|
||||
* @since 2022-02-16
|
||||
*/
|
||||
public class ReptileUrlConst {
|
||||
|
||||
/**
|
||||
* 新浪新闻网站
|
||||
*/
|
||||
public static final String SINA_NEWS_URL = "https://news.sina.com.cn/";
|
||||
|
||||
/**
|
||||
* 文案网网址
|
||||
*/
|
||||
public static final String COPY_WRITING_NETWORK_URL = "https://www.wenanwang.com/";
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
|
@ -1,9 +1,9 @@
|
|||
package com.xjs.consts;
|
||||
|
||||
/**
|
||||
* 请求是否成功常量
|
||||
* @author xiejs
|
||||
* @since 2021-12-26
|
||||
* @desc 请求是否成功常量
|
||||
* @create 2021-12-26
|
||||
*/
|
||||
public class ReqConst {
|
||||
public static final Integer SUCCESS = 1;
|
||||
|
|
|
|||
|
|
@ -18,9 +18,6 @@ import java.util.HashMap;
|
|||
import java.util.Map;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import static com.xjs.consts.RegexConst.NUMBER_REGEX;
|
||||
import static com.xjs.consts.ReptileUrlConst.COPY_WRITING_NETWORK_URL;
|
||||
|
||||
/**
|
||||
* 文案网爬虫任务 url:https://www.wenanwang.com/
|
||||
*
|
||||
|
|
@ -37,21 +34,23 @@ public class CopyWritingNetworkTask {
|
|||
private CopyWritingNetworkService copyWritingNetworkService;
|
||||
|
||||
|
||||
private static final Pattern pattern = Pattern.compile(NUMBER_REGEX);
|
||||
public static final String URL = "https://www.wenanwang.com/";
|
||||
|
||||
@Scheduled(fixedDelay = 1000 * 5 * 60 * 10)
|
||||
private static Pattern pattern = Pattern.compile("[0-9]*");
|
||||
|
||||
@Scheduled(fixedDelay = 1000 * 5)
|
||||
public void reptileCopyWriting() {
|
||||
try {
|
||||
String html = httpUtils.doGetHtml(COPY_WRITING_NETWORK_URL);
|
||||
String html = httpUtils.doGetHtml(URL);
|
||||
|
||||
Document document = Jsoup.parse(html);
|
||||
|
||||
this.parseHtmlGetUrl(document);
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
} finally {
|
||||
}finally {
|
||||
int i = copyWritingNetworkService.deleteRepeatData();
|
||||
log.info("删除文案网数据重复数:" + i);
|
||||
log.info("删除文案网数据重复数:"+i);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -93,7 +92,7 @@ public class CopyWritingNetworkTask {
|
|||
for (Element element : a) {
|
||||
|
||||
String href = element.attr("href");
|
||||
String newUrl = COPY_WRITING_NETWORK_URL + href;
|
||||
String newUrl = URL + href;
|
||||
|
||||
String cw = httpUtils.doGetHtml(newUrl);
|
||||
Document cwDocument = Jsoup.parse(cw);
|
||||
|
|
|
|||
|
|
@ -16,8 +16,6 @@ import org.springframework.stereotype.Component;
|
|||
import java.util.*;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import static com.xjs.consts.ReptileUrlConst.SINA_NEWS_URL;
|
||||
|
||||
/**
|
||||
* 新浪新闻爬虫任务
|
||||
* @author xiejs
|
||||
|
|
@ -32,12 +30,11 @@ public class SinaNewsTask {
|
|||
@Autowired
|
||||
private SinaNewsService sinaNewsService;
|
||||
|
||||
|
||||
|
||||
public void reptileSinaNews() {
|
||||
try {
|
||||
String url = "https://news.sina.com.cn/";
|
||||
|
||||
String html = httpUtils.doGetHtml(SINA_NEWS_URL);
|
||||
String html = httpUtils.doGetHtml(url);
|
||||
|
||||
Document document = Jsoup.parse(html);
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue