Revert "1、爬虫任务类中url常量抽出一个常量类"

This reverts commit 899de00d9f.
This commit is contained in:
xjs 2022-02-17 08:45:42 +08:00
parent 899de00d9f
commit cf8b312c88
8 changed files with 18 additions and 51 deletions

View File

@ -1,9 +1,9 @@
package com.xjs.consts; package com.xjs.consts;
/** /**
* api预警处理常量
* @author xiejs * @author xiejs
* @since 2022-01-07 * @desc api预警处理常量
* @create 2022-01-07
*/ */
public class ApiWarnHandleConst { public class ApiWarnHandleConst {
//已处理 //已处理

View File

@ -1,9 +1,9 @@
package com.xjs.consts; package com.xjs.consts;
/** /**
* 各个数据平台常量类
* @author xiejs * @author xiejs
* @since 2021-12-28 * @desc 各个数据平台常量类
* @create 2021-12-28
*/ */
public class CopyWritingConst { public class CopyWritingConst {

View File

@ -1,9 +1,9 @@
package com.xjs.consts; package com.xjs.consts;
/** /**
* 英语单词常量类
* @author xiejs * @author xiejs
* @since 2021-12-31 * @desc 英语单词常量类
* @create 2021-12-31
*/ */
public class EnglishWordConst { public class EnglishWordConst {

View File

@ -27,9 +27,4 @@ public class RegexConst {
* ip地址v4v6正则 * ip地址v4v6正则
*/ */
public static final String IP_REGEX ="^((2[0-4]\\d|25[0-5]|[01]?\\d\\d?)\\.){3}(2[0-4]\\d|25[0-5]|[01]?\\d\\d?)$"; public static final String IP_REGEX ="^((2[0-4]\\d|25[0-5]|[01]?\\d\\d?)\\.){3}(2[0-4]\\d|25[0-5]|[01]?\\d\\d?)$";
/**
* 数字校验正则
*/
public static final String NUMBER_REGEX= "[0-9]*";
} }

View File

@ -1,24 +0,0 @@
package com.xjs.consts;
/**
* 爬虫网址常量类
* @author xiejs
* @since 2022-02-16
*/
public class ReptileUrlConst {
/**
* 新浪新闻网站
*/
public static final String SINA_NEWS_URL = "https://news.sina.com.cn/";
/**
* 文案网网址
*/
public static final String COPY_WRITING_NETWORK_URL = "https://www.wenanwang.com/";
}

View File

@ -1,9 +1,9 @@
package com.xjs.consts; package com.xjs.consts;
/** /**
* 请求是否成功常量
* @author xiejs * @author xiejs
* @since 2021-12-26 * @desc 请求是否成功常量
* @create 2021-12-26
*/ */
public class ReqConst { public class ReqConst {
public static final Integer SUCCESS = 1; public static final Integer SUCCESS = 1;

View File

@ -18,9 +18,6 @@ import java.util.HashMap;
import java.util.Map; import java.util.Map;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import static com.xjs.consts.RegexConst.NUMBER_REGEX;
import static com.xjs.consts.ReptileUrlConst.COPY_WRITING_NETWORK_URL;
/** /**
* 文案网爬虫任务 url:https://www.wenanwang.com/ * 文案网爬虫任务 url:https://www.wenanwang.com/
* *
@ -37,21 +34,23 @@ public class CopyWritingNetworkTask {
private CopyWritingNetworkService copyWritingNetworkService; private CopyWritingNetworkService copyWritingNetworkService;
private static final Pattern pattern = Pattern.compile(NUMBER_REGEX); public static final String URL = "https://www.wenanwang.com/";
@Scheduled(fixedDelay = 1000 * 5 * 60 * 10) private static Pattern pattern = Pattern.compile("[0-9]*");
@Scheduled(fixedDelay = 1000 * 5)
public void reptileCopyWriting() { public void reptileCopyWriting() {
try { try {
String html = httpUtils.doGetHtml(COPY_WRITING_NETWORK_URL); String html = httpUtils.doGetHtml(URL);
Document document = Jsoup.parse(html); Document document = Jsoup.parse(html);
this.parseHtmlGetUrl(document); this.parseHtmlGetUrl(document);
} catch (Exception e) { } catch (Exception e) {
e.printStackTrace(); e.printStackTrace();
} finally { }finally {
int i = copyWritingNetworkService.deleteRepeatData(); int i = copyWritingNetworkService.deleteRepeatData();
log.info("删除文案网数据重复数:" + i); log.info("删除文案网数据重复数:"+i);
} }
} }
@ -93,7 +92,7 @@ public class CopyWritingNetworkTask {
for (Element element : a) { for (Element element : a) {
String href = element.attr("href"); String href = element.attr("href");
String newUrl = COPY_WRITING_NETWORK_URL + href; String newUrl = URL + href;
String cw = httpUtils.doGetHtml(newUrl); String cw = httpUtils.doGetHtml(newUrl);
Document cwDocument = Jsoup.parse(cw); Document cwDocument = Jsoup.parse(cw);

View File

@ -16,8 +16,6 @@ import org.springframework.stereotype.Component;
import java.util.*; import java.util.*;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import static com.xjs.consts.ReptileUrlConst.SINA_NEWS_URL;
/** /**
* 新浪新闻爬虫任务 * 新浪新闻爬虫任务
* @author xiejs * @author xiejs
@ -32,12 +30,11 @@ public class SinaNewsTask {
@Autowired @Autowired
private SinaNewsService sinaNewsService; private SinaNewsService sinaNewsService;
public void reptileSinaNews() { public void reptileSinaNews() {
try { try {
String url = "https://news.sina.com.cn/";
String html = httpUtils.doGetHtml(SINA_NEWS_URL); String html = httpUtils.doGetHtml(url);
Document document = Jsoup.parse(html); Document document = Jsoup.parse(html);