parent
636ee63f59
commit
bf1561d092
|
|
@ -23,7 +23,7 @@ import reactor.core.publisher.Mono;
|
|||
|
||||
/**
|
||||
* 网关鉴权
|
||||
*
|
||||
*
|
||||
* @author ruoyi
|
||||
*/
|
||||
@Component
|
||||
|
|
@ -51,6 +51,10 @@ public class AuthFilter implements GlobalFilter, Ordered
|
|||
{
|
||||
return chain.filter(exchange);
|
||||
}
|
||||
|
||||
// todo 自定义注解跳过登录验证
|
||||
|
||||
|
||||
String token = getToken(request);
|
||||
if (StringUtils.isEmpty(token))
|
||||
{
|
||||
|
|
@ -132,4 +136,4 @@ public class AuthFilter implements GlobalFilter, Ordered
|
|||
{
|
||||
return -200;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -52,9 +52,9 @@ public class ReptileConst {
|
|||
public static final String ZOL_PHONE_URL= "https://detail.zol.com.cn";
|
||||
|
||||
/**
|
||||
* 2048社区rul
|
||||
* 2048社区rul--https://vb.haowenzhi.com/2048/ --https://bbs9.qs2m.live/2048/
|
||||
*/
|
||||
public static final String Y_2048_COMMUNITY_URL = "https://bbs9.qs2m.live/2048/";
|
||||
public static final String Y_2048_COMMUNITY_URL = "https://vb.haowenzhi.com/2048/";
|
||||
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -19,13 +19,16 @@ public class WeiXinConst {
|
|||
|
||||
public static final String REDIS_KEY_OFFICIAL = "sys_config:xjs.webmagic.official_accounts";
|
||||
|
||||
public static final String REDIS_KEY_Y_2048 = "sys_config:xjs.webmagic.y2048";
|
||||
|
||||
/**
|
||||
* 系统配置表中的key
|
||||
*/
|
||||
public static final String CONFIG_KEY = "xjs.webmagic.wechatPicture";
|
||||
|
||||
public static final String CONFIG_KEY_OFFICIAL = "xjs.webmagic.official_accounts";
|
||||
|
||||
public static final String CONFIG_KEY_OFFICIAL = "xjs:webmagic:official_accounts";
|
||||
public static final String CONFIG_KEY_Y_2048 = "xjs.webmagic.y2048";
|
||||
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,28 @@
|
|||
package com.xjs.y2048community.consts;
|
||||
|
||||
/**
|
||||
* 爬虫初始化常量
|
||||
*
|
||||
* @author xiejs
|
||||
* @since 2022-06-18
|
||||
*/
|
||||
public class InitConst {
|
||||
|
||||
/**
|
||||
* 关
|
||||
*/
|
||||
public static final String OFF = "off";
|
||||
|
||||
/**
|
||||
* 开
|
||||
*/
|
||||
public static final String ON = "on";
|
||||
|
||||
|
||||
/**
|
||||
* 控制开关,true开 false关
|
||||
*/
|
||||
public static final Boolean CONTROL = true;
|
||||
|
||||
|
||||
}
|
||||
|
|
@ -64,322 +64,9 @@
|
|||
</div>
|
||||
<div class="main-wrap">
|
||||
<div id="main">
|
||||
<style type="text/css">.warning a:hover {
|
||||
background: #F00;
|
||||
color: #fff;
|
||||
padding: 0;
|
||||
}
|
||||
|
||||
.sigline {
|
||||
margin-top: 10px;
|
||||
margin-left: 15px;
|
||||
background: url(images/wind/read/sigline.gif) right bottom no-repeat;
|
||||
margin-right: 30em;
|
||||
height: 5px;
|
||||
}
|
||||
|
||||
.signature {
|
||||
padding: 10px 15px 0;
|
||||
height: expression(this.scrollHeight>parseInt(this.currentStyle.maxHeight)?this.currentStyle.maxHeight:"auto");
|
||||
}
|
||||
|
||||
.user-pic img {
|
||||
border: 1px solid #d5e6ed;
|
||||
background: #fff;
|
||||
padding: 3px;
|
||||
}
|
||||
|
||||
.small {
|
||||
font-size: 12px
|
||||
}
|
||||
|
||||
.middle {
|
||||
font-size: 14px
|
||||
}
|
||||
|
||||
.big {
|
||||
font-size: 18px
|
||||
}
|
||||
|
||||
.dig {
|
||||
font-size: 12px;
|
||||
background-color: #ffffee;
|
||||
background-position: -22px -57px;
|
||||
padding: 0 .4em 0 1.6em;
|
||||
cursor: pointer;
|
||||
color: #666;
|
||||
border: 1px solid #ffd0a8;
|
||||
height: 18px;
|
||||
line-height: 18px;
|
||||
margin: 12px 5px 10px 15px;
|
||||
}
|
||||
|
||||
.dig:hover {
|
||||
text-decoration: none;
|
||||
border: 1px solid #f5a25c;
|
||||
color: #ff6600;
|
||||
}
|
||||
|
||||
.readbot {
|
||||
padding: 0px;
|
||||
}
|
||||
|
||||
.readbot a {
|
||||
list-style: none;
|
||||
padding: 0 0 0 1.5em;
|
||||
margin: 0;
|
||||
float: left;
|
||||
cursor: pointer;
|
||||
background: url(images/wind/read/yin.gif) no-repeat;
|
||||
width: 3.5em;
|
||||
height: 16px;
|
||||
}
|
||||
|
||||
.readbot .r-quote:hover {
|
||||
background-position: 0 0;
|
||||
}
|
||||
|
||||
.readbot .r-reply:hover {
|
||||
background-position: 0 -20px;
|
||||
}
|
||||
|
||||
.readbot .r-score:hover {
|
||||
background-position: 0 -40px;
|
||||
}
|
||||
|
||||
.readbot .r-keep:hover {
|
||||
background-position: 0 -60px;
|
||||
}
|
||||
|
||||
.readbot .r-recommend:hover {
|
||||
background-position: 0 -160px;
|
||||
}
|
||||
|
||||
.readbot .r-report:hover {
|
||||
background-position: 0 -200px;
|
||||
}
|
||||
|
||||
.readbot .r-quote {
|
||||
background-position: 0 -80px;
|
||||
}
|
||||
|
||||
.readbot .r-reply {
|
||||
background-position: 0 -100px;
|
||||
}
|
||||
|
||||
.readbot .r-score {
|
||||
background-position: 0 -120px;
|
||||
}
|
||||
|
||||
.readbot .r-keep {
|
||||
background-position: 0 -140px;
|
||||
}
|
||||
|
||||
.readbot .r-recommend {
|
||||
background-position: 0 -180px;
|
||||
}
|
||||
|
||||
.readbot .r-report {
|
||||
background-position: 0 -220px;
|
||||
}
|
||||
|
||||
.down {
|
||||
background: url(images/post/down.gif) 5px center no-repeat;
|
||||
padding: 5px 5px 5px 30px;
|
||||
border: #c5d8e8 1px solid;
|
||||
margin: 0 1em 0 0;
|
||||
line-height: 40px;
|
||||
}
|
||||
|
||||
.img-50 {
|
||||
width: 48px;
|
||||
height: 48px;
|
||||
}
|
||||
|
||||
.cates {
|
||||
margin: 0 0 10px;
|
||||
}
|
||||
|
||||
.cates .cate-list li {
|
||||
padding: 2px 0 1px 10px;
|
||||
font-weight: 500;
|
||||
color: #444444;
|
||||
list-style: none;
|
||||
}
|
||||
|
||||
.cates .cate-list em {
|
||||
font-style: normal;
|
||||
width: 100px;
|
||||
float: left;
|
||||
}
|
||||
|
||||
.cates .cate-list cite {
|
||||
font-style: normal;
|
||||
}
|
||||
|
||||
.cates input {
|
||||
vertical-align: middle;
|
||||
}
|
||||
|
||||
.cates .w {
|
||||
margin-right: 10px;
|
||||
}
|
||||
|
||||
.cates .two {
|
||||
background: #f3f9fb;
|
||||
}
|
||||
|
||||
.app-post {
|
||||
background: url(images/app-post.png);
|
||||
display: block;
|
||||
float: left;
|
||||
color: #ffffff;
|
||||
width: 78px;
|
||||
height: 26px;
|
||||
line-height: 25px;
|
||||
padding-left: 17px;
|
||||
}
|
||||
|
||||
.app-post:hover {
|
||||
text-decoration: none;
|
||||
background-position: 0 -50px;
|
||||
}
|
||||
|
||||
.flash {
|
||||
position: relative;
|
||||
}
|
||||
|
||||
.flash img {
|
||||
height: 100%;
|
||||
width: 100%;
|
||||
}
|
||||
|
||||
.flash ul {
|
||||
position: absolute;
|
||||
right: 8px;
|
||||
bottom: 8px;
|
||||
z-index: 3;
|
||||
}
|
||||
|
||||
.flash ul li {
|
||||
list-style: none;
|
||||
float: left;
|
||||
width: 18px;
|
||||
height: 13px;
|
||||
line-height: 13px;
|
||||
text-align: center;
|
||||
margin-left: 2px;
|
||||
background: #ffffff;
|
||||
}
|
||||
|
||||
.flash ul li a {
|
||||
display: block;
|
||||
width: 18px;
|
||||
height: 13px;
|
||||
font-size: 10px;
|
||||
font-family: Tahoma;
|
||||
color: #333333;
|
||||
}
|
||||
|
||||
.flash ul li a:hover, .flash ul li a.sel {
|
||||
color: #fff;
|
||||
text-decoration: none;
|
||||
background: #ffa900;
|
||||
}
|
||||
|
||||
.score {
|
||||
margin: 10px 15px;
|
||||
padding-top: 10px;
|
||||
border-top: 1px dashed #d5e6ed;
|
||||
line-height: 22px;
|
||||
}
|
||||
|
||||
.score td {
|
||||
padding: 2px 10px 2px 5px;
|
||||
}
|
||||
|
||||
.score th {
|
||||
padding: 6px 0 0;
|
||||
}
|
||||
|
||||
.score tr:hover {
|
||||
background: #f3f9fb;
|
||||
}
|
||||
|
||||
.score a {
|
||||
color: #3366cc;
|
||||
}</style>
|
||||
<div id="breadCrumb" class="cc"><a href="index.php" title="人人为我 我为人人">人人为我 我为人人</a>»<a
|
||||
href="thread.php?fid=7">图片专区</a>»<a href="thread.php?fid=27">高跟絲襪</a>»<a
|
||||
href="read.php?tid=6523984">诱惑黑丝玉足5【10P】</a><a href="read.php?fid=27&tid=6523984&toread=1"><font
|
||||
color="red"> (转到动态网页)</font></a> »
|
||||
</div>
|
||||
<style>.TOP_PD {
|
||||
width: 100%;
|
||||
text-align: center;
|
||||
display: inline-block !important
|
||||
}
|
||||
|
||||
.TOP_PD2 {
|
||||
width: 100%;
|
||||
text-align: center;
|
||||
display: inline-block !important
|
||||
}
|
||||
|
||||
.PD_TAC_BOX {
|
||||
display: inline-block;
|
||||
max-width: 100%;
|
||||
margin: 0 auto
|
||||
}
|
||||
|
||||
.PD_TAC_BOX li {
|
||||
padding: 0;
|
||||
width: auto;
|
||||
min-width: 100px;
|
||||
height: 36px;
|
||||
float: left;
|
||||
text-align: center;
|
||||
list-style-type: none;
|
||||
margin: 0px !important
|
||||
}
|
||||
|
||||
.PD_TAC_BOX {
|
||||
display: inline-block;
|
||||
max-width: 100%;
|
||||
margin: 0 auto;
|
||||
float: left
|
||||
}
|
||||
|
||||
.PD_TAC_BOX li a {
|
||||
font-size: 20px;
|
||||
font-weight: bold
|
||||
}</style>
|
||||
<div class="tac" style="margin:.5em 0 0;">
|
||||
<div class="TOP_PD">
|
||||
<div class="PD_TAC_BOX">
|
||||
<li><a href="/htm/a1.htm" target="_BLANK" style="color: red">百家美女赌场</a> </li>
|
||||
<li><a href="/htm/a4.htm" target="_BLANK" style="color: blue">重金担保皇冠</a> </li>
|
||||
<li><a href="/htm/a8.htm" target="_BLANK" style="color: green">信誉凤凰娱乐</a> </li>
|
||||
<li><a href="/htm/a5.htm" target="_BLANK" style="color: red">澳门皇冠赌场</a> </li>
|
||||
<li><a href="/htm/a6.htm" target="_BLANK" style="color: green">逢赌必赢棋牌</a> </li>
|
||||
<li><a href="/htm/a7.htm" target="_BLANK" style="color: red">王者信誉棋牌</a> </li>
|
||||
<li><a href="/htm/b4.htm" target="_BLANK" style="color: blue">开元棋牌官网</a> </li>
|
||||
<li><a href="/htm/b1.htm" target="_BLANK" style="color: green">澳门威尼斯人</a> </li>
|
||||
<li><a href="/htm/sg.htm" target="_BLANK" style="color: green">博弈专区棋牌</a> </li>
|
||||
</div>
|
||||
</div>
|
||||
<div class="TOP_PD2">
|
||||
<div class="PD_TAC_BOX">
|
||||
<li><a href="/htm/b9.htm" target="_BLANK" style="color: red">威尼斯人赌场</a> </li>
|
||||
<li><a href="/htm/a2.htm" target="_BLANK" style="color: blue">澳门葡京赌场</a> </li>
|
||||
<li><a href="/htm/b5.htm" target="_BLANK" style="color: red">9 1 福利视频</a> </li>
|
||||
<li><a href="/htm/b2.htm" target="_BLANK" style="color: green">银河老牌赌城</a> </li>
|
||||
<li><a href="/htm/b6.htm" target="_BLANK" style="color: red">同城在线约炮</a> </li>
|
||||
<li><a href="/htm/b3.htm" target="_BLANK" style="color: blue">加人约炮大群</a> </li>
|
||||
<li><a href="/htm/b7.htm" target="_BLANK" style="color: green">兼职少妇学生</a> </li>
|
||||
<li><a href="/htm/b10.htm" target="_BLANK" style="color: red">鲍鱼聚合直播</a> </li>
|
||||
<li><a href="/htm/c1.htm" target="_BLANK" style="color: blue">世界杯定投站</a></li>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<div class="t3"><span class="fr" style="margin-left:.5em"><a href="post.php?fid=27"><img
|
||||
|
|
@ -458,27 +145,8 @@
|
|||
.apd > a {
|
||||
margin-right: 10px
|
||||
}</style>
|
||||
<div class="apd"><a href="/htm/b9.htm" target="_blank"><font size="4"
|
||||
color="blue">威尼斯人</font></a><a
|
||||
href="/htm/a1.htm" target="_blank"><font size="4" color="red">亚博赌场</font></a><a
|
||||
href="/htm/b2.htm" target="_blank"><font size="4" color="blue">银河贵宾会</font></a><a
|
||||
href="/htm/b5.htm" target="_blank"><font size="4" color="red">91原创视频</font></a><a
|
||||
href="/htm/b7.htm" target="_blank"><font size="4" color="blue">同城约炮</font></a><a
|
||||
href="/htm/a4.htm" target="_blank"><font size="4" color="red">皇冠体育</font></a><a
|
||||
href="/htm/a8.htm" target="_blank"><font size="4" color="green">凤凰娱乐</font></a><a
|
||||
href="/htm/b10.htm" target="_blank"><font size="4" color="red">聚合直播</font></a><a
|
||||
href="/htm/sg.htm" target="_blank"><font size="4" color="red">牛牛三公</font></a><br><br><a
|
||||
href="/htm/a6.htm" target="_blank"><font size="4" color="red">必赢棋牌</font></a><a
|
||||
href="/htm/b4.htm" target="_blank"><font size="4" color="blue">开元棋牌</font></a><a
|
||||
href="/htm/a5.htm" target="_blank"><font size="4" color="green">皇冠赌场</font></a><a
|
||||
href="/htm/a7.htm" target="_blank"><font size="4" color="red">王者棋牌</font></a><a
|
||||
href="/htm/a2.htm" target="_blank"><font size="4" color="blue">澳门新葡京</font></a><a
|
||||
href="/htm/b6.htm" target="_blank"><font size="4" color="green">约炮大群</font></a><a
|
||||
href="/htm/b3.htm" target="_blank"><font size="4" color="red">兼职少妇</font></a><a
|
||||
href="/htm/b1.htm" target="_blank"><font size="4" color="blue">真人娱乐</font></a><a
|
||||
href="/htm/c1.htm" target="_blank"><font size="4" color="red">必博体育</font></a>
|
||||
<div class="apd">
|
||||
<div class="tpc_content">
|
||||
<div id="p_tpc" class="c"></div>
|
||||
<div class="f14" id="read_tpc">
|
||||
<ignore_js_op class="att_img"><img id="aimg_r2dflf1"
|
||||
src="https://img.picelsb.com/i/2022/06/16/r2dflf.jpg"
|
||||
|
|
@ -395,7 +395,6 @@
|
|||
<tr align="center" class="tr3 t_one">
|
||||
<td><a title="开放主题" href="state/p/27/2206/6523984.html" target="_blank">⊙</a></td>
|
||||
<td class="tal" id="td_6523984">[06-16]
|
||||
|
||||
<a href="state/p/27/2206/6523984.html" target="_blank" id="a_ajax_6523984" class="subject">诱惑黑丝玉足5【10P】</a>
|
||||
</td>
|
||||
<td class="tal y-style"><a href="u.php?action=show&uid=2783914" class="bl">丝情话欲</a>
|
||||
|
|
@ -503,7 +502,8 @@
|
|||
|
||||
<a href="state/p/27/2206/6520570.html" target="_blank" id="a_ajax_6520570" class="subject">丝袜美脚3【12p】</a>
|
||||
</td>
|
||||
<td class="tal y-style"><a href="u.php?action=show&uid=2831127" class="bl">稳定控制</a>
|
||||
<td class="tal y-style">
|
||||
<a href="u.php?action=show&uid=2831127" class="bl">稳定控制</a>
|
||||
<div class="f10 gray">2022-06-16</div>
|
||||
</td>
|
||||
<td class="tal y-style f10 gray"><span class="s3">0</span></td>
|
||||
|
|
@ -0,0 +1,11 @@
|
|||
<div id="breadCrumb" class="cc">
|
||||
|
||||
<a href="https://vb.haowenzhi.com/2048/index.php?m=bbs" title="人人为我 我为人人">人人为我 我为人人</a> »
|
||||
<a href="thread.php?fid-273.html">美图秀秀</a> »
|
||||
<a href="thread.php?fid-277.html">COSPLAY</a>
|
||||
<span id="shortcut">
|
||||
<a style="cursor:pointer;" onclick="javascript:shortCut();" title="将本版块添加到我的书签"><img src="images/wind/thread/cancel.gif"
|
||||
align="absbottom"
|
||||
style="margin-bottom:2px;"/></a>
|
||||
</span>
|
||||
</div>
|
||||
|
|
@ -1,22 +1,197 @@
|
|||
package com.xjs.y2048community.webmagic;
|
||||
|
||||
import cn.hutool.core.collection.CollUtil;
|
||||
import cn.hutool.core.date.DatePattern;
|
||||
import cn.hutool.core.date.DateUtil;
|
||||
import cn.hutool.core.util.RandomUtil;
|
||||
import com.ruoyi.common.core.constant.HttpStatus;
|
||||
import com.ruoyi.common.core.utils.StringUtils;
|
||||
import com.ruoyi.common.redis.service.RedisService;
|
||||
import com.ruoyi.system.api.RemoteConfigService;
|
||||
import com.xjs.common.util.WeiXinUtils;
|
||||
import com.xjs.utils.RandomUtils;
|
||||
import com.xjs.weixin.consts.WeiXinConst;
|
||||
import com.xjs.y2048community.consts.InitConst;
|
||||
import lombok.extern.log4j.Log4j2;
|
||||
import org.apache.http.HttpResponse;
|
||||
import org.apache.http.client.methods.HttpGet;
|
||||
import org.apache.http.impl.client.CloseableHttpClient;
|
||||
import org.apache.http.impl.client.HttpClients;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.stereotype.Component;
|
||||
import us.codecraft.webmagic.ResultItems;
|
||||
import us.codecraft.webmagic.Task;
|
||||
import us.codecraft.webmagic.pipeline.Pipeline;
|
||||
|
||||
import javax.annotation.Resource;
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.Date;
|
||||
import java.util.List;
|
||||
|
||||
import static com.xjs.weixin.consts.WeiXinConst.*;
|
||||
|
||||
/**
|
||||
* 2048爬虫数据处理
|
||||
*
|
||||
* @author xiejs
|
||||
* @since 2022-06-17
|
||||
*/
|
||||
@Component
|
||||
@Log4j2
|
||||
public class Y2048communityPipeline implements Pipeline {
|
||||
@Autowired
|
||||
private RedisService redisService;
|
||||
@Resource
|
||||
private RemoteConfigService remoteConfigService;
|
||||
|
||||
|
||||
@Override
|
||||
public void process(ResultItems resultItems, Task task) {
|
||||
|
||||
List<String> srcs = resultItems.get("srcs");
|
||||
|
||||
String title = resultItems.get("title");
|
||||
|
||||
String type = resultItems.get("type");
|
||||
|
||||
if (CollUtil.isNotEmpty(srcs) && StringUtils.isNotEmpty(title) && StringUtils.isNotEmpty(type)) {
|
||||
String appendPath = this.getAppendPath(title, type);
|
||||
|
||||
File file = new File(appendPath);
|
||||
if (file.exists()) {
|
||||
return;
|
||||
}
|
||||
|
||||
for (String link : srcs) {
|
||||
InputStream inputStream = null;
|
||||
|
||||
// 创建GET请求
|
||||
CloseableHttpClient httpClient = HttpClients.createDefault();
|
||||
HttpGet httpGet = null;
|
||||
try {
|
||||
httpGet = new HttpGet(link);
|
||||
HttpResponse response = httpClient.execute(httpGet);
|
||||
if (response.getStatusLine().getStatusCode() == HttpStatus.SUCCESS) {
|
||||
inputStream = response.getEntity().getContent();
|
||||
|
||||
//文件小于30kb则不写入
|
||||
long contentLength = response.getEntity().getContentLength();
|
||||
long kb = contentLength / 1024;
|
||||
if (SIZE_KB > kb) {
|
||||
continue;
|
||||
}
|
||||
|
||||
//拼接文件后缀
|
||||
String suffix;
|
||||
if (link.contains(JPEG)) {
|
||||
suffix = JPEG;
|
||||
} else if (link.contains(JPG)) {
|
||||
suffix = JPG;
|
||||
} else if (link.contains(PNG)) {
|
||||
suffix = PNG;
|
||||
} else if (link.contains(GIF)) {
|
||||
continue;
|
||||
} else {
|
||||
suffix = JPG;
|
||||
}
|
||||
|
||||
String chars = "ABCDEFGHIZKLMNOPQRSTUVWXYZ";
|
||||
char c = chars.charAt((int) (Math.random() * 1));
|
||||
|
||||
|
||||
String fileName = RandomUtils.randomZm() + RandomUtil.randomLong(100000, 1000000) + DOT + suffix;
|
||||
|
||||
this.downloadPicture(inputStream, this.getPath(), fileName, title, type);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
} finally {
|
||||
try {
|
||||
if (httpGet != null) {
|
||||
httpGet.clone();
|
||||
}
|
||||
} catch (CloneNotSupportedException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
try {
|
||||
httpClient.close();
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
try {
|
||||
if (inputStream != null) {
|
||||
inputStream.close();
|
||||
}
|
||||
} catch (IOException e) {
|
||||
log.error(e.getMessage());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* 链接url下载图片
|
||||
*
|
||||
* @param inputStream 输入流
|
||||
* @param path 磁盘地址
|
||||
* @param fileName 文件名称
|
||||
* @param title 标题名称
|
||||
*/
|
||||
private void downloadPicture(InputStream inputStream, String path, String fileName, String title, String type) {
|
||||
String appendPath = this.getAppendPath(title, type);
|
||||
WeiXinUtils.downloadPicture(inputStream, path, fileName, title, appendPath);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* 从缓存 -> 数据库 -> 内存 中获取磁盘地址
|
||||
*
|
||||
* @return 地址
|
||||
*/
|
||||
private String getPath() {
|
||||
//磁盘路径
|
||||
String path;
|
||||
//判断redis中是否存在
|
||||
Boolean hasKey = redisService.hasKey(REDIS_KEY_Y_2048);
|
||||
if (hasKey) {
|
||||
path = redisService.getCacheObject(REDIS_KEY_Y_2048);
|
||||
} else {
|
||||
String data = remoteConfigService.getConfigKeyForRPC(CONFIG_KEY_Y_2048).getData();
|
||||
if (StringUtils.isNotEmpty(data)) {
|
||||
path = data;
|
||||
} else {
|
||||
path = WeiXinConst.PATH;
|
||||
}
|
||||
}
|
||||
return path;
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取拼接后的磁盘路径
|
||||
*
|
||||
* @param title 拼接的最后的文件夹
|
||||
* @return str
|
||||
*/
|
||||
private String getAppendPath(String title, String type) {
|
||||
title = WeiXinUtils.filterTitle(title);
|
||||
|
||||
String path = this.getPath() + File.separator + DateUtil.format(new Date(),
|
||||
DatePattern.NORM_MONTH_PATTERN) + File.separator + type + File.separator + title;
|
||||
|
||||
if (InitConst.CONTROL) {
|
||||
path = this.getPath() + File.separator + type;
|
||||
}
|
||||
|
||||
return path;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -1,7 +1,11 @@
|
|||
package com.xjs.y2048community.webmagic;
|
||||
|
||||
import cn.hutool.core.collection.CollUtil;
|
||||
import cn.hutool.core.date.DateUtil;
|
||||
import com.ruoyi.common.core.utils.StringUtils;
|
||||
import com.ruoyi.common.redis.service.RedisService;
|
||||
import com.xjs.consts.ReptileConst;
|
||||
import com.xjs.y2048community.consts.InitConst;
|
||||
import lombok.extern.log4j.Log4j2;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
|
@ -10,7 +14,9 @@ import us.codecraft.webmagic.Site;
|
|||
import us.codecraft.webmagic.processor.PageProcessor;
|
||||
import us.codecraft.webmagic.selector.Selectable;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
import static com.xjs.consts.RedisConst.REPTILE_2048_COMMUNITY_COUNT;
|
||||
|
|
@ -18,6 +24,7 @@ import static com.xjs.consts.ReptileConst.Y_2048_COMMUNITY_URL;
|
|||
|
||||
/**
|
||||
* 社区 爬虫处理
|
||||
*
|
||||
* @author xiejs
|
||||
* @since 2022-06-17
|
||||
*/
|
||||
|
|
@ -31,6 +38,13 @@ public class Y2048communityProcessor implements PageProcessor {
|
|||
@Override
|
||||
public void process(Page page) {
|
||||
|
||||
// 开关获取所有页面
|
||||
|
||||
// 根据每天日期获取最新
|
||||
|
||||
// todo 网络情况好的时候爬取所有页面
|
||||
|
||||
|
||||
try {
|
||||
Integer count = redisService.getCacheObject(REPTILE_2048_COMMUNITY_COUNT);
|
||||
if (count == null) {
|
||||
|
|
@ -38,34 +52,34 @@ public class Y2048communityProcessor implements PageProcessor {
|
|||
}
|
||||
|
||||
List<Selectable> trs = page.getHtml().css("#content .tr3").nodes();
|
||||
for (Selectable tr : trs) {
|
||||
List<String> allText = tr.css("th > span > a", "text").all();
|
||||
|
||||
for (String text : allText) {
|
||||
if ("COSPLAY".equalsIgnoreCase(text)) {
|
||||
String href = tr.css("th > span > a", "href").get();
|
||||
if (CollUtil.isNotEmpty(trs)) {
|
||||
for (Selectable tr : trs) {
|
||||
List<Selectable> selectables = tr.css("th > span > a").nodes();
|
||||
|
||||
//获取需要爬取的路径
|
||||
page.addTargetRequest(Y_2048_COMMUNITY_URL+href);
|
||||
for (Selectable selectable : selectables) {
|
||||
if (selectable.css("a", "text").get().equals("COSPLAY") ||
|
||||
selectable.css("a", "text").get().equals("高跟絲襪")
|
||||
) {
|
||||
|
||||
//String href = selectable.css("a", "href").get();
|
||||
|
||||
String href = selectable.links().get();
|
||||
|
||||
//获取需要爬取的路径
|
||||
page.addTargetRequest(href);
|
||||
}
|
||||
}
|
||||
if ("高跟絲襪".equalsIgnoreCase(text)) {
|
||||
String href = tr.css("th > span > a", "href").get();
|
||||
|
||||
//获取需要爬取的路径
|
||||
page.addTargetRequest(Y_2048_COMMUNITY_URL+href);
|
||||
}
|
||||
|
||||
count += selectables.size();
|
||||
|
||||
}
|
||||
|
||||
count += allText.size();
|
||||
|
||||
}
|
||||
|
||||
this.handlerListPage(page, count);
|
||||
|
||||
|
||||
|
||||
|
||||
redisService.setCacheObject(REPTILE_2048_COMMUNITY_COUNT, count);
|
||||
} catch (Exception e) {
|
||||
log.error(e.getMessage());
|
||||
} finally {
|
||||
|
|
@ -74,24 +88,145 @@ public class Y2048communityProcessor implements PageProcessor {
|
|||
}
|
||||
|
||||
/**
|
||||
* 处理列表页面
|
||||
* @param page 页面
|
||||
* 处理列表页面
|
||||
*
|
||||
* @param page 页面
|
||||
* @param count 总数
|
||||
*/
|
||||
private void handlerListPage(Page page, Integer count) {
|
||||
page.getHtml().css("#ajaxtable > .tr3 > .tal >");
|
||||
try {
|
||||
String div = page.getHtml().css("#ajaxtable").get();
|
||||
|
||||
String bread = page.getHtml().css("#breadCrumb").get();
|
||||
|
||||
//获取所有分页页面
|
||||
if (InitConst.CONTROL) {
|
||||
if (StringUtils.isNotEmpty(bread)) {
|
||||
if ((bread.contains("COSPLAY") || bread.contains("高跟絲襪")) && !page.getUrl().get().contains("-page-")) {
|
||||
String pages = page.getHtml().css(".pagesone > span", "text").get();
|
||||
if (StringUtils.isNotEmpty(pages)) {
|
||||
if (pages.contains("/")) {
|
||||
String[] split = pages.split("/");
|
||||
String num = split[split.length - 1];
|
||||
|
||||
int numInt = Integer.parseInt(num);
|
||||
for (int i = 2; i <= numInt; i++) {
|
||||
String url = page.getUrl().get();
|
||||
String[] splitUrl = url.split(".html");
|
||||
//String newUrl = splitUrl[0] + "-page-" + i + ".html";
|
||||
|
||||
StringBuffer sb = new StringBuffer();
|
||||
sb.append(splitUrl[0]).append("-page-").append(i).append(".html");
|
||||
|
||||
page.addTargetRequest(sb.toString());
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if (StringUtils.isNotEmpty(bread)) {
|
||||
if ((bread.contains("COSPLAY") || bread.contains("高跟絲襪")) && !page.getUrl().get().contains("-page-")) {
|
||||
String pages = page.getHtml().css(".pagesone > span", "text").get();
|
||||
if (StringUtils.isNotEmpty(pages)) {
|
||||
String url = page.getUrl().get();
|
||||
String[] splitUrl = url.split(".html");
|
||||
List<String> asList = Arrays.asList(splitUrl[0] + "-page-2.html", splitUrl[0] + "-page-3.html");
|
||||
page.addTargetRequests(asList);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (StringUtils.isNotEmpty(div)) {
|
||||
|
||||
/*List<Selectable> subjects = page.getHtml().css("#ajaxtable .tr3 .tal").nodes();
|
||||
|
||||
for (int i = 0; i < subjects.size(); i++) {
|
||||
if (i <= 18) {
|
||||
continue;
|
||||
}
|
||||
|
||||
List<String> hrefs = subjects.get(i).css(".subject", "href").all();
|
||||
|
||||
//subjects.get(i).css()
|
||||
|
||||
List<String> collect = hrefs.stream().map(href -> Y_2048_COMMUNITY_URL + href).collect(Collectors.toList());
|
||||
|
||||
count += collect.size();
|
||||
|
||||
page.addTargetRequests(collect);
|
||||
}*/
|
||||
|
||||
List<Selectable> trs = page.getHtml().css("#ajaxtable .tr3 ").nodes();
|
||||
|
||||
for (Selectable tr : trs) {
|
||||
String date = tr.css("td:nth-child(3) div", "text").get();
|
||||
|
||||
//不是当天的数据页面跳出
|
||||
if (StringUtils.isNotEmpty(date) && !InitConst.CONTROL) {
|
||||
if (!DateUtil.today().equals(date)) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
String href = tr.css("td .subject", "href").get();
|
||||
if (StringUtils.isNotEmpty(href)) {
|
||||
String url = Y_2048_COMMUNITY_URL + href;
|
||||
page.addTargetRequest(url);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
//this.handlerDetailPage(page, count);
|
||||
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 处理详情页面
|
||||
*
|
||||
* @param page 页面
|
||||
* @param count 总数
|
||||
*/
|
||||
private void handlerDetailPage(Page page, Integer count) {
|
||||
try {
|
||||
String div = page.getHtml().css("#read_tpc").get();
|
||||
|
||||
if (StringUtils.isNotEmpty(div)) {
|
||||
//获取图片链接
|
||||
List<String> srcs = page.getHtml().css("#read_tpc > .att_img > img", "src").all();
|
||||
page.putField("srcs", srcs);
|
||||
|
||||
//获取标题
|
||||
String title = page.getHtml().css("#subject_tpc", "text").get();
|
||||
page.putField("title", title);
|
||||
|
||||
//获取分类
|
||||
String type = Optional.ofNullable(page.getHtml().css("#breadCrumb > a:nth-child(3)", "text").get()).orElse("未知");
|
||||
page.putField("type", type);
|
||||
}
|
||||
|
||||
} finally {
|
||||
redisService.setCacheObject(REPTILE_2048_COMMUNITY_COUNT, count);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public Site getSite() {
|
||||
return Site.me()
|
||||
.addHeader(ReptileConst.headerKey, ReptileConst.headerValue)
|
||||
.addHeader("Connection", "close")
|
||||
.setCharset("utf8")//设置字符编码
|
||||
.setTimeOut(5000)//设置超时时间
|
||||
.setRetrySleepTime(500)//设置重试间隔时间
|
||||
.setCycleRetryTimes(5)//设置重试次数
|
||||
.setTimeOut(10000)//设置超时时间
|
||||
.setRetrySleepTime(100)//设置重试间隔时间
|
||||
.setCycleRetryTimes(2)//设置重试次数
|
||||
.setSleepTime(10)//设置两个页面之间的间隔时间
|
||||
;
|
||||
}
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue