1、爬虫中关村手机所有页面数据爬取并保存到数据库实现
This commit is contained in:
parent
1a5624533f
commit
f22a31a333
|
|
@ -1,6 +1,8 @@
|
||||||
package com.xjs.zol.webmagic;
|
package com.xjs.zol.webmagic;
|
||||||
|
|
||||||
|
import cn.hutool.core.collection.CollUtil;
|
||||||
import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper;
|
import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper;
|
||||||
|
import com.baomidou.mybatisplus.core.conditions.update.LambdaUpdateWrapper;
|
||||||
import com.xjs.zol.pojo.ZolPhone;
|
import com.xjs.zol.pojo.ZolPhone;
|
||||||
import com.xjs.zol.service.ZolPhoneService;
|
import com.xjs.zol.service.ZolPhoneService;
|
||||||
import lombok.extern.log4j.Log4j2;
|
import lombok.extern.log4j.Log4j2;
|
||||||
|
|
@ -31,12 +33,22 @@ public class ZolPhonePipeline implements Pipeline {
|
||||||
@Override
|
@Override
|
||||||
public void process(ResultItems resultItems, Task task) {
|
public void process(ResultItems resultItems, Task task) {
|
||||||
List<ZolPhone> zolPhoneList = resultItems.get("zolPhoneList");
|
List<ZolPhone> zolPhoneList = resultItems.get("zolPhoneList");
|
||||||
|
if (CollUtil.isNotEmpty(zolPhoneList)) {
|
||||||
//循环遍历集合,当对象的名称在数据库为空才插入数据
|
//循环遍历集合,当对象的名称在数据库为空才插入数据
|
||||||
for (ZolPhone zolPhone : zolPhoneList) {
|
for (ZolPhone zolPhone : zolPhoneList) {
|
||||||
ZolPhone dbData = zolPhoneService.getOne(new LambdaQueryWrapper<ZolPhone>()
|
ZolPhone dbData = zolPhoneService.getOne(new LambdaQueryWrapper<ZolPhone>()
|
||||||
.eq(ZolPhone::getPhoneName, zolPhone.getPhoneName()), false);
|
.eq(ZolPhone::getPhoneName, zolPhone.getPhoneName()), false);
|
||||||
if (Objects.isNull(dbData)) {
|
if (Objects.isNull(dbData)) {
|
||||||
zolPhoneService.save(zolPhone);
|
zolPhoneService.save(zolPhone);
|
||||||
|
} else {
|
||||||
|
//当前值与数据库热度值不相等的情况下更新数据库
|
||||||
|
if (zolPhone.getHeat().compareTo(dbData.getHeat()) != 0) {
|
||||||
|
zolPhoneService.update(new LambdaUpdateWrapper<ZolPhone>()
|
||||||
|
.eq(ZolPhone::getPhoneName, zolPhone.getPhoneName())
|
||||||
|
.set(ZolPhone::getHeat, zolPhone.getHeat()));
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,8 +1,10 @@
|
||||||
package com.xjs.zol.webmagic;
|
package com.xjs.zol.webmagic;
|
||||||
|
|
||||||
|
import com.ruoyi.common.core.utils.StringUtils;
|
||||||
import com.ruoyi.common.redis.service.RedisService;
|
import com.ruoyi.common.redis.service.RedisService;
|
||||||
import com.xjs.zol.pojo.ZolPhone;
|
import com.xjs.zol.pojo.ZolPhone;
|
||||||
import lombok.extern.log4j.Log4j2;
|
import lombok.extern.log4j.Log4j2;
|
||||||
|
import org.apache.commons.lang3.math.NumberUtils;
|
||||||
import org.springframework.beans.factory.annotation.Autowired;
|
import org.springframework.beans.factory.annotation.Autowired;
|
||||||
import org.springframework.stereotype.Component;
|
import org.springframework.stereotype.Component;
|
||||||
import us.codecraft.webmagic.Page;
|
import us.codecraft.webmagic.Page;
|
||||||
|
|
@ -12,6 +14,7 @@ import us.codecraft.webmagic.selector.Selectable;
|
||||||
|
|
||||||
import java.math.BigDecimal;
|
import java.math.BigDecimal;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collections;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.concurrent.TimeUnit;
|
import java.util.concurrent.TimeUnit;
|
||||||
|
|
||||||
|
|
@ -27,6 +30,8 @@ import static com.xjs.consts.RedisConst.REPTILE_ZOL_PHONE_COUNT;
|
||||||
@Component
|
@Component
|
||||||
public class ZolPhoneProcessor implements PageProcessor {
|
public class ZolPhoneProcessor implements PageProcessor {
|
||||||
|
|
||||||
|
public static final String URL = "https://detail.zol.com.cn/";
|
||||||
|
|
||||||
@Autowired
|
@Autowired
|
||||||
private RedisService redisService;
|
private RedisService redisService;
|
||||||
|
|
||||||
|
|
@ -38,6 +43,13 @@ public class ZolPhoneProcessor implements PageProcessor {
|
||||||
if (count == null) {
|
if (count == null) {
|
||||||
count = 0;
|
count = 0;
|
||||||
}
|
}
|
||||||
|
//获取其他页面放入队列中
|
||||||
|
//等待爬虫的页面后缀
|
||||||
|
String html_href = page.getHtml().css(".page-box > .pagebar > .next", "href").get();
|
||||||
|
|
||||||
|
Thread.sleep(100);
|
||||||
|
|
||||||
|
page.addTargetRequests(Collections.singletonList(html_href));
|
||||||
|
|
||||||
List<ZolPhone> zolPhoneList = new ArrayList<>();
|
List<ZolPhone> zolPhoneList = new ArrayList<>();
|
||||||
|
|
||||||
|
|
@ -55,7 +67,7 @@ public class ZolPhoneProcessor implements PageProcessor {
|
||||||
//获取手机的详情页面url
|
//获取手机的详情页面url
|
||||||
String href = li.css("li > .pic", "href").get();
|
String href = li.css("li > .pic", "href").get();
|
||||||
|
|
||||||
zolPhone.setDetailPage("https://detail.zol.com.cn/" + href);
|
zolPhone.setDetailPage(URL + href);
|
||||||
|
|
||||||
//获取手机的名称
|
//获取手机的名称
|
||||||
String phoneName = li.css("li > h3 > a", "text").get();
|
String phoneName = li.css("li > h3 > a", "text").get();
|
||||||
|
|
@ -68,14 +80,30 @@ public class ZolPhoneProcessor implements PageProcessor {
|
||||||
//获取手机的参考价
|
//获取手机的参考价
|
||||||
String price = li.css("li > .price-row .price-type", "text").get();
|
String price = li.css("li > .price-row .price-type", "text").get();
|
||||||
//排除无用数据
|
//排除无用数据
|
||||||
if ("概念产品".equals(price)) {
|
if (StringUtils.isNotBlank(price)) {
|
||||||
|
//检查是否是数字
|
||||||
|
boolean creatable = NumberUtils.isCreatable(price);
|
||||||
|
if (creatable) {
|
||||||
|
zolPhone.setPrice(new BigDecimal(price));
|
||||||
|
} else {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
zolPhone.setPrice(new BigDecimal(price));
|
|
||||||
|
|
||||||
//获取手机的评分
|
//获取手机的评分
|
||||||
String heat = li.css("li > .comment-row > .score", "text").get();
|
String heat = li.css("li > .comment-row > .score", "text").get();
|
||||||
|
if (StringUtils.isNotBlank(heat)) {
|
||||||
|
boolean creatable = NumberUtils.isCreatable(price);
|
||||||
|
if (creatable) {
|
||||||
zolPhone.setHeat(new BigDecimal(heat));
|
zolPhone.setHeat(new BigDecimal(heat));
|
||||||
|
} else {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
//获取手机图片的地址
|
//获取手机图片的地址
|
||||||
String picture = li.css("li > .pic > img", ".src").get();
|
String picture = li.css("li > .pic > img", ".src").get();
|
||||||
|
|
@ -87,7 +115,7 @@ public class ZolPhoneProcessor implements PageProcessor {
|
||||||
count++;
|
count++;
|
||||||
}
|
}
|
||||||
|
|
||||||
page.putField("zolPhoneList",zolPhoneList);
|
page.putField("zolPhoneList", zolPhoneList);
|
||||||
|
|
||||||
redisService.setCacheObject(REPTILE_ZOL_PHONE_COUNT, count);
|
redisService.setCacheObject(REPTILE_ZOL_PHONE_COUNT, count);
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue