From a77c80d0236b8ba35feebcfe25d1251ec5eae7e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?old=E6=98=93?= <156663459@qq.com> Date: Tue, 9 Jul 2024 10:34:50 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E6=8A=93=E5=8F=96=E8=B5=84?= =?UTF-8?q?=E8=AE=AF=E6=8A=A5=E9=94=99bug=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../HostedService/JobTimedSpiderService.cs | 3 +-- New_College.Tasks/HtmlAgSpider/HtmlAgHelper.cs | 17 +++++++++++------ 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/New_College.Tasks/HostedService/JobTimedSpiderService.cs b/New_College.Tasks/HostedService/JobTimedSpiderService.cs index 5ef3f47..de4a8ac 100644 --- a/New_College.Tasks/HostedService/JobTimedSpiderService.cs +++ b/New_College.Tasks/HostedService/JobTimedSpiderService.cs @@ -52,8 +52,7 @@ namespace New_College.Tasks ProvinceCode = "370000", CreateId = 1, CreateTime = c.pubtime, - - // Detail = c.detail, + Detail = c.detail, CoverImg = "https://static-data.ycymedu.com/static/newstop.png", OrderSort = 0, IsDelete = false, diff --git a/New_College.Tasks/HtmlAgSpider/HtmlAgHelper.cs b/New_College.Tasks/HtmlAgSpider/HtmlAgHelper.cs index 4feafa4..6db2dbd 100644 --- a/New_College.Tasks/HtmlAgSpider/HtmlAgHelper.cs +++ b/New_College.Tasks/HtmlAgSpider/HtmlAgHelper.cs @@ -1,5 +1,6 @@ using Aliyun.OSS.Model; using HtmlAgilityPack; +using New_College.Common.Helper; using System; using System.Collections.Generic; using System.Linq; @@ -26,18 +27,22 @@ namespace New_College.Tasks } listurls.ForEach(url => { + var doc = webClient.Load(url); var inntertitle = doc.DocumentNode.SelectSingleNode("//*[@id=\"form1\"]/div[6]/div[2]/h3").InnerText; var author = doc.DocumentNode.SelectSingleNode("//*[@id=\"form1\"]/div[6]/div[2]/em").InnerText.Split("作者:")[1].Split(" ")[0]; var createtime = doc.DocumentNode.SelectSingleNode("//*[@id=\"form1\"]/div[6]/div[2]/em").InnerText.Split("发布时间:")[1]; var innerhtml = doc.DocumentNode.SelectSingleNode("//*[@id=\"form1\"]/div[6]/div[2]/div").InnerHtml; - list.Add(new NewsModels() + if (!HtmlHelper.ReplaceHtmlTag(innerhtml).Contains("浏览器")) { - title = inntertitle, - author = author, - pubtime = Convert.ToDateTime(createtime), - detail = innerhtml.Replace("src=\"", "src=\"https://www.sdzk.cn").Replace("href=\"", "href=\"https://www.sdzk.cn") - }); + list.Add(new NewsModels() + { + title = inntertitle, + author = author, + pubtime = Convert.ToDateTime(createtime), + detail = innerhtml.Replace("src=\"", "src=\"https://www.sdzk.cn").Replace("href=\"", "href=\"https://www.sdzk.cn") + }); + } }); return list; }