From edc4cf4526e595b12db61210842cb0444ed18bf9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?old=E6=98=93?= <156663459@qq.com> Date: Mon, 3 Mar 2025 18:13:39 +0800 Subject: [PATCH] bug fixed --- .../HtmlAgSpider/NationWideNewsAgHelper.cs | 64 +++++++++++-------- 1 file changed, 36 insertions(+), 28 deletions(-) diff --git a/New_College.Tasks/HtmlAgSpider/NationWideNewsAgHelper.cs b/New_College.Tasks/HtmlAgSpider/NationWideNewsAgHelper.cs index bd0fed9..5e15b64 100644 --- a/New_College.Tasks/HtmlAgSpider/NationWideNewsAgHelper.cs +++ b/New_College.Tasks/HtmlAgSpider/NationWideNewsAgHelper.cs @@ -20,6 +20,7 @@ namespace New_College.Tasks private string itemUrl = "https://gaokao.chsi.com.cn"; public List HtmlCreatePageData(string provinceCode) { + HtmlWeb webClient = new HtmlWeb(); var apiUrl = "https://gaokao.chsi.com.cn/wap/news/search/5018267?ps=20&ss="; @@ -34,41 +35,48 @@ namespace New_College.Tasks var resultlist = JsonSerializer.Deserialize(jsonData); if (resultlist.msg.Any()) { - resultlist.msg.ForEach(o => + try { - Thread.Sleep(100); - string newsUrl = $"{itemUrl}{o.uri}"; - HtmlDocument doc = webClient.Load(newsUrl); - var inntertitle = doc.DocumentNode.SelectSingleNode("//*[@id=\"app\"]/div[2]/h2").InnerText; - var author = doc.DocumentNode.SelectSingleNode("//*[@id=\"app\"]/div[3]/div[1]/span[2]").InnerText.Replace("来源:", ""); - var createtime = doc.DocumentNode.SelectSingleNode("//*[@id=\"app\"]/div[3]/div[1]/span[1]").InnerText; - foreach (var linkNode in doc.DocumentNode.SelectNodes("//a[@href]") ?? new HtmlNodeCollection(null)) + resultlist.msg.ForEach(o => { - string href = linkNode.GetAttributeValue("href", ""); - if (!string.IsNullOrEmpty(href) && href.StartsWith("/")) + Thread.Sleep(100); + string newsUrl = $"{itemUrl}{o.uri}"; + HtmlDocument doc = webClient.Load(newsUrl); + var inntertitle = doc.DocumentNode.SelectSingleNode("//*[@id=\"app\"]/div[2]/h2").InnerText; + var author = doc.DocumentNode.SelectSingleNode("//*[@id=\"app\"]/div[3]/div[1]/span[2]").InnerText.Replace("来源:", ""); + var createtime = doc.DocumentNode.SelectSingleNode("//*[@id=\"app\"]/div[3]/div[1]/span[1]").InnerText; + foreach (var linkNode in doc.DocumentNode.SelectNodes("//a[@href]") ?? new HtmlNodeCollection(null)) { - string fullUrl = "https://gaokao.chsi.com.cn" + href; - linkNode.SetAttributeValue("href", fullUrl); + string href = linkNode.GetAttributeValue("href", ""); + if (!string.IsNullOrEmpty(href) && href.StartsWith("/")) + { + string fullUrl = "https://gaokao.chsi.com.cn" + href; + linkNode.SetAttributeValue("href", fullUrl); + } } - } - var innerhtml = doc.DocumentNode.SelectSingleNode("//*[@id=\"article_dnull\"]").InnerHtml; - //if (!HtmlHelper.ReplaceHtmlTag(innerhtml).Contains("浏览器")) - //{ + var innerhtml = doc.DocumentNode.SelectSingleNode("//*[@id=\"article_dnull\"]").InnerHtml; + //if (!HtmlHelper.ReplaceHtmlTag(innerhtml).Contains("浏览器")) + //{ - if (innerhtml.Contains(".pdf") || innerhtml.Contains(".doc") || innerhtml.Contains(".docx") || innerhtml.Contains(".xls") || innerhtml.Contains(".xlsx")) - { - innerhtml = $"{innerhtml}\n若有附件详情,请至本省招生考试院下载附件!!!"; - } + if (innerhtml.Contains(".pdf") || innerhtml.Contains(".doc") || innerhtml.Contains(".docx") || innerhtml.Contains(".xls") || innerhtml.Contains(".xlsx")) + { + innerhtml = $"{innerhtml}\n若有附件详情,请至本省招生考试院下载附件!!!"; + } - list.Add(new NewsModels() - { - title = inntertitle, - author = author, - pubtime = Convert.ToDateTime(createtime), - detail = Regex.Replace(innerhtml, @"src=""(?!https?:\/\/)(.*?)""", @"src=""https://gaokao.chsi.com.cn$1""") + list.Add(new NewsModels() + { + title = inntertitle, + author = author, + pubtime = Convert.ToDateTime(createtime), + detail = Regex.Replace(innerhtml, @"src=""(?!https?:\/\/)(.*?)""", @"src=""https://gaokao.chsi.com.cn$1""") + }); + // } }); - // } - }); + } + catch (Exception ex) + { + Console.WriteLine(ex.Message); + } } } else