bug fixed

develop
old易 2025-03-03 18:13:39 +08:00
parent 76ba3d23ba
commit edc4cf4526
1 changed files with 36 additions and 28 deletions

View File

@ -20,6 +20,7 @@ namespace New_College.Tasks
private string itemUrl = "https://gaokao.chsi.com.cn"; private string itemUrl = "https://gaokao.chsi.com.cn";
public List<NewsModels> HtmlCreatePageData(string provinceCode) public List<NewsModels> HtmlCreatePageData(string provinceCode)
{ {
HtmlWeb webClient = new HtmlWeb(); HtmlWeb webClient = new HtmlWeb();
var apiUrl = "https://gaokao.chsi.com.cn/wap/news/search/5018267?ps=20&ss="; var apiUrl = "https://gaokao.chsi.com.cn/wap/news/search/5018267?ps=20&ss=";
@ -34,41 +35,48 @@ namespace New_College.Tasks
var resultlist = JsonSerializer.Deserialize<GaokaoObject>(jsonData); var resultlist = JsonSerializer.Deserialize<GaokaoObject>(jsonData);
if (resultlist.msg.Any()) if (resultlist.msg.Any())
{ {
resultlist.msg.ForEach(o => try
{ {
Thread.Sleep(100); resultlist.msg.ForEach(o =>
string newsUrl = $"{itemUrl}{o.uri}";
HtmlDocument doc = webClient.Load(newsUrl);
var inntertitle = doc.DocumentNode.SelectSingleNode("//*[@id=\"app\"]/div[2]/h2").InnerText;
var author = doc.DocumentNode.SelectSingleNode("//*[@id=\"app\"]/div[3]/div[1]/span[2]").InnerText.Replace("来源:", "");
var createtime = doc.DocumentNode.SelectSingleNode("//*[@id=\"app\"]/div[3]/div[1]/span[1]").InnerText;
foreach (var linkNode in doc.DocumentNode.SelectNodes("//a[@href]") ?? new HtmlNodeCollection(null))
{ {
string href = linkNode.GetAttributeValue("href", ""); Thread.Sleep(100);
if (!string.IsNullOrEmpty(href) && href.StartsWith("/")) string newsUrl = $"{itemUrl}{o.uri}";
HtmlDocument doc = webClient.Load(newsUrl);
var inntertitle = doc.DocumentNode.SelectSingleNode("//*[@id=\"app\"]/div[2]/h2").InnerText;
var author = doc.DocumentNode.SelectSingleNode("//*[@id=\"app\"]/div[3]/div[1]/span[2]").InnerText.Replace("来源:", "");
var createtime = doc.DocumentNode.SelectSingleNode("//*[@id=\"app\"]/div[3]/div[1]/span[1]").InnerText;
foreach (var linkNode in doc.DocumentNode.SelectNodes("//a[@href]") ?? new HtmlNodeCollection(null))
{ {
string fullUrl = "https://gaokao.chsi.com.cn" + href; string href = linkNode.GetAttributeValue("href", "");
linkNode.SetAttributeValue("href", fullUrl); if (!string.IsNullOrEmpty(href) && href.StartsWith("/"))
{
string fullUrl = "https://gaokao.chsi.com.cn" + href;
linkNode.SetAttributeValue("href", fullUrl);
}
} }
} var innerhtml = doc.DocumentNode.SelectSingleNode("//*[@id=\"article_dnull\"]").InnerHtml;
var innerhtml = doc.DocumentNode.SelectSingleNode("//*[@id=\"article_dnull\"]").InnerHtml; //if (!HtmlHelper.ReplaceHtmlTag(innerhtml).Contains("浏览器"))
//if (!HtmlHelper.ReplaceHtmlTag(innerhtml).Contains("浏览器")) //{
//{
if (innerhtml.Contains(".pdf") || innerhtml.Contains(".doc") || innerhtml.Contains(".docx") || innerhtml.Contains(".xls") || innerhtml.Contains(".xlsx")) if (innerhtml.Contains(".pdf") || innerhtml.Contains(".doc") || innerhtml.Contains(".docx") || innerhtml.Contains(".xls") || innerhtml.Contains(".xlsx"))
{ {
innerhtml = $"{innerhtml}\n若有附件详情请至本省招生考试院下载附件!!!"; innerhtml = $"{innerhtml}\n若有附件详情请至本省招生考试院下载附件!!!";
} }
list.Add(new NewsModels() list.Add(new NewsModels()
{ {
title = inntertitle, title = inntertitle,
author = author, author = author,
pubtime = Convert.ToDateTime(createtime), pubtime = Convert.ToDateTime(createtime),
detail = Regex.Replace(innerhtml, @"src=""(?!https?:\/\/)(.*?)""", @"src=""https://gaokao.chsi.com.cn$1""") detail = Regex.Replace(innerhtml, @"src=""(?!https?:\/\/)(.*?)""", @"src=""https://gaokao.chsi.com.cn$1""")
});
// }
}); });
// } }
}); catch (Exception ex)
{
Console.WriteLine(ex.Message);
}
} }
} }
else else