diff --git a/New_College.Tasks/HtmlAgSpider/NationWideNewsAgHelper.cs b/New_College.Tasks/HtmlAgSpider/NationWideNewsAgHelper.cs
index bd0fed9..5e15b64 100644
--- a/New_College.Tasks/HtmlAgSpider/NationWideNewsAgHelper.cs
+++ b/New_College.Tasks/HtmlAgSpider/NationWideNewsAgHelper.cs
@@ -20,6 +20,7 @@ namespace New_College.Tasks
private string itemUrl = "https://gaokao.chsi.com.cn";
public List HtmlCreatePageData(string provinceCode)
{
+
HtmlWeb webClient = new HtmlWeb();
var apiUrl = "https://gaokao.chsi.com.cn/wap/news/search/5018267?ps=20&ss=";
@@ -34,41 +35,48 @@ namespace New_College.Tasks
var resultlist = JsonSerializer.Deserialize(jsonData);
if (resultlist.msg.Any())
{
- resultlist.msg.ForEach(o =>
+ try
{
- Thread.Sleep(100);
- string newsUrl = $"{itemUrl}{o.uri}";
- HtmlDocument doc = webClient.Load(newsUrl);
- var inntertitle = doc.DocumentNode.SelectSingleNode("//*[@id=\"app\"]/div[2]/h2").InnerText;
- var author = doc.DocumentNode.SelectSingleNode("//*[@id=\"app\"]/div[3]/div[1]/span[2]").InnerText.Replace("来源:", "");
- var createtime = doc.DocumentNode.SelectSingleNode("//*[@id=\"app\"]/div[3]/div[1]/span[1]").InnerText;
- foreach (var linkNode in doc.DocumentNode.SelectNodes("//a[@href]") ?? new HtmlNodeCollection(null))
+ resultlist.msg.ForEach(o =>
{
- string href = linkNode.GetAttributeValue("href", "");
- if (!string.IsNullOrEmpty(href) && href.StartsWith("/"))
+ Thread.Sleep(100);
+ string newsUrl = $"{itemUrl}{o.uri}";
+ HtmlDocument doc = webClient.Load(newsUrl);
+ var inntertitle = doc.DocumentNode.SelectSingleNode("//*[@id=\"app\"]/div[2]/h2").InnerText;
+ var author = doc.DocumentNode.SelectSingleNode("//*[@id=\"app\"]/div[3]/div[1]/span[2]").InnerText.Replace("来源:", "");
+ var createtime = doc.DocumentNode.SelectSingleNode("//*[@id=\"app\"]/div[3]/div[1]/span[1]").InnerText;
+ foreach (var linkNode in doc.DocumentNode.SelectNodes("//a[@href]") ?? new HtmlNodeCollection(null))
{
- string fullUrl = "https://gaokao.chsi.com.cn" + href;
- linkNode.SetAttributeValue("href", fullUrl);
+ string href = linkNode.GetAttributeValue("href", "");
+ if (!string.IsNullOrEmpty(href) && href.StartsWith("/"))
+ {
+ string fullUrl = "https://gaokao.chsi.com.cn" + href;
+ linkNode.SetAttributeValue("href", fullUrl);
+ }
}
- }
- var innerhtml = doc.DocumentNode.SelectSingleNode("//*[@id=\"article_dnull\"]").InnerHtml;
- //if (!HtmlHelper.ReplaceHtmlTag(innerhtml).Contains("浏览器"))
- //{
+ var innerhtml = doc.DocumentNode.SelectSingleNode("//*[@id=\"article_dnull\"]").InnerHtml;
+ //if (!HtmlHelper.ReplaceHtmlTag(innerhtml).Contains("浏览器"))
+ //{
- if (innerhtml.Contains(".pdf") || innerhtml.Contains(".doc") || innerhtml.Contains(".docx") || innerhtml.Contains(".xls") || innerhtml.Contains(".xlsx"))
- {
- innerhtml = $"{innerhtml}\n若有附件详情,请至本省招生考试院下载附件!!!";
- }
+ if (innerhtml.Contains(".pdf") || innerhtml.Contains(".doc") || innerhtml.Contains(".docx") || innerhtml.Contains(".xls") || innerhtml.Contains(".xlsx"))
+ {
+ innerhtml = $"{innerhtml}\n若有附件详情,请至本省招生考试院下载附件!!!";
+ }
- list.Add(new NewsModels()
- {
- title = inntertitle,
- author = author,
- pubtime = Convert.ToDateTime(createtime),
- detail = Regex.Replace(innerhtml, @"src=""(?!https?:\/\/)(.*?)""", @"src=""https://gaokao.chsi.com.cn$1""")
+ list.Add(new NewsModels()
+ {
+ title = inntertitle,
+ author = author,
+ pubtime = Convert.ToDateTime(createtime),
+ detail = Regex.Replace(innerhtml, @"src=""(?!https?:\/\/)(.*?)""", @"src=""https://gaokao.chsi.com.cn$1""")
+ });
+ // }
});
- // }
- });
+ }
+ catch (Exception ex)
+ {
+ Console.WriteLine(ex.Message);
+ }
}
}
else