using Aliyun.OSS.Model; using HtmlAgilityPack; using New_College.Common.Helper; using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Threading.Tasks; namespace New_College.Tasks { public class HtmlAgHelper { public List HtmlCreatePageData() { var list = new List(); HtmlWeb webClient = new HtmlWeb(); HtmlDocument doc = webClient.Load("https://www.sdzk.cn/NewsList.aspx?BCID=2"); HtmlNodeCollection categoryNodeList = doc.DocumentNode.SelectNodes("//*[@id=\"ctl00_ContentPlaceHolder1_ctl00_ContentPlaceHolder1_RadListView1Panel\"]/ul/li"); var listurls = new List(); foreach (var item in categoryNodeList)// { var aa = item.ChildNodes[0]; Console.WriteLine(string.Format("https://www.sdzk.cn/{0}|{1}", item.ChildNodes[0].Attributes["href"].Value, item.InnerText)); listurls.Add(string.Format("https://www.sdzk.cn/{0}", item.ChildNodes[0].Attributes["href"].Value)); } listurls.ForEach(url => { var doc = webClient.Load(url); var inntertitle = doc.DocumentNode.SelectSingleNode("//*[@id=\"form1\"]/div[6]/div[2]/h3").InnerText; var author = doc.DocumentNode.SelectSingleNode("//*[@id=\"form1\"]/div[6]/div[2]/em").InnerText.Split("作者:")[1].Split(" ")[0]; var createtime = doc.DocumentNode.SelectSingleNode("//*[@id=\"form1\"]/div[6]/div[2]/em").InnerText.Split("发布时间:")[1]; var innerhtml = doc.DocumentNode.SelectSingleNode("//*[@id=\"form1\"]/div[6]/div[2]/div").InnerHtml; if (!HtmlHelper.ReplaceHtmlTag(innerhtml).Contains("浏览器")) { list.Add(new NewsModels() { title = inntertitle, author = author, pubtime = Convert.ToDateTime(createtime), detail = innerhtml.Replace("src=\"", "src=\"https://www.sdzk.cn").Replace("href=\"", "href=\"https://www.sdzk.cn") }); } }); return list; } } public class NewsModels { public string title { get; set; } public string author { get; set; } public string summary { get; set; } public DateTime pubtime { get; set; } public string detail { get; set; } } }