NewGaoKaoApi/New_Spider/HtmlAgNewsHelper.cs

45 lines
1.6 KiB
C#

using HtmlAgilityPack;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace New_Spider
{
public class HtmlAgNewsHelper
{
public async void HtmlCreatePageData()
{
HtmlWeb webClient = new HtmlWeb();
HtmlDocument doc = webClient.Load("https://www.sdzk.cn/NewsList.aspx?BCID=2");
HtmlNodeCollection categoryNodeList = doc.DocumentNode.SelectNodes("//*[@id=\"ctl00_ContentPlaceHolder1_ctl00_ContentPlaceHolder1_RadListView1Panel\"]/ul/li");
var listurls = new List<string>();
foreach (var item in categoryNodeList)//
{
var aa = item.ChildNodes[0];
Console.WriteLine(string.Format("https://www.sdzk.cn/{0}|{1}", item.ChildNodes[0].Attributes["href"].Value, item.InnerText));
listurls.Add(string.Format("https://www.sdzk.cn/{0}", item.ChildNodes[0].Attributes["href"].Value));
}
listurls.ForEach(url =>
{
var doc = webClient.Load(url);
var inntertitle = doc.DocumentNode.SelectSingleNode("//*[@id=\"form1\"]/div[6]/div[2]/h3").InnerText;
var author = doc.DocumentNode.SelectSingleNode("//*[@id=\"form1\"]/div[6]/div[2]/em").InnerText.Split("作者:")[1].Split("&nbsp;")[0];
var createtime = doc.DocumentNode.SelectSingleNode("//*[@id=\"form1\"]/div[6]/div[2]/em").InnerText.Split("发布时间:")[1];
var innerhtml = doc.DocumentNode.SelectSingleNode("//*[@id=\"form1\"]/div[6]/div[2]/div").InnerHtml;
});
}
}
}