diff --git a/New_College.Common/Helper/HtmlHelper.cs b/New_College.Common/Helper/HtmlHelper.cs
index 5d02c33..7a6e1b4 100644
--- a/New_College.Common/Helper/HtmlHelper.cs
+++ b/New_College.Common/Helper/HtmlHelper.cs
@@ -17,7 +17,7 @@
if (length > 0 && strText.Length > length)
return strText.Substring(0, length);
- return strText;
+ return strText.Trim();
}
#endregion
}
diff --git a/New_College.Extensions/ServiceExtensions/JobSetup.cs b/New_College.Extensions/ServiceExtensions/JobSetup.cs
index f88d7fa..4bb2240 100644
--- a/New_College.Extensions/ServiceExtensions/JobSetup.cs
+++ b/New_College.Extensions/ServiceExtensions/JobSetup.cs
@@ -16,6 +16,7 @@ namespace New_College.Extensions
//services.AddHostedService();
//services.AddHostedService();
+ services.AddHostedService();
services.AddHostedService();
services.AddSingleton();
services.AddTransient();//Job使用瞬时依赖注入
diff --git a/New_College.Tasks/HostedService/JobTimedSpiderService.cs b/New_College.Tasks/HostedService/JobTimedSpiderService.cs
new file mode 100644
index 0000000..3756d63
--- /dev/null
+++ b/New_College.Tasks/HostedService/JobTimedSpiderService.cs
@@ -0,0 +1,79 @@
+using New_College.Common.Helper;
+using Microsoft.Extensions.Hosting;
+using System;
+using System.Threading;
+using System.Threading.Tasks;
+using New_College.IServices;
+using System.Linq;
+using New_College.Model.Models;
+using System.Collections.Generic;
+
+namespace New_College.Tasks
+{
+ public class JobTimedSpiderService : IHostedService, IDisposable
+ {
+ private Timer _timer;
+ ///
+ ///
+ ///
+ private ID_NewsInfoServices newsInfoServices;
+ // 这里可以注入
+ public JobTimedSpiderService(ID_NewsInfoServices d_NewsInfoServices)
+ {
+ newsInfoServices = d_NewsInfoServices;
+ }
+
+ public Task StartAsync(CancellationToken cancellationToken)
+ {
+ Console.WriteLine("Job spider is starting.");
+
+ _timer = new Timer(DoWork, null, TimeSpan.Zero,
+ TimeSpan.FromSeconds(60 * 60 * 8));//两个小时
+ return Task.CompletedTask;
+ }
+
+ private void DoWork(object state)
+ {
+ HtmlAgHelper agHelper = new HtmlAgHelper();
+ var list = agHelper.HtmlCreatePageData();
+ list.ForEach(async c =>
+ {
+ var newsinfo = await newsInfoServices.Query(e => e.Title.Trim() == c.title);
+ if (!newsinfo.Any() && c.pubtime.Year > DateTime.Now.Year - 1)
+ {
+
+ await newsInfoServices.Add(new Model.Models.D_NewsInfo()
+ {
+ Author = c.author,
+ CategoryId = 1,
+ CreateBy = "spdier",
+ CreateId = 1,
+ CreateTime = c.pubtime,
+ Detail = c.detail,
+ CoverImg = "https://static-data.ycymedu.com/static/newstop.png",
+ OrderSort = 0,
+ IsDelete = false,
+ Title = c.title,
+ Summary = HtmlHelper.ReplaceHtmlTag(c.detail).Length > 200 ? HtmlHelper.ReplaceHtmlTag(c.detail).Substring(0, 200) : HtmlHelper.ReplaceHtmlTag(c.detail)
+ });
+
+ }
+ });
+ ConsoleHelper.WriteWarningLine($"Job spider success: {DateTime.Now}-{list.Count}");
+ }
+
+ public Task StopAsync(CancellationToken cancellationToken)
+ {
+ Console.WriteLine("Job spider is stopping.");
+
+ _timer?.Change(Timeout.Infinite, 0);
+
+ return Task.CompletedTask;
+ }
+
+ public void Dispose()
+ {
+ _timer?.Dispose();
+ }
+ }
+}
diff --git a/New_College.Tasks/HtmlAgSpider/HtmlAgHelper.cs b/New_College.Tasks/HtmlAgSpider/HtmlAgHelper.cs
new file mode 100644
index 0000000..9a7576c
--- /dev/null
+++ b/New_College.Tasks/HtmlAgSpider/HtmlAgHelper.cs
@@ -0,0 +1,56 @@
+using Aliyun.OSS.Model;
+using HtmlAgilityPack;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace New_College.Tasks
+{
+ public class HtmlAgHelper
+ {
+
+ public List HtmlCreatePageData()
+ {
+ var list = new List();
+ HtmlWeb webClient = new HtmlWeb();
+ HtmlDocument doc = webClient.Load("https://www.sdzk.cn/NewsList.aspx?BCID=2");
+ HtmlNodeCollection categoryNodeList = doc.DocumentNode.SelectNodes("//*[@id=\"ctl00_ContentPlaceHolder1_ctl00_ContentPlaceHolder1_RadListView1Panel\"]/ul/li");
+ var listurls = new List();
+ foreach (var item in categoryNodeList)//
+ {
+ var aa = item.ChildNodes[0];
+ Console.WriteLine(string.Format("https://www.sdzk.cn/{0}|{1}", item.ChildNodes[0].Attributes["href"].Value, item.InnerText));
+ listurls.Add(string.Format("https://www.sdzk.cn/{0}", item.ChildNodes[0].Attributes["href"].Value));
+ }
+ listurls.ForEach(url =>
+ {
+ var doc = webClient.Load(url);
+ var inntertitle = doc.DocumentNode.SelectSingleNode("//*[@id=\"form1\"]/div[6]/div[2]/h3").InnerText;
+ var author = doc.DocumentNode.SelectSingleNode("//*[@id=\"form1\"]/div[6]/div[2]/em").InnerText.Split("作者:")[1].Split(" ")[0];
+ var createtime = doc.DocumentNode.SelectSingleNode("//*[@id=\"form1\"]/div[6]/div[2]/em").InnerText.Split("发布时间:")[1];
+ var innerhtml = doc.DocumentNode.SelectSingleNode("//*[@id=\"form1\"]/div[6]/div[2]/div").InnerHtml;
+ list.Add(new NewsModels()
+ {
+ title = inntertitle,
+ author = author,
+ pubtime = Convert.ToDateTime(createtime),
+ detail = innerhtml
+ });
+ });
+ return list;
+ }
+
+
+ }
+
+ public class NewsModels
+ {
+ public string title { get; set; }
+ public string author { get; set; }
+ public DateTime pubtime { get; set; }
+ public string detail { get; set; }
+ }
+
+}
diff --git a/New_College.Tasks/New_College.Tasks.csproj b/New_College.Tasks/New_College.Tasks.csproj
index 9bf2dfe..64f00c4 100644
--- a/New_College.Tasks/New_College.Tasks.csproj
+++ b/New_College.Tasks/New_College.Tasks.csproj
@@ -5,6 +5,7 @@
+
diff --git a/New_Spider/HtmlAgNewsHelper.cs b/New_Spider/HtmlAgNewsHelper.cs
new file mode 100644
index 0000000..8a1dad8
--- /dev/null
+++ b/New_Spider/HtmlAgNewsHelper.cs
@@ -0,0 +1,44 @@
+using HtmlAgilityPack;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace New_Spider
+{
+ public class HtmlAgNewsHelper
+ {
+ public async void HtmlCreatePageData()
+ {
+ HtmlWeb webClient = new HtmlWeb();
+ HtmlDocument doc = webClient.Load("https://www.sdzk.cn/NewsList.aspx?BCID=2");
+
+ HtmlNodeCollection categoryNodeList = doc.DocumentNode.SelectNodes("//*[@id=\"ctl00_ContentPlaceHolder1_ctl00_ContentPlaceHolder1_RadListView1Panel\"]/ul/li");
+ var listurls = new List();
+ foreach (var item in categoryNodeList)//
+ {
+ var aa = item.ChildNodes[0];
+ Console.WriteLine(string.Format("https://www.sdzk.cn/{0}|{1}", item.ChildNodes[0].Attributes["href"].Value, item.InnerText));
+ listurls.Add(string.Format("https://www.sdzk.cn/{0}", item.ChildNodes[0].Attributes["href"].Value));
+ }
+
+ listurls.ForEach(url =>
+ {
+
+ var doc = webClient.Load(url);
+
+ var inntertitle = doc.DocumentNode.SelectSingleNode("//*[@id=\"form1\"]/div[6]/div[2]/h3").InnerText;
+
+ var author = doc.DocumentNode.SelectSingleNode("//*[@id=\"form1\"]/div[6]/div[2]/em").InnerText.Split("作者:")[1].Split(" ")[0];
+ var createtime = doc.DocumentNode.SelectSingleNode("//*[@id=\"form1\"]/div[6]/div[2]/em").InnerText.Split("发布时间:")[1];
+
+ var innerhtml = doc.DocumentNode.SelectSingleNode("//*[@id=\"form1\"]/div[6]/div[2]/div").InnerHtml;
+
+
+
+ });
+
+ }
+ }
+}
diff --git a/New_Spider/New_Spider.csproj b/New_Spider/New_Spider.csproj
index 91f8ed1..df01b65 100644
--- a/New_Spider/New_Spider.csproj
+++ b/New_Spider/New_Spider.csproj
@@ -9,7 +9,7 @@
-
+
diff --git a/New_Spider/Program.cs b/New_Spider/Program.cs
index 4be4f6c..ea59bf0 100644
--- a/New_Spider/Program.cs
+++ b/New_Spider/Program.cs
@@ -6,10 +6,12 @@ using System.Text.RegularExpressions;
//HtmlAgHelper htmlAgHelper = new HtmlAgHelper();
//htmlAgHelper.HtmlCreatePageData();
-HtmlAgMajorHelper agMajorHelper = new HtmlAgMajorHelper();
+//HtmlAgMajorHelper agMajorHelper = new HtmlAgMajorHelper();
//agMajorHelper.DownloadTypeListFile();
-agMajorHelper.DownloadChildTypeListFile();
+//agMajorHelper.DownloadChildTypeListFile();
+HtmlAgNewsHelper agNewsHelper = new HtmlAgNewsHelper();
+agNewsHelper.HtmlCreatePageData();
Console.Read();