增加定时更新资讯-目前单个山东省份

develop
old易 2024-03-06 15:13:59 +08:00
parent ec3d1e7923
commit 9aadb08d45
8 changed files with 187 additions and 4 deletions

View File

@ -17,7 +17,7 @@
if (length > 0 && strText.Length > length)
return strText.Substring(0, length);
return strText;
return strText.Trim();
}
#endregion
}

View File

@ -16,6 +16,7 @@ namespace New_College.Extensions
//services.AddHostedService<Job1TimedService>();
//services.AddHostedService<Job2TimedService>();
services.AddHostedService<JobTimedSpiderService>();
services.AddHostedService<UserBaseSettingJobTimedService>();
services.AddSingleton<IJobFactory, JobFactory>();
services.AddTransient<Job_Blogs_Quartz>();//Job使用瞬时依赖注入

View File

@ -0,0 +1,79 @@
using New_College.Common.Helper;
using Microsoft.Extensions.Hosting;
using System;
using System.Threading;
using System.Threading.Tasks;
using New_College.IServices;
using System.Linq;
using New_College.Model.Models;
using System.Collections.Generic;
namespace New_College.Tasks
{
public class JobTimedSpiderService : IHostedService, IDisposable
{
private Timer _timer;
/// <summary>
///
/// </summary>
private ID_NewsInfoServices newsInfoServices;
// 这里可以注入
public JobTimedSpiderService(ID_NewsInfoServices d_NewsInfoServices)
{
newsInfoServices = d_NewsInfoServices;
}
public Task StartAsync(CancellationToken cancellationToken)
{
Console.WriteLine("Job spider is starting.");
_timer = new Timer(DoWork, null, TimeSpan.Zero,
TimeSpan.FromSeconds(60 * 60 * 8));//两个小时
return Task.CompletedTask;
}
private void DoWork(object state)
{
HtmlAgHelper agHelper = new HtmlAgHelper();
var list = agHelper.HtmlCreatePageData();
list.ForEach(async c =>
{
var newsinfo = await newsInfoServices.Query(e => e.Title.Trim() == c.title);
if (!newsinfo.Any() && c.pubtime.Year > DateTime.Now.Year - 1)
{
await newsInfoServices.Add(new Model.Models.D_NewsInfo()
{
Author = c.author,
CategoryId = 1,
CreateBy = "spdier",
CreateId = 1,
CreateTime = c.pubtime,
Detail = c.detail,
CoverImg = "https://static-data.ycymedu.com/static/newstop.png",
OrderSort = 0,
IsDelete = false,
Title = c.title,
Summary = HtmlHelper.ReplaceHtmlTag(c.detail).Length > 200 ? HtmlHelper.ReplaceHtmlTag(c.detail).Substring(0, 200) : HtmlHelper.ReplaceHtmlTag(c.detail)
});
}
});
ConsoleHelper.WriteWarningLine($"Job spider success {DateTime.Now}-{list.Count}");
}
public Task StopAsync(CancellationToken cancellationToken)
{
Console.WriteLine("Job spider is stopping.");
_timer?.Change(Timeout.Infinite, 0);
return Task.CompletedTask;
}
public void Dispose()
{
_timer?.Dispose();
}
}
}

View File

@ -0,0 +1,56 @@
using Aliyun.OSS.Model;
using HtmlAgilityPack;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace New_College.Tasks
{
public class HtmlAgHelper
{
public List<NewsModels> HtmlCreatePageData()
{
var list = new List<NewsModels>();
HtmlWeb webClient = new HtmlWeb();
HtmlDocument doc = webClient.Load("https://www.sdzk.cn/NewsList.aspx?BCID=2");
HtmlNodeCollection categoryNodeList = doc.DocumentNode.SelectNodes("//*[@id=\"ctl00_ContentPlaceHolder1_ctl00_ContentPlaceHolder1_RadListView1Panel\"]/ul/li");
var listurls = new List<string>();
foreach (var item in categoryNodeList)//
{
var aa = item.ChildNodes[0];
Console.WriteLine(string.Format("https://www.sdzk.cn/{0}|{1}", item.ChildNodes[0].Attributes["href"].Value, item.InnerText));
listurls.Add(string.Format("https://www.sdzk.cn/{0}", item.ChildNodes[0].Attributes["href"].Value));
}
listurls.ForEach(url =>
{
var doc = webClient.Load(url);
var inntertitle = doc.DocumentNode.SelectSingleNode("//*[@id=\"form1\"]/div[6]/div[2]/h3").InnerText;
var author = doc.DocumentNode.SelectSingleNode("//*[@id=\"form1\"]/div[6]/div[2]/em").InnerText.Split("作者:")[1].Split("&nbsp;")[0];
var createtime = doc.DocumentNode.SelectSingleNode("//*[@id=\"form1\"]/div[6]/div[2]/em").InnerText.Split("发布时间:")[1];
var innerhtml = doc.DocumentNode.SelectSingleNode("//*[@id=\"form1\"]/div[6]/div[2]/div").InnerHtml;
list.Add(new NewsModels()
{
title = inntertitle,
author = author,
pubtime = Convert.ToDateTime(createtime),
detail = innerhtml
});
});
return list;
}
}
public class NewsModels
{
public string title { get; set; }
public string author { get; set; }
public DateTime pubtime { get; set; }
public string detail { get; set; }
}
}

View File

@ -5,6 +5,7 @@
</PropertyGroup>
<ItemGroup>
<PackageReference Include="HtmlAgilityPack" Version="1.11.59" />
<PackageReference Include="Quartz" Version="3.0.7" />
</ItemGroup>

View File

@ -0,0 +1,44 @@
using HtmlAgilityPack;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace New_Spider
{
public class HtmlAgNewsHelper
{
public async void HtmlCreatePageData()
{
HtmlWeb webClient = new HtmlWeb();
HtmlDocument doc = webClient.Load("https://www.sdzk.cn/NewsList.aspx?BCID=2");
HtmlNodeCollection categoryNodeList = doc.DocumentNode.SelectNodes("//*[@id=\"ctl00_ContentPlaceHolder1_ctl00_ContentPlaceHolder1_RadListView1Panel\"]/ul/li");
var listurls = new List<string>();
foreach (var item in categoryNodeList)//
{
var aa = item.ChildNodes[0];
Console.WriteLine(string.Format("https://www.sdzk.cn/{0}|{1}", item.ChildNodes[0].Attributes["href"].Value, item.InnerText));
listurls.Add(string.Format("https://www.sdzk.cn/{0}", item.ChildNodes[0].Attributes["href"].Value));
}
listurls.ForEach(url =>
{
var doc = webClient.Load(url);
var inntertitle = doc.DocumentNode.SelectSingleNode("//*[@id=\"form1\"]/div[6]/div[2]/h3").InnerText;
var author = doc.DocumentNode.SelectSingleNode("//*[@id=\"form1\"]/div[6]/div[2]/em").InnerText.Split("作者:")[1].Split("&nbsp;")[0];
var createtime = doc.DocumentNode.SelectSingleNode("//*[@id=\"form1\"]/div[6]/div[2]/em").InnerText.Split("发布时间:")[1];
var innerhtml = doc.DocumentNode.SelectSingleNode("//*[@id=\"form1\"]/div[6]/div[2]/div").InnerHtml;
});
}
}
}

View File

@ -9,7 +9,7 @@
<ItemGroup>
<PackageReference Include="HtmlAgilityPack" Version="1.11.53" />
<PackageReference Include="Newtonsoft.Json" Version="12.0.3" />
</ItemGroup>
<ItemGroup>

View File

@ -6,10 +6,12 @@ using System.Text.RegularExpressions;
//HtmlAgHelper htmlAgHelper = new HtmlAgHelper();
//htmlAgHelper.HtmlCreatePageData();
HtmlAgMajorHelper agMajorHelper = new HtmlAgMajorHelper();
//HtmlAgMajorHelper agMajorHelper = new HtmlAgMajorHelper();
//agMajorHelper.DownloadTypeListFile();
agMajorHelper.DownloadChildTypeListFile();
//agMajorHelper.DownloadChildTypeListFile();
HtmlAgNewsHelper agNewsHelper = new HtmlAgNewsHelper();
agNewsHelper.HtmlCreatePageData();
Console.Read();