增加定时更新资讯-目前单个山东省份
parent
ec3d1e7923
commit
9aadb08d45
|
|
@ -17,7 +17,7 @@
|
|||
if (length > 0 && strText.Length > length)
|
||||
return strText.Substring(0, length);
|
||||
|
||||
return strText;
|
||||
return strText.Trim();
|
||||
}
|
||||
#endregion
|
||||
}
|
||||
|
|
|
|||
|
|
@ -16,6 +16,7 @@ namespace New_College.Extensions
|
|||
|
||||
//services.AddHostedService<Job1TimedService>();
|
||||
//services.AddHostedService<Job2TimedService>();
|
||||
services.AddHostedService<JobTimedSpiderService>();
|
||||
services.AddHostedService<UserBaseSettingJobTimedService>();
|
||||
services.AddSingleton<IJobFactory, JobFactory>();
|
||||
services.AddTransient<Job_Blogs_Quartz>();//Job使用瞬时依赖注入
|
||||
|
|
|
|||
|
|
@ -0,0 +1,79 @@
|
|||
using New_College.Common.Helper;
|
||||
using Microsoft.Extensions.Hosting;
|
||||
using System;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using New_College.IServices;
|
||||
using System.Linq;
|
||||
using New_College.Model.Models;
|
||||
using System.Collections.Generic;
|
||||
|
||||
namespace New_College.Tasks
|
||||
{
|
||||
public class JobTimedSpiderService : IHostedService, IDisposable
|
||||
{
|
||||
private Timer _timer;
|
||||
/// <summary>
|
||||
///
|
||||
/// </summary>
|
||||
private ID_NewsInfoServices newsInfoServices;
|
||||
// 这里可以注入
|
||||
public JobTimedSpiderService(ID_NewsInfoServices d_NewsInfoServices)
|
||||
{
|
||||
newsInfoServices = d_NewsInfoServices;
|
||||
}
|
||||
|
||||
public Task StartAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
Console.WriteLine("Job spider is starting.");
|
||||
|
||||
_timer = new Timer(DoWork, null, TimeSpan.Zero,
|
||||
TimeSpan.FromSeconds(60 * 60 * 8));//两个小时
|
||||
return Task.CompletedTask;
|
||||
}
|
||||
|
||||
private void DoWork(object state)
|
||||
{
|
||||
HtmlAgHelper agHelper = new HtmlAgHelper();
|
||||
var list = agHelper.HtmlCreatePageData();
|
||||
list.ForEach(async c =>
|
||||
{
|
||||
var newsinfo = await newsInfoServices.Query(e => e.Title.Trim() == c.title);
|
||||
if (!newsinfo.Any() && c.pubtime.Year > DateTime.Now.Year - 1)
|
||||
{
|
||||
|
||||
await newsInfoServices.Add(new Model.Models.D_NewsInfo()
|
||||
{
|
||||
Author = c.author,
|
||||
CategoryId = 1,
|
||||
CreateBy = "spdier",
|
||||
CreateId = 1,
|
||||
CreateTime = c.pubtime,
|
||||
Detail = c.detail,
|
||||
CoverImg = "https://static-data.ycymedu.com/static/newstop.png",
|
||||
OrderSort = 0,
|
||||
IsDelete = false,
|
||||
Title = c.title,
|
||||
Summary = HtmlHelper.ReplaceHtmlTag(c.detail).Length > 200 ? HtmlHelper.ReplaceHtmlTag(c.detail).Substring(0, 200) : HtmlHelper.ReplaceHtmlTag(c.detail)
|
||||
});
|
||||
|
||||
}
|
||||
});
|
||||
ConsoleHelper.WriteWarningLine($"Job spider success: {DateTime.Now}-{list.Count}");
|
||||
}
|
||||
|
||||
public Task StopAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
Console.WriteLine("Job spider is stopping.");
|
||||
|
||||
_timer?.Change(Timeout.Infinite, 0);
|
||||
|
||||
return Task.CompletedTask;
|
||||
}
|
||||
|
||||
public void Dispose()
|
||||
{
|
||||
_timer?.Dispose();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,56 @@
|
|||
using Aliyun.OSS.Model;
|
||||
using HtmlAgilityPack;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace New_College.Tasks
|
||||
{
|
||||
public class HtmlAgHelper
|
||||
{
|
||||
|
||||
public List<NewsModels> HtmlCreatePageData()
|
||||
{
|
||||
var list = new List<NewsModels>();
|
||||
HtmlWeb webClient = new HtmlWeb();
|
||||
HtmlDocument doc = webClient.Load("https://www.sdzk.cn/NewsList.aspx?BCID=2");
|
||||
HtmlNodeCollection categoryNodeList = doc.DocumentNode.SelectNodes("//*[@id=\"ctl00_ContentPlaceHolder1_ctl00_ContentPlaceHolder1_RadListView1Panel\"]/ul/li");
|
||||
var listurls = new List<string>();
|
||||
foreach (var item in categoryNodeList)//
|
||||
{
|
||||
var aa = item.ChildNodes[0];
|
||||
Console.WriteLine(string.Format("https://www.sdzk.cn/{0}|{1}", item.ChildNodes[0].Attributes["href"].Value, item.InnerText));
|
||||
listurls.Add(string.Format("https://www.sdzk.cn/{0}", item.ChildNodes[0].Attributes["href"].Value));
|
||||
}
|
||||
listurls.ForEach(url =>
|
||||
{
|
||||
var doc = webClient.Load(url);
|
||||
var inntertitle = doc.DocumentNode.SelectSingleNode("//*[@id=\"form1\"]/div[6]/div[2]/h3").InnerText;
|
||||
var author = doc.DocumentNode.SelectSingleNode("//*[@id=\"form1\"]/div[6]/div[2]/em").InnerText.Split("作者:")[1].Split(" ")[0];
|
||||
var createtime = doc.DocumentNode.SelectSingleNode("//*[@id=\"form1\"]/div[6]/div[2]/em").InnerText.Split("发布时间:")[1];
|
||||
var innerhtml = doc.DocumentNode.SelectSingleNode("//*[@id=\"form1\"]/div[6]/div[2]/div").InnerHtml;
|
||||
list.Add(new NewsModels()
|
||||
{
|
||||
title = inntertitle,
|
||||
author = author,
|
||||
pubtime = Convert.ToDateTime(createtime),
|
||||
detail = innerhtml
|
||||
});
|
||||
});
|
||||
return list;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
public class NewsModels
|
||||
{
|
||||
public string title { get; set; }
|
||||
public string author { get; set; }
|
||||
public DateTime pubtime { get; set; }
|
||||
public string detail { get; set; }
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -5,6 +5,7 @@
|
|||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="HtmlAgilityPack" Version="1.11.59" />
|
||||
<PackageReference Include="Quartz" Version="3.0.7" />
|
||||
</ItemGroup>
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,44 @@
|
|||
using HtmlAgilityPack;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace New_Spider
|
||||
{
|
||||
public class HtmlAgNewsHelper
|
||||
{
|
||||
public async void HtmlCreatePageData()
|
||||
{
|
||||
HtmlWeb webClient = new HtmlWeb();
|
||||
HtmlDocument doc = webClient.Load("https://www.sdzk.cn/NewsList.aspx?BCID=2");
|
||||
|
||||
HtmlNodeCollection categoryNodeList = doc.DocumentNode.SelectNodes("//*[@id=\"ctl00_ContentPlaceHolder1_ctl00_ContentPlaceHolder1_RadListView1Panel\"]/ul/li");
|
||||
var listurls = new List<string>();
|
||||
foreach (var item in categoryNodeList)//
|
||||
{
|
||||
var aa = item.ChildNodes[0];
|
||||
Console.WriteLine(string.Format("https://www.sdzk.cn/{0}|{1}", item.ChildNodes[0].Attributes["href"].Value, item.InnerText));
|
||||
listurls.Add(string.Format("https://www.sdzk.cn/{0}", item.ChildNodes[0].Attributes["href"].Value));
|
||||
}
|
||||
|
||||
listurls.ForEach(url =>
|
||||
{
|
||||
|
||||
var doc = webClient.Load(url);
|
||||
|
||||
var inntertitle = doc.DocumentNode.SelectSingleNode("//*[@id=\"form1\"]/div[6]/div[2]/h3").InnerText;
|
||||
|
||||
var author = doc.DocumentNode.SelectSingleNode("//*[@id=\"form1\"]/div[6]/div[2]/em").InnerText.Split("作者:")[1].Split(" ")[0];
|
||||
var createtime = doc.DocumentNode.SelectSingleNode("//*[@id=\"form1\"]/div[6]/div[2]/em").InnerText.Split("发布时间:")[1];
|
||||
|
||||
var innerhtml = doc.DocumentNode.SelectSingleNode("//*[@id=\"form1\"]/div[6]/div[2]/div").InnerHtml;
|
||||
|
||||
|
||||
|
||||
});
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -9,7 +9,7 @@
|
|||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="HtmlAgilityPack" Version="1.11.53" />
|
||||
<PackageReference Include="Newtonsoft.Json" Version="12.0.3" />
|
||||
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
|
|
|
|||
|
|
@ -6,10 +6,12 @@ using System.Text.RegularExpressions;
|
|||
//HtmlAgHelper htmlAgHelper = new HtmlAgHelper();
|
||||
|
||||
//htmlAgHelper.HtmlCreatePageData();
|
||||
HtmlAgMajorHelper agMajorHelper = new HtmlAgMajorHelper();
|
||||
//HtmlAgMajorHelper agMajorHelper = new HtmlAgMajorHelper();
|
||||
//agMajorHelper.DownloadTypeListFile();
|
||||
agMajorHelper.DownloadChildTypeListFile();
|
||||
//agMajorHelper.DownloadChildTypeListFile();
|
||||
HtmlAgNewsHelper agNewsHelper = new HtmlAgNewsHelper();
|
||||
|
||||
agNewsHelper.HtmlCreatePageData();
|
||||
|
||||
Console.Read();
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue