增加定时更新资讯-目前单个山东省份
parent
ec3d1e7923
commit
9aadb08d45
|
|
@ -17,7 +17,7 @@
|
||||||
if (length > 0 && strText.Length > length)
|
if (length > 0 && strText.Length > length)
|
||||||
return strText.Substring(0, length);
|
return strText.Substring(0, length);
|
||||||
|
|
||||||
return strText;
|
return strText.Trim();
|
||||||
}
|
}
|
||||||
#endregion
|
#endregion
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -16,6 +16,7 @@ namespace New_College.Extensions
|
||||||
|
|
||||||
//services.AddHostedService<Job1TimedService>();
|
//services.AddHostedService<Job1TimedService>();
|
||||||
//services.AddHostedService<Job2TimedService>();
|
//services.AddHostedService<Job2TimedService>();
|
||||||
|
services.AddHostedService<JobTimedSpiderService>();
|
||||||
services.AddHostedService<UserBaseSettingJobTimedService>();
|
services.AddHostedService<UserBaseSettingJobTimedService>();
|
||||||
services.AddSingleton<IJobFactory, JobFactory>();
|
services.AddSingleton<IJobFactory, JobFactory>();
|
||||||
services.AddTransient<Job_Blogs_Quartz>();//Job使用瞬时依赖注入
|
services.AddTransient<Job_Blogs_Quartz>();//Job使用瞬时依赖注入
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,79 @@
|
||||||
|
using New_College.Common.Helper;
|
||||||
|
using Microsoft.Extensions.Hosting;
|
||||||
|
using System;
|
||||||
|
using System.Threading;
|
||||||
|
using System.Threading.Tasks;
|
||||||
|
using New_College.IServices;
|
||||||
|
using System.Linq;
|
||||||
|
using New_College.Model.Models;
|
||||||
|
using System.Collections.Generic;
|
||||||
|
|
||||||
|
namespace New_College.Tasks
|
||||||
|
{
|
||||||
|
public class JobTimedSpiderService : IHostedService, IDisposable
|
||||||
|
{
|
||||||
|
private Timer _timer;
|
||||||
|
/// <summary>
|
||||||
|
///
|
||||||
|
/// </summary>
|
||||||
|
private ID_NewsInfoServices newsInfoServices;
|
||||||
|
// 这里可以注入
|
||||||
|
public JobTimedSpiderService(ID_NewsInfoServices d_NewsInfoServices)
|
||||||
|
{
|
||||||
|
newsInfoServices = d_NewsInfoServices;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Task StartAsync(CancellationToken cancellationToken)
|
||||||
|
{
|
||||||
|
Console.WriteLine("Job spider is starting.");
|
||||||
|
|
||||||
|
_timer = new Timer(DoWork, null, TimeSpan.Zero,
|
||||||
|
TimeSpan.FromSeconds(60 * 60 * 8));//两个小时
|
||||||
|
return Task.CompletedTask;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void DoWork(object state)
|
||||||
|
{
|
||||||
|
HtmlAgHelper agHelper = new HtmlAgHelper();
|
||||||
|
var list = agHelper.HtmlCreatePageData();
|
||||||
|
list.ForEach(async c =>
|
||||||
|
{
|
||||||
|
var newsinfo = await newsInfoServices.Query(e => e.Title.Trim() == c.title);
|
||||||
|
if (!newsinfo.Any() && c.pubtime.Year > DateTime.Now.Year - 1)
|
||||||
|
{
|
||||||
|
|
||||||
|
await newsInfoServices.Add(new Model.Models.D_NewsInfo()
|
||||||
|
{
|
||||||
|
Author = c.author,
|
||||||
|
CategoryId = 1,
|
||||||
|
CreateBy = "spdier",
|
||||||
|
CreateId = 1,
|
||||||
|
CreateTime = c.pubtime,
|
||||||
|
Detail = c.detail,
|
||||||
|
CoverImg = "https://static-data.ycymedu.com/static/newstop.png",
|
||||||
|
OrderSort = 0,
|
||||||
|
IsDelete = false,
|
||||||
|
Title = c.title,
|
||||||
|
Summary = HtmlHelper.ReplaceHtmlTag(c.detail).Length > 200 ? HtmlHelper.ReplaceHtmlTag(c.detail).Substring(0, 200) : HtmlHelper.ReplaceHtmlTag(c.detail)
|
||||||
|
});
|
||||||
|
|
||||||
|
}
|
||||||
|
});
|
||||||
|
ConsoleHelper.WriteWarningLine($"Job spider success: {DateTime.Now}-{list.Count}");
|
||||||
|
}
|
||||||
|
|
||||||
|
public Task StopAsync(CancellationToken cancellationToken)
|
||||||
|
{
|
||||||
|
Console.WriteLine("Job spider is stopping.");
|
||||||
|
|
||||||
|
_timer?.Change(Timeout.Infinite, 0);
|
||||||
|
|
||||||
|
return Task.CompletedTask;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void Dispose()
|
||||||
|
{
|
||||||
|
_timer?.Dispose();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,56 @@
|
||||||
|
using Aliyun.OSS.Model;
|
||||||
|
using HtmlAgilityPack;
|
||||||
|
using System;
|
||||||
|
using System.Collections.Generic;
|
||||||
|
using System.Linq;
|
||||||
|
using System.Text;
|
||||||
|
using System.Threading.Tasks;
|
||||||
|
|
||||||
|
namespace New_College.Tasks
|
||||||
|
{
|
||||||
|
public class HtmlAgHelper
|
||||||
|
{
|
||||||
|
|
||||||
|
public List<NewsModels> HtmlCreatePageData()
|
||||||
|
{
|
||||||
|
var list = new List<NewsModels>();
|
||||||
|
HtmlWeb webClient = new HtmlWeb();
|
||||||
|
HtmlDocument doc = webClient.Load("https://www.sdzk.cn/NewsList.aspx?BCID=2");
|
||||||
|
HtmlNodeCollection categoryNodeList = doc.DocumentNode.SelectNodes("//*[@id=\"ctl00_ContentPlaceHolder1_ctl00_ContentPlaceHolder1_RadListView1Panel\"]/ul/li");
|
||||||
|
var listurls = new List<string>();
|
||||||
|
foreach (var item in categoryNodeList)//
|
||||||
|
{
|
||||||
|
var aa = item.ChildNodes[0];
|
||||||
|
Console.WriteLine(string.Format("https://www.sdzk.cn/{0}|{1}", item.ChildNodes[0].Attributes["href"].Value, item.InnerText));
|
||||||
|
listurls.Add(string.Format("https://www.sdzk.cn/{0}", item.ChildNodes[0].Attributes["href"].Value));
|
||||||
|
}
|
||||||
|
listurls.ForEach(url =>
|
||||||
|
{
|
||||||
|
var doc = webClient.Load(url);
|
||||||
|
var inntertitle = doc.DocumentNode.SelectSingleNode("//*[@id=\"form1\"]/div[6]/div[2]/h3").InnerText;
|
||||||
|
var author = doc.DocumentNode.SelectSingleNode("//*[@id=\"form1\"]/div[6]/div[2]/em").InnerText.Split("作者:")[1].Split(" ")[0];
|
||||||
|
var createtime = doc.DocumentNode.SelectSingleNode("//*[@id=\"form1\"]/div[6]/div[2]/em").InnerText.Split("发布时间:")[1];
|
||||||
|
var innerhtml = doc.DocumentNode.SelectSingleNode("//*[@id=\"form1\"]/div[6]/div[2]/div").InnerHtml;
|
||||||
|
list.Add(new NewsModels()
|
||||||
|
{
|
||||||
|
title = inntertitle,
|
||||||
|
author = author,
|
||||||
|
pubtime = Convert.ToDateTime(createtime),
|
||||||
|
detail = innerhtml
|
||||||
|
});
|
||||||
|
});
|
||||||
|
return list;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public class NewsModels
|
||||||
|
{
|
||||||
|
public string title { get; set; }
|
||||||
|
public string author { get; set; }
|
||||||
|
public DateTime pubtime { get; set; }
|
||||||
|
public string detail { get; set; }
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
@ -5,6 +5,7 @@
|
||||||
</PropertyGroup>
|
</PropertyGroup>
|
||||||
|
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
|
<PackageReference Include="HtmlAgilityPack" Version="1.11.59" />
|
||||||
<PackageReference Include="Quartz" Version="3.0.7" />
|
<PackageReference Include="Quartz" Version="3.0.7" />
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,44 @@
|
||||||
|
using HtmlAgilityPack;
|
||||||
|
using System;
|
||||||
|
using System.Collections.Generic;
|
||||||
|
using System.Linq;
|
||||||
|
using System.Text;
|
||||||
|
using System.Threading.Tasks;
|
||||||
|
|
||||||
|
namespace New_Spider
|
||||||
|
{
|
||||||
|
public class HtmlAgNewsHelper
|
||||||
|
{
|
||||||
|
public async void HtmlCreatePageData()
|
||||||
|
{
|
||||||
|
HtmlWeb webClient = new HtmlWeb();
|
||||||
|
HtmlDocument doc = webClient.Load("https://www.sdzk.cn/NewsList.aspx?BCID=2");
|
||||||
|
|
||||||
|
HtmlNodeCollection categoryNodeList = doc.DocumentNode.SelectNodes("//*[@id=\"ctl00_ContentPlaceHolder1_ctl00_ContentPlaceHolder1_RadListView1Panel\"]/ul/li");
|
||||||
|
var listurls = new List<string>();
|
||||||
|
foreach (var item in categoryNodeList)//
|
||||||
|
{
|
||||||
|
var aa = item.ChildNodes[0];
|
||||||
|
Console.WriteLine(string.Format("https://www.sdzk.cn/{0}|{1}", item.ChildNodes[0].Attributes["href"].Value, item.InnerText));
|
||||||
|
listurls.Add(string.Format("https://www.sdzk.cn/{0}", item.ChildNodes[0].Attributes["href"].Value));
|
||||||
|
}
|
||||||
|
|
||||||
|
listurls.ForEach(url =>
|
||||||
|
{
|
||||||
|
|
||||||
|
var doc = webClient.Load(url);
|
||||||
|
|
||||||
|
var inntertitle = doc.DocumentNode.SelectSingleNode("//*[@id=\"form1\"]/div[6]/div[2]/h3").InnerText;
|
||||||
|
|
||||||
|
var author = doc.DocumentNode.SelectSingleNode("//*[@id=\"form1\"]/div[6]/div[2]/em").InnerText.Split("作者:")[1].Split(" ")[0];
|
||||||
|
var createtime = doc.DocumentNode.SelectSingleNode("//*[@id=\"form1\"]/div[6]/div[2]/em").InnerText.Split("发布时间:")[1];
|
||||||
|
|
||||||
|
var innerhtml = doc.DocumentNode.SelectSingleNode("//*[@id=\"form1\"]/div[6]/div[2]/div").InnerHtml;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
});
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -9,7 +9,7 @@
|
||||||
|
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<PackageReference Include="HtmlAgilityPack" Version="1.11.53" />
|
<PackageReference Include="HtmlAgilityPack" Version="1.11.53" />
|
||||||
<PackageReference Include="Newtonsoft.Json" Version="12.0.3" />
|
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
|
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
|
|
|
||||||
|
|
@ -6,10 +6,12 @@ using System.Text.RegularExpressions;
|
||||||
//HtmlAgHelper htmlAgHelper = new HtmlAgHelper();
|
//HtmlAgHelper htmlAgHelper = new HtmlAgHelper();
|
||||||
|
|
||||||
//htmlAgHelper.HtmlCreatePageData();
|
//htmlAgHelper.HtmlCreatePageData();
|
||||||
HtmlAgMajorHelper agMajorHelper = new HtmlAgMajorHelper();
|
//HtmlAgMajorHelper agMajorHelper = new HtmlAgMajorHelper();
|
||||||
//agMajorHelper.DownloadTypeListFile();
|
//agMajorHelper.DownloadTypeListFile();
|
||||||
agMajorHelper.DownloadChildTypeListFile();
|
//agMajorHelper.DownloadChildTypeListFile();
|
||||||
|
HtmlAgNewsHelper agNewsHelper = new HtmlAgNewsHelper();
|
||||||
|
|
||||||
|
agNewsHelper.HtmlCreatePageData();
|
||||||
|
|
||||||
Console.Read();
|
Console.Read();
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue