NewGaoKaoApi/New_College.Tasks/HtmlAgSpider/NationWideNewsAgHelper.cs

103 lines
4.0 KiB
C#
Raw Blame History

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

using Aliyun.OSS;
using HtmlAgilityPack;
using Microsoft.AspNetCore.Mvc.RazorPages;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Net.Http;
using System.Text;
using System.Threading.Tasks;
using System.Text.Json;
using New_College.Common.Helper;
using System.Threading;
namespace New_College.Tasks
{
public class NationWideNewsAgHelper
{
private string itemUrl = "https://gaokao.chsi.com.cn";
public List<NewsModels> HtmlCreatePageData(string provinceCode)
{
HtmlWeb webClient = new HtmlWeb();
var apiUrl = "https://gaokao.chsi.com.cn/wap/news/search/5018267?ps=20&ss=";
var list = new List<NewsModels>();
var baseUrl = $"{apiUrl}{provinceCode.Replace("0000", "")}&_t={DateTimeOffset.Now.ToUnixTimeSeconds()}";
using var httpClient = new HttpClient();
var response = httpClient.GetAsync(baseUrl).Result;
if (response.IsSuccessStatusCode)
{
var jsonData = response.Content.ReadAsStringAsync().Result;
Console.WriteLine(jsonData);
var resultlist = JsonSerializer.Deserialize<GaokaoObject>(jsonData);
if (resultlist.msg.Any())
{
resultlist.msg.ForEach(o =>
{
Thread.Sleep(100);
string newsUrl = $"{itemUrl}{o.uri}";
HtmlDocument doc = webClient.Load(newsUrl);
var inntertitle = doc.DocumentNode.SelectSingleNode("//*[@id=\"app\"]/div[2]/h2").InnerText;
var author = doc.DocumentNode.SelectSingleNode("//*[@id=\"app\"]/div[3]/div[1]/span[2]").InnerText.Replace("来源:", "");
var createtime = doc.DocumentNode.SelectSingleNode("//*[@id=\"app\"]/div[3]/div[1]/span[1]").InnerText;
foreach (var linkNode in doc.DocumentNode.SelectNodes("//a[@href]") ?? new HtmlNodeCollection(null))
{
string href = linkNode.GetAttributeValue("href", "");
if (!string.IsNullOrEmpty(href) && href.StartsWith("/"))
{
string fullUrl = "https://gaokao.chsi.com.cn" + href;
linkNode.SetAttributeValue("href", fullUrl);
}
}
var innerhtml = doc.DocumentNode.SelectSingleNode("//*[@id=\"article_dnull\"]").InnerHtml;
if (!HtmlHelper.ReplaceHtmlTag(innerhtml).Contains("浏览器"))
{
if (innerhtml.Contains(".pdf") || innerhtml.Contains(".doc") || innerhtml.Contains(".docx") || innerhtml.Contains(".xls") || innerhtml.Contains(".xlsx"))
{
innerhtml = $"{innerhtml}\n若有附件详情请至本省招生考试院下载附件!!!";
}
list.Add(new NewsModels()
{
title = inntertitle,
author = author,
pubtime = Convert.ToDateTime(createtime),
detail = innerhtml.Replace("src=\"", "src=\"https://gaokao.chsi.com.cn")
});
}
});
}
}
else
{
Console.WriteLine($"Failed to fetch page {baseUrl}, Status Code: {response.StatusCode}");
}
return list;
}
}
}
public class GaokaoObject
{
public List<Msg> msg { get; set; }
public bool flag { get; set; }
}
public class Msg
{
public string title { get; set; }
public string truncTitle { get; set; }
public string uri { get; set; }
public string displayDate { get; set; }
}