103 lines
4.0 KiB
C#
103 lines
4.0 KiB
C#
using Aliyun.OSS;
|
||
using HtmlAgilityPack;
|
||
using Microsoft.AspNetCore.Mvc.RazorPages;
|
||
using System;
|
||
using System.Collections.Generic;
|
||
using System.IO;
|
||
using System.Linq;
|
||
using System.Net.Http;
|
||
using System.Text;
|
||
using System.Threading.Tasks;
|
||
using System.Text.Json;
|
||
using New_College.Common.Helper;
|
||
using System.Threading;
|
||
namespace New_College.Tasks
|
||
{
|
||
public class NationWideNewsAgHelper
|
||
{
|
||
|
||
private string itemUrl = "https://gaokao.chsi.com.cn";
|
||
public List<NewsModels> HtmlCreatePageData(string provinceCode)
|
||
{
|
||
HtmlWeb webClient = new HtmlWeb();
|
||
|
||
var apiUrl = "https://gaokao.chsi.com.cn/wap/news/search/5018267?ps=20&ss=";
|
||
var list = new List<NewsModels>();
|
||
var baseUrl = $"{apiUrl}{provinceCode.Replace("0000", "")}&_t={DateTimeOffset.Now.ToUnixTimeSeconds()}";
|
||
using var httpClient = new HttpClient();
|
||
var response = httpClient.GetAsync(baseUrl).Result;
|
||
if (response.IsSuccessStatusCode)
|
||
{
|
||
var jsonData = response.Content.ReadAsStringAsync().Result;
|
||
Console.WriteLine(jsonData);
|
||
var resultlist = JsonSerializer.Deserialize<GaokaoObject>(jsonData);
|
||
if (resultlist.msg.Any())
|
||
{
|
||
resultlist.msg.ForEach(o =>
|
||
{
|
||
Thread.Sleep(100);
|
||
string newsUrl = $"{itemUrl}{o.uri}";
|
||
HtmlDocument doc = webClient.Load(newsUrl);
|
||
var inntertitle = doc.DocumentNode.SelectSingleNode("//*[@id=\"app\"]/div[2]/h2").InnerText;
|
||
var author = doc.DocumentNode.SelectSingleNode("//*[@id=\"app\"]/div[3]/div[1]/span[2]").InnerText.Replace("来源:", "");
|
||
var createtime = doc.DocumentNode.SelectSingleNode("//*[@id=\"app\"]/div[3]/div[1]/span[1]").InnerText;
|
||
foreach (var linkNode in doc.DocumentNode.SelectNodes("//a[@href]") ?? new HtmlNodeCollection(null))
|
||
{
|
||
string href = linkNode.GetAttributeValue("href", "");
|
||
if (!string.IsNullOrEmpty(href) && href.StartsWith("/"))
|
||
{
|
||
string fullUrl = "https://gaokao.chsi.com.cn" + href;
|
||
linkNode.SetAttributeValue("href", fullUrl);
|
||
}
|
||
}
|
||
var innerhtml = doc.DocumentNode.SelectSingleNode("//*[@id=\"article_dnull\"]").InnerHtml;
|
||
if (!HtmlHelper.ReplaceHtmlTag(innerhtml).Contains("浏览器"))
|
||
{
|
||
|
||
if (innerhtml.Contains(".pdf") || innerhtml.Contains(".doc") || innerhtml.Contains(".docx") || innerhtml.Contains(".xls") || innerhtml.Contains(".xlsx"))
|
||
{
|
||
innerhtml = $"{innerhtml}\n若有附件详情,请至本省招生考试院下载附件!!!";
|
||
}
|
||
|
||
list.Add(new NewsModels()
|
||
{
|
||
title = inntertitle,
|
||
author = author,
|
||
pubtime = Convert.ToDateTime(createtime),
|
||
detail = innerhtml.Replace("src=\"", "src=\"https://gaokao.chsi.com.cn")
|
||
});
|
||
}
|
||
});
|
||
}
|
||
}
|
||
else
|
||
{
|
||
Console.WriteLine($"Failed to fetch page {baseUrl}, Status Code: {response.StatusCode}");
|
||
}
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
return list;
|
||
}
|
||
}
|
||
}
|
||
|
||
public class GaokaoObject
|
||
{
|
||
public List<Msg> msg { get; set; }
|
||
public bool flag { get; set; }
|
||
}
|
||
|
||
public class Msg
|
||
{
|
||
public string title { get; set; }
|
||
public string truncTitle { get; set; }
|
||
public string uri { get; set; }
|
||
public string displayDate { get; set; }
|
||
}
|