using Aliyun.OSS; using HtmlAgilityPack; using Microsoft.AspNetCore.Mvc.RazorPages; using System; using System.Collections.Generic; using System.IO; using System.Linq; using System.Net.Http; using System.Text; using System.Threading.Tasks; using System.Text.Json; using New_College.Common.Helper; using System.Threading; namespace New_College.Tasks { public class NationWideNewsAgHelper { private string itemUrl = "https://gaokao.chsi.com.cn"; public List HtmlCreatePageData(string provinceCode) { HtmlWeb webClient = new HtmlWeb(); var apiUrl = "https://gaokao.chsi.com.cn/wap/news/search/5018267?ps=20&ss="; var list = new List(); var baseUrl = $"{apiUrl}{provinceCode.Replace("0000", "")}&_t={DateTimeOffset.Now.ToUnixTimeSeconds()}"; using var httpClient = new HttpClient(); var response = httpClient.GetAsync(baseUrl).Result; if (response.IsSuccessStatusCode) { var jsonData = response.Content.ReadAsStringAsync().Result; Console.WriteLine(jsonData); var resultlist = JsonSerializer.Deserialize(jsonData); if (resultlist.msg.Any()) { resultlist.msg.ForEach(o => { Thread.Sleep(100); string newsUrl = $"{itemUrl}{o.uri}"; HtmlDocument doc = webClient.Load(newsUrl); var inntertitle = doc.DocumentNode.SelectSingleNode("//*[@id=\"app\"]/div[2]/h2").InnerText; var author = doc.DocumentNode.SelectSingleNode("//*[@id=\"app\"]/div[3]/div[1]/span[2]").InnerText.Replace("来源:", ""); var createtime = doc.DocumentNode.SelectSingleNode("//*[@id=\"app\"]/div[3]/div[1]/span[1]").InnerText; foreach (var linkNode in doc.DocumentNode.SelectNodes("//a[@href]") ?? new HtmlNodeCollection(null)) { string href = linkNode.GetAttributeValue("href", ""); if (!string.IsNullOrEmpty(href) && href.StartsWith("/")) { string fullUrl = "https://gaokao.chsi.com.cn" + href; linkNode.SetAttributeValue("href", fullUrl); } } var innerhtml = doc.DocumentNode.SelectSingleNode("//*[@id=\"article_dnull\"]").InnerHtml; //if (!HtmlHelper.ReplaceHtmlTag(innerhtml).Contains("浏览器")) //{ if (innerhtml.Contains(".pdf") || innerhtml.Contains(".doc") || innerhtml.Contains(".docx") || innerhtml.Contains(".xls") || innerhtml.Contains(".xlsx")) { innerhtml = $"{innerhtml}\n若有附件详情,请至本省招生考试院下载附件!!!"; } list.Add(new NewsModels() { title = inntertitle, author = author, pubtime = Convert.ToDateTime(createtime), detail = innerhtml.Replace("src=\"", "src=\"https://gaokao.chsi.com.cn") }); // } }); } } else { Console.WriteLine($"Failed to fetch page {baseUrl}, Status Code: {response.StatusCode}"); } return list; } } } public class GaokaoObject { public List msg { get; set; } public bool flag { get; set; } } public class Msg { public string title { get; set; } public string truncTitle { get; set; } public string uri { get; set; } public string displayDate { get; set; } }