using Aliyun.OSS;
using HtmlAgilityPack;
using Microsoft.AspNetCore.Mvc.RazorPages;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Net.Http;
using System.Text;
using System.Threading.Tasks;
using System.Text.Json;
using New_College.Common.Helper;
using System.Threading;
using System.Text.RegularExpressions;
namespace New_College.Tasks
{
public class NationWideNewsAgHelper
{
private string itemUrl = "https://gaokao.chsi.com.cn";
public List HtmlCreatePageData(string provinceCode)
{
HtmlWeb webClient = new HtmlWeb();
var apiUrl = "https://gaokao.chsi.com.cn/wap/news/search/5018267?ps=20&ss=";
var list = new List();
var baseUrl = $"{apiUrl}{provinceCode.Replace("0000", "")}&_t={DateTimeOffset.Now.ToUnixTimeSeconds()}";
using var httpClient = new HttpClient();
var response = httpClient.GetAsync(baseUrl).Result;
if (response.IsSuccessStatusCode)
{
var jsonData = response.Content.ReadAsStringAsync().Result;
Console.WriteLine(jsonData);
var resultlist = JsonSerializer.Deserialize(jsonData);
if (resultlist.msg.Any())
{
try
{
resultlist.msg.ForEach(o =>
{
Thread.Sleep(100);
string newsUrl = $"{itemUrl}{o.uri}";
HtmlDocument doc = webClient.Load(newsUrl);
var inntertitle = doc.DocumentNode.SelectSingleNode("//*[@id=\"app\"]/div[2]/h2").InnerText;
var author = doc.DocumentNode.SelectSingleNode("//*[@id=\"app\"]/div[3]/div[1]/span[2]").InnerText.Replace("来源:", "");
var createtime = doc.DocumentNode.SelectSingleNode("//*[@id=\"app\"]/div[3]/div[1]/span[1]").InnerText;
foreach (var linkNode in doc.DocumentNode.SelectNodes("//a[@href]") ?? new HtmlNodeCollection(null))
{
string href = linkNode.GetAttributeValue("href", "");
if (!string.IsNullOrEmpty(href) && href.StartsWith("/"))
{
string fullUrl = "https://gaokao.chsi.com.cn" + href;
linkNode.SetAttributeValue("href", fullUrl);
}
}
var innerhtml = doc.DocumentNode.SelectSingleNode("//*[@id=\"article_dnull\"]").InnerHtml;
//if (!HtmlHelper.ReplaceHtmlTag(innerhtml).Contains("浏览器"))
//{
if (innerhtml.Contains(".pdf") || innerhtml.Contains(".doc") || innerhtml.Contains(".docx") || innerhtml.Contains(".xls") || innerhtml.Contains(".xlsx"))
{
innerhtml = $"{innerhtml}\n若有附件详情,请至本省招生考试院下载附件!!!";
}
list.Add(new NewsModels()
{
title = inntertitle,
author = author,
pubtime = Convert.ToDateTime(createtime),
detail = Regex.Replace(innerhtml, @"src=""(?!https?:\/\/)(.*?)""", @"src=""https://gaokao.chsi.com.cn$1""")
});
// }
});
}
catch (Exception ex)
{
Console.WriteLine(ex.Message);
}
}
}
else
{
Console.WriteLine($"Failed to fetch page {baseUrl}, Status Code: {response.StatusCode}");
}
return list;
}
}
}
public class GaokaoObject
{
public List msg { get; set; }
public bool flag { get; set; }
}
public class Msg
{
public string title { get; set; }
public string truncTitle { get; set; }
public string uri { get; set; }
public string displayDate { get; set; }
}