using Aliyun.OSS;
using HtmlAgilityPack;
using NPOI.Util.ArrayExtensions;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net.Http;
using System.Text;
using System.Text.Json;
using System.Threading.Tasks;
namespace New_Spider
{
public class XueHtmlAgHelper
{
private string bucketName = "static-data-ycymedu"; //
private string filePrefix = "college-data/page-";
// 初始化 OSS 客户端
private OssClient ossClient = new OssClient("https://oss-cn-shanghai.aliyuncs.com", "LTAI5tKs3TXSbt7E4WMDcxwR", "EvC8MjRaQC1kHubgU4MtecZnofOb0v");
///
/// 阳光高考完数据解析
///
public async Task HtmlCreatePageData()
{
// API 配置
string baseUrl = "http://api.xuexingaokao.com/api/college/base_college/index";
int totalPages = 287;
string filePrefix = "college-data/page-";
using var httpClient = new HttpClient();
for (int page = 1; page <= totalPages; page++)
{
try
{
// 动态构建 API URL
var url = $"{baseUrl}?area=&type=&nature=&grade=&is_bz=&cname=&page={page}&perpage=10×tamp={DateTimeOffset.Now.ToUnixTimeSeconds()}";
Console.WriteLine($"Fetching page {page}...");
// 获取 JSON 数据(同步请求)
var response = httpClient.GetAsync(url).Result;
if (response.IsSuccessStatusCode)
{
var jsonData = response.Content.ReadAsStringAsync().Result;
// 构建文件名和路径
string objectName = $"{filePrefix}{page}.json";
// 上传到 OSS(同步上传)
using var stream = new MemoryStream(Encoding.UTF8.GetBytes(jsonData));
ossClient.PutObject(bucketName, objectName, stream);
Console.WriteLine($"Uploaded page {page} to OSS as {objectName}");
}
else
{
Console.WriteLine($"Failed to fetch page {page}, Status Code: {response.StatusCode}");
}
}
catch (Exception ex)
{
Console.WriteLine($"Error fetching or uploading page {page}: {ex.Message}");
}
}
}
public async Task GetDataDetail()
{
int totalPages = 287;
string baseUrl = "https://static-data-ycymedu.oss-cn-shanghai.aliyuncs.com/";
string detailUrl = "http://api.xuexingaokao.com/api/college/base_college/collegeDetail?college_id=";
string facturl = "http://api.xuexingaokao.com/api/college/base_college/faculty?college_id=685×tamp=1733820969";
string subjectIntroduceurl = "http://api.xuexingaokao.com/api/college/base_college/subjectIntroduce?college_id=685×tamp=1733821065";
string recruitListUrl = "http://api.xuexingaokao.com/api/college/base_college/recruitList?college_id=685×tamp=1733821136";
using var httpClient = new HttpClient();
for (int page = 1; page <= totalPages; page++)
{ // 动态构建 API URL
var url = $"{baseUrl}{filePrefix}{page}.json";
Console.WriteLine($"Fetching page {page}...");
var response = httpClient.GetAsync(url).Result;
if (response.IsSuccessStatusCode)
{
var jsonData = JsonSerializer.Deserialize(response.Content.ReadAsStringAsync().Result);
if (jsonData?.code == 1)
{
string filePrefix2 = "college-detail/";
jsonData.data.data.ToList().ForEach(a =>
{
// 动态构建 API URL
var detailgourl = $"{detailUrl}{a.id}×tamp={DateTimeOffset.Now.ToUnixTimeSeconds()}";
Console.WriteLine($"detailgourl {a.id}...");
// 获取 JSON 数据(同步请求)
var response = httpClient.GetAsync(detailgourl).Result;
if (response.IsSuccessStatusCode)
{
var jsonData = response.Content.ReadAsStringAsync().Result;
// 构建文件名和路径
string objectName = $"{filePrefix2}{a.id}.json";
// 上传到 OSS(同步上传)
using var stream = new MemoryStream(Encoding.UTF8.GetBytes(jsonData));
ossClient.PutObject(bucketName, objectName, stream);
Console.WriteLine($"Uploaded page {page} to OSS as {objectName}");
}
else
{
Console.WriteLine($"Failed to fetch page {page}, Status Code: {response.StatusCode}");
}
});
}
}
}
}
///
/// 缺失详情页
///
public void GetDataRecruitList()
{
int totalPages = 287;
string baseUrl = "https://static-data-ycymedu.oss-cn-shanghai.aliyuncs.com/";
string recruitListUrl = "http://api.xuexingaokao.com/api/college/base_college/recruitList?college_id=";
using var httpClient = new HttpClient();
for (int page = 1; page <= totalPages; page++)
{ // 动态构建 API URL
var url = $"{baseUrl}{filePrefix}{page}.json";
Console.WriteLine($"recruitListUrl page {page}...");
var response = httpClient.GetAsync(url).Result;
if (response.IsSuccessStatusCode)
{
var jsonData = JsonSerializer.Deserialize(response.Content.ReadAsStringAsync().Result);
if (jsonData?.code == 1)
{
string filePrefix2 = "college-recruitlist/";
jsonData.data.data.ToList().ForEach(a =>
{
Thread.Sleep(200);
// 动态构建 API URL
var gourl = $"{recruitListUrl}{a.id}×tamp={DateTimeOffset.Now.ToUnixTimeSeconds()}";
Console.WriteLine($"recruitListUrl {a.id}...");
// 获取 JSON 数据(同步请求)
var response = httpClient.GetAsync(gourl).Result;
if (response.IsSuccessStatusCode)
{
var jsonData = response.Content.ReadAsStringAsync().Result;
// 构建文件名和路径
string objectName = $"{filePrefix2}{a.id}.json";
// 上传到 OSS(同步上传)
using var stream = new MemoryStream(Encoding.UTF8.GetBytes(jsonData));
ossClient.PutObject(bucketName, objectName, stream);
Console.WriteLine($"Uploaded page {page} to OSS as {objectName}");
}
else
{
Console.WriteLine($"Failed to fetch page {page}, Status Code: {response.StatusCode}");
}
});
}
}
}
}
public void GetDataSubjectIntroduce()
{
int totalPages = 287;
string baseUrl = "https://static-data-ycymedu.oss-cn-shanghai.aliyuncs.com/";
string subjectIntroduceurl = "http://api.xuexingaokao.com/api/college/base_college/subjectIntroduce?college_id=";
using var httpClient = new HttpClient();
for (int page = 1; page <= totalPages; page++)
{ // 动态构建 API URL
var url = $"{baseUrl}{filePrefix}{page}.json";
Console.WriteLine($"subjectIntroduceurl page {page}...");
var response = httpClient.GetAsync(url).Result;
if (response.IsSuccessStatusCode)
{
var jsonData = JsonSerializer.Deserialize(response.Content.ReadAsStringAsync().Result);
if (jsonData?.code == 1)
{
string filePrefix2 = "college-subjectIntroduce/";
jsonData.data.data.ToList().ForEach(a =>
{
Thread.Sleep(200);
// 动态构建 API URL
var gourl = $"{subjectIntroduceurl}{a.id}×tamp={DateTimeOffset.Now.ToUnixTimeSeconds()}";
Console.WriteLine($"subjectIntroduceurl {a.id}...");
// 获取 JSON 数据(同步请求)
var response = httpClient.GetAsync(gourl).Result;
if (response.IsSuccessStatusCode)
{
var jsonData = response.Content.ReadAsStringAsync().Result;
// 构建文件名和路径
string objectName = $"{filePrefix2}{a.id}.json";
// 上传到 OSS(同步上传)
using var stream = new MemoryStream(Encoding.UTF8.GetBytes(jsonData));
ossClient.PutObject(bucketName, objectName, stream);
Console.WriteLine($"Uploaded page {page} to OSS as {objectName}");
}
else
{
Console.WriteLine($"Failed to fetch page {page}, Status Code: {response.StatusCode}");
}
});
}
}
}
}
///
///
///
///
public void GetDataFaculty()
{
int totalPages = 287;
string baseUrl = "https://static-data-ycymedu.oss-cn-shanghai.aliyuncs.com/";
string facturl = "http://api.xuexingaokao.com/api/college/base_college/faculty?college_id=";
using var httpClient = new HttpClient();
for (int page = 1; page <= totalPages; page++)
{ // 动态构建 API URL
var url = $"{baseUrl}{filePrefix}{page}.json";
Console.WriteLine($"Fetching page {page}...");
var response = httpClient.GetAsync(url).Result;
if (response.IsSuccessStatusCode)
{
var jsonData = JsonSerializer.Deserialize(response.Content.ReadAsStringAsync().Result);
if (jsonData?.code == 1)
{
string filePrefix2 = "college-faculty/";
jsonData.data.data.ToList().ForEach(a =>
{
Thread.Sleep(300);
// 动态构建 API URL
var gourl = $"{facturl}{a.id}×tamp={DateTimeOffset.Now.ToUnixTimeSeconds()}";
Console.WriteLine($"facultyurl {a.id}...");
// 获取 JSON 数据(同步请求)
var response = httpClient.GetAsync(gourl).Result;
if (response.IsSuccessStatusCode)
{
var jsonData = response.Content.ReadAsStringAsync().Result;
// 构建文件名和路径
string objectName = $"{filePrefix2}{a.id}.json";
// 上传到 OSS(同步上传)
using var stream = new MemoryStream(Encoding.UTF8.GetBytes(jsonData));
ossClient.PutObject(bucketName, objectName, stream);
Console.WriteLine($"Uploaded page {page} to OSS as {objectName}");
}
else
{
Console.WriteLine($"Failed to fetch page {page}, Status Code: {response.StatusCode}");
}
});
}
}
}
}
///
/// 招生简介详情
///
public void GetDataRecruitDetail()
{
int totalPages = 287;
string baseUrl = "https://static-data-ycymedu.oss-cn-shanghai.aliyuncs.com/";
string recruitUrl = "http://api.xuexingaokao.com/api/college/base_college/recruitDetail?recruit_id=";
using var httpClient = new HttpClient();
string filePrefix2 = "college-recruitlist/";
string filePrefix3 = "college-recruitdetail/";
for (int page = 1; page <= totalPages; page++)
{ // 动态构建 API URL
var url = $"{baseUrl}{filePrefix}{page}.json";
Console.WriteLine($"recruitListUrl page {page}...");
var response = httpClient.GetAsync(url).Result;
if (response.IsSuccessStatusCode)
{
var jsonData = JsonSerializer.Deserialize(response.Content.ReadAsStringAsync().Result);
if (jsonData?.code == 1)
{
jsonData.data.data.ToList().ForEach(a =>
{
string objectUrl = $"{baseUrl}{filePrefix2}{a.id}.json";
Console.WriteLine($"down page {objectUrl}");
var responseitem = httpClient.GetAsync(objectUrl).Result;
if (responseitem.IsSuccessStatusCode)
{
try
{
var items = JsonSerializer.Deserialize(responseitem.Content.ReadAsStringAsync().Result);
if (items != null)
{
if (items.code == 1)
{
items.data.recruit.ToList().ForEach(e =>
{
Thread.Sleep(150);
try
{
var itemGoUrl = $"{recruitUrl}{e.id}×tamp={DateTimeOffset.Now.ToUnixTimeSeconds()}";
var response = httpClient.GetAsync(itemGoUrl).Result;
if (response.IsSuccessStatusCode)
{
var jsonData = response.Content.ReadAsStringAsync().Result;
// 构建文件名和路径
string objectName = $"{filePrefix3}{e.id}.json";
// 上传到 OSS(同步上传)
using var stream = new MemoryStream(Encoding.UTF8.GetBytes(jsonData));
ossClient.PutObject(bucketName, objectName, stream);
Console.WriteLine($"Uploaded page {page} to OSS as {objectName}");
}
else
{
Console.WriteLine($"Failed to fetch page {page}, Status Code: {response.StatusCode}");
}
}
catch (Exception ex)
{
Console.WriteLine(ex.Message);
}
});
}
}
}
catch (Exception exx)
{
Console.WriteLine(exx.Message);
Console.WriteLine($"{objectUrl}解析失败");
}
}
});
}
}
}
}
}
}
public class universitylistobject
{
public int code { get; set; }
public string msg { get; set; }
public Data data { get; set; }
}
public class Data
{
public int total { get; set; }
public string has_next { get; set; }
public string page { get; set; }
public string perpage { get; set; }
public Params _params { get; set; }
public Paramslist paramsList { get; set; }
public Datum[] data { get; set; }
}
public class Params
{
public string area { get; set; }
public string type { get; set; }
public string nature { get; set; }
public string grade { get; set; }
public string is_bz { get; set; }
public string cname { get; set; }
public string page { get; set; }
public string perpage { get; set; }
public string timestamp { get; set; }
}
public class Paramslist
{
public Arealist[] arealist { get; set; }
public Typelist[] typelist { get; set; }
public Naturelist[] naturelist { get; set; }
public Xingzhilist[] xingzhilist { get; set; }
public Classlist[] classlist { get; set; }
}
public class Arealist
{
public int id { get; set; }
public string area { get; set; }
public string province { get; set; }
public string city { get; set; }
public int create_time { get; set; }
public string text { get; set; }
public Child[] children { get; set; }
}
public class Child
{
public int id { get; set; }
public string area { get; set; }
public string province { get; set; }
public string city { get; set; }
public int create_time { get; set; }
public string text { get; set; }
}
public class Typelist
{
public int id { get; set; }
public string type_name { get; set; }
public int create_time { get; set; }
public int list_order { get; set; }
}
public class Naturelist
{
public int id { get; set; }
public string nature_name { get; set; }
public int create_time { get; set; }
public int list_order { get; set; }
public int staue { get; set; }
}
public class Xingzhilist
{
public int id { get; set; }
public string nature_name { get; set; }
public int create_time { get; set; }
public int list_order { get; set; }
public int staue { get; set; }
}
public class Classlist
{
public int id { get; set; }
public string nature_name { get; set; }
public int create_time { get; set; }
public int list_order { get; set; }
public int staue { get; set; }
}
public class Datum
{
public int id { get; set; }
public string college_name { get; set; }
public string college_name_display { get; set; }
public string rk_ranking_2024 { get; set; }
public int rk_ranking_list { get; set; }
public string college_class { get; set; }
public string college_nature { get; set; }
public Detail detail { get; set; }
public string rk_ranking { get; set; }
}
public class Detail
{
public int id { get; set; }
public string college_name { get; set; }
public string college_logo { get; set; }
public object ex_rate { get; set; }
public string college_class { get; set; }
public string college_create_time { get; set; }
public string master_num { get; set; }
public string doctor_num { get; set; }
public string college_nature { get; set; }
public string college_type { get; set; }
public string college_area { get; set; }
public string college_competent { get; set; }
public string[] college_grade { get; set; }
public object wsl_ranking { get; set; }
public string rk_ranking { get; set; }
public string enrollment_rate_in { get; set; }
public string enrollment_rate_out { get; set; }
public string job_report { get; set; }
}
public class RecruitRootobject
{
public int code { get; set; }
public string msg { get; set; }
public RecruitData data { get; set; }
}
public class RecruitData
{
public Tag tag { get; set; }
public List recruit { get; set; }
}
public class Tag
{
public string id { get; set; }
public string college_name { get; set; }
public string college_logo { get; set; }
public string ex_rate { get; set; }
public string college_class { get; set; }
public string college_create_time { get; set; }
public string master_num { get; set; }
public string doctor_num { get; set; }
public string college_nature { get; set; }
public string college_type { get; set; }
public string college_area { get; set; }
public string college_competent { get; set; }
public string[] college_grade { get; set; }
public int wsl_ranking { get; set; }
public string rk_ranking { get; set; }
public string enrollment_rate_in { get; set; }
public string enrollment_rate_out { get; set; }
public string job_report { get; set; }
}
public class Recruit
{
public int id { get; set; }
public int base_college_id { get; set; }
public string year { get; set; }
public string college_name { get; set; }
public string title { get; set; }
}