bug fixed
parent
b2e5fd81a2
commit
75e2ba93cd
|
|
@ -23,7 +23,13 @@ using System.Text.RegularExpressions;
|
||||||
// secret_key = "AeHT1p4GzDYZuaBLZYOBa0r2Npp22uWv"
|
// secret_key = "AeHT1p4GzDYZuaBLZYOBa0r2Npp22uWv"
|
||||||
//}, "https://api.xiaoe-tech.com/");
|
//}, "https://api.xiaoe-tech.com/");
|
||||||
//Console.WriteLine(result.ToJson());
|
//Console.WriteLine(result.ToJson());
|
||||||
Console.Read();
|
|
||||||
|
|
||||||
// See https://aka.ms/new-console-template for more information
|
// See https://aka.ms/new-console-template for more information
|
||||||
Console.WriteLine("Hello, World!");
|
|
||||||
|
|
||||||
|
XueHtmlAgHelper xueHtmlAg = new XueHtmlAgHelper();
|
||||||
|
//xueHtmlAg.GetDataRecruitDetail();
|
||||||
|
Console.WriteLine("success!");
|
||||||
|
Console.Read();
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,537 @@
|
||||||
|
using Aliyun.OSS;
|
||||||
|
using HtmlAgilityPack;
|
||||||
|
using NPOI.Util.ArrayExtensions;
|
||||||
|
using System;
|
||||||
|
using System.Collections.Generic;
|
||||||
|
using System.Linq;
|
||||||
|
using System.Net.Http;
|
||||||
|
using System.Text;
|
||||||
|
using System.Text.Json;
|
||||||
|
using System.Threading.Tasks;
|
||||||
|
|
||||||
|
namespace New_Spider
|
||||||
|
{
|
||||||
|
public class XueHtmlAgHelper
|
||||||
|
{
|
||||||
|
private string bucketName = "static-data-ycymedu"; //
|
||||||
|
private string filePrefix = "college-data/page-";
|
||||||
|
// 初始化 OSS 客户端
|
||||||
|
private OssClient ossClient = new OssClient("https://oss-cn-shanghai.aliyuncs.com", "LTAI5tKs3TXSbt7E4WMDcxwR", "EvC8MjRaQC1kHubgU4MtecZnofOb0v");
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// 阳光高考完数据解析
|
||||||
|
/// </summary>
|
||||||
|
public async Task HtmlCreatePageData()
|
||||||
|
{
|
||||||
|
// API 配置
|
||||||
|
string baseUrl = "http://api.xuexingaokao.com/api/college/base_college/index";
|
||||||
|
int totalPages = 287;
|
||||||
|
string filePrefix = "college-data/page-";
|
||||||
|
using var httpClient = new HttpClient();
|
||||||
|
for (int page = 1; page <= totalPages; page++)
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
|
// 动态构建 API URL
|
||||||
|
var url = $"{baseUrl}?area=&type=&nature=&grade=&is_bz=&cname=&page={page}&perpage=10×tamp={DateTimeOffset.Now.ToUnixTimeSeconds()}";
|
||||||
|
Console.WriteLine($"Fetching page {page}...");
|
||||||
|
|
||||||
|
// 获取 JSON 数据(同步请求)
|
||||||
|
var response = httpClient.GetAsync(url).Result;
|
||||||
|
if (response.IsSuccessStatusCode)
|
||||||
|
{
|
||||||
|
var jsonData = response.Content.ReadAsStringAsync().Result;
|
||||||
|
|
||||||
|
// 构建文件名和路径
|
||||||
|
string objectName = $"{filePrefix}{page}.json";
|
||||||
|
|
||||||
|
// 上传到 OSS(同步上传)
|
||||||
|
using var stream = new MemoryStream(Encoding.UTF8.GetBytes(jsonData));
|
||||||
|
ossClient.PutObject(bucketName, objectName, stream);
|
||||||
|
|
||||||
|
Console.WriteLine($"Uploaded page {page} to OSS as {objectName}");
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
Console.WriteLine($"Failed to fetch page {page}, Status Code: {response.StatusCode}");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch (Exception ex)
|
||||||
|
{
|
||||||
|
Console.WriteLine($"Error fetching or uploading page {page}: {ex.Message}");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public async Task GetDataDetail()
|
||||||
|
{
|
||||||
|
int totalPages = 287;
|
||||||
|
string baseUrl = "https://static-data-ycymedu.oss-cn-shanghai.aliyuncs.com/";
|
||||||
|
string detailUrl = "http://api.xuexingaokao.com/api/college/base_college/collegeDetail?college_id=";
|
||||||
|
string facturl = "http://api.xuexingaokao.com/api/college/base_college/faculty?college_id=685×tamp=1733820969";
|
||||||
|
string subjectIntroduceurl = "http://api.xuexingaokao.com/api/college/base_college/subjectIntroduce?college_id=685×tamp=1733821065";
|
||||||
|
string recruitListUrl = "http://api.xuexingaokao.com/api/college/base_college/recruitList?college_id=685×tamp=1733821136";
|
||||||
|
using var httpClient = new HttpClient();
|
||||||
|
|
||||||
|
for (int page = 1; page <= totalPages; page++)
|
||||||
|
{ // 动态构建 API URL
|
||||||
|
var url = $"{baseUrl}{filePrefix}{page}.json";
|
||||||
|
Console.WriteLine($"Fetching page {page}...");
|
||||||
|
var response = httpClient.GetAsync(url).Result;
|
||||||
|
if (response.IsSuccessStatusCode)
|
||||||
|
{
|
||||||
|
var jsonData = JsonSerializer.Deserialize<universitylistobject>(response.Content.ReadAsStringAsync().Result);
|
||||||
|
if (jsonData?.code == 1)
|
||||||
|
{
|
||||||
|
string filePrefix2 = "college-detail/";
|
||||||
|
jsonData.data.data.ToList().ForEach(a =>
|
||||||
|
{
|
||||||
|
// 动态构建 API URL
|
||||||
|
var detailgourl = $"{detailUrl}{a.id}×tamp={DateTimeOffset.Now.ToUnixTimeSeconds()}";
|
||||||
|
Console.WriteLine($"detailgourl {a.id}...");
|
||||||
|
// 获取 JSON 数据(同步请求)
|
||||||
|
var response = httpClient.GetAsync(detailgourl).Result;
|
||||||
|
if (response.IsSuccessStatusCode)
|
||||||
|
{
|
||||||
|
var jsonData = response.Content.ReadAsStringAsync().Result;
|
||||||
|
// 构建文件名和路径
|
||||||
|
string objectName = $"{filePrefix2}{a.id}.json";
|
||||||
|
// 上传到 OSS(同步上传)
|
||||||
|
using var stream = new MemoryStream(Encoding.UTF8.GetBytes(jsonData));
|
||||||
|
ossClient.PutObject(bucketName, objectName, stream);
|
||||||
|
Console.WriteLine($"Uploaded page {page} to OSS as {objectName}");
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
Console.WriteLine($"Failed to fetch page {page}, Status Code: {response.StatusCode}");
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// 缺失详情页
|
||||||
|
/// </summary>
|
||||||
|
public void GetDataRecruitList()
|
||||||
|
{
|
||||||
|
int totalPages = 287;
|
||||||
|
string baseUrl = "https://static-data-ycymedu.oss-cn-shanghai.aliyuncs.com/";
|
||||||
|
string recruitListUrl = "http://api.xuexingaokao.com/api/college/base_college/recruitList?college_id=";
|
||||||
|
using var httpClient = new HttpClient();
|
||||||
|
for (int page = 1; page <= totalPages; page++)
|
||||||
|
{ // 动态构建 API URL
|
||||||
|
var url = $"{baseUrl}{filePrefix}{page}.json";
|
||||||
|
Console.WriteLine($"recruitListUrl page {page}...");
|
||||||
|
var response = httpClient.GetAsync(url).Result;
|
||||||
|
if (response.IsSuccessStatusCode)
|
||||||
|
{
|
||||||
|
var jsonData = JsonSerializer.Deserialize<universitylistobject>(response.Content.ReadAsStringAsync().Result);
|
||||||
|
if (jsonData?.code == 1)
|
||||||
|
{
|
||||||
|
|
||||||
|
string filePrefix2 = "college-recruitlist/";
|
||||||
|
jsonData.data.data.ToList().ForEach(a =>
|
||||||
|
{
|
||||||
|
Thread.Sleep(200);
|
||||||
|
// 动态构建 API URL
|
||||||
|
var gourl = $"{recruitListUrl}{a.id}×tamp={DateTimeOffset.Now.ToUnixTimeSeconds()}";
|
||||||
|
Console.WriteLine($"recruitListUrl {a.id}...");
|
||||||
|
// 获取 JSON 数据(同步请求)
|
||||||
|
var response = httpClient.GetAsync(gourl).Result;
|
||||||
|
if (response.IsSuccessStatusCode)
|
||||||
|
{
|
||||||
|
var jsonData = response.Content.ReadAsStringAsync().Result;
|
||||||
|
// 构建文件名和路径
|
||||||
|
string objectName = $"{filePrefix2}{a.id}.json";
|
||||||
|
// 上传到 OSS(同步上传)
|
||||||
|
using var stream = new MemoryStream(Encoding.UTF8.GetBytes(jsonData));
|
||||||
|
ossClient.PutObject(bucketName, objectName, stream);
|
||||||
|
Console.WriteLine($"Uploaded page {page} to OSS as {objectName}");
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
Console.WriteLine($"Failed to fetch page {page}, Status Code: {response.StatusCode}");
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
public void GetDataSubjectIntroduce()
|
||||||
|
{
|
||||||
|
int totalPages = 287;
|
||||||
|
string baseUrl = "https://static-data-ycymedu.oss-cn-shanghai.aliyuncs.com/";
|
||||||
|
string subjectIntroduceurl = "http://api.xuexingaokao.com/api/college/base_college/subjectIntroduce?college_id=";
|
||||||
|
using var httpClient = new HttpClient();
|
||||||
|
for (int page = 1; page <= totalPages; page++)
|
||||||
|
{ // 动态构建 API URL
|
||||||
|
var url = $"{baseUrl}{filePrefix}{page}.json";
|
||||||
|
Console.WriteLine($"subjectIntroduceurl page {page}...");
|
||||||
|
var response = httpClient.GetAsync(url).Result;
|
||||||
|
if (response.IsSuccessStatusCode)
|
||||||
|
{
|
||||||
|
var jsonData = JsonSerializer.Deserialize<universitylistobject>(response.Content.ReadAsStringAsync().Result);
|
||||||
|
if (jsonData?.code == 1)
|
||||||
|
{
|
||||||
|
|
||||||
|
string filePrefix2 = "college-subjectIntroduce/";
|
||||||
|
jsonData.data.data.ToList().ForEach(a =>
|
||||||
|
{
|
||||||
|
Thread.Sleep(200);
|
||||||
|
// 动态构建 API URL
|
||||||
|
var gourl = $"{subjectIntroduceurl}{a.id}×tamp={DateTimeOffset.Now.ToUnixTimeSeconds()}";
|
||||||
|
Console.WriteLine($"subjectIntroduceurl {a.id}...");
|
||||||
|
// 获取 JSON 数据(同步请求)
|
||||||
|
var response = httpClient.GetAsync(gourl).Result;
|
||||||
|
if (response.IsSuccessStatusCode)
|
||||||
|
{
|
||||||
|
var jsonData = response.Content.ReadAsStringAsync().Result;
|
||||||
|
// 构建文件名和路径
|
||||||
|
string objectName = $"{filePrefix2}{a.id}.json";
|
||||||
|
// 上传到 OSS(同步上传)
|
||||||
|
using var stream = new MemoryStream(Encoding.UTF8.GetBytes(jsonData));
|
||||||
|
ossClient.PutObject(bucketName, objectName, stream);
|
||||||
|
Console.WriteLine($"Uploaded page {page} to OSS as {objectName}");
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
Console.WriteLine($"Failed to fetch page {page}, Status Code: {response.StatusCode}");
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
///
|
||||||
|
/// </summary>
|
||||||
|
/// <returns></returns>
|
||||||
|
public void GetDataFaculty()
|
||||||
|
{
|
||||||
|
int totalPages = 287;
|
||||||
|
string baseUrl = "https://static-data-ycymedu.oss-cn-shanghai.aliyuncs.com/";
|
||||||
|
string facturl = "http://api.xuexingaokao.com/api/college/base_college/faculty?college_id=";
|
||||||
|
using var httpClient = new HttpClient();
|
||||||
|
for (int page = 1; page <= totalPages; page++)
|
||||||
|
{ // 动态构建 API URL
|
||||||
|
var url = $"{baseUrl}{filePrefix}{page}.json";
|
||||||
|
Console.WriteLine($"Fetching page {page}...");
|
||||||
|
var response = httpClient.GetAsync(url).Result;
|
||||||
|
if (response.IsSuccessStatusCode)
|
||||||
|
{
|
||||||
|
var jsonData = JsonSerializer.Deserialize<universitylistobject>(response.Content.ReadAsStringAsync().Result);
|
||||||
|
if (jsonData?.code == 1)
|
||||||
|
{
|
||||||
|
|
||||||
|
string filePrefix2 = "college-faculty/";
|
||||||
|
jsonData.data.data.ToList().ForEach(a =>
|
||||||
|
{
|
||||||
|
Thread.Sleep(300);
|
||||||
|
// 动态构建 API URL
|
||||||
|
var gourl = $"{facturl}{a.id}×tamp={DateTimeOffset.Now.ToUnixTimeSeconds()}";
|
||||||
|
Console.WriteLine($"facultyurl {a.id}...");
|
||||||
|
// 获取 JSON 数据(同步请求)
|
||||||
|
var response = httpClient.GetAsync(gourl).Result;
|
||||||
|
if (response.IsSuccessStatusCode)
|
||||||
|
{
|
||||||
|
var jsonData = response.Content.ReadAsStringAsync().Result;
|
||||||
|
// 构建文件名和路径
|
||||||
|
string objectName = $"{filePrefix2}{a.id}.json";
|
||||||
|
// 上传到 OSS(同步上传)
|
||||||
|
using var stream = new MemoryStream(Encoding.UTF8.GetBytes(jsonData));
|
||||||
|
ossClient.PutObject(bucketName, objectName, stream);
|
||||||
|
Console.WriteLine($"Uploaded page {page} to OSS as {objectName}");
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
Console.WriteLine($"Failed to fetch page {page}, Status Code: {response.StatusCode}");
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// 招生简介详情
|
||||||
|
/// </summary>
|
||||||
|
public void GetDataRecruitDetail()
|
||||||
|
{
|
||||||
|
|
||||||
|
int totalPages = 287;
|
||||||
|
string baseUrl = "https://static-data-ycymedu.oss-cn-shanghai.aliyuncs.com/";
|
||||||
|
string recruitUrl = "http://api.xuexingaokao.com/api/college/base_college/recruitDetail?recruit_id=";
|
||||||
|
using var httpClient = new HttpClient();
|
||||||
|
string filePrefix2 = "college-recruitlist/";
|
||||||
|
string filePrefix3 = "college-recruitdetail/";
|
||||||
|
for (int page = 1; page <= totalPages; page++)
|
||||||
|
{ // 动态构建 API URL
|
||||||
|
var url = $"{baseUrl}{filePrefix}{page}.json";
|
||||||
|
Console.WriteLine($"recruitListUrl page {page}...");
|
||||||
|
var response = httpClient.GetAsync(url).Result;
|
||||||
|
if (response.IsSuccessStatusCode)
|
||||||
|
{
|
||||||
|
var jsonData = JsonSerializer.Deserialize<universitylistobject>(response.Content.ReadAsStringAsync().Result);
|
||||||
|
if (jsonData?.code == 1)
|
||||||
|
{
|
||||||
|
jsonData.data.data.ToList().ForEach(a =>
|
||||||
|
{
|
||||||
|
string objectUrl = $"{baseUrl}{filePrefix2}{a.id}.json";
|
||||||
|
Console.WriteLine($"down page {objectUrl}");
|
||||||
|
var responseitem = httpClient.GetAsync(objectUrl).Result;
|
||||||
|
if (responseitem.IsSuccessStatusCode)
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
|
var items = JsonSerializer.Deserialize<RecruitRootobject>(responseitem.Content.ReadAsStringAsync().Result);
|
||||||
|
if (items != null)
|
||||||
|
{
|
||||||
|
if (items.code == 1)
|
||||||
|
{
|
||||||
|
items.data.recruit.ToList().ForEach(e =>
|
||||||
|
{
|
||||||
|
Thread.Sleep(150);
|
||||||
|
|
||||||
|
try
|
||||||
|
{
|
||||||
|
var itemGoUrl = $"{recruitUrl}{e.id}×tamp={DateTimeOffset.Now.ToUnixTimeSeconds()}";
|
||||||
|
var response = httpClient.GetAsync(itemGoUrl).Result;
|
||||||
|
if (response.IsSuccessStatusCode)
|
||||||
|
{
|
||||||
|
var jsonData = response.Content.ReadAsStringAsync().Result;
|
||||||
|
// 构建文件名和路径
|
||||||
|
string objectName = $"{filePrefix3}{e.id}.json";
|
||||||
|
// 上传到 OSS(同步上传)
|
||||||
|
using var stream = new MemoryStream(Encoding.UTF8.GetBytes(jsonData));
|
||||||
|
ossClient.PutObject(bucketName, objectName, stream);
|
||||||
|
Console.WriteLine($"Uploaded page {page} to OSS as {objectName}");
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
Console.WriteLine($"Failed to fetch page {page}, Status Code: {response.StatusCode}");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch (Exception ex)
|
||||||
|
{
|
||||||
|
Console.WriteLine(ex.Message);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch (Exception exx)
|
||||||
|
{
|
||||||
|
Console.WriteLine(exx.Message);
|
||||||
|
Console.WriteLine($"{objectUrl}解析失败");
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
public class universitylistobject
|
||||||
|
{
|
||||||
|
public int code { get; set; }
|
||||||
|
public string msg { get; set; }
|
||||||
|
public Data data { get; set; }
|
||||||
|
}
|
||||||
|
|
||||||
|
public class Data
|
||||||
|
{
|
||||||
|
public int total { get; set; }
|
||||||
|
public string has_next { get; set; }
|
||||||
|
public string page { get; set; }
|
||||||
|
public string perpage { get; set; }
|
||||||
|
public Params _params { get; set; }
|
||||||
|
public Paramslist paramsList { get; set; }
|
||||||
|
public Datum[] data { get; set; }
|
||||||
|
}
|
||||||
|
|
||||||
|
public class Params
|
||||||
|
{
|
||||||
|
public string area { get; set; }
|
||||||
|
public string type { get; set; }
|
||||||
|
public string nature { get; set; }
|
||||||
|
public string grade { get; set; }
|
||||||
|
public string is_bz { get; set; }
|
||||||
|
public string cname { get; set; }
|
||||||
|
public string page { get; set; }
|
||||||
|
public string perpage { get; set; }
|
||||||
|
public string timestamp { get; set; }
|
||||||
|
}
|
||||||
|
|
||||||
|
public class Paramslist
|
||||||
|
{
|
||||||
|
public Arealist[] arealist { get; set; }
|
||||||
|
public Typelist[] typelist { get; set; }
|
||||||
|
public Naturelist[] naturelist { get; set; }
|
||||||
|
public Xingzhilist[] xingzhilist { get; set; }
|
||||||
|
public Classlist[] classlist { get; set; }
|
||||||
|
}
|
||||||
|
|
||||||
|
public class Arealist
|
||||||
|
{
|
||||||
|
public int id { get; set; }
|
||||||
|
public string area { get; set; }
|
||||||
|
public string province { get; set; }
|
||||||
|
public string city { get; set; }
|
||||||
|
public int create_time { get; set; }
|
||||||
|
public string text { get; set; }
|
||||||
|
public Child[] children { get; set; }
|
||||||
|
}
|
||||||
|
|
||||||
|
public class Child
|
||||||
|
{
|
||||||
|
public int id { get; set; }
|
||||||
|
public string area { get; set; }
|
||||||
|
public string province { get; set; }
|
||||||
|
public string city { get; set; }
|
||||||
|
public int create_time { get; set; }
|
||||||
|
public string text { get; set; }
|
||||||
|
}
|
||||||
|
|
||||||
|
public class Typelist
|
||||||
|
{
|
||||||
|
public int id { get; set; }
|
||||||
|
public string type_name { get; set; }
|
||||||
|
public int create_time { get; set; }
|
||||||
|
public int list_order { get; set; }
|
||||||
|
}
|
||||||
|
|
||||||
|
public class Naturelist
|
||||||
|
{
|
||||||
|
public int id { get; set; }
|
||||||
|
public string nature_name { get; set; }
|
||||||
|
public int create_time { get; set; }
|
||||||
|
public int list_order { get; set; }
|
||||||
|
public int staue { get; set; }
|
||||||
|
}
|
||||||
|
|
||||||
|
public class Xingzhilist
|
||||||
|
{
|
||||||
|
public int id { get; set; }
|
||||||
|
public string nature_name { get; set; }
|
||||||
|
public int create_time { get; set; }
|
||||||
|
public int list_order { get; set; }
|
||||||
|
public int staue { get; set; }
|
||||||
|
}
|
||||||
|
|
||||||
|
public class Classlist
|
||||||
|
{
|
||||||
|
public int id { get; set; }
|
||||||
|
public string nature_name { get; set; }
|
||||||
|
public int create_time { get; set; }
|
||||||
|
public int list_order { get; set; }
|
||||||
|
public int staue { get; set; }
|
||||||
|
}
|
||||||
|
|
||||||
|
public class Datum
|
||||||
|
{
|
||||||
|
public int id { get; set; }
|
||||||
|
public string college_name { get; set; }
|
||||||
|
public string college_name_display { get; set; }
|
||||||
|
public string rk_ranking_2024 { get; set; }
|
||||||
|
public int rk_ranking_list { get; set; }
|
||||||
|
public string college_class { get; set; }
|
||||||
|
public string college_nature { get; set; }
|
||||||
|
public Detail detail { get; set; }
|
||||||
|
public string rk_ranking { get; set; }
|
||||||
|
}
|
||||||
|
|
||||||
|
public class Detail
|
||||||
|
{
|
||||||
|
public int id { get; set; }
|
||||||
|
public string college_name { get; set; }
|
||||||
|
public string college_logo { get; set; }
|
||||||
|
public object ex_rate { get; set; }
|
||||||
|
public string college_class { get; set; }
|
||||||
|
public string college_create_time { get; set; }
|
||||||
|
public string master_num { get; set; }
|
||||||
|
public string doctor_num { get; set; }
|
||||||
|
public string college_nature { get; set; }
|
||||||
|
public string college_type { get; set; }
|
||||||
|
public string college_area { get; set; }
|
||||||
|
public string college_competent { get; set; }
|
||||||
|
public string[] college_grade { get; set; }
|
||||||
|
public object wsl_ranking { get; set; }
|
||||||
|
public string rk_ranking { get; set; }
|
||||||
|
public string enrollment_rate_in { get; set; }
|
||||||
|
public string enrollment_rate_out { get; set; }
|
||||||
|
public string job_report { get; set; }
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
public class RecruitRootobject
|
||||||
|
{
|
||||||
|
public int code { get; set; }
|
||||||
|
public string msg { get; set; }
|
||||||
|
public RecruitData data { get; set; }
|
||||||
|
}
|
||||||
|
|
||||||
|
public class RecruitData
|
||||||
|
{
|
||||||
|
public Tag tag { get; set; }
|
||||||
|
public List<Recruit> recruit { get; set; }
|
||||||
|
}
|
||||||
|
|
||||||
|
public class Tag
|
||||||
|
{
|
||||||
|
public string id { get; set; }
|
||||||
|
public string college_name { get; set; }
|
||||||
|
public string college_logo { get; set; }
|
||||||
|
public string ex_rate { get; set; }
|
||||||
|
public string college_class { get; set; }
|
||||||
|
public string college_create_time { get; set; }
|
||||||
|
public string master_num { get; set; }
|
||||||
|
public string doctor_num { get; set; }
|
||||||
|
public string college_nature { get; set; }
|
||||||
|
public string college_type { get; set; }
|
||||||
|
public string college_area { get; set; }
|
||||||
|
public string college_competent { get; set; }
|
||||||
|
public string[] college_grade { get; set; }
|
||||||
|
public int wsl_ranking { get; set; }
|
||||||
|
public string rk_ranking { get; set; }
|
||||||
|
public string enrollment_rate_in { get; set; }
|
||||||
|
public string enrollment_rate_out { get; set; }
|
||||||
|
public string job_report { get; set; }
|
||||||
|
}
|
||||||
|
|
||||||
|
public class Recruit
|
||||||
|
{
|
||||||
|
public int id { get; set; }
|
||||||
|
public int base_college_id { get; set; }
|
||||||
|
public string year { get; set; }
|
||||||
|
public string college_name { get; set; }
|
||||||
|
public string title { get; set; }
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,12 @@
|
||||||
|
using System;
|
||||||
|
using System.Collections.Generic;
|
||||||
|
using System.Linq;
|
||||||
|
using System.Text;
|
||||||
|
using System.Threading.Tasks;
|
||||||
|
|
||||||
|
namespace New_Spider.xuexingaokao
|
||||||
|
{
|
||||||
|
internal class XueHtmlDto
|
||||||
|
{
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -7,7 +7,7 @@ using System.Text;
|
||||||
using System.Text.Json;
|
using System.Text.Json;
|
||||||
using System.Threading.Tasks;
|
using System.Threading.Tasks;
|
||||||
|
|
||||||
namespace New_Spider
|
namespace New_Spider.阳光高考
|
||||||
{
|
{
|
||||||
public class HtmlAgHelper
|
public class HtmlAgHelper
|
||||||
{
|
{
|
||||||
|
|
@ -37,7 +37,7 @@ namespace New_Spider
|
||||||
|
|
||||||
Console.WriteLine(string.Format("item: a:https://gaokao.chsi.com.cn/{0};name:{1}", item.Attributes["href"].Value, item.InnerText));
|
Console.WriteLine(string.Format("item: a:https://gaokao.chsi.com.cn/{0};name:{1}", item.Attributes["href"].Value, item.InnerText));
|
||||||
|
|
||||||
HtmlUniversityAgResolve(doc, webClient, item, item.InnerText.Trim(),jsonobjs);
|
HtmlUniversityAgResolve(doc, webClient, item, item.InnerText.Trim(), jsonobjs);
|
||||||
}
|
}
|
||||||
// Console.WriteLine(k);
|
// Console.WriteLine(k);
|
||||||
Thread.Sleep(1000);
|
Thread.Sleep(1000);
|
||||||
|
|
@ -62,7 +62,7 @@ namespace New_Spider
|
||||||
/// <param name="htmlWeb"></param>
|
/// <param name="htmlWeb"></param>
|
||||||
/// <param name="htmlNode"></param>
|
/// <param name="htmlNode"></param>
|
||||||
/// <param name="name"></param>
|
/// <param name="name"></param>
|
||||||
public void HtmlUniversityAgResolve(HtmlDocument htmldoc, HtmlWeb htmlWeb, HtmlNode htmlNode,string name, List<JsonObj> jsons)
|
public void HtmlUniversityAgResolve(HtmlDocument htmldoc, HtmlWeb htmlWeb, HtmlNode htmlNode, string name, List<JsonObj> jsons)
|
||||||
{
|
{
|
||||||
htmldoc = htmlWeb.Load(string.Format("https://gaokao.chsi.com.cn/{0}", htmlNode.Attributes["href"].Value));
|
htmldoc = htmlWeb.Load(string.Format("https://gaokao.chsi.com.cn/{0}", htmlNode.Attributes["href"].Value));
|
||||||
|
|
||||||
|
|
@ -9,7 +9,7 @@ using System.Text;
|
||||||
using System.Text.RegularExpressions;
|
using System.Text.RegularExpressions;
|
||||||
using System.Threading.Tasks;
|
using System.Threading.Tasks;
|
||||||
|
|
||||||
namespace New_Spider
|
namespace New_Spider.阳光高考
|
||||||
{
|
{
|
||||||
public class HtmlAgMajorHelper
|
public class HtmlAgMajorHelper
|
||||||
{
|
{
|
||||||
|
|
@ -47,8 +47,8 @@ namespace New_Spider
|
||||||
{
|
{
|
||||||
|
|
||||||
var basejson = UniversityTypeRelsove.GetFileJson(AppDomain.CurrentDomain.SetupInformation.ApplicationBase + "Files/1070.json");
|
var basejson = UniversityTypeRelsove.GetFileJson(AppDomain.CurrentDomain.SetupInformation.ApplicationBase + "Files/1070.json");
|
||||||
// var jsontext2 = Regex.Unescape(jsontext.Remove(jsontext.Length - 1, 1).Remove(0, 1));
|
// var jsontext2 = Regex.Unescape(jsontext.Remove(jsontext.Length - 1, 1).Remove(0, 1));
|
||||||
|
|
||||||
var jsons = JsonConvert.DeserializeObject<Rootobject>(basejson);
|
var jsons = JsonConvert.DeserializeObject<Rootobject>(basejson);
|
||||||
string pathFile = AppDomain.CurrentDomain.SetupInformation.ApplicationBase + "Files/Type/";
|
string pathFile = AppDomain.CurrentDomain.SetupInformation.ApplicationBase + "Files/Type/";
|
||||||
//判断文件夹是否存在
|
//判断文件夹是否存在
|
||||||
|
|
@ -61,18 +61,18 @@ namespace New_Spider
|
||||||
var list = jsons.msg.ToList();
|
var list = jsons.msg.ToList();
|
||||||
list.ForEach(c =>
|
list.ForEach(c =>
|
||||||
{
|
{
|
||||||
|
|
||||||
wc.DownloadFile(string.Format("https://gaokao.chsi.com.cn/zyk/zybk/xkCategory/{0}?_t=1694748927326", c.key), string.Format(pathFile + "{0}.json", c.key));
|
wc.DownloadFile(string.Format("https://gaokao.chsi.com.cn/zyk/zybk/xkCategory/{0}?_t=1694748927326", c.key), string.Format(pathFile + "{0}.json", c.key));
|
||||||
});
|
});
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// 下载三级分类
|
/// 下载三级分类
|
||||||
/// </summary>
|
/// </summary>
|
||||||
public void DownloadChildTypeListFile()
|
public void DownloadChildTypeListFile()
|
||||||
{
|
{
|
||||||
|
|
||||||
string[] files = Directory.GetFiles(AppDomain.CurrentDomain.SetupInformation.ApplicationBase + "Files/Type/");
|
string[] files = Directory.GetFiles(AppDomain.CurrentDomain.SetupInformation.ApplicationBase + "Files/Type/");
|
||||||
foreach (string itemfile in files)
|
foreach (string itemfile in files)
|
||||||
{
|
{
|
||||||
|
|
@ -8,7 +8,7 @@ using System.Net;
|
||||||
using System.Text;
|
using System.Text;
|
||||||
using System.Threading.Tasks;
|
using System.Threading.Tasks;
|
||||||
|
|
||||||
namespace New_Spider
|
namespace New_Spider.阳光高考
|
||||||
{
|
{
|
||||||
public class HtmlAgNewsHelper
|
public class HtmlAgNewsHelper
|
||||||
{
|
{
|
||||||
|
|
@ -54,8 +54,8 @@ namespace New_Spider
|
||||||
options.AddArgument("--headless");
|
options.AddArgument("--headless");
|
||||||
options.AddArgument("--no-sandbox");
|
options.AddArgument("--no-sandbox");
|
||||||
options.AddArgument("--disable-dev-shm-usage");
|
options.AddArgument("--disable-dev-shm-usage");
|
||||||
String driverExecutableFileName = "chromedriver";
|
string driverExecutableFileName = "chromedriver";
|
||||||
String driverPath = "D:\\一草一木教育\\newgaokao\\New_Spider\\bin\\Debug\\net6.0\\";
|
string driverPath = "D:\\一草一木教育\\newgaokao\\New_Spider\\bin\\Debug\\net6.0\\";
|
||||||
ChromeDriverService service = ChromeDriverService.CreateDefaultService(driverPath, driverExecutableFileName);
|
ChromeDriverService service = ChromeDriverService.CreateDefaultService(driverPath, driverExecutableFileName);
|
||||||
var driver = new ChromeDriver(service, options);
|
var driver = new ChromeDriver(service, options);
|
||||||
|
|
||||||
|
|
@ -66,7 +66,7 @@ namespace New_Spider
|
||||||
Thread.Sleep(1000); // 这里使用简单的等待,更好的做法是使用 WebDriverWait
|
Thread.Sleep(1000); // 这里使用简单的等待,更好的做法是使用 WebDriverWait
|
||||||
|
|
||||||
// 获取动态内容
|
// 获取动态内容
|
||||||
var content = driver.FindElement(By.XPath("//*[@id=\"app\"]/div[2]/div[2]"));
|
var content = driver.FindElement(By.XPath("//*[@id=\"app\"]/div[2]/div[2]"));
|
||||||
|
|
||||||
|
|
||||||
// 也可以进一步获取子节点的子节点
|
// 也可以进一步获取子节点的子节点
|
||||||
|
|
@ -75,8 +75,8 @@ namespace New_Spider
|
||||||
{
|
{
|
||||||
var grandChild = grandChildElement.FindElement(By.TagName("span"));
|
var grandChild = grandChildElement.FindElement(By.TagName("span"));
|
||||||
var grandChildText = grandChild.Text;
|
var grandChildText = grandChild.Text;
|
||||||
var titletext= grandChildElement.FindElement(By.TagName("a")).Text;
|
var titletext = grandChildElement.FindElement(By.TagName("a")).Text;
|
||||||
var aurl= grandChildElement.FindElement(By.TagName("a")).GetAttribute("href");
|
var aurl = grandChildElement.FindElement(By.TagName("a")).GetAttribute("href");
|
||||||
Console.WriteLine($"Grandchild Element Text: {grandChildText}");
|
Console.WriteLine($"Grandchild Element Text: {grandChildText}");
|
||||||
}
|
}
|
||||||
|
|
||||||
Loading…
Reference in New Issue