120 lines
3.8 KiB
C#
120 lines
3.8 KiB
C#
using HtmlAgilityPack;
|
|
using New_College.Common.Helper;
|
|
using Newtonsoft.Json;
|
|
using System;
|
|
using System.Collections.Generic;
|
|
using System.Linq;
|
|
using System.Net;
|
|
using System.Text;
|
|
using System.Text.RegularExpressions;
|
|
using System.Threading.Tasks;
|
|
|
|
namespace New_Spider
|
|
{
|
|
public class HtmlAgMajorHelper
|
|
{
|
|
|
|
public async void HtmlCreatePageData()
|
|
{
|
|
WebClient wc = new WebClient();
|
|
wc.DownloadProgressChanged += (sender, args) => Console.WriteLine(args.ProgressPercentage + "% complete");
|
|
Task.Delay(10000).ContinueWith(ant => wc.CancelAsync()); //如果超过限制时间,则取消下载
|
|
|
|
string pathFile = AppDomain.CurrentDomain.SetupInformation.ApplicationBase + "Files/";
|
|
//判断文件夹是否存在
|
|
if (!Directory.Exists(pathFile))
|
|
{
|
|
Directory.CreateDirectory(pathFile);
|
|
}
|
|
|
|
|
|
string type = "https://gaokao.chsi.com.cn/zyk/zybk/xkCategory/107021?_t=1694748927326";
|
|
|
|
string childtype = "https://gaokao.chsi.com.cn/zyk/zybk/specialityesByCategory/10702101?_t=1694748927386";
|
|
|
|
|
|
await wc.DownloadFileTaskAsync("https://gaokao.chsi.com.cn/zyk/zybk/mlCategory/1070?_t=1694748266332", pathFile + "1070.json"); // await 是C# 5.0中实现异步操作的关键字
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
/// <summary>
|
|
/// 下载二级分类
|
|
/// </summary>
|
|
public void DownloadTypeListFile()
|
|
{
|
|
|
|
var basejson = UniversityTypeRelsove.GetFileJson(AppDomain.CurrentDomain.SetupInformation.ApplicationBase + "Files/1070.json");
|
|
// var jsontext2 = Regex.Unescape(jsontext.Remove(jsontext.Length - 1, 1).Remove(0, 1));
|
|
|
|
var jsons = JsonConvert.DeserializeObject<Rootobject>(basejson);
|
|
string pathFile = AppDomain.CurrentDomain.SetupInformation.ApplicationBase + "Files/Type/";
|
|
//判断文件夹是否存在
|
|
if (!Directory.Exists(pathFile))
|
|
{
|
|
Directory.CreateDirectory(pathFile);
|
|
}
|
|
WebClient wc = new WebClient();
|
|
|
|
var list = jsons.msg.ToList();
|
|
list.ForEach(c =>
|
|
{
|
|
|
|
wc.DownloadFile(string.Format("https://gaokao.chsi.com.cn/zyk/zybk/xkCategory/{0}?_t=1694748927326", c.key), string.Format(pathFile + "{0}.json", c.key));
|
|
});
|
|
|
|
}
|
|
|
|
/// <summary>
|
|
/// 下载三级分类
|
|
/// </summary>
|
|
public void DownloadChildTypeListFile()
|
|
{
|
|
|
|
string[] files = Directory.GetFiles(AppDomain.CurrentDomain.SetupInformation.ApplicationBase + "Files/Type/");
|
|
foreach (string itemfile in files)
|
|
{
|
|
var basejson = UniversityTypeRelsove.GetFileJson(itemfile);
|
|
var jsons = JsonConvert.DeserializeObject<Rootobject>(basejson);
|
|
string pathFile = AppDomain.CurrentDomain.SetupInformation.ApplicationBase + "Files/TypeChild/";
|
|
//判断文件夹是否存在
|
|
if (!Directory.Exists(pathFile))
|
|
{
|
|
Directory.CreateDirectory(pathFile);
|
|
}
|
|
WebClient wc = new WebClient();
|
|
|
|
var list = jsons.msg.ToList();
|
|
list.ForEach(c =>
|
|
{
|
|
|
|
wc.DownloadFile(string.Format("https://gaokao.chsi.com.cn/zyk/zybk/specialityesByCategory/{0}?_t=1694748927326", c.key), string.Format(pathFile + "{0}.json", c.key));
|
|
});
|
|
|
|
Thread.Sleep(300);
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
public class Rootobject
|
|
{
|
|
public Msg[] msg { get; set; }
|
|
public bool flag { get; set; }
|
|
}
|
|
|
|
public class Msg
|
|
{
|
|
public string key { get; set; }
|
|
public string name { get; set; }
|
|
}
|
|
|
|
|
|
}
|
|
}
|