faet:爬虫数据部分提交
parent
51c8c9a526
commit
9edc8a7aa2
|
|
@ -1,4 +1,5 @@
|
||||||
using Microsoft.AspNetCore.Mvc;
|
using Microsoft.AspNetCore.Mvc;
|
||||||
|
using New_College.Common.Helper;
|
||||||
using New_College.IServices;
|
using New_College.IServices;
|
||||||
using System.Threading.Tasks;
|
using System.Threading.Tasks;
|
||||||
|
|
||||||
|
|
@ -24,6 +25,7 @@ namespace New_College.Controllers
|
||||||
[HttpGet]
|
[HttpGet]
|
||||||
public IActionResult Get()
|
public IActionResult Get()
|
||||||
{
|
{
|
||||||
|
|
||||||
return Ok();
|
return Ok();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -35,7 +37,9 @@ namespace New_College.Controllers
|
||||||
public Task<bool> Post()
|
public Task<bool> Post()
|
||||||
{
|
{
|
||||||
// return t_EnrollmentPlane.Import();
|
// return t_EnrollmentPlane.Import();
|
||||||
return t_EnrollmentPlane.Importuniverbaseinfo();
|
// return t_EnrollmentPlane.Importuniverbaseinfo();
|
||||||
|
|
||||||
|
return t_EnrollmentPlane.universitydetailupdate();
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -527,7 +527,7 @@
|
||||||
</member>
|
</member>
|
||||||
<member name="P:New_College.Model.Models.D_University.Nature">
|
<member name="P:New_College.Model.Models.D_University.Nature">
|
||||||
<summary>
|
<summary>
|
||||||
办学性质 1、公办,2、民办
|
办学性质 1、公办,2、民办,3中外合作,4 港澳台
|
||||||
</summary>
|
</summary>
|
||||||
</member>
|
</member>
|
||||||
<member name="P:New_College.Model.Models.D_University.Ascription">
|
<member name="P:New_College.Model.Models.D_University.Ascription">
|
||||||
|
|
@ -4799,6 +4799,11 @@
|
||||||
是否双一流
|
是否双一流
|
||||||
</summary>
|
</summary>
|
||||||
</member>
|
</member>
|
||||||
|
<member name="P:New_College.Model.ViewModels.UniversityResult.QJJH">
|
||||||
|
<summary>
|
||||||
|
强基
|
||||||
|
</summary>
|
||||||
|
</member>
|
||||||
<member name="P:New_College.Model.ViewModels.UniversityResult.Logo">
|
<member name="P:New_College.Model.ViewModels.UniversityResult.Logo">
|
||||||
<summary>
|
<summary>
|
||||||
学校logo
|
学校logo
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,88 @@
|
||||||
|
using Newtonsoft.Json;
|
||||||
|
using System;
|
||||||
|
using System.Collections.Generic;
|
||||||
|
using System.IO;
|
||||||
|
using System.Text;
|
||||||
|
|
||||||
|
namespace New_College.Common.Helper
|
||||||
|
{
|
||||||
|
public static class UniversityTypeRelsove
|
||||||
|
{
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// 院校类型转换
|
||||||
|
/// </summary>0综合,1理工类,2医学类|3军事类|4语言类|5师范类|6财经类|7政法类|8民族类|9农林类|10艺术类|11体育类|12其他
|
||||||
|
/// <param name="type"></param>
|
||||||
|
/// <returns></returns>
|
||||||
|
public static string GetType(int type)
|
||||||
|
{
|
||||||
|
string typename = string.Empty;
|
||||||
|
switch (type)
|
||||||
|
{
|
||||||
|
case 0:
|
||||||
|
typename = "综合类";
|
||||||
|
break;
|
||||||
|
case 1:
|
||||||
|
typename = "理工类";
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
typename = "医学类";
|
||||||
|
break;
|
||||||
|
case 3:
|
||||||
|
typename = "军事类";
|
||||||
|
break;
|
||||||
|
case 4:
|
||||||
|
typename = "语言类";
|
||||||
|
break;
|
||||||
|
case 5:
|
||||||
|
typename = "师范类";
|
||||||
|
break;
|
||||||
|
case 6:
|
||||||
|
typename = "财经类";
|
||||||
|
break;
|
||||||
|
case 7:
|
||||||
|
typename = "政法类";
|
||||||
|
break;
|
||||||
|
case 8:
|
||||||
|
typename = "民族类";
|
||||||
|
break;
|
||||||
|
case 9:
|
||||||
|
typename = "农林类";
|
||||||
|
break;
|
||||||
|
case 10:
|
||||||
|
typename = "艺术类";
|
||||||
|
break;
|
||||||
|
case 11:
|
||||||
|
typename = "体育类";
|
||||||
|
break;
|
||||||
|
case 12:
|
||||||
|
typename = "其他";
|
||||||
|
break;
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
return typename;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public static string GetFileJson(string filepath)
|
||||||
|
{
|
||||||
|
string json = string.Empty;
|
||||||
|
using (FileStream fs = new FileStream(filepath, FileMode.Open, System.IO.FileAccess.Read, FileShare.ReadWrite))
|
||||||
|
{
|
||||||
|
using (StreamReader sr = new StreamReader(fs, Encoding.GetEncoding("utf-8")))
|
||||||
|
{
|
||||||
|
json = sr.ReadToEnd().ToString();
|
||||||
|
|
||||||
|
// var obj = JsonConvert.DeserializeObject(json);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return json;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -14,7 +14,7 @@ namespace New_College.IServices
|
||||||
public interface IT_EnrollmentPlaneServices : IBaseServices<T_EnrollmentPlane>
|
public interface IT_EnrollmentPlaneServices : IBaseServices<T_EnrollmentPlane>
|
||||||
{
|
{
|
||||||
Task<bool> Import();
|
Task<bool> Import();
|
||||||
|
Task<bool> universitydetailupdate();
|
||||||
Task<bool> Importuniverbaseinfo();
|
Task<bool> Importuniverbaseinfo();
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// 根据大学或者专业获取招生计划
|
/// 根据大学或者专业获取招生计划
|
||||||
|
|
|
||||||
|
|
@ -35,7 +35,7 @@ namespace New_College.Model.Models
|
||||||
public string Build_Date { get; set; }
|
public string Build_Date { get; set; }
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// 办学性质 1、公办,2、民办
|
/// 办学性质 1、公办,2、民办,3中外合作,4 港澳台
|
||||||
/// </summary>
|
/// </summary>
|
||||||
[SugarColumn(IsNullable = true)]
|
[SugarColumn(IsNullable = true)]
|
||||||
public int Nature { get; set; }
|
public int Nature { get; set; }
|
||||||
|
|
|
||||||
|
|
@ -30,6 +30,11 @@ namespace New_College.Model.ViewModels
|
||||||
/// 是否双一流
|
/// 是否双一流
|
||||||
/// </summary>
|
/// </summary>
|
||||||
public bool? Syl { get; set; }
|
public bool? Syl { get; set; }
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// 强基
|
||||||
|
/// </summary>
|
||||||
|
public bool? QJJH { get; set; }
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// 学校logo
|
/// 学校logo
|
||||||
/// </summary>
|
/// </summary>
|
||||||
|
|
|
||||||
|
|
@ -304,6 +304,7 @@ namespace New_College.Services
|
||||||
Syl = university.Syl == 1 ? true : false,
|
Syl = university.Syl == 1 ? true : false,
|
||||||
Nhef = university.Nhef == 1 ? true : false,
|
Nhef = university.Nhef == 1 ? true : false,
|
||||||
Sff = university.Sff == 1 ? true : false,
|
Sff = university.Sff == 1 ? true : false,
|
||||||
|
QJJH = university.QJJH == 1 ? true : false,
|
||||||
UniversityType = university.Type,
|
UniversityType = university.Type,
|
||||||
SubjectLevel = university.Subject_Level,
|
SubjectLevel = university.Subject_Level,
|
||||||
Imglist = imgs,
|
Imglist = imgs,
|
||||||
|
|
|
||||||
|
|
@ -9,6 +9,7 @@ using System.Threading.Tasks;
|
||||||
using System.Collections.Generic;
|
using System.Collections.Generic;
|
||||||
using New_College.IRepository;
|
using New_College.IRepository;
|
||||||
using System.Linq;
|
using System.Linq;
|
||||||
|
using New_College.Common.Helper;
|
||||||
|
|
||||||
namespace New_College.Services
|
namespace New_College.Services
|
||||||
{
|
{
|
||||||
|
|
@ -161,14 +162,14 @@ namespace New_College.Services
|
||||||
{
|
{
|
||||||
Id = c.Id,
|
Id = c.Id,
|
||||||
Name = c.Name,
|
Name = c.Name,
|
||||||
Nature = c.Nature == 1 ? "公立" : "私立",
|
Nature = c.Nature == 0 ? "公办" : c.Nature == 1 ? "民办" : c.Nature == 2 ? "中外合作" : "港澳台合作",
|
||||||
AscriptionName = string.IsNullOrWhiteSpace(c.AscriptionName) ? "-" : c.AscriptionName,
|
AscriptionName = string.IsNullOrWhiteSpace(c.AscriptionName) ? "-" : c.AscriptionName,
|
||||||
Rank = c.Rank,
|
Rank = c.Rank,
|
||||||
AreaName = c.Area_Name,
|
AreaName = c.Area_Name,
|
||||||
Syl = c.Syl == 1 ? "是" : "否",
|
Syl = c.Syl == 1 ? "是" : "否",
|
||||||
Nhef = c.Nhef == 1 ? "是" : "否",
|
Nhef = c.Nhef == 1 ? "是" : "否",
|
||||||
Sff = c.Sff == 1 ? "是" : "否",
|
Sff = c.Sff == 1 ? "是" : "否",
|
||||||
UniversityType = c.Type == 0 ? "综合" : c.Type == 1 ? "理工" : "医学类",
|
UniversityType = UniversityTypeRelsove.GetType(c.Type),
|
||||||
BuildDate = c.Build_Date,
|
BuildDate = c.Build_Date,
|
||||||
SubjectLevel = c.Subject_Level == 1 ? "本科" : c.Subject_Level == 2 ? "专科" : "-",
|
SubjectLevel = c.Subject_Level == 1 ? "本科" : c.Subject_Level == 2 ? "专科" : "-",
|
||||||
AcademicianCount = c.Academician_Count <= 0 ? "-" : c.Academician_Count.ToString(),
|
AcademicianCount = c.Academician_Count <= 0 ? "-" : c.Academician_Count.ToString(),
|
||||||
|
|
|
||||||
|
|
@ -15,6 +15,10 @@ using New_College.Model.ViewModels.Result;
|
||||||
using New_College.Model.ViewModels.Query;
|
using New_College.Model.ViewModels.Query;
|
||||||
using New_College.Model;
|
using New_College.Model;
|
||||||
using System.Text.RegularExpressions;
|
using System.Text.RegularExpressions;
|
||||||
|
using New_College.Common.Helper;
|
||||||
|
using Newtonsoft.Json;
|
||||||
|
using System.IO;
|
||||||
|
using System.Threading;
|
||||||
|
|
||||||
namespace New_College.Services
|
namespace New_College.Services
|
||||||
{
|
{
|
||||||
|
|
@ -51,6 +55,54 @@ namespace New_College.Services
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
public async Task<bool> universitydetailupdate()
|
||||||
|
{
|
||||||
|
var universitylist = await this.d_University.Query(x => x.IsDelete == false);
|
||||||
|
for (int k = 0; k <= 2800; k += 20)
|
||||||
|
{
|
||||||
|
var jsontext = UniversityTypeRelsove.GetFileJson(string.Format(@"D:\\jsondoc\\2023-09-14\\{0}.json", k));
|
||||||
|
var jsontext2 = Regex.Unescape(jsontext.Remove(jsontext.Length - 1, 1).Remove(0, 1));
|
||||||
|
var result = JsonConvert.DeserializeObject<List<Class1>>(jsontext2);
|
||||||
|
|
||||||
|
|
||||||
|
result.ForEach(async c =>
|
||||||
|
{
|
||||||
|
Thread.Sleep(100);
|
||||||
|
if (!string.IsNullOrWhiteSpace(c.description))
|
||||||
|
{
|
||||||
|
if (universitylist.Any(k => k.Name == c.name.Trim())&& string.IsNullOrWhiteSpace(universitylist.FirstOrDefault(k => k.Name == c.name.Trim()).Description))
|
||||||
|
{
|
||||||
|
var defaultinfo = universitylist.FirstOrDefault(k => k.Name == c.name.Trim());
|
||||||
|
|
||||||
|
defaultinfo.Description = c.description;
|
||||||
|
defaultinfo.ModifyTime = DateTime.Now;
|
||||||
|
await this.d_University.Update(defaultinfo);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
});
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
public class Class1
|
||||||
|
{
|
||||||
|
public string id { get; set; }
|
||||||
|
public string name { get; set; }
|
||||||
|
public string description { get; set; }
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
///
|
///
|
||||||
/// </summary>
|
/// </summary>
|
||||||
|
|
@ -97,7 +149,7 @@ namespace New_College.Services
|
||||||
single.Syl = issyl == "双一流" ? 1 : 0;
|
single.Syl = issyl == "双一流" ? 1 : 0;
|
||||||
single.Subject_Level = schooltype == "本科" ? 0 : 1;
|
single.Subject_Level = schooltype == "本科" ? 0 : 1;
|
||||||
single.ModifyTime = DateTime.Now;
|
single.ModifyTime = DateTime.Now;
|
||||||
await this.d_University.Update(single);
|
await this.d_University.Update(single);
|
||||||
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
|
|
||||||
Microsoft Visual Studio Solution File, Format Version 12.00
|
Microsoft Visual Studio Solution File, Format Version 12.00
|
||||||
# Visual Studio Version 16
|
# Visual Studio Version 17
|
||||||
VisualStudioVersion = 16.0.30114.105
|
VisualStudioVersion = 17.5.33627.172
|
||||||
MinimumVisualStudioVersion = 10.0.40219.1
|
MinimumVisualStudioVersion = 10.0.40219.1
|
||||||
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "New_College.Api", "New_College.Api\New_College.Api.csproj", "{6F47A41A-085E-4422-BB73-5A2CBAA07D9F}"
|
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "New_College.Api", "New_College.Api\New_College.Api.csproj", "{6F47A41A-085E-4422-BB73-5A2CBAA07D9F}"
|
||||||
EndProject
|
EndProject
|
||||||
|
|
@ -27,6 +27,8 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "New_College.ConsoleApp", "N
|
||||||
EndProject
|
EndProject
|
||||||
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "New_College.AdminMvc", "New_College.AdminMvc\New_College.AdminMvc.csproj", "{06D885F3-6352-4BF6-B826-DEA742DFFBD7}"
|
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "New_College.AdminMvc", "New_College.AdminMvc\New_College.AdminMvc.csproj", "{06D885F3-6352-4BF6-B826-DEA742DFFBD7}"
|
||||||
EndProject
|
EndProject
|
||||||
|
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "New_Spider", "New_Spider\New_Spider.csproj", "{E23857BF-DFBB-49DD-A86A-1B2932F6D33B}"
|
||||||
|
EndProject
|
||||||
Global
|
Global
|
||||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||||
Debug|Any CPU = Debug|Any CPU
|
Debug|Any CPU = Debug|Any CPU
|
||||||
|
|
@ -81,6 +83,10 @@ Global
|
||||||
{06D885F3-6352-4BF6-B826-DEA742DFFBD7}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
{06D885F3-6352-4BF6-B826-DEA742DFFBD7}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||||
{06D885F3-6352-4BF6-B826-DEA742DFFBD7}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
{06D885F3-6352-4BF6-B826-DEA742DFFBD7}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||||
{06D885F3-6352-4BF6-B826-DEA742DFFBD7}.Release|Any CPU.Build.0 = Release|Any CPU
|
{06D885F3-6352-4BF6-B826-DEA742DFFBD7}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||||
|
{E23857BF-DFBB-49DD-A86A-1B2932F6D33B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||||
|
{E23857BF-DFBB-49DD-A86A-1B2932F6D33B}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||||
|
{E23857BF-DFBB-49DD-A86A-1B2932F6D33B}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||||
|
{E23857BF-DFBB-49DD-A86A-1B2932F6D33B}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||||
EndGlobalSection
|
EndGlobalSection
|
||||||
GlobalSection(SolutionProperties) = preSolution
|
GlobalSection(SolutionProperties) = preSolution
|
||||||
HideSolutionNode = FALSE
|
HideSolutionNode = FALSE
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,172 @@
|
||||||
|
using HtmlAgilityPack;
|
||||||
|
using System;
|
||||||
|
using System.Collections.Generic;
|
||||||
|
using System.Linq;
|
||||||
|
using System.Net;
|
||||||
|
using System.Text;
|
||||||
|
using System.Text.Json;
|
||||||
|
using System.Threading.Tasks;
|
||||||
|
|
||||||
|
namespace New_Spider
|
||||||
|
{
|
||||||
|
public class HtmlAgHelper
|
||||||
|
{
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// 阳光高考完数据解析
|
||||||
|
/// </summary>
|
||||||
|
public void HtmlCreatePageData()
|
||||||
|
{
|
||||||
|
HtmlWeb webClient = new HtmlWeb();
|
||||||
|
ITextGen textgen = new ITextGen();
|
||||||
|
//for (int k = 0; k <= 2800; k += 20)
|
||||||
|
for (int k = 0; k <= 1; k += 20)
|
||||||
|
{
|
||||||
|
var jsonobjs = new List<JsonObj>();
|
||||||
|
|
||||||
|
HtmlDocument doc = webClient.Load(string.Format("https://gaokao.chsi.com.cn/sch/search--ss-on,option-qg,searchType-1,start-{0}.dhtml", k));
|
||||||
|
var rootNode = doc.DocumentNode;
|
||||||
|
HtmlNodeCollection categoryNodeList = rootNode.SelectNodes("//*[@id=\"app-yxk-sch-list\"]/div[1]/div[*]");
|
||||||
|
int categoryIndex = 0;
|
||||||
|
foreach (var items in categoryNodeList)//
|
||||||
|
{
|
||||||
|
Thread.Sleep(500);
|
||||||
|
categoryIndex++;
|
||||||
|
|
||||||
|
var item = items.SelectSingleNode("//*[@id=\"app-yxk-sch-list\"]/div[1]/div[" + categoryIndex + "]/div[1]/div/a");
|
||||||
|
|
||||||
|
Console.WriteLine(string.Format("item: a:https://gaokao.chsi.com.cn/{0};name:{1}", item.Attributes["href"].Value, item.InnerText));
|
||||||
|
|
||||||
|
HtmlUniversityAgResolve(doc, webClient, item, item.InnerText.Trim(),jsonobjs);
|
||||||
|
}
|
||||||
|
// Console.WriteLine(k);
|
||||||
|
Thread.Sleep(1000);
|
||||||
|
textgen.GenJson(Newtonsoft.Json.JsonConvert.SerializeObject(jsonobjs), k.ToString());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public class JsonObj
|
||||||
|
{
|
||||||
|
public string id { get; set; }
|
||||||
|
public string name { get; set; }
|
||||||
|
public string description { get; set; }
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// 解析院校信息页面
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="htmldoc"></param>
|
||||||
|
/// <param name="htmlWeb"></param>
|
||||||
|
/// <param name="htmlNode"></param>
|
||||||
|
/// <param name="name"></param>
|
||||||
|
public void HtmlUniversityAgResolve(HtmlDocument htmldoc, HtmlWeb htmlWeb, HtmlNode htmlNode,string name, List<JsonObj> jsons)
|
||||||
|
{
|
||||||
|
htmldoc = htmlWeb.Load(string.Format("https://gaokao.chsi.com.cn/{0}", htmlNode.Attributes["href"].Value));
|
||||||
|
|
||||||
|
var rootNode = htmldoc.DocumentNode;
|
||||||
|
HtmlNodeCollection items = rootNode.SelectNodes("/html/body/div[1]/div[3]/div/a[2]");
|
||||||
|
|
||||||
|
if (items != null)
|
||||||
|
{
|
||||||
|
foreach (var item in items)//
|
||||||
|
{
|
||||||
|
Thread.Sleep(500);
|
||||||
|
try
|
||||||
|
{
|
||||||
|
HtmlUniversityDescriptionResolve(htmldoc, htmlWeb, item.Attributes["href"].Value, name, jsons);
|
||||||
|
}
|
||||||
|
catch (Exception e)
|
||||||
|
{
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// 解析院校描述模块
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="htmldoc"></param>
|
||||||
|
/// <param name="htmlWeb"></param>
|
||||||
|
/// <param name="htmlNode"></param>
|
||||||
|
/// <param name="name"></param>
|
||||||
|
public void HtmlUniversityDescriptionResolve(HtmlDocument htmldoc, HtmlWeb htmlWeb, string url, string name, List<JsonObj> jsons)
|
||||||
|
{
|
||||||
|
var href = url;
|
||||||
|
htmldoc = htmlWeb.Load(string.Format("https://gaokao.chsi.com.cn/{0}", href));//院校页面
|
||||||
|
var rootNode = htmldoc.DocumentNode;
|
||||||
|
HtmlNodeCollection items = rootNode.SelectNodes("/html/body/div[1]/div[4]/div[3]");
|
||||||
|
if (items != null)
|
||||||
|
{
|
||||||
|
foreach (var item in items)//
|
||||||
|
{
|
||||||
|
var description = item.InnerText; //院校简介
|
||||||
|
|
||||||
|
jsons.Add(new JsonObj()
|
||||||
|
{
|
||||||
|
description = description,
|
||||||
|
name = name,
|
||||||
|
id = DateTime.Now.ToString("yyyyMMHHddmmssfff"),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
public void HtmlAg()
|
||||||
|
{
|
||||||
|
HtmlWeb webClient = new HtmlWeb();
|
||||||
|
HtmlDocument doc = webClient.Load("https://gaokao.chsi.com.cn/sch/search--ss-on,option-qg,searchType-1,start-0.dhtml");
|
||||||
|
var rootNode = doc.DocumentNode;
|
||||||
|
HtmlNodeCollection categoryNodeList = rootNode.SelectNodes("//*[@id=\"app-yxk-sch-list\"]/div[1]/div[*]");
|
||||||
|
int categoryIndex = 0;
|
||||||
|
foreach (var items in categoryNodeList)//
|
||||||
|
{
|
||||||
|
|
||||||
|
categoryIndex++;
|
||||||
|
|
||||||
|
var item = items.SelectSingleNode("//*[@id=\"app-yxk-sch-list\"]/div[1]/div[" + categoryIndex + "]/div[1]/div/a");
|
||||||
|
Console.WriteLine(string.Format("item: a:https://gaokao.chsi.com.cn/{0};name:{1}", item.Attributes["href"].Value, item.InnerText));
|
||||||
|
|
||||||
|
|
||||||
|
doc = webClient.Load(string.Format("https://gaokao.chsi.com.cn/{0}", item.Attributes["href"].Value));
|
||||||
|
|
||||||
|
var rootNode2 = doc.DocumentNode;
|
||||||
|
HtmlNodeCollection categoryNodeList2 = rootNode2.SelectNodes("/html/body/div[1]/div[3]/div/a[2]");
|
||||||
|
|
||||||
|
foreach (var itemsd in categoryNodeList2)//
|
||||||
|
{
|
||||||
|
|
||||||
|
var href = itemsd.Attributes["href"].Value;
|
||||||
|
|
||||||
|
doc = webClient.Load(string.Format("https://gaokao.chsi.com.cn/{0}", href));//院校页面
|
||||||
|
|
||||||
|
var rootNode3 = doc.DocumentNode;
|
||||||
|
|
||||||
|
HtmlNodeCollection categoryNodeList3 = rootNode3.SelectNodes("/html/body/div[1]/div[4]/div[3]");
|
||||||
|
|
||||||
|
foreach (var itemsd3 in categoryNodeList3)//
|
||||||
|
{
|
||||||
|
var contexts = itemsd3.InnerText; //院校简介
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,28 @@
|
||||||
|
using Newtonsoft.Json;
|
||||||
|
using System;
|
||||||
|
using System.Collections.Generic;
|
||||||
|
using System.Linq;
|
||||||
|
using System.Security.AccessControl;
|
||||||
|
using System.Text;
|
||||||
|
using System.Threading.Tasks;
|
||||||
|
using System.Xml;
|
||||||
|
|
||||||
|
namespace New_Spider
|
||||||
|
{
|
||||||
|
public class ITextGen
|
||||||
|
{
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// 生成json文件
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="jsons"></param>
|
||||||
|
public void GenJson(string jsons, string Ids)
|
||||||
|
{
|
||||||
|
string jsonString = JsonConvert.SerializeObject(jsons, Newtonsoft.Json.Formatting.Indented);
|
||||||
|
File.WriteAllText(string.Format(@"D:\\jsondoc\\2023-09-14\\{0}.json",Ids), jsonString);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,15 @@
|
||||||
|
<Project Sdk="Microsoft.NET.Sdk">
|
||||||
|
|
||||||
|
<PropertyGroup>
|
||||||
|
<OutputType>Exe</OutputType>
|
||||||
|
<TargetFramework>net6.0</TargetFramework>
|
||||||
|
<ImplicitUsings>enable</ImplicitUsings>
|
||||||
|
<Nullable>enable</Nullable>
|
||||||
|
</PropertyGroup>
|
||||||
|
|
||||||
|
<ItemGroup>
|
||||||
|
<PackageReference Include="HtmlAgilityPack" Version="1.11.53" />
|
||||||
|
<PackageReference Include="Newtonsoft.Json" Version="12.0.3" />
|
||||||
|
</ItemGroup>
|
||||||
|
|
||||||
|
</Project>
|
||||||
|
|
@ -0,0 +1,13 @@
|
||||||
|
|
||||||
|
using HtmlAgilityPack;
|
||||||
|
using New_Spider;
|
||||||
|
using System.Text.RegularExpressions;
|
||||||
|
|
||||||
|
HtmlAgHelper htmlAgHelper = new HtmlAgHelper();
|
||||||
|
|
||||||
|
htmlAgHelper.HtmlCreatePageData();
|
||||||
|
|
||||||
|
Console.Read();
|
||||||
|
|
||||||
|
// See https://aka.ms/new-console-template for more information
|
||||||
|
Console.WriteLine("Hello, World!");
|
||||||
Loading…
Reference in New Issue