diff --git a/New_College.Api/Controllers/HealthCheckController.cs b/New_College.Api/Controllers/HealthCheckController.cs index 5a05c1d..0365c56 100644 --- a/New_College.Api/Controllers/HealthCheckController.cs +++ b/New_College.Api/Controllers/HealthCheckController.cs @@ -1,4 +1,5 @@ using Microsoft.AspNetCore.Mvc; +using New_College.Common.Helper; using New_College.IServices; using System.Threading.Tasks; @@ -23,7 +24,8 @@ namespace New_College.Controllers /// [HttpGet] public IActionResult Get() - { + { + return Ok(); } @@ -35,7 +37,9 @@ namespace New_College.Controllers public Task Post() { // return t_EnrollmentPlane.Import(); - return t_EnrollmentPlane.Importuniverbaseinfo(); + // return t_EnrollmentPlane.Importuniverbaseinfo(); + + return t_EnrollmentPlane.universitydetailupdate(); } } diff --git a/New_College.Api/New_College.Model.xml b/New_College.Api/New_College.Model.xml index 65addc1..b348c65 100644 --- a/New_College.Api/New_College.Model.xml +++ b/New_College.Api/New_College.Model.xml @@ -527,7 +527,7 @@ - 办学性质 1、公办,2、民办 + 办学性质 1、公办,2、民办,3中外合作,4 港澳台 @@ -4799,6 +4799,11 @@ 是否双一流 + + + 强基 + + 学校logo diff --git a/New_College.Common/Helper/UniversityTypeRelsove.cs b/New_College.Common/Helper/UniversityTypeRelsove.cs new file mode 100644 index 0000000..f13858d --- /dev/null +++ b/New_College.Common/Helper/UniversityTypeRelsove.cs @@ -0,0 +1,88 @@ +using Newtonsoft.Json; +using System; +using System.Collections.Generic; +using System.IO; +using System.Text; + +namespace New_College.Common.Helper +{ + public static class UniversityTypeRelsove + { + + /// + /// 院校类型转换 + /// 0综合,1理工类,2医学类|3军事类|4语言类|5师范类|6财经类|7政法类|8民族类|9农林类|10艺术类|11体育类|12其他 + /// + /// + public static string GetType(int type) + { + string typename = string.Empty; + switch (type) + { + case 0: + typename = "综合类"; + break; + case 1: + typename = "理工类"; + break; + case 2: + typename = "医学类"; + break; + case 3: + typename = "军事类"; + break; + case 4: + typename = "语言类"; + break; + case 5: + typename = "师范类"; + break; + case 6: + typename = "财经类"; + break; + case 7: + typename = "政法类"; + break; + case 8: + typename = "民族类"; + break; + case 9: + typename = "农林类"; + break; + case 10: + typename = "艺术类"; + break; + case 11: + typename = "体育类"; + break; + case 12: + typename = "其他"; + break; + + + } + + + + return typename; + } + + + public static string GetFileJson(string filepath) + { + string json = string.Empty; + using (FileStream fs = new FileStream(filepath, FileMode.Open, System.IO.FileAccess.Read, FileShare.ReadWrite)) + { + using (StreamReader sr = new StreamReader(fs, Encoding.GetEncoding("utf-8"))) + { + json = sr.ReadToEnd().ToString(); + + // var obj = JsonConvert.DeserializeObject(json); + } + } + return json; + } + + + } +} diff --git a/New_College.IServices/IT_EnrollmentPlaneServices.cs b/New_College.IServices/IT_EnrollmentPlaneServices.cs index 97c4f79..f8280a3 100644 --- a/New_College.IServices/IT_EnrollmentPlaneServices.cs +++ b/New_College.IServices/IT_EnrollmentPlaneServices.cs @@ -14,7 +14,7 @@ namespace New_College.IServices public interface IT_EnrollmentPlaneServices : IBaseServices { Task Import(); - + Task universitydetailupdate(); Task Importuniverbaseinfo(); /// /// 根据大学或者专业获取招生计划 diff --git a/New_College.Model/Models/D_University.cs b/New_College.Model/Models/D_University.cs index aca39cf..7df8d22 100644 --- a/New_College.Model/Models/D_University.cs +++ b/New_College.Model/Models/D_University.cs @@ -35,7 +35,7 @@ namespace New_College.Model.Models public string Build_Date { get; set; } /// - /// 办学性质 1、公办,2、民办 + /// 办学性质 1、公办,2、民办,3中外合作,4 港澳台 /// [SugarColumn(IsNullable = true)] public int Nature { get; set; } diff --git a/New_College.Model/ViewModels/Result/UniversityResult.cs b/New_College.Model/ViewModels/Result/UniversityResult.cs index b7b6c7b..6657336 100644 --- a/New_College.Model/ViewModels/Result/UniversityResult.cs +++ b/New_College.Model/ViewModels/Result/UniversityResult.cs @@ -30,6 +30,11 @@ namespace New_College.Model.ViewModels /// 是否双一流 /// public bool? Syl { get; set; } + + /// + /// 强基 + /// + public bool? QJJH { get; set; } /// /// 学校logo /// diff --git a/New_College.Services/D_LongIdMapServices.cs b/New_College.Services/D_LongIdMapServices.cs index ee338e6..ad327ef 100644 --- a/New_College.Services/D_LongIdMapServices.cs +++ b/New_College.Services/D_LongIdMapServices.cs @@ -304,6 +304,7 @@ namespace New_College.Services Syl = university.Syl == 1 ? true : false, Nhef = university.Nhef == 1 ? true : false, Sff = university.Sff == 1 ? true : false, + QJJH = university.QJJH == 1 ? true : false, UniversityType = university.Type, SubjectLevel = university.Subject_Level, Imglist = imgs, diff --git a/New_College.Services/D_UniversityCollectionServices.cs b/New_College.Services/D_UniversityCollectionServices.cs index 646498e..17ba07b 100644 --- a/New_College.Services/D_UniversityCollectionServices.cs +++ b/New_College.Services/D_UniversityCollectionServices.cs @@ -9,6 +9,7 @@ using System.Threading.Tasks; using System.Collections.Generic; using New_College.IRepository; using System.Linq; +using New_College.Common.Helper; namespace New_College.Services { @@ -161,14 +162,14 @@ namespace New_College.Services { Id = c.Id, Name = c.Name, - Nature = c.Nature == 1 ? "公立" : "私立", + Nature = c.Nature == 0 ? "公办" : c.Nature == 1 ? "民办" : c.Nature == 2 ? "中外合作" : "港澳台合作", AscriptionName = string.IsNullOrWhiteSpace(c.AscriptionName) ? "-" : c.AscriptionName, Rank = c.Rank, AreaName = c.Area_Name, Syl = c.Syl == 1 ? "是" : "否", Nhef = c.Nhef == 1 ? "是" : "否", Sff = c.Sff == 1 ? "是" : "否", - UniversityType = c.Type == 0 ? "综合" : c.Type == 1 ? "理工" : "医学类", + UniversityType = UniversityTypeRelsove.GetType(c.Type), BuildDate = c.Build_Date, SubjectLevel = c.Subject_Level == 1 ? "本科" : c.Subject_Level == 2 ? "专科" : "-", AcademicianCount = c.Academician_Count <= 0 ? "-" : c.Academician_Count.ToString(), diff --git a/New_College.Services/T_EnrollmentPlaneServices.cs b/New_College.Services/T_EnrollmentPlaneServices.cs index 4aa6874..f677dde 100644 --- a/New_College.Services/T_EnrollmentPlaneServices.cs +++ b/New_College.Services/T_EnrollmentPlaneServices.cs @@ -15,6 +15,10 @@ using New_College.Model.ViewModels.Result; using New_College.Model.ViewModels.Query; using New_College.Model; using System.Text.RegularExpressions; +using New_College.Common.Helper; +using Newtonsoft.Json; +using System.IO; +using System.Threading; namespace New_College.Services { @@ -51,6 +55,54 @@ namespace New_College.Services + public async Task universitydetailupdate() + { + var universitylist = await this.d_University.Query(x => x.IsDelete == false); + for (int k = 0; k <= 2800; k += 20) + { + var jsontext = UniversityTypeRelsove.GetFileJson(string.Format(@"D:\\jsondoc\\2023-09-14\\{0}.json", k)); + var jsontext2 = Regex.Unescape(jsontext.Remove(jsontext.Length - 1, 1).Remove(0, 1)); + var result = JsonConvert.DeserializeObject>(jsontext2); + + + result.ForEach(async c => + { + Thread.Sleep(100); + if (!string.IsNullOrWhiteSpace(c.description)) + { + if (universitylist.Any(k => k.Name == c.name.Trim())&& string.IsNullOrWhiteSpace(universitylist.FirstOrDefault(k => k.Name == c.name.Trim()).Description)) + { + var defaultinfo = universitylist.FirstOrDefault(k => k.Name == c.name.Trim()); + + defaultinfo.Description = c.description; + defaultinfo.ModifyTime = DateTime.Now; + await this.d_University.Update(defaultinfo); + } + + } + + + + }); + + + } + return true; + } + + + + + + + public class Class1 + { + public string id { get; set; } + public string name { get; set; } + public string description { get; set; } + } + + /// /// /// @@ -97,7 +149,7 @@ namespace New_College.Services single.Syl = issyl == "双一流" ? 1 : 0; single.Subject_Level = schooltype == "本科" ? 0 : 1; single.ModifyTime = DateTime.Now; - await this.d_University.Update(single); + await this.d_University.Update(single); } else diff --git a/New_College.sln b/New_College.sln index 7211122..7b3e931 100644 --- a/New_College.sln +++ b/New_College.sln @@ -1,7 +1,7 @@  Microsoft Visual Studio Solution File, Format Version 12.00 -# Visual Studio Version 16 -VisualStudioVersion = 16.0.30114.105 +# Visual Studio Version 17 +VisualStudioVersion = 17.5.33627.172 MinimumVisualStudioVersion = 10.0.40219.1 Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "New_College.Api", "New_College.Api\New_College.Api.csproj", "{6F47A41A-085E-4422-BB73-5A2CBAA07D9F}" EndProject @@ -27,6 +27,8 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "New_College.ConsoleApp", "N EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "New_College.AdminMvc", "New_College.AdminMvc\New_College.AdminMvc.csproj", "{06D885F3-6352-4BF6-B826-DEA742DFFBD7}" EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "New_Spider", "New_Spider\New_Spider.csproj", "{E23857BF-DFBB-49DD-A86A-1B2932F6D33B}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -81,6 +83,10 @@ Global {06D885F3-6352-4BF6-B826-DEA742DFFBD7}.Debug|Any CPU.Build.0 = Debug|Any CPU {06D885F3-6352-4BF6-B826-DEA742DFFBD7}.Release|Any CPU.ActiveCfg = Release|Any CPU {06D885F3-6352-4BF6-B826-DEA742DFFBD7}.Release|Any CPU.Build.0 = Release|Any CPU + {E23857BF-DFBB-49DD-A86A-1B2932F6D33B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {E23857BF-DFBB-49DD-A86A-1B2932F6D33B}.Debug|Any CPU.Build.0 = Debug|Any CPU + {E23857BF-DFBB-49DD-A86A-1B2932F6D33B}.Release|Any CPU.ActiveCfg = Release|Any CPU + {E23857BF-DFBB-49DD-A86A-1B2932F6D33B}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE diff --git a/New_Spider/HtmlAgHelper.cs b/New_Spider/HtmlAgHelper.cs new file mode 100644 index 0000000..1eb2c00 --- /dev/null +++ b/New_Spider/HtmlAgHelper.cs @@ -0,0 +1,172 @@ +using HtmlAgilityPack; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Net; +using System.Text; +using System.Text.Json; +using System.Threading.Tasks; + +namespace New_Spider +{ + public class HtmlAgHelper + { + + /// + /// 阳光高考完数据解析 + /// + public void HtmlCreatePageData() + { + HtmlWeb webClient = new HtmlWeb(); + ITextGen textgen = new ITextGen(); + //for (int k = 0; k <= 2800; k += 20) + for (int k = 0; k <= 1; k += 20) + { + var jsonobjs = new List(); + + HtmlDocument doc = webClient.Load(string.Format("https://gaokao.chsi.com.cn/sch/search--ss-on,option-qg,searchType-1,start-{0}.dhtml", k)); + var rootNode = doc.DocumentNode; + HtmlNodeCollection categoryNodeList = rootNode.SelectNodes("//*[@id=\"app-yxk-sch-list\"]/div[1]/div[*]"); + int categoryIndex = 0; + foreach (var items in categoryNodeList)// + { + Thread.Sleep(500); + categoryIndex++; + + var item = items.SelectSingleNode("//*[@id=\"app-yxk-sch-list\"]/div[1]/div[" + categoryIndex + "]/div[1]/div/a"); + + Console.WriteLine(string.Format("item: a:https://gaokao.chsi.com.cn/{0};name:{1}", item.Attributes["href"].Value, item.InnerText)); + + HtmlUniversityAgResolve(doc, webClient, item, item.InnerText.Trim(),jsonobjs); + } + // Console.WriteLine(k); + Thread.Sleep(1000); + textgen.GenJson(Newtonsoft.Json.JsonConvert.SerializeObject(jsonobjs), k.ToString()); + } + } + + + public class JsonObj + { + public string id { get; set; } + public string name { get; set; } + public string description { get; set; } + + } + + + /// + /// 解析院校信息页面 + /// + /// + /// + /// + /// + public void HtmlUniversityAgResolve(HtmlDocument htmldoc, HtmlWeb htmlWeb, HtmlNode htmlNode,string name, List jsons) + { + htmldoc = htmlWeb.Load(string.Format("https://gaokao.chsi.com.cn/{0}", htmlNode.Attributes["href"].Value)); + + var rootNode = htmldoc.DocumentNode; + HtmlNodeCollection items = rootNode.SelectNodes("/html/body/div[1]/div[3]/div/a[2]"); + + if (items != null) + { + foreach (var item in items)// + { + Thread.Sleep(500); + try + { + HtmlUniversityDescriptionResolve(htmldoc, htmlWeb, item.Attributes["href"].Value, name, jsons); + } + catch (Exception e) + { + + } + + } + } + + + } + + + /// + /// 解析院校描述模块 + /// + /// + /// + /// + /// + public void HtmlUniversityDescriptionResolve(HtmlDocument htmldoc, HtmlWeb htmlWeb, string url, string name, List jsons) + { + var href = url; + htmldoc = htmlWeb.Load(string.Format("https://gaokao.chsi.com.cn/{0}", href));//院校页面 + var rootNode = htmldoc.DocumentNode; + HtmlNodeCollection items = rootNode.SelectNodes("/html/body/div[1]/div[4]/div[3]"); + if (items != null) + { + foreach (var item in items)// + { + var description = item.InnerText; //院校简介 + + jsons.Add(new JsonObj() + { + description = description, + name = name, + id = DateTime.Now.ToString("yyyyMMHHddmmssfff"), + }); + } + } + } + + + + + public void HtmlAg() + { + HtmlWeb webClient = new HtmlWeb(); + HtmlDocument doc = webClient.Load("https://gaokao.chsi.com.cn/sch/search--ss-on,option-qg,searchType-1,start-0.dhtml"); + var rootNode = doc.DocumentNode; + HtmlNodeCollection categoryNodeList = rootNode.SelectNodes("//*[@id=\"app-yxk-sch-list\"]/div[1]/div[*]"); + int categoryIndex = 0; + foreach (var items in categoryNodeList)// + { + + categoryIndex++; + + var item = items.SelectSingleNode("//*[@id=\"app-yxk-sch-list\"]/div[1]/div[" + categoryIndex + "]/div[1]/div/a"); + Console.WriteLine(string.Format("item: a:https://gaokao.chsi.com.cn/{0};name:{1}", item.Attributes["href"].Value, item.InnerText)); + + + doc = webClient.Load(string.Format("https://gaokao.chsi.com.cn/{0}", item.Attributes["href"].Value)); + + var rootNode2 = doc.DocumentNode; + HtmlNodeCollection categoryNodeList2 = rootNode2.SelectNodes("/html/body/div[1]/div[3]/div/a[2]"); + + foreach (var itemsd in categoryNodeList2)// + { + + var href = itemsd.Attributes["href"].Value; + + doc = webClient.Load(string.Format("https://gaokao.chsi.com.cn/{0}", href));//院校页面 + + var rootNode3 = doc.DocumentNode; + + HtmlNodeCollection categoryNodeList3 = rootNode3.SelectNodes("/html/body/div[1]/div[4]/div[3]"); + + foreach (var itemsd3 in categoryNodeList3)// + { + var contexts = itemsd3.InnerText; //院校简介 + + } + } + } + + + + } + + + + } +} diff --git a/New_Spider/ITextGen.cs b/New_Spider/ITextGen.cs new file mode 100644 index 0000000..31bd8c3 --- /dev/null +++ b/New_Spider/ITextGen.cs @@ -0,0 +1,28 @@ +using Newtonsoft.Json; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Security.AccessControl; +using System.Text; +using System.Threading.Tasks; +using System.Xml; + +namespace New_Spider +{ + public class ITextGen + { + + /// + /// 生成json文件 + /// + /// + public void GenJson(string jsons, string Ids) + { + string jsonString = JsonConvert.SerializeObject(jsons, Newtonsoft.Json.Formatting.Indented); + File.WriteAllText(string.Format(@"D:\\jsondoc\\2023-09-14\\{0}.json",Ids), jsonString); + } + } + + + +} \ No newline at end of file diff --git a/New_Spider/New_Spider.csproj b/New_Spider/New_Spider.csproj new file mode 100644 index 0000000..da66fe9 --- /dev/null +++ b/New_Spider/New_Spider.csproj @@ -0,0 +1,15 @@ + + + + Exe + net6.0 + enable + enable + + + + + + + + diff --git a/New_Spider/Program.cs b/New_Spider/Program.cs new file mode 100644 index 0000000..e38bbc5 --- /dev/null +++ b/New_Spider/Program.cs @@ -0,0 +1,13 @@ + +using HtmlAgilityPack; +using New_Spider; +using System.Text.RegularExpressions; + +HtmlAgHelper htmlAgHelper = new HtmlAgHelper(); + +htmlAgHelper.HtmlCreatePageData(); + +Console.Read(); + +// See https://aka.ms/new-console-template for more information +Console.WriteLine("Hello, World!");