From 19b0f1727cfc964a97549d16682521e0dc1f3373 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?old=E6=98=93?= <156663459@qq.com> Date: Thu, 26 Dec 2024 17:03:47 +0800 Subject: [PATCH] 111 --- New_Spider/New_Spider.csproj | 4 +- New_Spider/Program.cs | 4 +- .../qingtingzhiyuan/QingTingSpiderHelper.cs | 149 ++++++++++++++++++ 3 files changed, 154 insertions(+), 3 deletions(-) create mode 100644 New_Spider/qingtingzhiyuan/QingTingSpiderHelper.cs diff --git a/New_Spider/New_Spider.csproj b/New_Spider/New_Spider.csproj index da74ab8..d2e374e 100644 --- a/New_Spider/New_Spider.csproj +++ b/New_Spider/New_Spider.csproj @@ -9,8 +9,8 @@ - - + + diff --git a/New_Spider/Program.cs b/New_Spider/Program.cs index b8f548b..ff2ff03 100644 --- a/New_Spider/Program.cs +++ b/New_Spider/Program.cs @@ -28,7 +28,9 @@ using System.Text.RegularExpressions; // See https://aka.ms/new-console-template for more information -XueHtmlAgHelper xueHtmlAg = new XueHtmlAgHelper(); +//XueHtmlAgHelper xueHtmlAg = new XueHtmlAgHelper(); +QingTingSpiderHelper tingSpiderHelper=new QingTingSpiderHelper(); + tingSpiderHelper.HtmltwlItemsData(); //xueHtmlAg.GetDataRecruitDetail(); Console.WriteLine("success!"); Console.Read(); diff --git a/New_Spider/qingtingzhiyuan/QingTingSpiderHelper.cs b/New_Spider/qingtingzhiyuan/QingTingSpiderHelper.cs new file mode 100644 index 0000000..46c2e73 --- /dev/null +++ b/New_Spider/qingtingzhiyuan/QingTingSpiderHelper.cs @@ -0,0 +1,149 @@ +using Aliyun.OSS; +using HtmlAgilityPack; +using ICSharpCode.SharpZipLib.Core; +using Microsoft.AspNetCore.Mvc.RazorPages; +using New_College.Common.Helper; +using OpenQA.Selenium.Chrome; +using OSS.Tools.Http; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Net; +using System.Net.Http; +using System.Security.AccessControl; +using System.Text; +using System.Threading.Tasks; + +namespace New_Spider +{ + public class QingTingSpiderHelper + { + private string bucketName = "static-data-ycymedu"; // + private string filePrefix = "qingting-data/"; + // 初始化 OSS 客户端 + private OssClient ossClient = new OssClient("https://oss-cn-shanghai.aliyuncs.com", "LTAI5tKs3TXSbt7E4WMDcxwR", "EvC8MjRaQC1kHubgU4MtecZnofOb0v"); + + public async Task HtmlCreatePageData() + { + + var options = new ChromeOptions(); + options.AddArgument("--headless"); // 如果需要无界面模式 + options.AddArgument("--disable-gpu"); + options.AddArgument("--no-sandbox"); + var list = new List(); + using (var driver = new ChromeDriver(options)) + { + // 导航到目标页面 + driver.Navigate().GoToUrl("https://www.qingtingai.net/inquiryProfession"); + + // 获取页面内容 + var pageSource = driver.PageSource; + + // 加载 HTML 内容 + HtmlDocument doc = new HtmlDocument(); + doc.LoadHtml(pageSource); + + + var topCategoryNodes = doc.DocumentNode.SelectNodes("//div[contains(@class, 'Top-Category cateMargin')]"); + + if (topCategoryNodes != null) + { + foreach (var topCategory in topCategoryNodes) + { + // 提取主类标题 + // 调整 XPath,宽松匹配 class 值 + var titleNode = topCategory.SelectSingleNode(".//span[contains(@class, 'category-defult-title')]"); + string title = titleNode?.InnerText.Trim() ?? "无标题"; + + // 提取职业信息 + var jobNodes = topCategory.SelectNodes(".//ul/li"); + Console.WriteLine($"分类: {title}"); + var itemlist = new List(); + if (jobNodes != null) + { + foreach (var job in jobNodes) + { + var jobName = job.InnerText.Trim(); + var jobLink = job.SelectSingleNode(".//a")?.GetAttributeValue("href", "无链接"); + Console.WriteLine($" 职业名称: {jobName}, 链接: {jobLink}"); + if (jobLink.Contains("id=")) + { + int idValue = int.Parse(jobLink.Split("id=")[1]); + itemlist.Add(new ZProfessionItemDto() { id = idValue, name = jobName }); + } + } + } + list.Add(new BaseZProfessionDto() { RootName = title, itemDtos = itemlist }); + } + } + else + { + Console.WriteLine("未找到匹配的分类!"); + } + + + // 构建文件名和路径 + string objectName = $"{filePrefix}list.json"; + + // 上传到 OSS(同步上传) + using var stream = new MemoryStream(Encoding.UTF8.GetBytes(list.ToJson())); + ossClient.PutObject(bucketName, objectName, stream); + + Console.WriteLine($"Uploaded page to OSS as {objectName}"); + + + } + + + } + + + + public void HtmltwlItemsData() + { + using var httpClient = new HttpClient(); + + var geturlresult = httpClient.GetAsync("https://static-data-ycymedu.oss-cn-shanghai.aliyuncs.com/qingting-data/list.json").Result; + if (geturlresult.IsSuccessStatusCode) + { + var list = System.Text.Json.JsonSerializer.Deserialize>(geturlresult.ReadContentAsStringAsync().Result); + + list.ForEach(item => + { + item.itemDtos.ForEach(async a => + { + var gourl = $"https://www.qingtingai.net/api/career/get_career_info?id={a.id}&agt_host=www.qingtingai.net¤t_host=www.qingtingai.net"; + var response = httpClient.GetAsync(gourl).Result; + if (response.IsSuccessStatusCode) + { + var jsonData = response.Content.ReadAsStringAsync().Result; + using var stream = new MemoryStream(Encoding.UTF8.GetBytes(jsonData)); + // 构建文件名和路径 + string objectName = $"{filePrefix}{a.id}.json"; + ossClient.PutObject(bucketName, objectName, stream); + Console.WriteLine($"Uploaded to OSS as {objectName}"); + Thread.Sleep(300); + } + }); + }); + } + } + + } + + public class BaseZProfessionDto + { + public string RootName { get; set; } + public List itemDtos { get; set; } + } + + public class ZProfessionItemDto + { + public long id { get; set; } + + public string name { get; set; } + + } + + +}