From 19b0f1727cfc964a97549d16682521e0dc1f3373 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?old=E6=98=93?= <156663459@qq.com>
Date: Thu, 26 Dec 2024 17:03:47 +0800
Subject: [PATCH] 111
---
New_Spider/New_Spider.csproj | 4 +-
New_Spider/Program.cs | 4 +-
.../qingtingzhiyuan/QingTingSpiderHelper.cs | 149 ++++++++++++++++++
3 files changed, 154 insertions(+), 3 deletions(-)
create mode 100644 New_Spider/qingtingzhiyuan/QingTingSpiderHelper.cs
diff --git a/New_Spider/New_Spider.csproj b/New_Spider/New_Spider.csproj
index da74ab8..d2e374e 100644
--- a/New_Spider/New_Spider.csproj
+++ b/New_Spider/New_Spider.csproj
@@ -9,8 +9,8 @@
-
-
+
+
diff --git a/New_Spider/Program.cs b/New_Spider/Program.cs
index b8f548b..ff2ff03 100644
--- a/New_Spider/Program.cs
+++ b/New_Spider/Program.cs
@@ -28,7 +28,9 @@ using System.Text.RegularExpressions;
// See https://aka.ms/new-console-template for more information
-XueHtmlAgHelper xueHtmlAg = new XueHtmlAgHelper();
+//XueHtmlAgHelper xueHtmlAg = new XueHtmlAgHelper();
+QingTingSpiderHelper tingSpiderHelper=new QingTingSpiderHelper();
+ tingSpiderHelper.HtmltwlItemsData();
//xueHtmlAg.GetDataRecruitDetail();
Console.WriteLine("success!");
Console.Read();
diff --git a/New_Spider/qingtingzhiyuan/QingTingSpiderHelper.cs b/New_Spider/qingtingzhiyuan/QingTingSpiderHelper.cs
new file mode 100644
index 0000000..46c2e73
--- /dev/null
+++ b/New_Spider/qingtingzhiyuan/QingTingSpiderHelper.cs
@@ -0,0 +1,149 @@
+using Aliyun.OSS;
+using HtmlAgilityPack;
+using ICSharpCode.SharpZipLib.Core;
+using Microsoft.AspNetCore.Mvc.RazorPages;
+using New_College.Common.Helper;
+using OpenQA.Selenium.Chrome;
+using OSS.Tools.Http;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Net;
+using System.Net.Http;
+using System.Security.AccessControl;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace New_Spider
+{
+ public class QingTingSpiderHelper
+ {
+ private string bucketName = "static-data-ycymedu"; //
+ private string filePrefix = "qingting-data/";
+ // 初始化 OSS 客户端
+ private OssClient ossClient = new OssClient("https://oss-cn-shanghai.aliyuncs.com", "LTAI5tKs3TXSbt7E4WMDcxwR", "EvC8MjRaQC1kHubgU4MtecZnofOb0v");
+
+ public async Task HtmlCreatePageData()
+ {
+
+ var options = new ChromeOptions();
+ options.AddArgument("--headless"); // 如果需要无界面模式
+ options.AddArgument("--disable-gpu");
+ options.AddArgument("--no-sandbox");
+ var list = new List();
+ using (var driver = new ChromeDriver(options))
+ {
+ // 导航到目标页面
+ driver.Navigate().GoToUrl("https://www.qingtingai.net/inquiryProfession");
+
+ // 获取页面内容
+ var pageSource = driver.PageSource;
+
+ // 加载 HTML 内容
+ HtmlDocument doc = new HtmlDocument();
+ doc.LoadHtml(pageSource);
+
+
+ var topCategoryNodes = doc.DocumentNode.SelectNodes("//div[contains(@class, 'Top-Category cateMargin')]");
+
+ if (topCategoryNodes != null)
+ {
+ foreach (var topCategory in topCategoryNodes)
+ {
+ // 提取主类标题
+ // 调整 XPath,宽松匹配 class 值
+ var titleNode = topCategory.SelectSingleNode(".//span[contains(@class, 'category-defult-title')]");
+ string title = titleNode?.InnerText.Trim() ?? "无标题";
+
+ // 提取职业信息
+ var jobNodes = topCategory.SelectNodes(".//ul/li");
+ Console.WriteLine($"分类: {title}");
+ var itemlist = new List();
+ if (jobNodes != null)
+ {
+ foreach (var job in jobNodes)
+ {
+ var jobName = job.InnerText.Trim();
+ var jobLink = job.SelectSingleNode(".//a")?.GetAttributeValue("href", "无链接");
+ Console.WriteLine($" 职业名称: {jobName}, 链接: {jobLink}");
+ if (jobLink.Contains("id="))
+ {
+ int idValue = int.Parse(jobLink.Split("id=")[1]);
+ itemlist.Add(new ZProfessionItemDto() { id = idValue, name = jobName });
+ }
+ }
+ }
+ list.Add(new BaseZProfessionDto() { RootName = title, itemDtos = itemlist });
+ }
+ }
+ else
+ {
+ Console.WriteLine("未找到匹配的分类!");
+ }
+
+
+ // 构建文件名和路径
+ string objectName = $"{filePrefix}list.json";
+
+ // 上传到 OSS(同步上传)
+ using var stream = new MemoryStream(Encoding.UTF8.GetBytes(list.ToJson()));
+ ossClient.PutObject(bucketName, objectName, stream);
+
+ Console.WriteLine($"Uploaded page to OSS as {objectName}");
+
+
+ }
+
+
+ }
+
+
+
+ public void HtmltwlItemsData()
+ {
+ using var httpClient = new HttpClient();
+
+ var geturlresult = httpClient.GetAsync("https://static-data-ycymedu.oss-cn-shanghai.aliyuncs.com/qingting-data/list.json").Result;
+ if (geturlresult.IsSuccessStatusCode)
+ {
+ var list = System.Text.Json.JsonSerializer.Deserialize>(geturlresult.ReadContentAsStringAsync().Result);
+
+ list.ForEach(item =>
+ {
+ item.itemDtos.ForEach(async a =>
+ {
+ var gourl = $"https://www.qingtingai.net/api/career/get_career_info?id={a.id}&agt_host=www.qingtingai.net¤t_host=www.qingtingai.net";
+ var response = httpClient.GetAsync(gourl).Result;
+ if (response.IsSuccessStatusCode)
+ {
+ var jsonData = response.Content.ReadAsStringAsync().Result;
+ using var stream = new MemoryStream(Encoding.UTF8.GetBytes(jsonData));
+ // 构建文件名和路径
+ string objectName = $"{filePrefix}{a.id}.json";
+ ossClient.PutObject(bucketName, objectName, stream);
+ Console.WriteLine($"Uploaded to OSS as {objectName}");
+ Thread.Sleep(300);
+ }
+ });
+ });
+ }
+ }
+
+ }
+
+ public class BaseZProfessionDto
+ {
+ public string RootName { get; set; }
+ public List itemDtos { get; set; }
+ }
+
+ public class ZProfessionItemDto
+ {
+ public long id { get; set; }
+
+ public string name { get; set; }
+
+ }
+
+
+}