NewGaoKaoApi/New_Spider/qingtingzhiyuan/QingTingSpiderHelper.cs

150 lines
5.6 KiB
C#
Raw Blame History

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

using Aliyun.OSS;
using HtmlAgilityPack;
using ICSharpCode.SharpZipLib.Core;
using Microsoft.AspNetCore.Mvc.RazorPages;
using New_College.Common.Helper;
using OpenQA.Selenium.Chrome;
using OSS.Tools.Http;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net;
using System.Net.Http;
using System.Security.AccessControl;
using System.Text;
using System.Threading.Tasks;
namespace New_Spider
{
public class QingTingSpiderHelper
{
private string bucketName = "static-data-ycymedu"; //
private string filePrefix = "qingting-data/";
// 初始化 OSS 客户端
private OssClient ossClient = new OssClient("https://oss-cn-shanghai.aliyuncs.com", "LTAI5tKs3TXSbt7E4WMDcxwR", "EvC8MjRaQC1kHubgU4MtecZnofOb0v");
public async Task HtmlCreatePageData()
{
var options = new ChromeOptions();
options.AddArgument("--headless"); // 如果需要无界面模式
options.AddArgument("--disable-gpu");
options.AddArgument("--no-sandbox");
var list = new List<BaseZProfessionDto>();
using (var driver = new ChromeDriver(options))
{
// 导航到目标页面
driver.Navigate().GoToUrl("https://www.qingtingai.net/inquiryProfession");
// 获取页面内容
var pageSource = driver.PageSource;
// 加载 HTML 内容
HtmlDocument doc = new HtmlDocument();
doc.LoadHtml(pageSource);
var topCategoryNodes = doc.DocumentNode.SelectNodes("//div[contains(@class, 'Top-Category cateMargin')]");
if (topCategoryNodes != null)
{
foreach (var topCategory in topCategoryNodes)
{
// 提取主类标题
// 调整 XPath宽松匹配 class 值
var titleNode = topCategory.SelectSingleNode(".//span[contains(@class, 'category-defult-title')]");
string title = titleNode?.InnerText.Trim() ?? "无标题";
// 提取职业信息
var jobNodes = topCategory.SelectNodes(".//ul/li");
Console.WriteLine($"分类: {title}");
var itemlist = new List<ZProfessionItemDto>();
if (jobNodes != null)
{
foreach (var job in jobNodes)
{
var jobName = job.InnerText.Trim();
var jobLink = job.SelectSingleNode(".//a")?.GetAttributeValue("href", "无链接");
Console.WriteLine($" 职业名称: {jobName}, 链接: {jobLink}");
if (jobLink.Contains("id="))
{
int idValue = int.Parse(jobLink.Split("id=")[1]);
itemlist.Add(new ZProfessionItemDto() { id = idValue, name = jobName });
}
}
}
list.Add(new BaseZProfessionDto() { RootName = title, itemDtos = itemlist });
}
}
else
{
Console.WriteLine("未找到匹配的分类!");
}
// 构建文件名和路径
string objectName = $"{filePrefix}list.json";
// 上传到 OSS同步上传
using var stream = new MemoryStream(Encoding.UTF8.GetBytes(list.ToJson()));
ossClient.PutObject(bucketName, objectName, stream);
Console.WriteLine($"Uploaded page to OSS as {objectName}");
}
}
public void HtmltwlItemsData()
{
using var httpClient = new HttpClient();
var geturlresult = httpClient.GetAsync("https://static-data-ycymedu.oss-cn-shanghai.aliyuncs.com/qingting-data/list.json").Result;
if (geturlresult.IsSuccessStatusCode)
{
var list = System.Text.Json.JsonSerializer.Deserialize<List<BaseZProfessionDto>>(geturlresult.ReadContentAsStringAsync().Result);
list.ForEach(item =>
{
item.itemDtos.ForEach(async a =>
{
var gourl = $"https://www.qingtingai.net/api/career/get_career_info?id={a.id}&agt_host=www.qingtingai.net&current_host=www.qingtingai.net";
var response = httpClient.GetAsync(gourl).Result;
if (response.IsSuccessStatusCode)
{
var jsonData = response.Content.ReadAsStringAsync().Result;
using var stream = new MemoryStream(Encoding.UTF8.GetBytes(jsonData));
// 构建文件名和路径
string objectName = $"{filePrefix}{a.id}.json";
ossClient.PutObject(bucketName, objectName, stream);
Console.WriteLine($"Uploaded to OSS as {objectName}");
Thread.Sleep(300);
}
});
});
}
}
}
public class BaseZProfessionDto
{
public string RootName { get; set; }
public List<ZProfessionItemDto> itemDtos { get; set; }
}
public class ZProfessionItemDto
{
public long id { get; set; }
public string name { get; set; }
}
}