111
parent
99116a3ca8
commit
19b0f1727c
|
|
@ -9,8 +9,8 @@
|
||||||
|
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<PackageReference Include="HtmlAgilityPack" Version="1.11.53" />
|
<PackageReference Include="HtmlAgilityPack" Version="1.11.53" />
|
||||||
<PackageReference Include="Selenium.WebDriver" Version="4.18.1" />
|
<PackageReference Include="Selenium.WebDriver" Version="4.27.0" />
|
||||||
<PackageReference Include="Selenium.WebDriver.ChromeDriver" Version="122.0.6261.9400" />
|
<PackageReference Include="Selenium.WebDriver.ChromeDriver" Version="131.0.6778.20400" />
|
||||||
|
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -28,7 +28,9 @@ using System.Text.RegularExpressions;
|
||||||
// See https://aka.ms/new-console-template for more information
|
// See https://aka.ms/new-console-template for more information
|
||||||
|
|
||||||
|
|
||||||
XueHtmlAgHelper xueHtmlAg = new XueHtmlAgHelper();
|
//XueHtmlAgHelper xueHtmlAg = new XueHtmlAgHelper();
|
||||||
|
QingTingSpiderHelper tingSpiderHelper=new QingTingSpiderHelper();
|
||||||
|
tingSpiderHelper.HtmltwlItemsData();
|
||||||
//xueHtmlAg.GetDataRecruitDetail();
|
//xueHtmlAg.GetDataRecruitDetail();
|
||||||
Console.WriteLine("success!");
|
Console.WriteLine("success!");
|
||||||
Console.Read();
|
Console.Read();
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,149 @@
|
||||||
|
using Aliyun.OSS;
|
||||||
|
using HtmlAgilityPack;
|
||||||
|
using ICSharpCode.SharpZipLib.Core;
|
||||||
|
using Microsoft.AspNetCore.Mvc.RazorPages;
|
||||||
|
using New_College.Common.Helper;
|
||||||
|
using OpenQA.Selenium.Chrome;
|
||||||
|
using OSS.Tools.Http;
|
||||||
|
using System;
|
||||||
|
using System.Collections.Generic;
|
||||||
|
using System.Linq;
|
||||||
|
using System.Net;
|
||||||
|
using System.Net.Http;
|
||||||
|
using System.Security.AccessControl;
|
||||||
|
using System.Text;
|
||||||
|
using System.Threading.Tasks;
|
||||||
|
|
||||||
|
namespace New_Spider
|
||||||
|
{
|
||||||
|
public class QingTingSpiderHelper
|
||||||
|
{
|
||||||
|
private string bucketName = "static-data-ycymedu"; //
|
||||||
|
private string filePrefix = "qingting-data/";
|
||||||
|
// 初始化 OSS 客户端
|
||||||
|
private OssClient ossClient = new OssClient("https://oss-cn-shanghai.aliyuncs.com", "LTAI5tKs3TXSbt7E4WMDcxwR", "EvC8MjRaQC1kHubgU4MtecZnofOb0v");
|
||||||
|
|
||||||
|
public async Task HtmlCreatePageData()
|
||||||
|
{
|
||||||
|
|
||||||
|
var options = new ChromeOptions();
|
||||||
|
options.AddArgument("--headless"); // 如果需要无界面模式
|
||||||
|
options.AddArgument("--disable-gpu");
|
||||||
|
options.AddArgument("--no-sandbox");
|
||||||
|
var list = new List<BaseZProfessionDto>();
|
||||||
|
using (var driver = new ChromeDriver(options))
|
||||||
|
{
|
||||||
|
// 导航到目标页面
|
||||||
|
driver.Navigate().GoToUrl("https://www.qingtingai.net/inquiryProfession");
|
||||||
|
|
||||||
|
// 获取页面内容
|
||||||
|
var pageSource = driver.PageSource;
|
||||||
|
|
||||||
|
// 加载 HTML 内容
|
||||||
|
HtmlDocument doc = new HtmlDocument();
|
||||||
|
doc.LoadHtml(pageSource);
|
||||||
|
|
||||||
|
|
||||||
|
var topCategoryNodes = doc.DocumentNode.SelectNodes("//div[contains(@class, 'Top-Category cateMargin')]");
|
||||||
|
|
||||||
|
if (topCategoryNodes != null)
|
||||||
|
{
|
||||||
|
foreach (var topCategory in topCategoryNodes)
|
||||||
|
{
|
||||||
|
// 提取主类标题
|
||||||
|
// 调整 XPath,宽松匹配 class 值
|
||||||
|
var titleNode = topCategory.SelectSingleNode(".//span[contains(@class, 'category-defult-title')]");
|
||||||
|
string title = titleNode?.InnerText.Trim() ?? "无标题";
|
||||||
|
|
||||||
|
// 提取职业信息
|
||||||
|
var jobNodes = topCategory.SelectNodes(".//ul/li");
|
||||||
|
Console.WriteLine($"分类: {title}");
|
||||||
|
var itemlist = new List<ZProfessionItemDto>();
|
||||||
|
if (jobNodes != null)
|
||||||
|
{
|
||||||
|
foreach (var job in jobNodes)
|
||||||
|
{
|
||||||
|
var jobName = job.InnerText.Trim();
|
||||||
|
var jobLink = job.SelectSingleNode(".//a")?.GetAttributeValue("href", "无链接");
|
||||||
|
Console.WriteLine($" 职业名称: {jobName}, 链接: {jobLink}");
|
||||||
|
if (jobLink.Contains("id="))
|
||||||
|
{
|
||||||
|
int idValue = int.Parse(jobLink.Split("id=")[1]);
|
||||||
|
itemlist.Add(new ZProfessionItemDto() { id = idValue, name = jobName });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
list.Add(new BaseZProfessionDto() { RootName = title, itemDtos = itemlist });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
Console.WriteLine("未找到匹配的分类!");
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// 构建文件名和路径
|
||||||
|
string objectName = $"{filePrefix}list.json";
|
||||||
|
|
||||||
|
// 上传到 OSS(同步上传)
|
||||||
|
using var stream = new MemoryStream(Encoding.UTF8.GetBytes(list.ToJson()));
|
||||||
|
ossClient.PutObject(bucketName, objectName, stream);
|
||||||
|
|
||||||
|
Console.WriteLine($"Uploaded page to OSS as {objectName}");
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
public void HtmltwlItemsData()
|
||||||
|
{
|
||||||
|
using var httpClient = new HttpClient();
|
||||||
|
|
||||||
|
var geturlresult = httpClient.GetAsync("https://static-data-ycymedu.oss-cn-shanghai.aliyuncs.com/qingting-data/list.json").Result;
|
||||||
|
if (geturlresult.IsSuccessStatusCode)
|
||||||
|
{
|
||||||
|
var list = System.Text.Json.JsonSerializer.Deserialize<List<BaseZProfessionDto>>(geturlresult.ReadContentAsStringAsync().Result);
|
||||||
|
|
||||||
|
list.ForEach(item =>
|
||||||
|
{
|
||||||
|
item.itemDtos.ForEach(async a =>
|
||||||
|
{
|
||||||
|
var gourl = $"https://www.qingtingai.net/api/career/get_career_info?id={a.id}&agt_host=www.qingtingai.net¤t_host=www.qingtingai.net";
|
||||||
|
var response = httpClient.GetAsync(gourl).Result;
|
||||||
|
if (response.IsSuccessStatusCode)
|
||||||
|
{
|
||||||
|
var jsonData = response.Content.ReadAsStringAsync().Result;
|
||||||
|
using var stream = new MemoryStream(Encoding.UTF8.GetBytes(jsonData));
|
||||||
|
// 构建文件名和路径
|
||||||
|
string objectName = $"{filePrefix}{a.id}.json";
|
||||||
|
ossClient.PutObject(bucketName, objectName, stream);
|
||||||
|
Console.WriteLine($"Uploaded to OSS as {objectName}");
|
||||||
|
Thread.Sleep(300);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public class BaseZProfessionDto
|
||||||
|
{
|
||||||
|
public string RootName { get; set; }
|
||||||
|
public List<ZProfessionItemDto> itemDtos { get; set; }
|
||||||
|
}
|
||||||
|
|
||||||
|
public class ZProfessionItemDto
|
||||||
|
{
|
||||||
|
public long id { get; set; }
|
||||||
|
|
||||||
|
public string name { get; set; }
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue