Ajout de HelloFresh
This commit is contained in:
138
Controllers/HelloFresh/HelloFreshScraperService.cs
Normal file
138
Controllers/HelloFresh/HelloFreshScraperService.cs
Normal file
@@ -0,0 +1,138 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Net.Http;
|
||||
using System.Text.RegularExpressions;
|
||||
using System.Threading.Tasks;
|
||||
using AngleSharp;
|
||||
using AngleSharp.Dom;
|
||||
using Newtonsoft.Json.Linq;
|
||||
|
||||
namespace HelloFreshScraper.Services
|
||||
{
|
||||
public class Recipe
|
||||
{
|
||||
public string Id { get; set; }
|
||||
public string Name { get; set; }
|
||||
public string Image { get; set; }
|
||||
public string Pdf { get; set; }
|
||||
public string PrepTime { get; set; } // ex: "30-35 min"
|
||||
public string Difficulty { get; set; } // ex: "Intermédiaire"
|
||||
public string Description { get; set; } // ex: "Accompagné de..."
|
||||
public string Label { get; set; }
|
||||
}
|
||||
|
||||
public class HelloFreshScraperService
|
||||
{
|
||||
private readonly HttpClient _httpClient;
|
||||
private readonly IBrowsingContext _context;
|
||||
|
||||
public HelloFreshScraperService(HttpClient httpClient)
|
||||
{
|
||||
_httpClient = httpClient;
|
||||
var config = Configuration.Default.WithDefaultLoader();
|
||||
_context = BrowsingContext.New(config);
|
||||
}
|
||||
|
||||
public async Task<List<Recipe>> GetRecipesAsync(string locale = "fr-fr", int startPage = 1, int pagesToLoad = 2)
|
||||
{
|
||||
var recipesDict = new Dictionary<string, Recipe>();
|
||||
var config = Configuration.Default.WithDefaultLoader();
|
||||
var context = BrowsingContext.New(config);
|
||||
|
||||
for (int page = startPage; page < startPage + pagesToLoad; page++)
|
||||
{
|
||||
var url = $"https://hfresh.info/{locale}?page={page}";
|
||||
var html = await _httpClient.GetStringAsync(url);
|
||||
|
||||
var document = await context.OpenAsync(req => req.Content(html));
|
||||
var rawData = document.QuerySelector("#app")?.GetAttribute("data-page");
|
||||
|
||||
if (string.IsNullOrWhiteSpace(rawData)) continue;
|
||||
|
||||
var parsed = JObject.Parse(rawData);
|
||||
var recipeArray = parsed.SelectToken("props.recipes.data") as JArray;
|
||||
|
||||
foreach (var item in recipeArray ?? new JArray())
|
||||
{
|
||||
var id = item["id"]?.ToString();
|
||||
var name = item["name"]?.ToString();
|
||||
var pdf = item["pdf"]?.ToString();
|
||||
|
||||
if (string.IsNullOrEmpty(id) || string.IsNullOrEmpty(name) || string.IsNullOrEmpty(pdf))
|
||||
continue;
|
||||
|
||||
if (recipesDict.ContainsKey(id)) continue; // éviter les doublons
|
||||
|
||||
var nameKey = name.ToLower().Trim();
|
||||
if (recipesDict.Values.Any(r => r.Name.ToLower().Trim() == nameKey)) continue;
|
||||
|
||||
|
||||
var recipe = new Recipe
|
||||
{
|
||||
Id = id,
|
||||
Name = name,
|
||||
Image = item["image"]?.ToString(),
|
||||
Pdf = pdf,
|
||||
Description = item["headline"]?.ToString(),
|
||||
Label = item["label"]?.ToString()
|
||||
};
|
||||
|
||||
recipesDict[id] = recipe;
|
||||
}
|
||||
}
|
||||
|
||||
// 👉 Scrape en parallèle les pages de détail
|
||||
var tasks = recipesDict.Values.Select(async recipe =>
|
||||
{
|
||||
var slug = GenerateSlug(recipe.Name);
|
||||
var url = $"https://www.hellofresh.fr/recipes/{slug}-{recipe.Id}";
|
||||
|
||||
try
|
||||
{
|
||||
var html = await _httpClient.GetStringAsync(url);
|
||||
|
||||
// PrepTime (Regex simple, ex: "35 minutes")
|
||||
var match = Regex.Match(html, @"(\d{1,3})\s*minutes?", RegexOptions.IgnoreCase);
|
||||
if (match.Success)
|
||||
recipe.PrepTime = match.Groups[1].Value;
|
||||
|
||||
// Difficulty
|
||||
var diffMatch = Regex.Match(html, @"<span[^>]*data-translation-id=[""']recipe-detail\.level-number[^>]*>([^<]+)</span>", RegexOptions.IgnoreCase);
|
||||
if (diffMatch.Success)
|
||||
recipe.Difficulty = diffMatch.Groups[1].Value.Trim();
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
Console.WriteLine($"❌ Erreur scraping {url} : {ex.Message}");
|
||||
}
|
||||
});
|
||||
|
||||
await Task.WhenAll(tasks); // 🧠 Attendre que tous les détails soient récupérés
|
||||
|
||||
return recipesDict.Values.ToList();
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
private string GenerateSlug(string name)
|
||||
{
|
||||
var slug = name.ToLower()
|
||||
.Replace("é", "e").Replace("è", "e").Replace("ê", "e")
|
||||
.Replace("à", "a").Replace("â", "a").Replace("ù", "u")
|
||||
.Replace("î", "i").Replace("ô", "o").Replace("ç", "c")
|
||||
.Replace("œ", "oe").Replace("&", "et")
|
||||
.Replace("’", "-").Replace("'", "-")
|
||||
.Replace("\"", "").Replace(",", "").Replace(":", "")
|
||||
.Replace("!", "").Replace("?", "").Replace("(", "").Replace(")", "")
|
||||
.Replace(" ", " ").Replace(" ", "-");
|
||||
|
||||
while (slug.Contains("--"))
|
||||
slug = slug.Replace("--", "-");
|
||||
|
||||
return slug.Trim('-');
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user