Ajout de HelloFresh

This commit is contained in:
2025-09-03 20:17:50 +02:00
parent bcef0a472b
commit d287112b7d
429 changed files with 82881 additions and 22074 deletions

View File

@@ -0,0 +1,138 @@
using System;
using System.Collections.Generic;
using System.Net.Http;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
using AngleSharp;
using AngleSharp.Dom;
using Newtonsoft.Json.Linq;
namespace HelloFreshScraper.Services
{
public class Recipe
{
public string Id { get; set; }
public string Name { get; set; }
public string Image { get; set; }
public string Pdf { get; set; }
public string PrepTime { get; set; } // ex: "30-35 min"
public string Difficulty { get; set; } // ex: "Intermédiaire"
public string Description { get; set; } // ex: "Accompagné de..."
public string Label { get; set; }
}
public class HelloFreshScraperService
{
private readonly HttpClient _httpClient;
private readonly IBrowsingContext _context;
public HelloFreshScraperService(HttpClient httpClient)
{
_httpClient = httpClient;
var config = Configuration.Default.WithDefaultLoader();
_context = BrowsingContext.New(config);
}
public async Task<List<Recipe>> GetRecipesAsync(string locale = "fr-fr", int startPage = 1, int pagesToLoad = 2)
{
var recipesDict = new Dictionary<string, Recipe>();
var config = Configuration.Default.WithDefaultLoader();
var context = BrowsingContext.New(config);
for (int page = startPage; page < startPage + pagesToLoad; page++)
{
var url = $"https://hfresh.info/{locale}?page={page}";
var html = await _httpClient.GetStringAsync(url);
var document = await context.OpenAsync(req => req.Content(html));
var rawData = document.QuerySelector("#app")?.GetAttribute("data-page");
if (string.IsNullOrWhiteSpace(rawData)) continue;
var parsed = JObject.Parse(rawData);
var recipeArray = parsed.SelectToken("props.recipes.data") as JArray;
foreach (var item in recipeArray ?? new JArray())
{
var id = item["id"]?.ToString();
var name = item["name"]?.ToString();
var pdf = item["pdf"]?.ToString();
if (string.IsNullOrEmpty(id) || string.IsNullOrEmpty(name) || string.IsNullOrEmpty(pdf))
continue;
if (recipesDict.ContainsKey(id)) continue; // éviter les doublons
var nameKey = name.ToLower().Trim();
if (recipesDict.Values.Any(r => r.Name.ToLower().Trim() == nameKey)) continue;
var recipe = new Recipe
{
Id = id,
Name = name,
Image = item["image"]?.ToString(),
Pdf = pdf,
Description = item["headline"]?.ToString(),
Label = item["label"]?.ToString()
};
recipesDict[id] = recipe;
}
}
// 👉 Scrape en parallèle les pages de détail
var tasks = recipesDict.Values.Select(async recipe =>
{
var slug = GenerateSlug(recipe.Name);
var url = $"https://www.hellofresh.fr/recipes/{slug}-{recipe.Id}";
try
{
var html = await _httpClient.GetStringAsync(url);
// PrepTime (Regex simple, ex: "35 minutes")
var match = Regex.Match(html, @"(\d{1,3})\s*minutes?", RegexOptions.IgnoreCase);
if (match.Success)
recipe.PrepTime = match.Groups[1].Value;
// Difficulty
var diffMatch = Regex.Match(html, @"<span[^>]*data-translation-id=[""']recipe-detail\.level-number[^>]*>([^<]+)</span>", RegexOptions.IgnoreCase);
if (diffMatch.Success)
recipe.Difficulty = diffMatch.Groups[1].Value.Trim();
}
catch (Exception ex)
{
Console.WriteLine($"❌ Erreur scraping {url} : {ex.Message}");
}
});
await Task.WhenAll(tasks); // 🧠 Attendre que tous les détails soient récupérés
return recipesDict.Values.ToList();
}
private string GenerateSlug(string name)
{
var slug = name.ToLower()
.Replace("é", "e").Replace("è", "e").Replace("ê", "e")
.Replace("à", "a").Replace("â", "a").Replace("ù", "u")
.Replace("î", "i").Replace("ô", "o").Replace("ç", "c")
.Replace("œ", "oe").Replace("&", "et")
.Replace("", "-").Replace("'", "-")
.Replace("\"", "").Replace(",", "").Replace(":", "")
.Replace("!", "").Replace("?", "").Replace("(", "").Replace(")", "")
.Replace(" ", " ").Replace(" ", "-");
while (slug.Contains("--"))
slug = slug.Replace("--", "-");
return slug.Trim('-');
}
}
}