using System; using System.Collections.Generic; using System.Net.Http; using System.Text.RegularExpressions; using System.Threading.Tasks; using AngleSharp; using AngleSharp.Dom; using Newtonsoft.Json.Linq; namespace HelloFreshScraper.Services { public class Recipe { public string Id { get; set; } public string Name { get; set; } public string Image { get; set; } public string Pdf { get; set; } public string PrepTime { get; set; } // ex: "30-35 min" public string Difficulty { get; set; } // ex: "Intermédiaire" public string Description { get; set; } // ex: "Accompagné de..." public string Label { get; set; } } public class HelloFreshScraperService { private readonly HttpClient _httpClient; private readonly IBrowsingContext _context; public HelloFreshScraperService(HttpClient httpClient) { _httpClient = httpClient; var config = Configuration.Default.WithDefaultLoader(); _context = BrowsingContext.New(config); } public async Task> GetRecipesAsync(string locale = "fr-fr", int startPage = 1, int pagesToLoad = 2) { var recipesDict = new Dictionary(); var config = Configuration.Default.WithDefaultLoader(); var context = BrowsingContext.New(config); for (int page = startPage; page < startPage + pagesToLoad; page++) { var url = $"https://hfresh.info/{locale}?page={page}"; var html = await _httpClient.GetStringAsync(url); var document = await context.OpenAsync(req => req.Content(html)); var rawData = document.QuerySelector("#app")?.GetAttribute("data-page"); if (string.IsNullOrWhiteSpace(rawData)) continue; var parsed = JObject.Parse(rawData); var recipeArray = parsed.SelectToken("props.recipes.data") as JArray; foreach (var item in recipeArray ?? new JArray()) { var id = item["id"]?.ToString(); var name = item["name"]?.ToString(); var pdf = item["pdf"]?.ToString(); if (string.IsNullOrEmpty(id) || string.IsNullOrEmpty(name) || string.IsNullOrEmpty(pdf)) continue; if (recipesDict.ContainsKey(id)) continue; // éviter les doublons var nameKey = name.ToLower().Trim(); if (recipesDict.Values.Any(r => r.Name.ToLower().Trim() == nameKey)) continue; var recipe = new Recipe { Id = id, Name = name, Image = item["image"]?.ToString(), Pdf = pdf, Description = item["headline"]?.ToString(), Label = item["label"]?.ToString() }; recipesDict[id] = recipe; } } // 👉 Scrape en parallèle les pages de détail var tasks = recipesDict.Values.Select(async recipe => { var slug = GenerateSlug(recipe.Name); var url = $"https://www.hellofresh.fr/recipes/{slug}-{recipe.Id}"; try { var html = await _httpClient.GetStringAsync(url); // PrepTime (Regex simple, ex: "35 minutes") var match = Regex.Match(html, @"(\d{1,3})\s*minutes?", RegexOptions.IgnoreCase); if (match.Success) recipe.PrepTime = match.Groups[1].Value; // Difficulty var diffMatch = Regex.Match(html, @"]*data-translation-id=[""']recipe-detail\.level-number[^>]*>([^<]+)", RegexOptions.IgnoreCase); if (diffMatch.Success) recipe.Difficulty = diffMatch.Groups[1].Value.Trim(); } catch (Exception ex) { Console.WriteLine($"❌ Erreur scraping {url} : {ex.Message}"); } }); await Task.WhenAll(tasks); // 🧠 Attendre que tous les détails soient récupérés return recipesDict.Values.ToList(); } private string GenerateSlug(string name) { var slug = name.ToLower() .Replace("é", "e").Replace("è", "e").Replace("ê", "e") .Replace("à", "a").Replace("â", "a").Replace("ù", "u") .Replace("î", "i").Replace("ô", "o").Replace("ç", "c") .Replace("œ", "oe").Replace("&", "et") .Replace("’", "-").Replace("'", "-") .Replace("\"", "").Replace(",", "").Replace(":", "") .Replace("!", "").Replace("?", "").Replace("(", "").Replace(")", "") .Replace(" ", " ").Replace(" ", "-"); while (slug.Contains("--")) slug = slug.Replace("--", "-"); return slug.Trim('-'); } } }