From cda2d2146e0c86bfe52f0b5ac870ece107af0372 Mon Sep 17 00:00:00 2001 From: AdrianBalunan Date: Mon, 13 Apr 2026 16:23:21 +0200 Subject: [PATCH] Fix: Added try and catches for better runtime expecption handling --- .../team6/scraper/FullCharityScrape.java | 81 +++++++++++++++---- 1 file changed, 67 insertions(+), 14 deletions(-) diff --git a/helpmehelpapplication/src/main/java/ntnu/systemutvikling/team6/scraper/FullCharityScrape.java b/helpmehelpapplication/src/main/java/ntnu/systemutvikling/team6/scraper/FullCharityScrape.java index 67fbda6..76a4f4e 100644 --- a/helpmehelpapplication/src/main/java/ntnu/systemutvikling/team6/scraper/FullCharityScrape.java +++ b/helpmehelpapplication/src/main/java/ntnu/systemutvikling/team6/scraper/FullCharityScrape.java @@ -3,28 +3,78 @@ import java.io.IOException; import java.net.URISyntaxException; import java.net.http.HttpClient; +import java.util.function.Function; + import ntnu.systemutvikling.team6.models.Charity; import ntnu.systemutvikling.team6.models.registry.CharityRegistry; import ntnu.systemutvikling.team6.scraper.scraperComponents.APICharityScraper; import ntnu.systemutvikling.team6.scraper.scraperComponents.URLCharityScraper; +/** + * Orchestrates a full charity data scrape by combining two data sources: + * + *
    + *
  1. The external charity API (via {@link APICharityScraper}), which provides structured data + * such as organisation numbers, approval status, and charity URLs. + *
  2. Individual charity web pages (via {@link URLCharityScraper}), which provide richer + * presentation data such as descriptions, logos, categories, and key values. + *
+ * + *

This class acts as a facade — callers only need to invoke {@link #getAPIAndURLCharityData()} + * to receive a fully populated {@link CharityRegistry}. + */ public class FullCharityScrape { - private APICharityScraper apiScraper; + private final APICharityScraper apiScraper; + private final LogoDownloader logoDownloader; + /** + * Constructs a {@code FullCharityScrape} instance and initialises the {@link APICharityScraper} + * with a new {@link HttpClient}. + * + * @throws URISyntaxException if the API endpoint URI used by {@link APICharityScraper} is + * malformed + */ public FullCharityScrape() throws URISyntaxException { HttpClient https = HttpClient.newHttpClient(); this.apiScraper = new APICharityScraper(https); + this.logoDownloader = new LogoDownloader(); } + /** + * Performs a full two-phase scrape and returns a {@link CharityRegistry} populated with all + * available charity data. + * + *

Phase 1 — API scrape: Calls {@link APICharityScraper#checkConnection()} to verify + * availability, then fetches and parses the JSON payload into a {@link CharityRegistry}. + * + *

Phase 2 — URL scrape: Iterates over every {@link Charity} in the registry and uses + * a {@link URLCharityScraper} to enrich each entry with its description, logo URL, logo blob, + * categories, and key values scraped from the charity's own web page. + * + *

If {@link APICharityScraper#checkConnection()} throws an exception, it propagates to the + * caller and {@code null} is returned. If the connection check passes but returns {@code false}, + * {@code null} is also returned. + * + * @return a fully populated {@link CharityRegistry}, or {@code null} if the API is unreachable + * @throws IOException if an I/O error occurs during the API request or URL scraping + * @throws InterruptedException if the HTTP request thread is interrupted + */ public CharityRegistry getAPIAndURLCharityData() throws IOException, InterruptedException { - CharityRegistry charityRegistry = null; - if (apiScraper.checkConnection()) { - charityRegistry = apiScraper.parseJSON(apiScraper.getJSONData()); + try { + if (!apiScraper.checkConnection()){ + throw new RuntimeException("Connection check returned false"); + } + } catch (Exception e) { + e.printStackTrace(); + } + + CharityRegistry charityRegistry = apiScraper.parseJSON(apiScraper.getJSONData()); + int charityCounter = 0; + for (Charity charity : charityRegistry.getAllCharities()) { System.out.println(charity.getName()); } // Scrapes description, logo, categories, and key values from IK - int charityCounter = 0; for (Charity charity : charityRegistry.getAllCharities()) { charityCounter++; @@ -33,15 +83,18 @@ public CharityRegistry getAPIAndURLCharityData() throws IOException, Interrupted + charityCounter + " of " + charityRegistry.getAllCharities().size()); - URLCharityScraper urlScraper = new URLCharityScraper(charity.getURL()); - urlScraper.scrapeCharityPage(); - - charity.setDescription(urlScraper.getDescription()); - charity.setCategory(urlScraper.getCategories()); - charity.setLogoURL(urlScraper.getLogoURL()); - charity.setKeyValues(urlScraper.getKeyValues()); - byte[] logoBlob = LogoDownloader.downloadImageAsBlob(charity.getLogoURL()); - charity.setLogoBlob(logoBlob); + try { + URLCharityScraper urlScraper = new URLCharityScraper(charity.getURL()); + urlScraper.scrapeCharityPage(); + + charity.setDescription(urlScraper.getDescription()); + charity.setCategory(urlScraper.getCategories()); + charity.setLogoURL(urlScraper.getLogoURL()); + charity.setKeyValues(urlScraper.getKeyValues()); + byte[] logoBlob = LogoDownloader.downloadImageAsBlob(charity.getLogoURL()); + charity.setLogoBlob(logoBlob); + } catch (Exception e){ + throw new RuntimeException("Failed to Scrape for: [" + charityCounter +"]: "+ charity.getName() + ": " + e.getMessage()); } } return charityRegistry;