-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat&update[OrganizationPage]: Update description to be description f…
…etched from the API
- Loading branch information
Fredrik Marjoni
committed
Apr 8, 2026
1 parent
c5c3ce9
commit a2a087b
Showing
6 changed files
with
192 additions
and
73 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
107 changes: 107 additions & 0 deletions
107
src/main/java/edu/group5/app/model/organization/OrganizationScraper.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,107 @@ | ||
| package edu.group5.app.model.organization; | ||
|
|
||
| import org.jsoup.Jsoup; | ||
| import org.jsoup.nodes.Document; | ||
| import org.jsoup.nodes.Element; | ||
| import java.util.stream.Collectors; | ||
|
|
||
| import java.util.HashMap; | ||
| import java.util.Map; | ||
|
|
||
| /** | ||
| * Handles web scraping of organization information from Innsamlingskontrollen. | ||
| * Responsible for fetching logos and descriptions from organization pages. | ||
| * All results are cached to avoid redundant network requests. | ||
| */ | ||
| public class OrganizationScraper { | ||
| private final Map<String, String> logoCache = new HashMap<>(); | ||
| private final Map<String, String> descriptionCache = new HashMap<>(); | ||
|
|
||
| /** | ||
| * Fetches the description for the given URL by scraping all text content | ||
| * inside {@code <section class="information">}. Results are cached. | ||
| * | ||
| * <p>Strategy:</p> | ||
| * <ol> | ||
| * <li>Tries to get all <p> tags (skipping the first one) and concatenates them</li> | ||
| * <li>If no paragraphs found, gets all text content from the section</li> | ||
| * <li>Returns null if section not found or is empty</li> | ||
| * </ol> | ||
| * | ||
| * @param pageUrl the URL for the organization's page; may be null or blank | ||
| * @return the description text, or null if not found or pageUrl is invalid | ||
| */ | ||
| public String fetchDescription(String pageUrl) { | ||
| if (pageUrl == null || pageUrl.isBlank()) { | ||
| return null; | ||
| } | ||
|
|
||
| if (descriptionCache.containsKey(pageUrl)) { | ||
| return descriptionCache.get(pageUrl); | ||
| } | ||
|
|
||
| try { | ||
| Document doc = Jsoup.connect(pageUrl) | ||
| .userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36") | ||
| .timeout(5000).get(); | ||
|
|
||
| Element section = doc.selectFirst("section.information"); | ||
| if (section != null) { | ||
| // Try to get all <p> tags (skip first one if multiple exist) | ||
| String description = section.select("p").stream() | ||
| .skip(1) // Skip first paragraph (usually a heading) | ||
| .map(Element::text) | ||
| .filter(text -> !text.isBlank()) | ||
| .map(String::trim) | ||
| .collect(Collectors.joining("\n\n")); | ||
|
|
||
| // Fallback: if no paragraphs after first, get all text from section | ||
| if (description.isBlank()) { | ||
| description = section.text().trim(); | ||
| } | ||
|
|
||
| // Only cache and return if we found something meaningful | ||
| if (!description.isBlank()) { | ||
| descriptionCache.put(pageUrl, description); | ||
| return description; | ||
| } | ||
| } | ||
| } catch (Exception e) { | ||
| System.out.println("Could not get description for: " + pageUrl); | ||
| } | ||
| return null; | ||
| } | ||
|
|
||
| /** | ||
| * Fetches the logo URL for the given page by scraping the {@code div.logo img} | ||
| * element. Results are cached so each URL is only fetched once. | ||
| * | ||
| * @param pageUrl the URL for the organization's page; may be null or blank | ||
| * @return the absolute logo URL, or null if not found or pageUrl is invalid | ||
| */ | ||
| public String fetchLogoUrl(String pageUrl) { | ||
| if (pageUrl == null || pageUrl.isBlank()) { | ||
| return null; | ||
| } | ||
|
|
||
| if (logoCache.containsKey(pageUrl)) { | ||
| return logoCache.get(pageUrl); | ||
| } | ||
|
|
||
| try { | ||
| Document doc = Jsoup.connect(pageUrl) | ||
| .userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36") | ||
| .timeout(5000).get(); | ||
| Element img = doc.selectFirst("div.logo img"); | ||
|
|
||
| if (img != null) { | ||
| String logoUrl = img.absUrl("src"); | ||
| logoCache.put(pageUrl, logoUrl); | ||
| return logoUrl; | ||
| } | ||
| } catch (Exception e) { | ||
| System.out.println("Could not get logo for: " + pageUrl); | ||
| } | ||
| return null; | ||
| } | ||
| } |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,31 +1,45 @@ | ||
| #main-container { | ||
| -fx-padding: 50px | ||
| -fx-padding: 50px | ||
| } | ||
|
|
||
| #logo { | ||
| -fx-min-height: 50%; | ||
| -fx-min-height: 80%; | ||
| } | ||
|
|
||
| #orgName { | ||
| -fx-font-weight: bold; | ||
| -fx-font-size: 20pt; | ||
| -fx-font-weight: bold; | ||
| -fx-font-size: 28pt; | ||
| -fx-padding: 0 0 30 0; | ||
| } | ||
|
|
||
| #description { | ||
| -fx-font-size: 10pt; | ||
| #description-container { | ||
| -fx-padding: 30 0 30 0; | ||
| -fx-spacing: 25; | ||
| } | ||
|
|
||
| #description-paragraph { | ||
| -fx-font-size: 16; | ||
| -fx-text-fill: #222; | ||
| -fx-font-family: "Segoe UI", Arial, sans-serif; | ||
| -fx-padding: 15 50 15 50; | ||
| -fx-line-spacing: 6; | ||
| -fx-text-alignment: Left; | ||
| -fx-wrap-text: true; | ||
|
|
||
| } | ||
|
|
||
| #donate-button { | ||
| -fx-pref-height: 55px; | ||
| -fx-background-color: #e03030; | ||
| -fx-text-fill: white; | ||
| -fx-font-size: 22px; | ||
| -fx-font-weight: bold; | ||
| -fx-background-radius: 8; | ||
| -fx-cursor: hand; | ||
| -fx-padding: 0 40 0 40; | ||
| -fx-pref-height: 55px; | ||
| -fx-background-color: #e03030; | ||
| -fx-text-fill: white; | ||
| -fx-font-size: 22px; | ||
| -fx-font-weight: bold; | ||
| -fx-background-radius: 8; | ||
| -fx-cursor: hand; | ||
| -fx-padding: 0 40 0 40; | ||
| -fx-margin-top: 30; | ||
| } | ||
|
|
||
| #donate-button:hover { | ||
| -fx-background-color: #c02020; | ||
| -fx-background-color: #c02020; | ||
| } |