From cd3c2500039c482677709664f691eb0c4d97fa66 Mon Sep 17 00:00:00 2001 From: Roar Date: Tue, 24 Mar 2026 14:43:49 +0100 Subject: [PATCH 1/6] Added URLCharityScraper Added a web-scraper that will scrape relevant information for the charity about-us page. Currently supports scraping description String. --- .../team6/scraper/URLCharityScraper.java | 65 +++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 helpmehelpapplication/src/main/java/ntnu/systemutvikling/team6/scraper/URLCharityScraper.java diff --git a/helpmehelpapplication/src/main/java/ntnu/systemutvikling/team6/scraper/URLCharityScraper.java b/helpmehelpapplication/src/main/java/ntnu/systemutvikling/team6/scraper/URLCharityScraper.java new file mode 100644 index 0000000..4120dcc --- /dev/null +++ b/helpmehelpapplication/src/main/java/ntnu/systemutvikling/team6/scraper/URLCharityScraper.java @@ -0,0 +1,65 @@ +package ntnu.systemutvikling.team6.scraper; + +import java.time.Duration; +import java.util.List; +import org.openqa.selenium.By; +import org.openqa.selenium.WebDriver; +import org.openqa.selenium.WebElement; +import org.openqa.selenium.chrome.ChromeDriver; +import org.openqa.selenium.chrome.ChromeOptions; +import org.openqa.selenium.support.ui.ExpectedConditions; +import org.openqa.selenium.support.ui.WebDriverWait; + +public class URLCharityScraper { + ChromeOptions options; + WebDriver driver; + + public URLCharityScraper() { + this.options = new ChromeOptions(); + options.addArguments("--headless=new"); + options.addArguments("--window-size=1920,1080"); + options.addArguments("--disable-gpu"); + options.addArguments("--no-sandbox"); + options.addArguments("--disable-dev-shm-usage"); + + this.driver = new ChromeDriver(options); + } + + public boolean updateDescription() { + StringBuilder descriptionString = new StringBuilder(); + + try { + // URL for scraping approved organizations + driver.get("https://www.innsamlingskontrollen.no/organisasjoner/anna-ministries/"); + + WebDriverWait wait = new WebDriverWait(driver, Duration.ofSeconds(10)); + wait.until( + ExpectedConditions.numberOfElementsToBeMoreThan(By.cssSelector(".information div"), 0)); + + List description = driver.findElements(By.cssSelector(".information div")); + + for (WebElement element : description) { + descriptionString.append(element.getText()); + } + + // Check for if description is long and contains a "read more" link + List doesReadMoreExist = driver.findElements(By.cssSelector("a.read-more")); + + if (!doesReadMoreExist.isEmpty()) { + WebElement descReadMore = driver.findElement(By.cssSelector("a.read-more")); + descReadMore.click(); + wait.until(ExpectedConditions.visibilityOfElementLocated(By.cssSelector(".extra-info"))); + + List extraDescription = driver.findElements(By.cssSelector(".extra-info")); + + for (WebElement webElement : extraDescription) { + descriptionString.append(webElement.getText()); + } + } + + } finally { + driver.quit(); + } + return true; + } +} From 30a5f0e61b0f07d2898996442076a91611ad52c1 Mon Sep 17 00:00:00 2001 From: Roar Date: Tue, 24 Mar 2026 15:25:26 +0100 Subject: [PATCH 2/6] Updated URLCharityScraper Added method to get the URL of the logo. Converted WebDriverWait object to an object parameter, and initialize it in the constructor. --- .../team6/scraper/URLCharityScraper.java | 25 ++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/helpmehelpapplication/src/main/java/ntnu/systemutvikling/team6/scraper/URLCharityScraper.java b/helpmehelpapplication/src/main/java/ntnu/systemutvikling/team6/scraper/URLCharityScraper.java index 4120dcc..ba1dda9 100644 --- a/helpmehelpapplication/src/main/java/ntnu/systemutvikling/team6/scraper/URLCharityScraper.java +++ b/helpmehelpapplication/src/main/java/ntnu/systemutvikling/team6/scraper/URLCharityScraper.java @@ -13,6 +13,7 @@ public class URLCharityScraper { ChromeOptions options; WebDriver driver; + WebDriverWait wait; public URLCharityScraper() { this.options = new ChromeOptions(); @@ -23,6 +24,8 @@ public URLCharityScraper() { options.addArguments("--disable-dev-shm-usage"); this.driver = new ChromeDriver(options); + + this.wait = new WebDriverWait(driver, Duration.ofSeconds(30)); } public boolean updateDescription() { @@ -32,8 +35,7 @@ public boolean updateDescription() { // URL for scraping approved organizations driver.get("https://www.innsamlingskontrollen.no/organisasjoner/anna-ministries/"); - WebDriverWait wait = new WebDriverWait(driver, Duration.ofSeconds(10)); - wait.until( + this.wait.until( ExpectedConditions.numberOfElementsToBeMoreThan(By.cssSelector(".information div"), 0)); List description = driver.findElements(By.cssSelector(".information div")); @@ -48,7 +50,7 @@ public boolean updateDescription() { if (!doesReadMoreExist.isEmpty()) { WebElement descReadMore = driver.findElement(By.cssSelector("a.read-more")); descReadMore.click(); - wait.until(ExpectedConditions.visibilityOfElementLocated(By.cssSelector(".extra-info"))); + this.wait.until(ExpectedConditions.visibilityOfElementLocated(By.cssSelector(".extra-info"))); List extraDescription = driver.findElements(By.cssSelector(".extra-info")); @@ -62,4 +64,21 @@ public boolean updateDescription() { } return true; } + + public boolean updateLogo() { + String logoURL = ""; + + try { + driver.get("https://www.innsamlingskontrollen.no/organisasjoner/anna-ministries/"); + + this.wait.until(ExpectedConditions.visibilityOfElementLocated(By.cssSelector(".logo > img"))); + + WebElement logo = driver.findElement(By.cssSelector(".logo > img")); + + logoURL = logo.getAttribute("src"); + } finally { + driver.close(); + } + return true; + } } From bc0cc72aac08448c0a8a1e1f9cc00cdf16cac586 Mon Sep 17 00:00:00 2001 From: Roar Date: Tue, 24 Mar 2026 15:55:30 +0100 Subject: [PATCH 3/6] Updated URLCharityScraper Changed driver and wait initialization to be for each method instead of constructor. Added a method for getting categories from charity URL. --- .../team6/scraper/URLCharityScraper.java | 44 ++++++++++++++----- 1 file changed, 34 insertions(+), 10 deletions(-) diff --git a/helpmehelpapplication/src/main/java/ntnu/systemutvikling/team6/scraper/URLCharityScraper.java b/helpmehelpapplication/src/main/java/ntnu/systemutvikling/team6/scraper/URLCharityScraper.java index ba1dda9..173785d 100644 --- a/helpmehelpapplication/src/main/java/ntnu/systemutvikling/team6/scraper/URLCharityScraper.java +++ b/helpmehelpapplication/src/main/java/ntnu/systemutvikling/team6/scraper/URLCharityScraper.java @@ -1,6 +1,7 @@ package ntnu.systemutvikling.team6.scraper; import java.time.Duration; +import java.util.ArrayList; import java.util.List; import org.openqa.selenium.By; import org.openqa.selenium.WebDriver; @@ -12,8 +13,6 @@ public class URLCharityScraper { ChromeOptions options; - WebDriver driver; - WebDriverWait wait; public URLCharityScraper() { this.options = new ChromeOptions(); @@ -22,20 +21,18 @@ public URLCharityScraper() { options.addArguments("--disable-gpu"); options.addArguments("--no-sandbox"); options.addArguments("--disable-dev-shm-usage"); - - this.driver = new ChromeDriver(options); - - this.wait = new WebDriverWait(driver, Duration.ofSeconds(30)); } public boolean updateDescription() { + WebDriver driver = new ChromeDriver(options); + WebDriverWait wait = new WebDriverWait(driver, Duration.ofSeconds(30)); StringBuilder descriptionString = new StringBuilder(); try { // URL for scraping approved organizations driver.get("https://www.innsamlingskontrollen.no/organisasjoner/anna-ministries/"); - this.wait.until( + wait.until( ExpectedConditions.numberOfElementsToBeMoreThan(By.cssSelector(".information div"), 0)); List description = driver.findElements(By.cssSelector(".information div")); @@ -50,7 +47,7 @@ public boolean updateDescription() { if (!doesReadMoreExist.isEmpty()) { WebElement descReadMore = driver.findElement(By.cssSelector("a.read-more")); descReadMore.click(); - this.wait.until(ExpectedConditions.visibilityOfElementLocated(By.cssSelector(".extra-info"))); + wait.until(ExpectedConditions.visibilityOfElementLocated(By.cssSelector(".extra-info"))); List extraDescription = driver.findElements(By.cssSelector(".extra-info")); @@ -66,19 +63,46 @@ public boolean updateDescription() { } public boolean updateLogo() { + WebDriver driver = new ChromeDriver(options); + WebDriverWait wait = new WebDriverWait(driver, Duration.ofSeconds(30)); String logoURL = ""; try { driver.get("https://www.innsamlingskontrollen.no/organisasjoner/anna-ministries/"); - this.wait.until(ExpectedConditions.visibilityOfElementLocated(By.cssSelector(".logo > img"))); + wait.until(ExpectedConditions.visibilityOfElementLocated(By.cssSelector(".logo > img"))); WebElement logo = driver.findElement(By.cssSelector(".logo > img")); logoURL = logo.getAttribute("src"); } finally { - driver.close(); + driver.quit(); } return true; } + + public boolean updateCategories() { + WebDriver driver = new ChromeDriver(options); + WebDriverWait wait = new WebDriverWait(driver, Duration.ofSeconds(30)); + List categoriesList = new ArrayList<>(); + + try { + driver.get("https://www.innsamlingskontrollen.no/organisasjoner/anna-ministries/"); + + wait.until(ExpectedConditions.visibilityOfElementLocated(By.cssSelector(".tag-label"))); + + List categories = driver.findElements(By.cssSelector(".tag-label")); + + for (WebElement element : categories) { + categoriesList.add(element.getText()); + } + + } finally { + driver.quit(); + } + + System.out.println(categoriesList); + + return true; + } } From 3232a981e1825ca2e3dafe4bf7c6150dd4654ae8 Mon Sep 17 00:00:00 2001 From: Roar Date: Wed, 1 Apr 2026 13:53:57 +0200 Subject: [PATCH 4/6] Updated URLCharityScraper Added method updateKeyNumbers() that gets the value of the 3 key number values by IK for the given charity. --- .../team6/scraper/URLCharityScraper.java | 46 +++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/helpmehelpapplication/src/main/java/ntnu/systemutvikling/team6/scraper/URLCharityScraper.java b/helpmehelpapplication/src/main/java/ntnu/systemutvikling/team6/scraper/URLCharityScraper.java index 173785d..9b64342 100644 --- a/helpmehelpapplication/src/main/java/ntnu/systemutvikling/team6/scraper/URLCharityScraper.java +++ b/helpmehelpapplication/src/main/java/ntnu/systemutvikling/team6/scraper/URLCharityScraper.java @@ -105,4 +105,50 @@ public boolean updateCategories() { return true; } + + public boolean updateKeyNumbers() { + WebDriver driver = new ChromeDriver(options); + WebDriverWait wait = new WebDriverWait(driver, Duration.ofSeconds(30)); + String percentage; + WebElement element; + + try { + driver.get("https://www.innsamlingskontrollen.no/organisasjoner/anna-ministries/"); + + wait.until(ExpectedConditions.visibilityOfElementLocated(By.xpath( + "//li[.//h2[normalize-space()='Innsamlingsprosent']]//div[@class='graph']"))); + + element = driver.findElement( + By.xpath("//li[.//h2[normalize-space()='Innsamlingsprosent']]//div[@class='graph']") + ); + + percentage = element.getAttribute("data-percentage"); + System.out.println(percentage); + + wait.until(ExpectedConditions.visibilityOfElementLocated(By.xpath( + "//li[.//h2[normalize-space()='Administrasjonsprosent']]//div[@class='graph']"))); + + element = driver.findElement( + By.xpath("//li[.//h2[normalize-space()='Administrasjonsprosent']]//div[@class='graph']") + ); + + percentage = element.getAttribute("data-percentage"); + System.out.println(percentage); + + wait.until(ExpectedConditions.visibilityOfElementLocated(By.xpath( + "//li[.//h2[normalize-space()='Formålsprosent']]//div[@class='graph']"))); + + element = driver.findElement( + By.xpath("//li[.//h2[normalize-space()='Formålsprosent']]//div[@class='graph']") + ); + + percentage = element.getAttribute("data-percentage"); + System.out.println(percentage); + + } finally { + driver.quit(); + } + + return true; + } } From bfc92ab93cb6d52d8d31b658b4ad2ba04f59ba38 Mon Sep 17 00:00:00 2001 From: Roar Date: Wed, 1 Apr 2026 14:21:33 +0200 Subject: [PATCH 5/6] Updated URLCharityScraper Fixed updateDescription so it doesn't output some of the text twice, and it now returns the String. --- .../team6/scraper/URLCharityScraper.java | 34 ++++++++----------- 1 file changed, 14 insertions(+), 20 deletions(-) diff --git a/helpmehelpapplication/src/main/java/ntnu/systemutvikling/team6/scraper/URLCharityScraper.java b/helpmehelpapplication/src/main/java/ntnu/systemutvikling/team6/scraper/URLCharityScraper.java index 9b64342..75709ec 100644 --- a/helpmehelpapplication/src/main/java/ntnu/systemutvikling/team6/scraper/URLCharityScraper.java +++ b/helpmehelpapplication/src/main/java/ntnu/systemutvikling/team6/scraper/URLCharityScraper.java @@ -23,43 +23,37 @@ public URLCharityScraper() { options.addArguments("--disable-dev-shm-usage"); } - public boolean updateDescription() { + public String updateDescription(String url) { WebDriver driver = new ChromeDriver(options); WebDriverWait wait = new WebDriverWait(driver, Duration.ofSeconds(30)); - StringBuilder descriptionString = new StringBuilder(); + StringBuilder descriptionString = new StringBuilder(); try { - // URL for scraping approved organizations - driver.get("https://www.innsamlingskontrollen.no/organisasjoner/anna-ministries/"); + driver.get(url); wait.until( ExpectedConditions.numberOfElementsToBeMoreThan(By.cssSelector(".information div"), 0)); + // Check for if description is long and contains a "read more" link + List doesReadMoreExist = driver.findElements(By.cssSelector("a.read-more")); + + if (!doesReadMoreExist.isEmpty()) { + WebElement descReadMore = driver.findElement(By.cssSelector("a.read-more")); + descReadMore.click(); + wait.until(ExpectedConditions.visibilityOfElementLocated(By.cssSelector(".extra-info"))); + } + List description = driver.findElements(By.cssSelector(".information div")); for (WebElement element : description) { descriptionString.append(element.getText()); } - // Check for if description is long and contains a "read more" link - List doesReadMoreExist = driver.findElements(By.cssSelector("a.read-more")); - - if (!doesReadMoreExist.isEmpty()) { - WebElement descReadMore = driver.findElement(By.cssSelector("a.read-more")); - descReadMore.click(); - wait.until(ExpectedConditions.visibilityOfElementLocated(By.cssSelector(".extra-info"))); - - List extraDescription = driver.findElements(By.cssSelector(".extra-info")); - - for (WebElement webElement : extraDescription) { - descriptionString.append(webElement.getText()); - } - } - } finally { driver.quit(); } - return true; + + return descriptionString.toString(); } public boolean updateLogo() { From ee2aee62f879825d03c4082a4b2ddc2f574a10a9 Mon Sep 17 00:00:00 2001 From: Roar Date: Wed, 1 Apr 2026 14:40:14 +0200 Subject: [PATCH 6/6] Updated URLCharityScraper Changed methods to return their values, and made the url a object parameter. --- .../team6/scraper/URLCharityScraper.java | 40 ++++++++++--------- 1 file changed, 22 insertions(+), 18 deletions(-) diff --git a/helpmehelpapplication/src/main/java/ntnu/systemutvikling/team6/scraper/URLCharityScraper.java b/helpmehelpapplication/src/main/java/ntnu/systemutvikling/team6/scraper/URLCharityScraper.java index 75709ec..53aa2dc 100644 --- a/helpmehelpapplication/src/main/java/ntnu/systemutvikling/team6/scraper/URLCharityScraper.java +++ b/helpmehelpapplication/src/main/java/ntnu/systemutvikling/team6/scraper/URLCharityScraper.java @@ -13,23 +13,25 @@ public class URLCharityScraper { ChromeOptions options; + String url; - public URLCharityScraper() { + public URLCharityScraper(String url) { this.options = new ChromeOptions(); options.addArguments("--headless=new"); options.addArguments("--window-size=1920,1080"); options.addArguments("--disable-gpu"); options.addArguments("--no-sandbox"); options.addArguments("--disable-dev-shm-usage"); + this.url = url; } - public String updateDescription(String url) { + public String updateDescription() { WebDriver driver = new ChromeDriver(options); WebDriverWait wait = new WebDriverWait(driver, Duration.ofSeconds(30)); StringBuilder descriptionString = new StringBuilder(); try { - driver.get(url); + driver.get(this.url); wait.until( ExpectedConditions.numberOfElementsToBeMoreThan(By.cssSelector(".information div"), 0)); @@ -56,13 +58,13 @@ public String updateDescription(String url) { return descriptionString.toString(); } - public boolean updateLogo() { + public String updateLogo() { WebDriver driver = new ChromeDriver(options); WebDriverWait wait = new WebDriverWait(driver, Duration.ofSeconds(30)); - String logoURL = ""; + String logoURL; try { - driver.get("https://www.innsamlingskontrollen.no/organisasjoner/anna-ministries/"); + driver.get(this.url); wait.until(ExpectedConditions.visibilityOfElementLocated(By.cssSelector(".logo > img"))); @@ -72,16 +74,16 @@ public boolean updateLogo() { } finally { driver.quit(); } - return true; + return logoURL; } - public boolean updateCategories() { + public List updateCategories() { WebDriver driver = new ChromeDriver(options); WebDriverWait wait = new WebDriverWait(driver, Duration.ofSeconds(30)); List categoriesList = new ArrayList<>(); try { - driver.get("https://www.innsamlingskontrollen.no/organisasjoner/anna-ministries/"); + driver.get(this.url); wait.until(ExpectedConditions.visibilityOfElementLocated(By.cssSelector(".tag-label"))); @@ -95,19 +97,18 @@ public boolean updateCategories() { driver.quit(); } - System.out.println(categoriesList); - - return true; + return categoriesList; } - public boolean updateKeyNumbers() { + public List updateKeyNumbers() { WebDriver driver = new ChromeDriver(options); WebDriverWait wait = new WebDriverWait(driver, Duration.ofSeconds(30)); String percentage; WebElement element; + List keyNumbersList = new ArrayList<>(); try { - driver.get("https://www.innsamlingskontrollen.no/organisasjoner/anna-ministries/"); + driver.get(this.url); wait.until(ExpectedConditions.visibilityOfElementLocated(By.xpath( "//li[.//h2[normalize-space()='Innsamlingsprosent']]//div[@class='graph']"))); @@ -117,7 +118,8 @@ public boolean updateKeyNumbers() { ); percentage = element.getAttribute("data-percentage"); - System.out.println(percentage); + + keyNumbersList.add(percentage); wait.until(ExpectedConditions.visibilityOfElementLocated(By.xpath( "//li[.//h2[normalize-space()='Administrasjonsprosent']]//div[@class='graph']"))); @@ -127,7 +129,8 @@ public boolean updateKeyNumbers() { ); percentage = element.getAttribute("data-percentage"); - System.out.println(percentage); + + keyNumbersList.add(percentage); wait.until(ExpectedConditions.visibilityOfElementLocated(By.xpath( "//li[.//h2[normalize-space()='Formålsprosent']]//div[@class='graph']"))); @@ -137,12 +140,13 @@ public boolean updateKeyNumbers() { ); percentage = element.getAttribute("data-percentage"); - System.out.println(percentage); + + keyNumbersList.add(percentage); } finally { driver.quit(); } - return true; + return keyNumbersList; } }