Skip to content

Commit

Permalink
Updated URLCharityScraper
Browse files Browse the repository at this point in the history
Changed driver and wait initialization to be for each method instead of constructor.
Added a method for getting categories from charity URL.
  • Loading branch information
roaraf committed Apr 7, 2026
1 parent d2f0a54 commit 1671e1e
Showing 1 changed file with 34 additions and 10 deletions.
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package ntnu.systemutvikling.team6.scraper;

import java.time.Duration;
import java.util.ArrayList;
import java.util.List;
import org.openqa.selenium.By;
import org.openqa.selenium.WebDriver;
Expand All @@ -12,8 +13,6 @@

public class URLCharityScraper {
ChromeOptions options;
WebDriver driver;
WebDriverWait wait;

public URLCharityScraper() {
this.options = new ChromeOptions();
Expand All @@ -22,20 +21,18 @@ public URLCharityScraper() {
options.addArguments("--disable-gpu");
options.addArguments("--no-sandbox");
options.addArguments("--disable-dev-shm-usage");

this.driver = new ChromeDriver(options);

this.wait = new WebDriverWait(driver, Duration.ofSeconds(30));
}

public boolean updateDescription() {
WebDriver driver = new ChromeDriver(options);
WebDriverWait wait = new WebDriverWait(driver, Duration.ofSeconds(30));
StringBuilder descriptionString = new StringBuilder();

try {
// URL for scraping approved organizations
driver.get("https://www.innsamlingskontrollen.no/organisasjoner/anna-ministries/");

this.wait.until(
wait.until(
ExpectedConditions.numberOfElementsToBeMoreThan(By.cssSelector(".information div"), 0));

List<WebElement> description = driver.findElements(By.cssSelector(".information div"));
Expand All @@ -50,7 +47,7 @@ public boolean updateDescription() {
if (!doesReadMoreExist.isEmpty()) {
WebElement descReadMore = driver.findElement(By.cssSelector("a.read-more"));
descReadMore.click();
this.wait.until(ExpectedConditions.visibilityOfElementLocated(By.cssSelector(".extra-info")));
wait.until(ExpectedConditions.visibilityOfElementLocated(By.cssSelector(".extra-info")));

List<WebElement> extraDescription = driver.findElements(By.cssSelector(".extra-info"));

Expand All @@ -66,19 +63,46 @@ public boolean updateDescription() {
}

public boolean updateLogo() {
WebDriver driver = new ChromeDriver(options);
WebDriverWait wait = new WebDriverWait(driver, Duration.ofSeconds(30));
String logoURL = "";

try {
driver.get("https://www.innsamlingskontrollen.no/organisasjoner/anna-ministries/");

this.wait.until(ExpectedConditions.visibilityOfElementLocated(By.cssSelector(".logo > img")));
wait.until(ExpectedConditions.visibilityOfElementLocated(By.cssSelector(".logo > img")));

WebElement logo = driver.findElement(By.cssSelector(".logo > img"));

logoURL = logo.getAttribute("src");
} finally {
driver.close();
driver.quit();
}
return true;
}

public boolean updateCategories() {
WebDriver driver = new ChromeDriver(options);
WebDriverWait wait = new WebDriverWait(driver, Duration.ofSeconds(30));
List<String> categoriesList = new ArrayList<>();

try {
driver.get("https://www.innsamlingskontrollen.no/organisasjoner/anna-ministries/");

wait.until(ExpectedConditions.visibilityOfElementLocated(By.cssSelector(".tag-label")));

List<WebElement> categories = driver.findElements(By.cssSelector(".tag-label"));

for (WebElement element : categories) {
categoriesList.add(element.getText());
}

} finally {
driver.quit();
}

System.out.println(categoriesList);

return true;
}
}

0 comments on commit 1671e1e

Please sign in to comment.