Skip to content

Commit

Permalink
Updated URLCharityScraper
Browse files Browse the repository at this point in the history
Changed driver and wait initialization to be for each method instead of constructor.
Added a method for getting categories from charity URL.
  • Loading branch information
roaraf committed Mar 24, 2026
1 parent 30a5f0e commit bc0cc72
Showing 1 changed file with 34 additions and 10 deletions.
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package ntnu.systemutvikling.team6.scraper;

import java.time.Duration;
import java.util.ArrayList;
import java.util.List;
import org.openqa.selenium.By;
import org.openqa.selenium.WebDriver;
Expand All @@ -12,8 +13,6 @@

public class URLCharityScraper {
ChromeOptions options;
WebDriver driver;
WebDriverWait wait;

public URLCharityScraper() {
this.options = new ChromeOptions();
Expand All @@ -22,20 +21,18 @@ public URLCharityScraper() {
options.addArguments("--disable-gpu");
options.addArguments("--no-sandbox");
options.addArguments("--disable-dev-shm-usage");

this.driver = new ChromeDriver(options);

this.wait = new WebDriverWait(driver, Duration.ofSeconds(30));
}

public boolean updateDescription() {
WebDriver driver = new ChromeDriver(options);
WebDriverWait wait = new WebDriverWait(driver, Duration.ofSeconds(30));
StringBuilder descriptionString = new StringBuilder();

try {
// URL for scraping approved organizations
driver.get("https://www.innsamlingskontrollen.no/organisasjoner/anna-ministries/");

this.wait.until(
wait.until(
ExpectedConditions.numberOfElementsToBeMoreThan(By.cssSelector(".information div"), 0));

List<WebElement> description = driver.findElements(By.cssSelector(".information div"));
Expand All @@ -50,7 +47,7 @@ public boolean updateDescription() {
if (!doesReadMoreExist.isEmpty()) {
WebElement descReadMore = driver.findElement(By.cssSelector("a.read-more"));
descReadMore.click();
this.wait.until(ExpectedConditions.visibilityOfElementLocated(By.cssSelector(".extra-info")));
wait.until(ExpectedConditions.visibilityOfElementLocated(By.cssSelector(".extra-info")));

List<WebElement> extraDescription = driver.findElements(By.cssSelector(".extra-info"));

Expand All @@ -66,19 +63,46 @@ public boolean updateDescription() {
}

public boolean updateLogo() {
WebDriver driver = new ChromeDriver(options);
WebDriverWait wait = new WebDriverWait(driver, Duration.ofSeconds(30));
String logoURL = "";

try {
driver.get("https://www.innsamlingskontrollen.no/organisasjoner/anna-ministries/");

this.wait.until(ExpectedConditions.visibilityOfElementLocated(By.cssSelector(".logo > img")));
wait.until(ExpectedConditions.visibilityOfElementLocated(By.cssSelector(".logo > img")));

WebElement logo = driver.findElement(By.cssSelector(".logo > img"));

logoURL = logo.getAttribute("src");
} finally {
driver.close();
driver.quit();
}
return true;
}

public boolean updateCategories() {
WebDriver driver = new ChromeDriver(options);
WebDriverWait wait = new WebDriverWait(driver, Duration.ofSeconds(30));
List<String> categoriesList = new ArrayList<>();

try {
driver.get("https://www.innsamlingskontrollen.no/organisasjoner/anna-ministries/");

wait.until(ExpectedConditions.visibilityOfElementLocated(By.cssSelector(".tag-label")));

List<WebElement> categories = driver.findElements(By.cssSelector(".tag-label"));

for (WebElement element : categories) {
categoriesList.add(element.getText());
}

} finally {
driver.quit();
}

System.out.println(categoriesList);

return true;
}
}

0 comments on commit bc0cc72

Please sign in to comment.