Skip to content

Commit

Permalink
Added URLCharityScraper
Browse files Browse the repository at this point in the history
Added a web-scraper that will scrape relevant information for the charity about-us page. Currently supports scraping description String.
  • Loading branch information
roaraf committed Apr 7, 2026
1 parent d94957f commit 3cf7984
Showing 1 changed file with 65 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
package ntnu.systemutvikling.team6.scraper;

import java.time.Duration;
import java.util.List;
import org.openqa.selenium.By;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.chrome.ChromeDriver;
import org.openqa.selenium.chrome.ChromeOptions;
import org.openqa.selenium.support.ui.ExpectedConditions;
import org.openqa.selenium.support.ui.WebDriverWait;

public class URLCharityScraper {
ChromeOptions options;
WebDriver driver;

public URLCharityScraper() {
this.options = new ChromeOptions();
options.addArguments("--headless=new");
options.addArguments("--window-size=1920,1080");
options.addArguments("--disable-gpu");
options.addArguments("--no-sandbox");
options.addArguments("--disable-dev-shm-usage");

this.driver = new ChromeDriver(options);
}

public boolean updateDescription() {
StringBuilder descriptionString = new StringBuilder();

try {
// URL for scraping approved organizations
driver.get("https://www.innsamlingskontrollen.no/organisasjoner/anna-ministries/");

WebDriverWait wait = new WebDriverWait(driver, Duration.ofSeconds(10));
wait.until(
ExpectedConditions.numberOfElementsToBeMoreThan(By.cssSelector(".information div"), 0));

List<WebElement> description = driver.findElements(By.cssSelector(".information div"));

for (WebElement element : description) {
descriptionString.append(element.getText());
}

// Check for if description is long and contains a "read more" link
List<WebElement> doesReadMoreExist = driver.findElements(By.cssSelector("a.read-more"));

if (!doesReadMoreExist.isEmpty()) {
WebElement descReadMore = driver.findElement(By.cssSelector("a.read-more"));
descReadMore.click();
wait.until(ExpectedConditions.visibilityOfElementLocated(By.cssSelector(".extra-info")));

List<WebElement> extraDescription = driver.findElements(By.cssSelector(".extra-info"));

for (WebElement webElement : extraDescription) {
descriptionString.append(webElement.getText());
}
}

} finally {
driver.quit();
}
return true;
}
}

0 comments on commit 3cf7984

Please sign in to comment.