diff --git a/helpmehelpapplication/src/main/java/ntnu/systemutvikling/team6/scraper/URLCharityScraper.java b/helpmehelpapplication/src/main/java/ntnu/systemutvikling/team6/scraper/URLCharityScraper.java index ba1dda9..173785d 100644 --- a/helpmehelpapplication/src/main/java/ntnu/systemutvikling/team6/scraper/URLCharityScraper.java +++ b/helpmehelpapplication/src/main/java/ntnu/systemutvikling/team6/scraper/URLCharityScraper.java @@ -1,6 +1,7 @@ package ntnu.systemutvikling.team6.scraper; import java.time.Duration; +import java.util.ArrayList; import java.util.List; import org.openqa.selenium.By; import org.openqa.selenium.WebDriver; @@ -12,8 +13,6 @@ public class URLCharityScraper { ChromeOptions options; - WebDriver driver; - WebDriverWait wait; public URLCharityScraper() { this.options = new ChromeOptions(); @@ -22,20 +21,18 @@ public URLCharityScraper() { options.addArguments("--disable-gpu"); options.addArguments("--no-sandbox"); options.addArguments("--disable-dev-shm-usage"); - - this.driver = new ChromeDriver(options); - - this.wait = new WebDriverWait(driver, Duration.ofSeconds(30)); } public boolean updateDescription() { + WebDriver driver = new ChromeDriver(options); + WebDriverWait wait = new WebDriverWait(driver, Duration.ofSeconds(30)); StringBuilder descriptionString = new StringBuilder(); try { // URL for scraping approved organizations driver.get("https://www.innsamlingskontrollen.no/organisasjoner/anna-ministries/"); - this.wait.until( + wait.until( ExpectedConditions.numberOfElementsToBeMoreThan(By.cssSelector(".information div"), 0)); List description = driver.findElements(By.cssSelector(".information div")); @@ -50,7 +47,7 @@ public boolean updateDescription() { if (!doesReadMoreExist.isEmpty()) { WebElement descReadMore = driver.findElement(By.cssSelector("a.read-more")); descReadMore.click(); - this.wait.until(ExpectedConditions.visibilityOfElementLocated(By.cssSelector(".extra-info"))); + wait.until(ExpectedConditions.visibilityOfElementLocated(By.cssSelector(".extra-info"))); List extraDescription = driver.findElements(By.cssSelector(".extra-info")); @@ -66,19 +63,46 @@ public boolean updateDescription() { } public boolean updateLogo() { + WebDriver driver = new ChromeDriver(options); + WebDriverWait wait = new WebDriverWait(driver, Duration.ofSeconds(30)); String logoURL = ""; try { driver.get("https://www.innsamlingskontrollen.no/organisasjoner/anna-ministries/"); - this.wait.until(ExpectedConditions.visibilityOfElementLocated(By.cssSelector(".logo > img"))); + wait.until(ExpectedConditions.visibilityOfElementLocated(By.cssSelector(".logo > img"))); WebElement logo = driver.findElement(By.cssSelector(".logo > img")); logoURL = logo.getAttribute("src"); } finally { - driver.close(); + driver.quit(); } return true; } + + public boolean updateCategories() { + WebDriver driver = new ChromeDriver(options); + WebDriverWait wait = new WebDriverWait(driver, Duration.ofSeconds(30)); + List categoriesList = new ArrayList<>(); + + try { + driver.get("https://www.innsamlingskontrollen.no/organisasjoner/anna-ministries/"); + + wait.until(ExpectedConditions.visibilityOfElementLocated(By.cssSelector(".tag-label"))); + + List categories = driver.findElements(By.cssSelector(".tag-label")); + + for (WebElement element : categories) { + categoriesList.add(element.getText()); + } + + } finally { + driver.quit(); + } + + System.out.println(categoriesList); + + return true; + } }