diff --git a/helpmehelpapplication/src/main/java/ntnu/systemutvikling/team6/scraper/URLCharityScraper.java b/helpmehelpapplication/src/main/java/ntnu/systemutvikling/team6/scraper/URLCharityScraper.java new file mode 100644 index 0000000..4120dcc --- /dev/null +++ b/helpmehelpapplication/src/main/java/ntnu/systemutvikling/team6/scraper/URLCharityScraper.java @@ -0,0 +1,65 @@ +package ntnu.systemutvikling.team6.scraper; + +import java.time.Duration; +import java.util.List; +import org.openqa.selenium.By; +import org.openqa.selenium.WebDriver; +import org.openqa.selenium.WebElement; +import org.openqa.selenium.chrome.ChromeDriver; +import org.openqa.selenium.chrome.ChromeOptions; +import org.openqa.selenium.support.ui.ExpectedConditions; +import org.openqa.selenium.support.ui.WebDriverWait; + +public class URLCharityScraper { + ChromeOptions options; + WebDriver driver; + + public URLCharityScraper() { + this.options = new ChromeOptions(); + options.addArguments("--headless=new"); + options.addArguments("--window-size=1920,1080"); + options.addArguments("--disable-gpu"); + options.addArguments("--no-sandbox"); + options.addArguments("--disable-dev-shm-usage"); + + this.driver = new ChromeDriver(options); + } + + public boolean updateDescription() { + StringBuilder descriptionString = new StringBuilder(); + + try { + // URL for scraping approved organizations + driver.get("https://www.innsamlingskontrollen.no/organisasjoner/anna-ministries/"); + + WebDriverWait wait = new WebDriverWait(driver, Duration.ofSeconds(10)); + wait.until( + ExpectedConditions.numberOfElementsToBeMoreThan(By.cssSelector(".information div"), 0)); + + List description = driver.findElements(By.cssSelector(".information div")); + + for (WebElement element : description) { + descriptionString.append(element.getText()); + } + + // Check for if description is long and contains a "read more" link + List doesReadMoreExist = driver.findElements(By.cssSelector("a.read-more")); + + if (!doesReadMoreExist.isEmpty()) { + WebElement descReadMore = driver.findElement(By.cssSelector("a.read-more")); + descReadMore.click(); + wait.until(ExpectedConditions.visibilityOfElementLocated(By.cssSelector(".extra-info"))); + + List extraDescription = driver.findElements(By.cssSelector(".extra-info")); + + for (WebElement webElement : extraDescription) { + descriptionString.append(webElement.getText()); + } + } + + } finally { + driver.quit(); + } + return true; + } +}