diff --git a/helpmehelpapplication/src/main/java/ntnu/sytemutvikling/team6/models/IKOrganizationScraper.java b/helpmehelpapplication/src/main/java/ntnu/sytemutvikling/team6/models/IKOrganizationScraper.java index c4abb75..d7e833d 100644 --- a/helpmehelpapplication/src/main/java/ntnu/sytemutvikling/team6/models/IKOrganizationScraper.java +++ b/helpmehelpapplication/src/main/java/ntnu/sytemutvikling/team6/models/IKOrganizationScraper.java @@ -1,8 +1,12 @@ package ntnu.sytemutvikling.team6.models; +import java.io.FileWriter; +import java.io.IOException; import java.time.Duration; import java.util.ArrayList; +import java.util.Collections; import java.util.List; +import com.opencsv.CSVWriter; import org.openqa.selenium.By; import org.openqa.selenium.WebDriver; import org.openqa.selenium.WebElement; @@ -12,89 +16,87 @@ import org.openqa.selenium.support.ui.WebDriverWait; public class IKOrganizationScraper { + private final List organizationData; + private final String filename = "charities.csv"; public IKOrganizationScraper() { this.organizationData = new ArrayList<>(); } - public boolean updateData() { - // Configure headless chrome browser - ChromeOptions options = new ChromeOptions(); - options.addArguments("--headless=new"); - options.addArguments("--window-size=1920,1080"); - options.addArguments("--disable-gpu"); - options.addArguments("--no-sandbox"); - options.addArguments("--disable-dev-shm-usage"); - - WebDriver driver = new ChromeDriver(options); - - //URL for godkjente organisasjoner - driver.get("https://www.innsamlingskontrollen.no/organisasjoner/"); - - // Wait to ensure tabular data is loaded first - WebDriverWait wait = new WebDriverWait(driver, Duration.ofSeconds(10)); - wait.until(ExpectedConditions.presenceOfElementLocated(By.tagName("table"))); - - String name = null; - String telephone = null; - String location = null; - String status = null; - - // Clear old data before updating - this.organizationData.clear(); - - // Loops through table rows and columns - List rows = driver.findElements(By.cssSelector("table tbody tr")); - - for (int i = 0; i < rows.size(); i++) { - List columns = rows.get(i).findElements(By.tagName("td")); - - // Create organization with category names for csv file - if (i == 0) { - var categories = new Organization("Name", "Telephone", "Location", "Status"); - this.organizationData.add(categories); - continue; - } - - for (int j = 0; j < columns.size(); j++) { - - WebElement column = columns.get(j); - - // Non-verification columns - if (j == 0) { - name = column.getText(); - } - - if (j == 1) { - telephone = columns.get(j).getText(); - } - - if (j == 2) { - location = columns.get(j).getText(); - } - - // Verification column - if (j == 3) { - if (!column.findElements(By.cssSelector(".status-pre-approved")).isEmpty()) { - status = "Monitored"; - } else if (!column.findElements(By.cssSelector(".status-approved")).isEmpty()) { - status = "Approved"; - } else { - status = "Unknown"; - } + public boolean updateData() { + // Configure headless Chrome browser + ChromeOptions options = new ChromeOptions(); + options.addArguments("--headless=new"); + options.addArguments("--window-size=1920,1080"); + options.addArguments("--disable-gpu"); + options.addArguments("--no-sandbox"); + options.addArguments("--disable-dev-shm-usage"); + + WebDriver driver = new ChromeDriver(options); + + try { + //URL for scraping approved organizations + driver.get("https://www.innsamlingskontrollen.no/organisasjoner/"); + + WebDriverWait wait = new WebDriverWait(driver, Duration.ofSeconds(30)); + wait.until(ExpectedConditions.numberOfElementsToBeMoreThan(By.cssSelector( + "table tbody tr"), 0)); + List rows = driver.findElements(By.cssSelector("table tbody tr")); + + if (!rows.isEmpty()) { + wait.until(ExpectedConditions.visibilityOf(rows.getLast())); + } + + // Clear old data + this.organizationData.clear(); + + // Add CSV header + this.organizationData.add(new Organization( + "Name", "Telephone", "Location", "Status")); + + // Loop through table rows + for (WebElement row : rows) { + List columns = row.findElements(By.tagName("td")); + if (columns.size() < 4) continue; + + String name = columns.get(0).getText(); + String telephone = columns.get(1).getText(); + String location = columns.get(2).getText(); + + WebElement statusColumn = columns.get(3); + + String status; + if (!statusColumn.findElements(By.cssSelector(".status-pre-approved")).isEmpty()) { + status = "Monitored"; + } else if (!statusColumn.findElements(By.cssSelector(".status-approved")).isEmpty()) { + status = "Approved"; + } else { + status = "Unknown"; + } + this.organizationData.add(new Organization(name, telephone, location, status)); + } + } finally { + driver.quit(); } - } - - var organization = new Organization(name, telephone, location, status); - this.organizationData.add(organization); + return true; } - driver.quit(); - return true; - } + public boolean writeToCSV() throws IOException { + try (CSVWriter writer = new CSVWriter(new FileWriter(filename))) { + for (Organization o : this.organizationData) { + writer.writeNext(new String[]{ + o.getName(), + o.getTelephone(), + o.getLocation(), + o.getStatus() + }); + } + } + return true; + } - public List getData() { - return this.organizationData; - } -} + public List getData() { + return Collections.unmodifiableList(this.organizationData); + } +} \ No newline at end of file