Skip to content

Commit

Permalink
Updated IKOrganizationScraper
Browse files Browse the repository at this point in the history
Changed the logic for updateData to be more robust (old method didn't get all data reliably). Put method in a try - finally statement to ensure headless chrome browser reliably shuts down after completion/if error.

Added a method to write gathered data to a csv file.

Made the getter method return an immutable list.
  • Loading branch information
roaraf committed Feb 26, 2026
1 parent d6ed083 commit 07d0180
Showing 1 changed file with 78 additions and 76 deletions.
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
package ntnu.sytemutvikling.team6.models;

import java.io.FileWriter;
import java.io.IOException;
import java.time.Duration;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import com.opencsv.CSVWriter;
import org.openqa.selenium.By;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
Expand All @@ -12,89 +16,87 @@
import org.openqa.selenium.support.ui.WebDriverWait;

public class IKOrganizationScraper {

private final List<Organization> organizationData;
private final String filename = "charities.csv";

public IKOrganizationScraper() {
this.organizationData = new ArrayList<>();
}

public boolean updateData() {
// Configure headless chrome browser
ChromeOptions options = new ChromeOptions();
options.addArguments("--headless=new");
options.addArguments("--window-size=1920,1080");
options.addArguments("--disable-gpu");
options.addArguments("--no-sandbox");
options.addArguments("--disable-dev-shm-usage");

WebDriver driver = new ChromeDriver(options);

//URL for godkjente organisasjoner
driver.get("https://www.innsamlingskontrollen.no/organisasjoner/");

// Wait to ensure tabular data is loaded first
WebDriverWait wait = new WebDriverWait(driver, Duration.ofSeconds(10));
wait.until(ExpectedConditions.presenceOfElementLocated(By.tagName("table")));

String name = null;
String telephone = null;
String location = null;
String status = null;

// Clear old data before updating
this.organizationData.clear();

// Loops through table rows and columns
List<WebElement> rows = driver.findElements(By.cssSelector("table tbody tr"));

for (int i = 0; i < rows.size(); i++) {
List<WebElement> columns = rows.get(i).findElements(By.tagName("td"));

// Create organization with category names for csv file
if (i == 0) {
var categories = new Organization("Name", "Telephone", "Location", "Status");
this.organizationData.add(categories);
continue;
}

for (int j = 0; j < columns.size(); j++) {

WebElement column = columns.get(j);

// Non-verification columns
if (j == 0) {
name = column.getText();
}

if (j == 1) {
telephone = columns.get(j).getText();
}

if (j == 2) {
location = columns.get(j).getText();
}

// Verification column
if (j == 3) {
if (!column.findElements(By.cssSelector(".status-pre-approved")).isEmpty()) {
status = "Monitored";
} else if (!column.findElements(By.cssSelector(".status-approved")).isEmpty()) {
status = "Approved";
} else {
status = "Unknown";
}
public boolean updateData() {
// Configure headless Chrome browser
ChromeOptions options = new ChromeOptions();
options.addArguments("--headless=new");
options.addArguments("--window-size=1920,1080");
options.addArguments("--disable-gpu");
options.addArguments("--no-sandbox");
options.addArguments("--disable-dev-shm-usage");

WebDriver driver = new ChromeDriver(options);

try {
//URL for scraping approved organizations
driver.get("https://www.innsamlingskontrollen.no/organisasjoner/");

WebDriverWait wait = new WebDriverWait(driver, Duration.ofSeconds(30));
wait.until(ExpectedConditions.numberOfElementsToBeMoreThan(By.cssSelector(
"table tbody tr"), 0));
List<WebElement> rows = driver.findElements(By.cssSelector("table tbody tr"));

if (!rows.isEmpty()) {
wait.until(ExpectedConditions.visibilityOf(rows.getLast()));
}

// Clear old data
this.organizationData.clear();

// Add CSV header
this.organizationData.add(new Organization(
"Name", "Telephone", "Location", "Status"));

// Loop through table rows
for (WebElement row : rows) {
List<WebElement> columns = row.findElements(By.tagName("td"));
if (columns.size() < 4) continue;

String name = columns.get(0).getText();
String telephone = columns.get(1).getText();
String location = columns.get(2).getText();

WebElement statusColumn = columns.get(3);

String status;
if (!statusColumn.findElements(By.cssSelector(".status-pre-approved")).isEmpty()) {
status = "Monitored";
} else if (!statusColumn.findElements(By.cssSelector(".status-approved")).isEmpty()) {
status = "Approved";
} else {
status = "Unknown";
}
this.organizationData.add(new Organization(name, telephone, location, status));
}
} finally {
driver.quit();
}
}

var organization = new Organization(name, telephone, location, status);
this.organizationData.add(organization);
return true;
}
driver.quit();

return true;
}
public boolean writeToCSV() throws IOException {
try (CSVWriter writer = new CSVWriter(new FileWriter(filename))) {
for (Organization o : this.organizationData) {
writer.writeNext(new String[]{
o.getName(),
o.getTelephone(),
o.getLocation(),
o.getStatus()
});
}
}
return true;
}

public List<Organization> getData() {
return this.organizationData;
}
}
public List<Organization> getData() {
return Collections.unmodifiableList(this.organizationData);
}
}

0 comments on commit 07d0180

Please sign in to comment.