keyValues;
+
+ /**
+ * Constructor used for production code.
+ *
+ * It initializes the lists used for categories and keyValues, as well as defining the
+ * parameters used for the selenium Chromium-based browser that does the scraping.
+ *
+ * @param url the URL for the charity's webpage on IK
+ */
+ public URLCharityScraper(String url) {
+ this.categories = new ArrayList<>();
+ this.keyValues = new ArrayList<>();
+
+ ChromeOptions options = new ChromeOptions();
+ options.addArguments("--headless=new");
+ options.addArguments("--window-size=1920,1080");
+ options.addArguments("--disable-gpu");
+ options.addArguments("--no-sandbox");
+ options.addArguments("--disable-dev-shm-usage");
+
+ this.url = url;
+ this.driver = new ChromeDriver(options);
+ }
+
+ /**
+ * Constructor used for testing.
+ *
+ *
It accepts both a url (should ideally be a dud) and a {@link WebDriver} as parameters. The
+ * WebDriver is passed to make testing easier.
+ *
+ * @param url the URL for the charity's webpage on IK (for this constructor it should not be a
+ * real URL)
+ * @param driver the {@code WebDriver} object used for scraping
+ */
+ public URLCharityScraper(String url, WebDriver driver) {
+ this.categories = new ArrayList<>();
+ this.keyValues = new ArrayList<>();
+ this.url = url;
+ this.driver = driver;
+ }
+
+ /**
+ * Creates a {@link WebDriverWait} object for halting scraping until the correct pre-conditions
+ * are met.
+ *
+ * @return the {@code WebDriverWait} object to be used in the methods
+ */
+ protected WebDriverWait createWait() {
+ return new WebDriverWait(driver, Duration.ofSeconds(30));
+ }
+
+ /**
+ * Calls the {@code findElements} method from the {@code WebDriver} object and returns a list of
+ * the returned {@link WebElement} objects.
+ *
+ * @param by a selector for {@code WebElement} objects
+ * @return a list of found {@code WebElement} objects matching the given selector
+ */
+ protected List findElements(By by) {
+ return driver.findElements(by);
+ }
+
+ /**
+ * Calls the {@code findElement} method from the {@code WebDriver} object and returns a list of
+ * the returned {@code WebElement} objects.
+ *
+ * @param by a selector for {@code WebElement} objects
+ * @return a list of found {@code WebElement} objects matching the given selector
+ */
+ protected WebElement findElement(By by) {
+ return driver.findElement(by);
+ }
+
+ /** Quits the driver instance, making it unusable. */
+ protected void closeDriver() {
+ driver.quit();
+ }
+
+ /** Scrapes the URL for the paragraphs containing the description of the charity. */
+ protected void updateDescription() {
+ try {
+ WebDriverWait wait = createWait();
+ StringBuilder descriptionString = new StringBuilder();
+
+ List readMoreLinks = findElements(By.cssSelector("a.read-more"));
+
+ if (!readMoreLinks.isEmpty()) {
+ WebElement readMore = findElement(By.cssSelector("a.read-more"));
+ readMore.click();
+
+ wait.until(ExpectedConditions.visibilityOfElementLocated(By.cssSelector(".extra-info")));
+ }
+
+ wait.until(
+ ExpectedConditions.numberOfElementsToBeMoreThan(By.cssSelector(".information"), 0));
+
+ Thread.sleep(5000);
+ List firstDescription = findElements(By.cssSelector(".information"));
+
+ for (WebElement element : firstDescription) {
+ if (!element.getText().isBlank()) {
+ descriptionString.append(element.getText()).append("\n\n");
}
+ }
- List extraDescription =
- findElements(By.cssSelector(".extra-info p"));
-
- for (WebElement element : extraDescription) {
- if (!element.getText().isBlank()) {
- descriptionString.append(element.getText()).append("\n\n");
- }
- }
+ this.description = descriptionString.toString();
- this.description = descriptionString.toString();
+ } catch (Exception e) {
+ System.out.println("No description found for " + driver.getCurrentUrl());
}
+ }
- /**
- * Scrapes the URL for the image URL of the logo for the charity.
- */
- void updateLogo() {
- WebDriverWait wait = createWait();
+ /** Scrapes the URL for the image URL of the logo for the charity. */
+ void updateLogo() {
+ try {
+ WebDriverWait wait = createWait();
+ wait.until(ExpectedConditions.visibilityOfElementLocated(By.cssSelector(".logo > img")));
+ Thread.sleep(5000);
- wait.until(ExpectedConditions.visibilityOfElementLocated(
- By.cssSelector(".logo > img")));
+ WebElement logo = findElement(By.cssSelector(".logo > img"));
+ this.logoURL = logo.getAttribute("src");
- WebElement logo = findElement(By.cssSelector(".logo > img"));
- this.logoURL = logo.getAttribute("src");
+ } catch (Exception e) {
+ System.out.println("No logo found for " + driver.getCurrentUrl());
}
+ }
- /**
- * Scrapes the URL for the category labels containing the categories for the charity.
- */
- void updateCategories() {
- WebDriverWait wait = createWait();
+ /** Scrapes the URL for the category labels containing the categories for the charity. */
+ void updateCategories() {
+ try {
+ WebDriverWait wait = createWait();
- wait.until(ExpectedConditions.visibilityOfElementLocated(
- By.cssSelector(".tag-label")));
+ wait.until(ExpectedConditions.presenceOfElementLocated(By.cssSelector(".tag-label")));
+ Thread.sleep(5000);
- List elements =
- findElements(By.cssSelector(".tag-label"));
+ List elements = findElements(By.cssSelector(".tag-label"));
- for (WebElement element : elements) {
- this.categories.add(element.getText());
- }
- }
-
- /**
- * Scrapes the URL for the statistics of the charity; the percentage collected, the percentage that goes to the
- * administration, and the percentage that is put towards the cause.
- */
- void updateKeyValues() {
- WebDriverWait wait = createWait();
-
- String percentage;
- WebElement element;
-
- wait.until(ExpectedConditions.visibilityOfElementLocated(
- By.xpath("//li[.//h2[normalize-space()='Innsamlingsprosent']]//div[@class='graph']")));
-
- element = findElement(By.xpath(
- "//li[.//h2[normalize-space()='Innsamlingsprosent']]//div[@class='graph']"));
- percentage = element.getAttribute("data-percentage");
- this.keyValues.add(percentage);
-
- wait.until(ExpectedConditions.visibilityOfElementLocated(
- By.xpath("//li[.//h2[normalize-space()='Administrasjonsprosent']]//div[@class='graph']")));
-
- element = findElement(By.xpath(
- "//li[.//h2[normalize-space()='Administrasjonsprosent']]//div[@class='graph']"));
- percentage = element.getAttribute("data-percentage");
- this.keyValues.add(percentage);
-
- wait.until(ExpectedConditions.visibilityOfElementLocated(
- By.xpath("//li[.//h2[normalize-space()='Formålsprosent']]//div[@class='graph']")));
-
- element = findElement(By.xpath(
- "//li[.//h2[normalize-space()='Formålsprosent']]//div[@class='graph']"));
- percentage = element.getAttribute("data-percentage");
- this.keyValues.add(percentage);
- }
-
- /**
- * Runs all the scraper methods at once, updating the object parameters.
- */
- public void scrapeCharityPage() {
- try {
- driver.get(this.url);
-
- updateDescription();
- updateLogo();
- updateCategories();
- updateKeyValues();
+ for (WebElement element : elements) {
+ this.categories.add(element.getText());
+ }
- } finally {
- closeDriver();
- }
+ } catch (Exception e) {
+ System.out.println("No categories found for " + driver.getCurrentUrl());
}
-
- /**
- * Returns the description of the charity.
- *
- * @return a String containing the description of the charity.
- */
- public String getDescription() {
- return description;
+ }
+
+ /**
+ * Scrapes the URL for the statistics of the charity; the percentage collected, the percentage
+ * that goes to the administration, and the percentage that is put towards the cause.
+ */
+ void updateKeyValues() {
+ try {
+ WebDriverWait wait = createWait();
+
+ String percentage;
+ WebElement element;
+
+ wait.until(
+ ExpectedConditions.visibilityOfElementLocated(
+ By.xpath(
+ "//li[.//h2[normalize-space()='Innsamlingsprosent']]//div[@class='graph']")));
+ Thread.sleep(5000);
+ element =
+ findElement(
+ By.xpath("//li[.//h2[normalize-space()='Innsamlingsprosent']]//div[@class='graph']"));
+ percentage = element.getAttribute("data-percentage");
+ this.keyValues.add(percentage);
+
+ wait.until(
+ ExpectedConditions.visibilityOfElementLocated(
+ By.xpath(
+ "//li[.//h2[normalize-space()='Administrasjonsprosent']]//div[@class='graph']")));
+
+ element =
+ findElement(
+ By.xpath(
+ "//li[.//h2[normalize-space()='Administrasjonsprosent']]//div[@class='graph']"));
+ percentage = element.getAttribute("data-percentage");
+ this.keyValues.add(percentage);
+
+ wait.until(
+ ExpectedConditions.visibilityOfElementLocated(
+ By.xpath("//li[.//h2[normalize-space()='Formålsprosent']]//div[@class='graph']")));
+
+ element =
+ findElement(
+ By.xpath("//li[.//h2[normalize-space()='Formålsprosent']]//div[@class='graph']"));
+ percentage = element.getAttribute("data-percentage");
+ this.keyValues.add(percentage);
+ } catch (Exception e) {
+ System.out.println("No key values found for " + driver.getCurrentUrl());
}
-
- /**
- * Returns the URL of the logo for the charity.
- *
- * @return a String containing the URL for the logo of the charity.
- */
- public String getLogoURL() {
- return logoURL;
+ }
+
+ /** Runs all the scraper methods at once, updating the object parameters. */
+ public void scrapeCharityPage() {
+ try {
+ driver.get(this.url);
+ updateDescription();
+ updateLogo();
+ updateCategories();
+ updateKeyValues();
+ Thread.sleep(1000);
+
+ } catch (InterruptedException e) {
+ throw new RuntimeException(e);
+ } finally {
+ closeDriver();
}
-
- /**
- * Returns a String of the categories for the charity with ',' as a delimiter.
- *
- * @return a String of strings containing the categories for the charity
- */
- public String getCategories() {
- StringBuilder categoriesString = new StringBuilder();
-
- for (int i = 0; i < this.categories.size(); i++) {
- categoriesString.append(this.categories.get(i));
- if (i < this.categories.size() - 1) {
- categoriesString.append(",");
- }
- }
- return categoriesString.toString();
+ }
+
+ /**
+ * Returns the description of the charity.
+ *
+ * @return a String containing the description of the charity.
+ */
+ public String getDescription() {
+ return description;
+ }
+
+ /**
+ * Returns the URL of the logo for the charity.
+ *
+ * @return a String containing the URL for the logo of the charity.
+ */
+ public String getLogoURL() {
+ return logoURL;
+ }
+
+ /**
+ * Returns a String of the categories for the charity with ',' as a delimiter.
+ *
+ * @return a String of strings containing the categories for the charity
+ */
+ public String getCategories() {
+ StringBuilder categoriesString = new StringBuilder();
+
+ for (int i = 0; i < this.categories.size(); i++) {
+ categoriesString.append(this.categories.get(i));
+ if (i < this.categories.size() - 1) {
+ categoriesString.append(",");
+ }
}
-
- /**
- * Returns a String of the key value percentages for the charity with ':' as a delimiter, verified by IK.
- *
- * @return a String of the key values for the charity-
- */
- public String getKeyValues() {
- StringBuilder keyValuesString = new StringBuilder();
-
- for (int i = 0; i < this.keyValues.size(); i++) {
- keyValuesString.append(this.keyValues.get(i));
- if (i < this.keyValues.size() - 1) {
- keyValuesString.append(":");
- }
- }
- return keyValuesString.toString();
+ return categoriesString.toString();
+ }
+
+ /**
+ * Returns a String of the key value percentages for the charity with ':' as a delimiter, verified
+ * by IK.
+ *
+ * @return a String of the key values for the charity-
+ */
+ public String getKeyValues() {
+ StringBuilder keyValuesString = new StringBuilder();
+
+ for (int i = 0; i < this.keyValues.size(); i++) {
+ keyValuesString.append(this.keyValues.get(i));
+ if (i < this.keyValues.size() - 1) {
+ keyValuesString.append(":");
+ }
}
-}
\ No newline at end of file
+ return keyValuesString.toString();
+ }
+}
diff --git a/helpmehelpapplication/src/test/java/ntnu/systemutvikling/team6/database/DatabaseManagerTest.java b/helpmehelpapplication/src/test/java/ntnu/systemutvikling/team6/database/DatabaseManagerTest.java
index 78be78a..8b8ac6e 100644
--- a/helpmehelpapplication/src/test/java/ntnu/systemutvikling/team6/database/DatabaseManagerTest.java
+++ b/helpmehelpapplication/src/test/java/ntnu/systemutvikling/team6/database/DatabaseManagerTest.java
@@ -7,6 +7,7 @@
import java.util.List;
import ntnu.systemutvikling.team6.models.Charity;
import ntnu.systemutvikling.team6.models.CharityRegistry;
+import ntnu.systemutvikling.team6.scraper.LogoDownloader;
import org.junit.jupiter.api.*;
class DatabaseManagerTest {
@@ -19,7 +20,7 @@ public void setUp() throws SQLException {
}
@Test
- public void test() {
+ public void blobImageTest() {
dbManager.createTables();
String org_number = "12345";
@@ -31,8 +32,10 @@ public void test() {
Charity charity = new Charity(org_number, url, name, is_pre_approved, status);
dbManager.addAPIDataToTable(List.of(charity));
+ LogoDownloader.convertBlobToPNG(charity.getLogoBlob(), charity.getUUID().toString());
}
+
// Make sure you're connected to the NTNU network for this to work
@Test
public void testConnectionShouldReturnTrue() {
diff --git a/helpmehelpapplication/src/test/java/ntnu/systemutvikling/team6/scraper/LogoDownloaderTest.java b/helpmehelpapplication/src/test/java/ntnu/systemutvikling/team6/scraper/LogoDownloaderTest.java
new file mode 100644
index 0000000..c933025
--- /dev/null
+++ b/helpmehelpapplication/src/test/java/ntnu/systemutvikling/team6/scraper/LogoDownloaderTest.java
@@ -0,0 +1,21 @@
+package ntnu.systemutvikling.team6.scraper;
+
+import static org.junit.jupiter.api.Assertions.*;
+
+import java.nio.file.Files;
+import java.nio.file.Path;
+import org.junit.jupiter.api.Test;
+
+class LogoDownloaderTest {
+ @Test
+ void testConvertBlobToPNG() throws Exception {
+ byte[] fakeImage = new byte[] {1, 2, 3, 4, 5};
+
+ LogoDownloader.convertBlobToPNG(fakeImage, "test-logo");
+
+ Path path = Path.of("target", "logo", "test-logo.png");
+
+ assertTrue(Files.exists(path));
+ assertArrayEquals(fakeImage, Files.readAllBytes(path));
+ }
+}