Skip to content

Commit

Permalink
Updated URLCharityScraper
Browse files Browse the repository at this point in the history
Time for scraping went down from approx 2 hours to around 20-30 mins, and is still reliable. Thread.sleep() seems to not be required after all.
  • Loading branch information
roaraf committed Apr 18, 2026
1 parent 7ba0277 commit 6410ed8
Showing 1 changed file with 7 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ public URLCharityScraper(String url, WebDriver driver) {
* @return the {@code WebDriverWait} object to be used in the methods
*/
protected WebDriverWait createWait() {
return new WebDriverWait(driver, Duration.ofSeconds(30));
return new WebDriverWait(driver, Duration.ofSeconds(10));
}

/**
Expand Down Expand Up @@ -118,7 +118,7 @@ protected void updateDescription() {
wait.until(
ExpectedConditions.numberOfElementsToBeMoreThan(By.cssSelector(".information"), 0));

Thread.sleep(5000);
// Thread.sleep(5000);
List<WebElement> firstDescription = findElements(By.cssSelector(".information"));

for (WebElement element : firstDescription) {
Expand All @@ -139,7 +139,7 @@ void updateLogo() {
try {
WebDriverWait wait = createWait();
wait.until(ExpectedConditions.visibilityOfElementLocated(By.cssSelector(".logo > img")));
Thread.sleep(5000);
// Thread.sleep(5000);

WebElement logo = findElement(By.cssSelector(".logo > img"));
this.logoURL = logo.getAttribute("src");
Expand All @@ -155,7 +155,7 @@ void updateCategories() {
WebDriverWait wait = createWait();

wait.until(ExpectedConditions.presenceOfElementLocated(By.cssSelector(".tag-label")));
Thread.sleep(5000);
// Thread.sleep(5000);

List<WebElement> elements = findElements(By.cssSelector(".tag-label"));

Expand Down Expand Up @@ -183,7 +183,7 @@ void updateKeyValues() {
ExpectedConditions.visibilityOfElementLocated(
By.xpath(
"//li[.//h2[normalize-space()='Innsamlingsprosent']]//div[@class='graph']")));
Thread.sleep(5000);
// Thread.sleep(5000);
element =
findElement(
By.xpath("//li[.//h2[normalize-space()='Innsamlingsprosent']]//div[@class='graph']"));
Expand Down Expand Up @@ -224,9 +224,9 @@ public void scrapeCharityPage() {
updateLogo();
updateCategories();
updateKeyValues();
Thread.sleep(1000);
// Thread.sleep(1000);

} catch (InterruptedException e) {
} catch (Exception e) {
throw new RuntimeException(e);
} finally {
closeDriver();
Expand Down

0 comments on commit 6410ed8

Please sign in to comment.