From 6fc378b342cddccb5def942e238a108bb82504f3 Mon Sep 17 00:00:00 2001 From: Sem van der Hoeven Date: Wed, 4 Mar 2020 22:10:43 +0100 Subject: [PATCH] added writing of debug --- .../java/webcrawler/crawler/CrawlBranch.java | 9 ++++++--- .../java/webcrawler/crawler/CrawlThread.java | 14 ++++++-------- .../java/webcrawler/crawler/WebCrawler.java | 19 +++++++++++-------- .../webcrawler/visualiser/Visualiser.java | 17 +++++++++++++++-- 4 files changed, 38 insertions(+), 21 deletions(-) diff --git a/src/main/java/webcrawler/crawler/CrawlBranch.java b/src/main/java/webcrawler/crawler/CrawlBranch.java index 5e9911e..b7ea76d 100644 --- a/src/main/java/webcrawler/crawler/CrawlBranch.java +++ b/src/main/java/webcrawler/crawler/CrawlBranch.java @@ -1,5 +1,6 @@ package main.java.webcrawler.crawler; +import main.java.webcrawler.visualiser.Visualiser; import org.jsoup.Connection; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; @@ -11,17 +12,19 @@ import java.util.LinkedList; import java.util.List; public class CrawlBranch { + private final Visualiser logger; private List links = new LinkedList<>(); private Document htmlDocument; private boolean debug; private static final String USER_AGENT = "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.112 Safari/535.1"; public CrawlBranch() { - this(false); + this(false,null); } - public CrawlBranch(boolean debug) { + public CrawlBranch(boolean debug, Visualiser logger) { this.debug = debug; + this.logger = logger; } /** @@ -96,6 +99,6 @@ public class CrawlBranch { } private void print(String text) { - if (debug) System.out.println(text); + if (debug) logger.log(text); } } diff --git a/src/main/java/webcrawler/crawler/CrawlThread.java b/src/main/java/webcrawler/crawler/CrawlThread.java index 6622c90..e35e099 100644 --- a/src/main/java/webcrawler/crawler/CrawlThread.java +++ b/src/main/java/webcrawler/crawler/CrawlThread.java @@ -1,25 +1,27 @@ package main.java.webcrawler.crawler; +import main.java.webcrawler.visualiser.Visualiser; + public class CrawlThread extends Thread { private final int amount; - private final boolean save; private final boolean debug; private final String startUrl; private final String word; private WebCrawler crawler; + private Visualiser visualiser; - public CrawlThread(int amount, boolean save, boolean debug, String startUrl, String word) { + public CrawlThread(int amount, boolean debug, String startUrl, String word, Visualiser visualiser) { this.amount = amount; - this.save = save; this.debug = debug; this.startUrl = startUrl; this.word = word; + this.visualiser = visualiser; } public void run() { - this.crawler = new WebCrawler(amount, save, debug); + this.crawler = new WebCrawler(amount, true, debug,visualiser); this.crawler.search(startUrl, word); } @@ -31,10 +33,6 @@ public class CrawlThread extends Thread { return amount; } - public boolean isSave() { - return save; - } - public boolean isDebug() { return debug; } diff --git a/src/main/java/webcrawler/crawler/WebCrawler.java b/src/main/java/webcrawler/crawler/WebCrawler.java index 07c951c..6ba808c 100644 --- a/src/main/java/webcrawler/crawler/WebCrawler.java +++ b/src/main/java/webcrawler/crawler/WebCrawler.java @@ -1,6 +1,7 @@ package main.java.webcrawler.crawler; import main.java.webcrawler.crawler.CrawlBranch; +import main.java.webcrawler.visualiser.Visualiser; import java.util.*; @@ -10,6 +11,7 @@ public class WebCrawler { private List pagesPending; private ArrayList resultPages; private Map urlHits; + private Visualiser logger; private int amountFound = 0; private int successPages = 0; private boolean shouldSaveHitLinks; @@ -38,10 +40,10 @@ public class WebCrawler { * @param shouldSaveHitLinks if the crawler should save the links that have one or more hits */ public WebCrawler(int maxPages, boolean shouldSaveHitLinks) { - this(maxPages, shouldSaveHitLinks, false); + this(maxPages, shouldSaveHitLinks, false, null); } - public WebCrawler(int maxPages, boolean shouldSaveHitLinks, boolean debug) { + public WebCrawler(int maxPages, boolean shouldSaveHitLinks, boolean debug, Visualiser logger) { this.amountOfPages = maxPages; this.shouldSaveHitLinks = shouldSaveHitLinks; this.pagesVisited = new HashSet<>(); @@ -49,6 +51,7 @@ public class WebCrawler { this.resultPages = new ArrayList<>(); this.urlHits = new HashMap<>(); this.debug = debug; + this.logger = logger; } @@ -76,14 +79,14 @@ public class WebCrawler { int counter = 0; while (this.pagesVisited.size() < amountOfPages) { String curUrl; - CrawlBranch branch = new CrawlBranch(debug); + CrawlBranch branch = new CrawlBranch(debug,logger); if (this.pagesPending.isEmpty()) { curUrl = url; this.pagesVisited.add(url); } else { curUrl = this.nextUrl(); counter++; - System.out.println(String.format("visiting page %s / %s",counter,amountOfPages)); + print(String.format("visiting page %s / %s",counter,amountOfPages)); } branch.crawl(curUrl); @@ -98,10 +101,10 @@ public class WebCrawler { } this.pagesPending.addAll(branch.getLinks()); } - System.out.println("========================"); - System.out.printf("DONE -- Visited %s webpages\nHits: %s\nAmount of pages with hits: %s\n", this.pagesVisited.size(), amountFound, successPages); + print("========================"); + print(String.format("DONE -- Visited %s webpages\nHits: %s\nAmount of pages with hits: %s\n", this.pagesVisited.size(), amountFound, successPages)); if (shouldSaveHitLinks) { - System.out.printf("Successful pages: \n%s", showCombinations(urlHits)); + print(String.format("Successful pages: \n%s", showCombinations(urlHits))); } } @@ -168,6 +171,6 @@ public class WebCrawler { } private void print(String text) { - if (debug) System.out.println(text); + if (debug) logger.log(text); } } diff --git a/src/main/java/webcrawler/visualiser/Visualiser.java b/src/main/java/webcrawler/visualiser/Visualiser.java index 502e988..a3ef0c3 100644 --- a/src/main/java/webcrawler/visualiser/Visualiser.java +++ b/src/main/java/webcrawler/visualiser/Visualiser.java @@ -10,6 +10,7 @@ import javafx.geometry.HPos; import javafx.geometry.Insets; import javafx.geometry.Pos; import javafx.scene.Scene; +import javafx.scene.control.Button; import javafx.scene.control.Label; import javafx.scene.control.ListView; import javafx.scene.control.TextField; @@ -17,6 +18,7 @@ import javafx.scene.layout.BorderPane; import javafx.scene.layout.HBox; import javafx.scene.layout.VBox; import javafx.stage.Stage; +import main.java.webcrawler.crawler.CrawlThread; import org.jfree.fx.FXGraphics2D; import org.jfree.fx.ResizableCanvas; @@ -63,7 +65,7 @@ public class Visualiser extends Application { } private void initGUIElements() { - HBox top = new HBox(200); + HBox top = new HBox(100); top.getStyleClass().add("content"); top.setPadding(new Insets(10, 10, 10, 10)); top.setPrefWidth(canvas.getWidth()); @@ -91,7 +93,14 @@ public class Visualiser extends Application { new Label("Maximum amount of pages:"), amountField); top.getChildren().add(content); + Button button = new Button("Run"); + button.setOnAction(e -> { + log.getItems().clear(); + CrawlThread thread = new CrawlThread(Integer.parseInt(amountField.getText()), true, urlField.getText(), wordField.getText(), this); + thread.start(); + }); + top.getChildren().add(button); log = new ListView<>(); log.setMinWidth(1100); top.setAlignment(Pos.CENTER_LEFT); @@ -136,6 +145,10 @@ public class Visualiser extends Application { } public void log(String item) { - this.log.getItems().add(item); + try { + this.log.getItems().add(item); + } catch (Exception e) { + System.out.println("exception caught"); + } } }