diff --git a/src/main/java/webcrawler/crawler/CrawlBranch.java b/src/main/java/webcrawler/crawler/CrawlBranch.java index b7ea76d..b3f70ef 100644 --- a/src/main/java/webcrawler/crawler/CrawlBranch.java +++ b/src/main/java/webcrawler/crawler/CrawlBranch.java @@ -18,13 +18,16 @@ public class CrawlBranch { private boolean debug; private static final String USER_AGENT = "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.112 Safari/535.1"; + private WebCrawler parent; + public CrawlBranch() { - this(false,null); + this(false,null,null); } - public CrawlBranch(boolean debug, Visualiser logger) { + public CrawlBranch(boolean debug, WebCrawler parent,Visualiser logger) { this.debug = debug; this.logger = logger; + this.parent = parent; } /** @@ -39,14 +42,17 @@ public class CrawlBranch { this.htmlDocument = connection.get(); if (connection.response().statusCode() == 200) { - print("VISITING -- Recieved web page at " + url); +// print("VISITING -- Recieved web page at " + url); + sendMessage("VISITING -- Recieved web page at " + url); } else { - print("FAIL -- recieved something else than a web page"); +// print("FAIL -- recieved something else than a web page"); + sendMessage("FAIL -- recieved something else than a web page"); return false; } Elements linksOnPage = htmlDocument.select("a[href]"); - print("FOUND (" + linksOnPage.size() + ") links"); +// print("FOUND (" + linksOnPage.size() + ") links"); + sendMessage("FOUND (" + linksOnPage.size() + ") links"); for (Element link : linksOnPage) { this.links.add(link.absUrl("href")); } @@ -68,7 +74,8 @@ public class CrawlBranch { //System.out.println("ERROR -- call crawl before searhing"); return -1; } - print(String.format("Searching for %s...", word)); +// print(String.format("Searching for %s...", word)); + sendMessage(String.format("Searching for %s...", word)); String bodyText = this.htmlDocument.body().text(); return count(bodyText.toLowerCase(), word.toLowerCase()); } @@ -101,4 +108,8 @@ public class CrawlBranch { private void print(String text) { if (debug) logger.log(text); } + + private void sendMessage(String message) { + this.parent.addMessage(message); + } } diff --git a/src/main/java/webcrawler/crawler/CrawlThread.java b/src/main/java/webcrawler/crawler/CrawlThread.java index e35e099..13c49b1 100644 --- a/src/main/java/webcrawler/crawler/CrawlThread.java +++ b/src/main/java/webcrawler/crawler/CrawlThread.java @@ -2,6 +2,8 @@ package main.java.webcrawler.crawler; import main.java.webcrawler.visualiser.Visualiser; +import java.util.LinkedList; + public class CrawlThread extends Thread { private final int amount; @@ -44,4 +46,10 @@ public class CrawlThread extends Thread { public String getWord() { return word; } + + public LinkedList retrieveLog() { + return this.crawler.getMessages(); + } + + } diff --git a/src/main/java/webcrawler/crawler/WebCrawler.java b/src/main/java/webcrawler/crawler/WebCrawler.java index 6ba808c..319641e 100644 --- a/src/main/java/webcrawler/crawler/WebCrawler.java +++ b/src/main/java/webcrawler/crawler/WebCrawler.java @@ -17,6 +17,8 @@ public class WebCrawler { private boolean shouldSaveHitLinks; private boolean debug; + private LinkedList messages; + /** * creates a new WebCrawler object with standard values */ @@ -52,6 +54,7 @@ public class WebCrawler { this.urlHits = new HashMap<>(); this.debug = debug; this.logger = logger; + this.messages = new LinkedList<>(); } @@ -79,20 +82,22 @@ public class WebCrawler { int counter = 0; while (this.pagesVisited.size() < amountOfPages) { String curUrl; - CrawlBranch branch = new CrawlBranch(debug,logger); + CrawlBranch branch = new CrawlBranch(debug, this, logger); if (this.pagesPending.isEmpty()) { curUrl = url; this.pagesVisited.add(url); } else { curUrl = this.nextUrl(); counter++; - print(String.format("visiting page %s / %s",counter,amountOfPages)); +// print(String.format("visiting page %s / %s",counter,amountOfPages)); + addMessage(String.format("visiting page %s / %s", counter, amountOfPages)); } branch.crawl(curUrl); int amount = branch.searchForWord(searchWord); if (amount > 0) { - print(String.format("SUCCESS -- word %s found at %s %s times\n", searchWord, curUrl, amount)); +// print(String.format("SUCCESS -- word %s found at %s %s times\n", searchWord, curUrl, amount)); + addMessage(String.format("SUCCESS -- word %s found at %s %s times\n", searchWord, curUrl, amount)); successPages++; amountFound += amount; if (shouldSaveHitLinks) @@ -101,10 +106,13 @@ public class WebCrawler { } this.pagesPending.addAll(branch.getLinks()); } - print("========================"); - print(String.format("DONE -- Visited %s webpages\nHits: %s\nAmount of pages with hits: %s\n", this.pagesVisited.size(), amountFound, successPages)); +// print("========================"); + addMessage("========================"); +// print(String.format("DONE -- Visited %s webpages\nHits: %s\nAmount of pages with hits: %s\n", this.pagesVisited.size(), amountFound, successPages)); + addMessage(String.format("DONE -- Visited %s webpages\nHits: %s\nAmount of pages with hits: %s\n", this.pagesVisited.size(), amountFound, successPages)); if (shouldSaveHitLinks) { - print(String.format("Successful pages: \n%s", showCombinations(urlHits))); +// print(String.format("Successful pages: \n%s", showCombinations(urlHits))); + addMessage(String.format("Successful pages: \n%s", showCombinations(urlHits))); } } @@ -173,4 +181,17 @@ public class WebCrawler { private void print(String text) { if (debug) logger.log(text); } + + public void addMessage(String message) { + this.messages.add(message); +// System.out.println(message); + } + + public LinkedList getMessages() { + return this.messages; + } + + public void clearMessages() { + this.messages.clear(); + } } diff --git a/src/main/java/webcrawler/visualiser/Visualiser.java b/src/main/java/webcrawler/visualiser/Visualiser.java index a3ef0c3..e788062 100644 --- a/src/main/java/webcrawler/visualiser/Visualiser.java +++ b/src/main/java/webcrawler/visualiser/Visualiser.java @@ -19,6 +19,7 @@ import javafx.scene.layout.HBox; import javafx.scene.layout.VBox; import javafx.stage.Stage; import main.java.webcrawler.crawler.CrawlThread; +import main.java.webcrawler.crawler.WebCrawler; import org.jfree.fx.FXGraphics2D; import org.jfree.fx.ResizableCanvas; @@ -27,12 +28,14 @@ import java.awt.geom.Line2D; import java.awt.geom.Rectangle2D; import java.util.ArrayList; import java.util.LinkedList; +import java.util.List; public class Visualiser extends Application { private double frameTime = 0; private BorderPane pane; private ResizableCanvas canvas; private ListView log; + private CrawlThread thread; @Override public void start(Stage primaryStage) throws Exception { @@ -96,7 +99,7 @@ public class Visualiser extends Application { Button button = new Button("Run"); button.setOnAction(e -> { log.getItems().clear(); - CrawlThread thread = new CrawlThread(Integer.parseInt(amountField.getText()), true, urlField.getText(), wordField.getText(), this); + thread = new CrawlThread(Integer.parseInt(amountField.getText()), true, urlField.getText(), wordField.getText(), this); thread.start(); }); @@ -142,6 +145,19 @@ public class Visualiser extends Application { updateFrame(); this.frameTime = 0d; } + + if (thread != null && thread.isAlive()) { + WebCrawler crawler = thread.getCrawler(); + if (crawler != null) { + List msgs = crawler.getMessages(); + if (msgs != null) + if (!msgs.isEmpty()) { + log.getItems().addAll(msgs); + thread.getCrawler().clearMessages(); + } + } + + } } public void log(String item) {