From db1eaf4cc3cebf810cc6d518f4c001674ab0c319 Mon Sep 17 00:00:00 2001 From: Sem van der Hoeven Date: Sun, 19 Apr 2020 16:25:10 +0200 Subject: [PATCH] shit --- src/main/java/webcrawler/Main.java | 1 + .../java/webcrawler/crawler/CrawlThread.java | 6 +- .../java/webcrawler/crawler/WebCrawler.java | 20 ++++-- .../webcrawler/visualiser/Visualiser.java | 64 +++++++++++++------ 4 files changed, 66 insertions(+), 25 deletions(-) diff --git a/src/main/java/webcrawler/Main.java b/src/main/java/webcrawler/Main.java index 5806408..039150b 100644 --- a/src/main/java/webcrawler/Main.java +++ b/src/main/java/webcrawler/Main.java @@ -9,6 +9,7 @@ import java.util.Scanner; public class Main { public static void main(String[] args) throws InterruptedException { + //TODO add status text // Scanner scanner = new Scanner(System.in); // System.out.print("Enter a starting URL : "); // String startUrl = scanner.nextLine().trim(); diff --git a/src/main/java/webcrawler/crawler/CrawlThread.java b/src/main/java/webcrawler/crawler/CrawlThread.java index 13c49b1..4ba0423 100644 --- a/src/main/java/webcrawler/crawler/CrawlThread.java +++ b/src/main/java/webcrawler/crawler/CrawlThread.java @@ -7,7 +7,7 @@ import java.util.LinkedList; public class CrawlThread extends Thread { private final int amount; - private final boolean debug; + private boolean debug; private final String startUrl; private final String word; private WebCrawler crawler; @@ -19,11 +19,13 @@ public class CrawlThread extends Thread { this.startUrl = startUrl; this.word = word; this.visualiser = visualiser; + this.crawler = new WebCrawler(amount, true, debug, visualiser); } public void run() { - this.crawler = new WebCrawler(amount, true, debug,visualiser); +// this.debug = false; + System.out.println("starting thread"); this.crawler.search(startUrl, word); } diff --git a/src/main/java/webcrawler/crawler/WebCrawler.java b/src/main/java/webcrawler/crawler/WebCrawler.java index 319641e..c06c01f 100644 --- a/src/main/java/webcrawler/crawler/WebCrawler.java +++ b/src/main/java/webcrawler/crawler/WebCrawler.java @@ -16,8 +16,9 @@ public class WebCrawler { private int successPages = 0; private boolean shouldSaveHitLinks; private boolean debug; + private boolean done = false; - private LinkedList messages; + public LinkedList messages; /** * creates a new WebCrawler object with standard values @@ -79,6 +80,7 @@ public class WebCrawler { * @param searchWord the word to search for */ public void search(String url, String searchWord) { +// System.out.println("searching for " + searchWord + " in " + url); int counter = 0; while (this.pagesVisited.size() < amountOfPages) { String curUrl; @@ -90,6 +92,7 @@ public class WebCrawler { curUrl = this.nextUrl(); counter++; // print(String.format("visiting page %s / %s",counter,amountOfPages)); + System.out.println(String.format("visiting page %s / %s", counter, amountOfPages)); addMessage(String.format("visiting page %s / %s", counter, amountOfPages)); } branch.crawl(curUrl); @@ -106,6 +109,7 @@ public class WebCrawler { } this.pagesPending.addAll(branch.getLinks()); } + System.out.println("done searching"); // print("========================"); addMessage("========================"); // print(String.format("DONE -- Visited %s webpages\nHits: %s\nAmount of pages with hits: %s\n", this.pagesVisited.size(), amountFound, successPages)); @@ -114,6 +118,7 @@ public class WebCrawler { // print(String.format("Successful pages: \n%s", showCombinations(urlHits))); addMessage(String.format("Successful pages: \n%s", showCombinations(urlHits))); } + done = true; } private String display(List list) { @@ -176,15 +181,18 @@ public class WebCrawler { this.pagesVisited.clear(); this.successPages = 0; this.amountFound = 0; + this.done = false; } private void print(String text) { - if (debug) logger.log(text); + if (debug) System.out.println(text); } public void addMessage(String message) { - this.messages.add(message); -// System.out.println(message); + if (!this.messages.contains(message)) { + this.messages.add(message); + } +// System.out.println("ADDED MESSAGE " + message); } public LinkedList getMessages() { @@ -194,4 +202,8 @@ public class WebCrawler { public void clearMessages() { this.messages.clear(); } + + public boolean isDone() { + return done; + } } diff --git a/src/main/java/webcrawler/visualiser/Visualiser.java b/src/main/java/webcrawler/visualiser/Visualiser.java index 3ad2e11..394bc6c 100644 --- a/src/main/java/webcrawler/visualiser/Visualiser.java +++ b/src/main/java/webcrawler/visualiser/Visualiser.java @@ -13,6 +13,7 @@ import javafx.scene.Scene; import javafx.scene.control.Button; import javafx.scene.control.Label; import javafx.scene.control.ListView; +import javafx.scene.control.ScrollPane; import javafx.scene.control.TextField; import javafx.scene.layout.BorderPane; import javafx.scene.layout.HBox; @@ -36,6 +37,7 @@ public class Visualiser extends Application { private ResizableCanvas canvas; private ListView log; private CrawlThread thread; + private WebCrawler crawler; private int lastLogSize = 0; @@ -102,9 +104,13 @@ public class Visualiser extends Application { top.getChildren().add(content); Button button = new Button("Run"); button.setOnAction(e -> { - log.getItems().clear(); - thread = new CrawlThread(Integer.parseInt(amountField.getText()), true, urlField.getText(), wordField.getText(), this); +// log.getItems().clear(); + thread = new CrawlThread(Integer.parseInt(amountField.getText()), true, parseUrl(urlField.getText()), wordField.getText(), this); thread.start(); + this.crawler = thread.getCrawler(); + System.out.println(crawler); + ObservableList crawlerMessages = FXCollections.observableList(crawler.messages); + this.log.setItems(crawlerMessages); }); top.getChildren().add(button); @@ -116,6 +122,17 @@ public class Visualiser extends Application { } + private String parseUrl(String text) { + if (!text.startsWith("http://")) { + text = "http://" + text; + } + if (text.startsWith("https")) { + text = text.replace("https", "http"); + } + System.out.println("parsed to " + text); + return text; + } + private void makeNumeric(TextField textField) { // force the field to be numeric only textField.textProperty().addListener(new ChangeListener() { @@ -149,24 +166,33 @@ public class Visualiser extends Application { updateFrame(); this.frameTime = 0d; } - - if (thread != null && thread.isAlive()) { - WebCrawler crawler = thread.getCrawler(); - if (crawler != null) { - List msgs = crawler.getMessages(); - if (msgs != null) - if (!msgs.isEmpty()) { - log.getItems().addAll(msgs); - thread.getCrawler().clearMessages(); - if (log.getItems().size() > lastLogSize) { - if (!log.getItems().isEmpty()) - log.scrollTo(log.getItems().size() - 1); - lastLogSize = log.getItems().size(); - } - } - } - + if (this.log.getItems().isEmpty()) { + this.log.getItems().add("test"); } + this.log.refresh(); + +// if (thread != null && thread.isAlive()) { +// if (crawler == null) { +// crawler = thread.getCrawler(); +// } +// if (crawler != null) { +// if (!this.crawler.isDone()) { +// +// List msgs = new ArrayList<>(crawler.getMessages()); +// System.out.println(msgs); +//// if (!msgs.isEmpty()) { +//// System.out.println("adding messages:\n" + msgs); +// log.getItems().addAll(msgs); +// thread.getCrawler().clearMessages(); + if (!log.getItems().isEmpty()) + log.scrollTo(log.getItems().size() - 1); +//// lastLogSize = log.getItems().size(); +// +//// } +// } +// } +// +// } } public void log(String item) {