This commit is contained in:
Sem van der Hoeven
2020-04-19 16:25:10 +02:00
parent 4792062925
commit db1eaf4cc3
4 changed files with 66 additions and 25 deletions

View File

@@ -9,6 +9,7 @@ import java.util.Scanner;
public class Main { public class Main {
public static void main(String[] args) throws InterruptedException { public static void main(String[] args) throws InterruptedException {
//TODO add status text
// Scanner scanner = new Scanner(System.in); // Scanner scanner = new Scanner(System.in);
// System.out.print("Enter a starting URL : "); // System.out.print("Enter a starting URL : ");
// String startUrl = scanner.nextLine().trim(); // String startUrl = scanner.nextLine().trim();

View File

@@ -7,7 +7,7 @@ import java.util.LinkedList;
public class CrawlThread extends Thread { public class CrawlThread extends Thread {
private final int amount; private final int amount;
private final boolean debug; private boolean debug;
private final String startUrl; private final String startUrl;
private final String word; private final String word;
private WebCrawler crawler; private WebCrawler crawler;
@@ -19,11 +19,13 @@ public class CrawlThread extends Thread {
this.startUrl = startUrl; this.startUrl = startUrl;
this.word = word; this.word = word;
this.visualiser = visualiser; this.visualiser = visualiser;
this.crawler = new WebCrawler(amount, true, debug, visualiser);
} }
public void run() { public void run() {
this.crawler = new WebCrawler(amount, true, debug,visualiser); // this.debug = false;
System.out.println("starting thread");
this.crawler.search(startUrl, word); this.crawler.search(startUrl, word);
} }

View File

@@ -16,8 +16,9 @@ public class WebCrawler {
private int successPages = 0; private int successPages = 0;
private boolean shouldSaveHitLinks; private boolean shouldSaveHitLinks;
private boolean debug; private boolean debug;
private boolean done = false;
private LinkedList<String> messages; public LinkedList<String> messages;
/** /**
* creates a new WebCrawler object with standard values * creates a new WebCrawler object with standard values
@@ -79,6 +80,7 @@ public class WebCrawler {
* @param searchWord the word to search for * @param searchWord the word to search for
*/ */
public void search(String url, String searchWord) { public void search(String url, String searchWord) {
// System.out.println("searching for " + searchWord + " in " + url);
int counter = 0; int counter = 0;
while (this.pagesVisited.size() < amountOfPages) { while (this.pagesVisited.size() < amountOfPages) {
String curUrl; String curUrl;
@@ -90,6 +92,7 @@ public class WebCrawler {
curUrl = this.nextUrl(); curUrl = this.nextUrl();
counter++; counter++;
// print(String.format("visiting page %s / %s",counter,amountOfPages)); // print(String.format("visiting page %s / %s",counter,amountOfPages));
System.out.println(String.format("visiting page %s / %s", counter, amountOfPages));
addMessage(String.format("visiting page %s / %s", counter, amountOfPages)); addMessage(String.format("visiting page %s / %s", counter, amountOfPages));
} }
branch.crawl(curUrl); branch.crawl(curUrl);
@@ -106,6 +109,7 @@ public class WebCrawler {
} }
this.pagesPending.addAll(branch.getLinks()); this.pagesPending.addAll(branch.getLinks());
} }
System.out.println("done searching");
// print("========================"); // print("========================");
addMessage("========================"); addMessage("========================");
// print(String.format("DONE -- Visited %s webpages\nHits: %s\nAmount of pages with hits: %s\n", this.pagesVisited.size(), amountFound, successPages)); // print(String.format("DONE -- Visited %s webpages\nHits: %s\nAmount of pages with hits: %s\n", this.pagesVisited.size(), amountFound, successPages));
@@ -114,6 +118,7 @@ public class WebCrawler {
// print(String.format("Successful pages: \n%s", showCombinations(urlHits))); // print(String.format("Successful pages: \n%s", showCombinations(urlHits)));
addMessage(String.format("Successful pages: \n%s", showCombinations(urlHits))); addMessage(String.format("Successful pages: \n%s", showCombinations(urlHits)));
} }
done = true;
} }
private String display(List<String> list) { private String display(List<String> list) {
@@ -176,15 +181,18 @@ public class WebCrawler {
this.pagesVisited.clear(); this.pagesVisited.clear();
this.successPages = 0; this.successPages = 0;
this.amountFound = 0; this.amountFound = 0;
this.done = false;
} }
private void print(String text) { private void print(String text) {
if (debug) logger.log(text); if (debug) System.out.println(text);
} }
public void addMessage(String message) { public void addMessage(String message) {
this.messages.add(message); if (!this.messages.contains(message)) {
// System.out.println(message); this.messages.add(message);
}
// System.out.println("ADDED MESSAGE " + message);
} }
public LinkedList<String> getMessages() { public LinkedList<String> getMessages() {
@@ -194,4 +202,8 @@ public class WebCrawler {
public void clearMessages() { public void clearMessages() {
this.messages.clear(); this.messages.clear();
} }
public boolean isDone() {
return done;
}
} }

View File

@@ -13,6 +13,7 @@ import javafx.scene.Scene;
import javafx.scene.control.Button; import javafx.scene.control.Button;
import javafx.scene.control.Label; import javafx.scene.control.Label;
import javafx.scene.control.ListView; import javafx.scene.control.ListView;
import javafx.scene.control.ScrollPane;
import javafx.scene.control.TextField; import javafx.scene.control.TextField;
import javafx.scene.layout.BorderPane; import javafx.scene.layout.BorderPane;
import javafx.scene.layout.HBox; import javafx.scene.layout.HBox;
@@ -36,6 +37,7 @@ public class Visualiser extends Application {
private ResizableCanvas canvas; private ResizableCanvas canvas;
private ListView<String> log; private ListView<String> log;
private CrawlThread thread; private CrawlThread thread;
private WebCrawler crawler;
private int lastLogSize = 0; private int lastLogSize = 0;
@@ -102,9 +104,13 @@ public class Visualiser extends Application {
top.getChildren().add(content); top.getChildren().add(content);
Button button = new Button("Run"); Button button = new Button("Run");
button.setOnAction(e -> { button.setOnAction(e -> {
log.getItems().clear(); // log.getItems().clear();
thread = new CrawlThread(Integer.parseInt(amountField.getText()), true, urlField.getText(), wordField.getText(), this); thread = new CrawlThread(Integer.parseInt(amountField.getText()), true, parseUrl(urlField.getText()), wordField.getText(), this);
thread.start(); thread.start();
this.crawler = thread.getCrawler();
System.out.println(crawler);
ObservableList<String> crawlerMessages = FXCollections.observableList(crawler.messages);
this.log.setItems(crawlerMessages);
}); });
top.getChildren().add(button); top.getChildren().add(button);
@@ -116,6 +122,17 @@ public class Visualiser extends Application {
} }
private String parseUrl(String text) {
if (!text.startsWith("http://")) {
text = "http://" + text;
}
if (text.startsWith("https")) {
text = text.replace("https", "http");
}
System.out.println("parsed to " + text);
return text;
}
private void makeNumeric(TextField textField) { private void makeNumeric(TextField textField) {
// force the field to be numeric only // force the field to be numeric only
textField.textProperty().addListener(new ChangeListener<String>() { textField.textProperty().addListener(new ChangeListener<String>() {
@@ -149,24 +166,33 @@ public class Visualiser extends Application {
updateFrame(); updateFrame();
this.frameTime = 0d; this.frameTime = 0d;
} }
if (this.log.getItems().isEmpty()) {
if (thread != null && thread.isAlive()) { this.log.getItems().add("test");
WebCrawler crawler = thread.getCrawler();
if (crawler != null) {
List<String> msgs = crawler.getMessages();
if (msgs != null)
if (!msgs.isEmpty()) {
log.getItems().addAll(msgs);
thread.getCrawler().clearMessages();
if (log.getItems().size() > lastLogSize) {
if (!log.getItems().isEmpty())
log.scrollTo(log.getItems().size() - 1);
lastLogSize = log.getItems().size();
}
}
}
} }
this.log.refresh();
// if (thread != null && thread.isAlive()) {
// if (crawler == null) {
// crawler = thread.getCrawler();
// }
// if (crawler != null) {
// if (!this.crawler.isDone()) {
//
// List<String> msgs = new ArrayList<>(crawler.getMessages());
// System.out.println(msgs);
//// if (!msgs.isEmpty()) {
//// System.out.println("adding messages:\n" + msgs);
// log.getItems().addAll(msgs);
// thread.getCrawler().clearMessages();
if (!log.getItems().isEmpty())
log.scrollTo(log.getItems().size() - 1);
//// lastLogSize = log.getItems().size();
//
//// }
// }
// }
//
// }
} }
public void log(String item) { public void log(String item) {