added writing of debug

This commit is contained in:
Sem van der Hoeven
2020-03-04 22:10:43 +01:00
parent faa2ff2b67
commit 6fc378b342
4 changed files with 38 additions and 21 deletions

View File

@@ -1,5 +1,6 @@
package main.java.webcrawler.crawler; package main.java.webcrawler.crawler;
import main.java.webcrawler.visualiser.Visualiser;
import org.jsoup.Connection; import org.jsoup.Connection;
import org.jsoup.Jsoup; import org.jsoup.Jsoup;
import org.jsoup.nodes.Document; import org.jsoup.nodes.Document;
@@ -11,17 +12,19 @@ import java.util.LinkedList;
import java.util.List; import java.util.List;
public class CrawlBranch { public class CrawlBranch {
private final Visualiser logger;
private List<String> links = new LinkedList<>(); private List<String> links = new LinkedList<>();
private Document htmlDocument; private Document htmlDocument;
private boolean debug; private boolean debug;
private static final String USER_AGENT = "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.112 Safari/535.1"; private static final String USER_AGENT = "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.112 Safari/535.1";
public CrawlBranch() { public CrawlBranch() {
this(false); this(false,null);
} }
public CrawlBranch(boolean debug) { public CrawlBranch(boolean debug, Visualiser logger) {
this.debug = debug; this.debug = debug;
this.logger = logger;
} }
/** /**
@@ -96,6 +99,6 @@ public class CrawlBranch {
} }
private void print(String text) { private void print(String text) {
if (debug) System.out.println(text); if (debug) logger.log(text);
} }
} }

View File

@@ -1,25 +1,27 @@
package main.java.webcrawler.crawler; package main.java.webcrawler.crawler;
import main.java.webcrawler.visualiser.Visualiser;
public class CrawlThread extends Thread { public class CrawlThread extends Thread {
private final int amount; private final int amount;
private final boolean save;
private final boolean debug; private final boolean debug;
private final String startUrl; private final String startUrl;
private final String word; private final String word;
private WebCrawler crawler; private WebCrawler crawler;
private Visualiser visualiser;
public CrawlThread(int amount, boolean save, boolean debug, String startUrl, String word) { public CrawlThread(int amount, boolean debug, String startUrl, String word, Visualiser visualiser) {
this.amount = amount; this.amount = amount;
this.save = save;
this.debug = debug; this.debug = debug;
this.startUrl = startUrl; this.startUrl = startUrl;
this.word = word; this.word = word;
this.visualiser = visualiser;
} }
public void run() { public void run() {
this.crawler = new WebCrawler(amount, save, debug); this.crawler = new WebCrawler(amount, true, debug,visualiser);
this.crawler.search(startUrl, word); this.crawler.search(startUrl, word);
} }
@@ -31,10 +33,6 @@ public class CrawlThread extends Thread {
return amount; return amount;
} }
public boolean isSave() {
return save;
}
public boolean isDebug() { public boolean isDebug() {
return debug; return debug;
} }

View File

@@ -1,6 +1,7 @@
package main.java.webcrawler.crawler; package main.java.webcrawler.crawler;
import main.java.webcrawler.crawler.CrawlBranch; import main.java.webcrawler.crawler.CrawlBranch;
import main.java.webcrawler.visualiser.Visualiser;
import java.util.*; import java.util.*;
@@ -10,6 +11,7 @@ public class WebCrawler {
private List<String> pagesPending; private List<String> pagesPending;
private ArrayList<String> resultPages; private ArrayList<String> resultPages;
private Map<String, Integer> urlHits; private Map<String, Integer> urlHits;
private Visualiser logger;
private int amountFound = 0; private int amountFound = 0;
private int successPages = 0; private int successPages = 0;
private boolean shouldSaveHitLinks; private boolean shouldSaveHitLinks;
@@ -38,10 +40,10 @@ public class WebCrawler {
* @param shouldSaveHitLinks if the crawler should save the links that have one or more hits * @param shouldSaveHitLinks if the crawler should save the links that have one or more hits
*/ */
public WebCrawler(int maxPages, boolean shouldSaveHitLinks) { public WebCrawler(int maxPages, boolean shouldSaveHitLinks) {
this(maxPages, shouldSaveHitLinks, false); this(maxPages, shouldSaveHitLinks, false, null);
} }
public WebCrawler(int maxPages, boolean shouldSaveHitLinks, boolean debug) { public WebCrawler(int maxPages, boolean shouldSaveHitLinks, boolean debug, Visualiser logger) {
this.amountOfPages = maxPages; this.amountOfPages = maxPages;
this.shouldSaveHitLinks = shouldSaveHitLinks; this.shouldSaveHitLinks = shouldSaveHitLinks;
this.pagesVisited = new HashSet<>(); this.pagesVisited = new HashSet<>();
@@ -49,6 +51,7 @@ public class WebCrawler {
this.resultPages = new ArrayList<>(); this.resultPages = new ArrayList<>();
this.urlHits = new HashMap<>(); this.urlHits = new HashMap<>();
this.debug = debug; this.debug = debug;
this.logger = logger;
} }
@@ -76,14 +79,14 @@ public class WebCrawler {
int counter = 0; int counter = 0;
while (this.pagesVisited.size() < amountOfPages) { while (this.pagesVisited.size() < amountOfPages) {
String curUrl; String curUrl;
CrawlBranch branch = new CrawlBranch(debug); CrawlBranch branch = new CrawlBranch(debug,logger);
if (this.pagesPending.isEmpty()) { if (this.pagesPending.isEmpty()) {
curUrl = url; curUrl = url;
this.pagesVisited.add(url); this.pagesVisited.add(url);
} else { } else {
curUrl = this.nextUrl(); curUrl = this.nextUrl();
counter++; counter++;
System.out.println(String.format("visiting page %s / %s",counter,amountOfPages)); print(String.format("visiting page %s / %s",counter,amountOfPages));
} }
branch.crawl(curUrl); branch.crawl(curUrl);
@@ -98,10 +101,10 @@ public class WebCrawler {
} }
this.pagesPending.addAll(branch.getLinks()); this.pagesPending.addAll(branch.getLinks());
} }
System.out.println("========================"); print("========================");
System.out.printf("DONE -- Visited %s webpages\nHits: %s\nAmount of pages with hits: %s\n", this.pagesVisited.size(), amountFound, successPages); print(String.format("DONE -- Visited %s webpages\nHits: %s\nAmount of pages with hits: %s\n", this.pagesVisited.size(), amountFound, successPages));
if (shouldSaveHitLinks) { if (shouldSaveHitLinks) {
System.out.printf("Successful pages: \n%s", showCombinations(urlHits)); print(String.format("Successful pages: \n%s", showCombinations(urlHits)));
} }
} }
@@ -168,6 +171,6 @@ public class WebCrawler {
} }
private void print(String text) { private void print(String text) {
if (debug) System.out.println(text); if (debug) logger.log(text);
} }
} }

View File

@@ -10,6 +10,7 @@ import javafx.geometry.HPos;
import javafx.geometry.Insets; import javafx.geometry.Insets;
import javafx.geometry.Pos; import javafx.geometry.Pos;
import javafx.scene.Scene; import javafx.scene.Scene;
import javafx.scene.control.Button;
import javafx.scene.control.Label; import javafx.scene.control.Label;
import javafx.scene.control.ListView; import javafx.scene.control.ListView;
import javafx.scene.control.TextField; import javafx.scene.control.TextField;
@@ -17,6 +18,7 @@ import javafx.scene.layout.BorderPane;
import javafx.scene.layout.HBox; import javafx.scene.layout.HBox;
import javafx.scene.layout.VBox; import javafx.scene.layout.VBox;
import javafx.stage.Stage; import javafx.stage.Stage;
import main.java.webcrawler.crawler.CrawlThread;
import org.jfree.fx.FXGraphics2D; import org.jfree.fx.FXGraphics2D;
import org.jfree.fx.ResizableCanvas; import org.jfree.fx.ResizableCanvas;
@@ -63,7 +65,7 @@ public class Visualiser extends Application {
} }
private void initGUIElements() { private void initGUIElements() {
HBox top = new HBox(200); HBox top = new HBox(100);
top.getStyleClass().add("content"); top.getStyleClass().add("content");
top.setPadding(new Insets(10, 10, 10, 10)); top.setPadding(new Insets(10, 10, 10, 10));
top.setPrefWidth(canvas.getWidth()); top.setPrefWidth(canvas.getWidth());
@@ -91,7 +93,14 @@ public class Visualiser extends Application {
new Label("Maximum amount of pages:"), new Label("Maximum amount of pages:"),
amountField); amountField);
top.getChildren().add(content); top.getChildren().add(content);
Button button = new Button("Run");
button.setOnAction(e -> {
log.getItems().clear();
CrawlThread thread = new CrawlThread(Integer.parseInt(amountField.getText()), true, urlField.getText(), wordField.getText(), this);
thread.start();
});
top.getChildren().add(button);
log = new ListView<>(); log = new ListView<>();
log.setMinWidth(1100); log.setMinWidth(1100);
top.setAlignment(Pos.CENTER_LEFT); top.setAlignment(Pos.CENTER_LEFT);
@@ -136,6 +145,10 @@ public class Visualiser extends Application {
} }
public void log(String item) { public void log(String item) {
this.log.getItems().add(item); try {
this.log.getItems().add(item);
} catch (Exception e) {
System.out.println("exception caught");
}
} }
} }