added writing of debug

This commit is contained in:
Sem van der Hoeven
2020-03-04 22:10:43 +01:00
parent faa2ff2b67
commit 6fc378b342
4 changed files with 38 additions and 21 deletions

View File

@@ -1,5 +1,6 @@
package main.java.webcrawler.crawler;
import main.java.webcrawler.visualiser.Visualiser;
import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
@@ -11,17 +12,19 @@ import java.util.LinkedList;
import java.util.List;
public class CrawlBranch {
private final Visualiser logger;
private List<String> links = new LinkedList<>();
private Document htmlDocument;
private boolean debug;
private static final String USER_AGENT = "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.112 Safari/535.1";
public CrawlBranch() {
this(false);
this(false,null);
}
public CrawlBranch(boolean debug) {
public CrawlBranch(boolean debug, Visualiser logger) {
this.debug = debug;
this.logger = logger;
}
/**
@@ -96,6 +99,6 @@ public class CrawlBranch {
}
private void print(String text) {
if (debug) System.out.println(text);
if (debug) logger.log(text);
}
}

View File

@@ -1,25 +1,27 @@
package main.java.webcrawler.crawler;
import main.java.webcrawler.visualiser.Visualiser;
public class CrawlThread extends Thread {
private final int amount;
private final boolean save;
private final boolean debug;
private final String startUrl;
private final String word;
private WebCrawler crawler;
private Visualiser visualiser;
public CrawlThread(int amount, boolean save, boolean debug, String startUrl, String word) {
public CrawlThread(int amount, boolean debug, String startUrl, String word, Visualiser visualiser) {
this.amount = amount;
this.save = save;
this.debug = debug;
this.startUrl = startUrl;
this.word = word;
this.visualiser = visualiser;
}
public void run() {
this.crawler = new WebCrawler(amount, save, debug);
this.crawler = new WebCrawler(amount, true, debug,visualiser);
this.crawler.search(startUrl, word);
}
@@ -31,10 +33,6 @@ public class CrawlThread extends Thread {
return amount;
}
public boolean isSave() {
return save;
}
public boolean isDebug() {
return debug;
}

View File

@@ -1,6 +1,7 @@
package main.java.webcrawler.crawler;
import main.java.webcrawler.crawler.CrawlBranch;
import main.java.webcrawler.visualiser.Visualiser;
import java.util.*;
@@ -10,6 +11,7 @@ public class WebCrawler {
private List<String> pagesPending;
private ArrayList<String> resultPages;
private Map<String, Integer> urlHits;
private Visualiser logger;
private int amountFound = 0;
private int successPages = 0;
private boolean shouldSaveHitLinks;
@@ -38,10 +40,10 @@ public class WebCrawler {
* @param shouldSaveHitLinks if the crawler should save the links that have one or more hits
*/
public WebCrawler(int maxPages, boolean shouldSaveHitLinks) {
this(maxPages, shouldSaveHitLinks, false);
this(maxPages, shouldSaveHitLinks, false, null);
}
public WebCrawler(int maxPages, boolean shouldSaveHitLinks, boolean debug) {
public WebCrawler(int maxPages, boolean shouldSaveHitLinks, boolean debug, Visualiser logger) {
this.amountOfPages = maxPages;
this.shouldSaveHitLinks = shouldSaveHitLinks;
this.pagesVisited = new HashSet<>();
@@ -49,6 +51,7 @@ public class WebCrawler {
this.resultPages = new ArrayList<>();
this.urlHits = new HashMap<>();
this.debug = debug;
this.logger = logger;
}
@@ -76,14 +79,14 @@ public class WebCrawler {
int counter = 0;
while (this.pagesVisited.size() < amountOfPages) {
String curUrl;
CrawlBranch branch = new CrawlBranch(debug);
CrawlBranch branch = new CrawlBranch(debug,logger);
if (this.pagesPending.isEmpty()) {
curUrl = url;
this.pagesVisited.add(url);
} else {
curUrl = this.nextUrl();
counter++;
System.out.println(String.format("visiting page %s / %s",counter,amountOfPages));
print(String.format("visiting page %s / %s",counter,amountOfPages));
}
branch.crawl(curUrl);
@@ -98,10 +101,10 @@ public class WebCrawler {
}
this.pagesPending.addAll(branch.getLinks());
}
System.out.println("========================");
System.out.printf("DONE -- Visited %s webpages\nHits: %s\nAmount of pages with hits: %s\n", this.pagesVisited.size(), amountFound, successPages);
print("========================");
print(String.format("DONE -- Visited %s webpages\nHits: %s\nAmount of pages with hits: %s\n", this.pagesVisited.size(), amountFound, successPages));
if (shouldSaveHitLinks) {
System.out.printf("Successful pages: \n%s", showCombinations(urlHits));
print(String.format("Successful pages: \n%s", showCombinations(urlHits)));
}
}
@@ -168,6 +171,6 @@ public class WebCrawler {
}
private void print(String text) {
if (debug) System.out.println(text);
if (debug) logger.log(text);
}
}

View File

@@ -10,6 +10,7 @@ import javafx.geometry.HPos;
import javafx.geometry.Insets;
import javafx.geometry.Pos;
import javafx.scene.Scene;
import javafx.scene.control.Button;
import javafx.scene.control.Label;
import javafx.scene.control.ListView;
import javafx.scene.control.TextField;
@@ -17,6 +18,7 @@ import javafx.scene.layout.BorderPane;
import javafx.scene.layout.HBox;
import javafx.scene.layout.VBox;
import javafx.stage.Stage;
import main.java.webcrawler.crawler.CrawlThread;
import org.jfree.fx.FXGraphics2D;
import org.jfree.fx.ResizableCanvas;
@@ -63,7 +65,7 @@ public class Visualiser extends Application {
}
private void initGUIElements() {
HBox top = new HBox(200);
HBox top = new HBox(100);
top.getStyleClass().add("content");
top.setPadding(new Insets(10, 10, 10, 10));
top.setPrefWidth(canvas.getWidth());
@@ -91,7 +93,14 @@ public class Visualiser extends Application {
new Label("Maximum amount of pages:"),
amountField);
top.getChildren().add(content);
Button button = new Button("Run");
button.setOnAction(e -> {
log.getItems().clear();
CrawlThread thread = new CrawlThread(Integer.parseInt(amountField.getText()), true, urlField.getText(), wordField.getText(), this);
thread.start();
});
top.getChildren().add(button);
log = new ListView<>();
log.setMinWidth(1100);
top.setAlignment(Pos.CENTER_LEFT);
@@ -136,6 +145,10 @@ public class Visualiser extends Application {
}
public void log(String item) {
this.log.getItems().add(item);
try {
this.log.getItems().add(item);
} catch (Exception e) {
System.out.println("exception caught");
}
}
}