added writing of debug
This commit is contained in:
@@ -1,5 +1,6 @@
|
||||
package main.java.webcrawler.crawler;
|
||||
|
||||
import main.java.webcrawler.visualiser.Visualiser;
|
||||
import org.jsoup.Connection;
|
||||
import org.jsoup.Jsoup;
|
||||
import org.jsoup.nodes.Document;
|
||||
@@ -11,17 +12,19 @@ import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
|
||||
public class CrawlBranch {
|
||||
private final Visualiser logger;
|
||||
private List<String> links = new LinkedList<>();
|
||||
private Document htmlDocument;
|
||||
private boolean debug;
|
||||
private static final String USER_AGENT = "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.112 Safari/535.1";
|
||||
|
||||
public CrawlBranch() {
|
||||
this(false);
|
||||
this(false,null);
|
||||
}
|
||||
|
||||
public CrawlBranch(boolean debug) {
|
||||
public CrawlBranch(boolean debug, Visualiser logger) {
|
||||
this.debug = debug;
|
||||
this.logger = logger;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -96,6 +99,6 @@ public class CrawlBranch {
|
||||
}
|
||||
|
||||
private void print(String text) {
|
||||
if (debug) System.out.println(text);
|
||||
if (debug) logger.log(text);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,25 +1,27 @@
|
||||
package main.java.webcrawler.crawler;
|
||||
|
||||
import main.java.webcrawler.visualiser.Visualiser;
|
||||
|
||||
public class CrawlThread extends Thread {
|
||||
|
||||
private final int amount;
|
||||
private final boolean save;
|
||||
private final boolean debug;
|
||||
private final String startUrl;
|
||||
private final String word;
|
||||
private WebCrawler crawler;
|
||||
private Visualiser visualiser;
|
||||
|
||||
public CrawlThread(int amount, boolean save, boolean debug, String startUrl, String word) {
|
||||
public CrawlThread(int amount, boolean debug, String startUrl, String word, Visualiser visualiser) {
|
||||
this.amount = amount;
|
||||
this.save = save;
|
||||
this.debug = debug;
|
||||
this.startUrl = startUrl;
|
||||
this.word = word;
|
||||
this.visualiser = visualiser;
|
||||
|
||||
}
|
||||
|
||||
public void run() {
|
||||
this.crawler = new WebCrawler(amount, save, debug);
|
||||
this.crawler = new WebCrawler(amount, true, debug,visualiser);
|
||||
this.crawler.search(startUrl, word);
|
||||
}
|
||||
|
||||
@@ -31,10 +33,6 @@ public class CrawlThread extends Thread {
|
||||
return amount;
|
||||
}
|
||||
|
||||
public boolean isSave() {
|
||||
return save;
|
||||
}
|
||||
|
||||
public boolean isDebug() {
|
||||
return debug;
|
||||
}
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package main.java.webcrawler.crawler;
|
||||
|
||||
import main.java.webcrawler.crawler.CrawlBranch;
|
||||
import main.java.webcrawler.visualiser.Visualiser;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
@@ -10,6 +11,7 @@ public class WebCrawler {
|
||||
private List<String> pagesPending;
|
||||
private ArrayList<String> resultPages;
|
||||
private Map<String, Integer> urlHits;
|
||||
private Visualiser logger;
|
||||
private int amountFound = 0;
|
||||
private int successPages = 0;
|
||||
private boolean shouldSaveHitLinks;
|
||||
@@ -38,10 +40,10 @@ public class WebCrawler {
|
||||
* @param shouldSaveHitLinks if the crawler should save the links that have one or more hits
|
||||
*/
|
||||
public WebCrawler(int maxPages, boolean shouldSaveHitLinks) {
|
||||
this(maxPages, shouldSaveHitLinks, false);
|
||||
this(maxPages, shouldSaveHitLinks, false, null);
|
||||
}
|
||||
|
||||
public WebCrawler(int maxPages, boolean shouldSaveHitLinks, boolean debug) {
|
||||
public WebCrawler(int maxPages, boolean shouldSaveHitLinks, boolean debug, Visualiser logger) {
|
||||
this.amountOfPages = maxPages;
|
||||
this.shouldSaveHitLinks = shouldSaveHitLinks;
|
||||
this.pagesVisited = new HashSet<>();
|
||||
@@ -49,6 +51,7 @@ public class WebCrawler {
|
||||
this.resultPages = new ArrayList<>();
|
||||
this.urlHits = new HashMap<>();
|
||||
this.debug = debug;
|
||||
this.logger = logger;
|
||||
}
|
||||
|
||||
|
||||
@@ -76,14 +79,14 @@ public class WebCrawler {
|
||||
int counter = 0;
|
||||
while (this.pagesVisited.size() < amountOfPages) {
|
||||
String curUrl;
|
||||
CrawlBranch branch = new CrawlBranch(debug);
|
||||
CrawlBranch branch = new CrawlBranch(debug,logger);
|
||||
if (this.pagesPending.isEmpty()) {
|
||||
curUrl = url;
|
||||
this.pagesVisited.add(url);
|
||||
} else {
|
||||
curUrl = this.nextUrl();
|
||||
counter++;
|
||||
System.out.println(String.format("visiting page %s / %s",counter,amountOfPages));
|
||||
print(String.format("visiting page %s / %s",counter,amountOfPages));
|
||||
}
|
||||
branch.crawl(curUrl);
|
||||
|
||||
@@ -98,10 +101,10 @@ public class WebCrawler {
|
||||
}
|
||||
this.pagesPending.addAll(branch.getLinks());
|
||||
}
|
||||
System.out.println("========================");
|
||||
System.out.printf("DONE -- Visited %s webpages\nHits: %s\nAmount of pages with hits: %s\n", this.pagesVisited.size(), amountFound, successPages);
|
||||
print("========================");
|
||||
print(String.format("DONE -- Visited %s webpages\nHits: %s\nAmount of pages with hits: %s\n", this.pagesVisited.size(), amountFound, successPages));
|
||||
if (shouldSaveHitLinks) {
|
||||
System.out.printf("Successful pages: \n%s", showCombinations(urlHits));
|
||||
print(String.format("Successful pages: \n%s", showCombinations(urlHits)));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -168,6 +171,6 @@ public class WebCrawler {
|
||||
}
|
||||
|
||||
private void print(String text) {
|
||||
if (debug) System.out.println(text);
|
||||
if (debug) logger.log(text);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -10,6 +10,7 @@ import javafx.geometry.HPos;
|
||||
import javafx.geometry.Insets;
|
||||
import javafx.geometry.Pos;
|
||||
import javafx.scene.Scene;
|
||||
import javafx.scene.control.Button;
|
||||
import javafx.scene.control.Label;
|
||||
import javafx.scene.control.ListView;
|
||||
import javafx.scene.control.TextField;
|
||||
@@ -17,6 +18,7 @@ import javafx.scene.layout.BorderPane;
|
||||
import javafx.scene.layout.HBox;
|
||||
import javafx.scene.layout.VBox;
|
||||
import javafx.stage.Stage;
|
||||
import main.java.webcrawler.crawler.CrawlThread;
|
||||
import org.jfree.fx.FXGraphics2D;
|
||||
import org.jfree.fx.ResizableCanvas;
|
||||
|
||||
@@ -63,7 +65,7 @@ public class Visualiser extends Application {
|
||||
}
|
||||
|
||||
private void initGUIElements() {
|
||||
HBox top = new HBox(200);
|
||||
HBox top = new HBox(100);
|
||||
top.getStyleClass().add("content");
|
||||
top.setPadding(new Insets(10, 10, 10, 10));
|
||||
top.setPrefWidth(canvas.getWidth());
|
||||
@@ -91,7 +93,14 @@ public class Visualiser extends Application {
|
||||
new Label("Maximum amount of pages:"),
|
||||
amountField);
|
||||
top.getChildren().add(content);
|
||||
Button button = new Button("Run");
|
||||
button.setOnAction(e -> {
|
||||
log.getItems().clear();
|
||||
CrawlThread thread = new CrawlThread(Integer.parseInt(amountField.getText()), true, urlField.getText(), wordField.getText(), this);
|
||||
thread.start();
|
||||
});
|
||||
|
||||
top.getChildren().add(button);
|
||||
log = new ListView<>();
|
||||
log.setMinWidth(1100);
|
||||
top.setAlignment(Pos.CENTER_LEFT);
|
||||
@@ -136,6 +145,10 @@ public class Visualiser extends Application {
|
||||
}
|
||||
|
||||
public void log(String item) {
|
||||
this.log.getItems().add(item);
|
||||
try {
|
||||
this.log.getItems().add(item);
|
||||
} catch (Exception e) {
|
||||
System.out.println("exception caught");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user