made debug window functional
This commit is contained in:
@@ -18,13 +18,16 @@ public class CrawlBranch {
|
||||
private boolean debug;
|
||||
private static final String USER_AGENT = "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.112 Safari/535.1";
|
||||
|
||||
private WebCrawler parent;
|
||||
|
||||
public CrawlBranch() {
|
||||
this(false,null);
|
||||
this(false,null,null);
|
||||
}
|
||||
|
||||
public CrawlBranch(boolean debug, Visualiser logger) {
|
||||
public CrawlBranch(boolean debug, WebCrawler parent,Visualiser logger) {
|
||||
this.debug = debug;
|
||||
this.logger = logger;
|
||||
this.parent = parent;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -39,14 +42,17 @@ public class CrawlBranch {
|
||||
this.htmlDocument = connection.get();
|
||||
|
||||
if (connection.response().statusCode() == 200) {
|
||||
print("VISITING -- Recieved web page at " + url);
|
||||
// print("VISITING -- Recieved web page at " + url);
|
||||
sendMessage("VISITING -- Recieved web page at " + url);
|
||||
} else {
|
||||
print("FAIL -- recieved something else than a web page");
|
||||
// print("FAIL -- recieved something else than a web page");
|
||||
sendMessage("FAIL -- recieved something else than a web page");
|
||||
return false;
|
||||
}
|
||||
|
||||
Elements linksOnPage = htmlDocument.select("a[href]");
|
||||
print("FOUND (" + linksOnPage.size() + ") links");
|
||||
// print("FOUND (" + linksOnPage.size() + ") links");
|
||||
sendMessage("FOUND (" + linksOnPage.size() + ") links");
|
||||
for (Element link : linksOnPage) {
|
||||
this.links.add(link.absUrl("href"));
|
||||
}
|
||||
@@ -68,7 +74,8 @@ public class CrawlBranch {
|
||||
//System.out.println("ERROR -- call crawl before searhing");
|
||||
return -1;
|
||||
}
|
||||
print(String.format("Searching for %s...", word));
|
||||
// print(String.format("Searching for %s...", word));
|
||||
sendMessage(String.format("Searching for %s...", word));
|
||||
String bodyText = this.htmlDocument.body().text();
|
||||
return count(bodyText.toLowerCase(), word.toLowerCase());
|
||||
}
|
||||
@@ -101,4 +108,8 @@ public class CrawlBranch {
|
||||
private void print(String text) {
|
||||
if (debug) logger.log(text);
|
||||
}
|
||||
|
||||
private void sendMessage(String message) {
|
||||
this.parent.addMessage(message);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,6 +2,8 @@ package main.java.webcrawler.crawler;
|
||||
|
||||
import main.java.webcrawler.visualiser.Visualiser;
|
||||
|
||||
import java.util.LinkedList;
|
||||
|
||||
public class CrawlThread extends Thread {
|
||||
|
||||
private final int amount;
|
||||
@@ -44,4 +46,10 @@ public class CrawlThread extends Thread {
|
||||
public String getWord() {
|
||||
return word;
|
||||
}
|
||||
|
||||
public LinkedList<String> retrieveLog() {
|
||||
return this.crawler.getMessages();
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
@@ -17,6 +17,8 @@ public class WebCrawler {
|
||||
private boolean shouldSaveHitLinks;
|
||||
private boolean debug;
|
||||
|
||||
private LinkedList<String> messages;
|
||||
|
||||
/**
|
||||
* creates a new WebCrawler object with standard values
|
||||
*/
|
||||
@@ -52,6 +54,7 @@ public class WebCrawler {
|
||||
this.urlHits = new HashMap<>();
|
||||
this.debug = debug;
|
||||
this.logger = logger;
|
||||
this.messages = new LinkedList<>();
|
||||
}
|
||||
|
||||
|
||||
@@ -79,20 +82,22 @@ public class WebCrawler {
|
||||
int counter = 0;
|
||||
while (this.pagesVisited.size() < amountOfPages) {
|
||||
String curUrl;
|
||||
CrawlBranch branch = new CrawlBranch(debug,logger);
|
||||
CrawlBranch branch = new CrawlBranch(debug, this, logger);
|
||||
if (this.pagesPending.isEmpty()) {
|
||||
curUrl = url;
|
||||
this.pagesVisited.add(url);
|
||||
} else {
|
||||
curUrl = this.nextUrl();
|
||||
counter++;
|
||||
print(String.format("visiting page %s / %s",counter,amountOfPages));
|
||||
// print(String.format("visiting page %s / %s",counter,amountOfPages));
|
||||
addMessage(String.format("visiting page %s / %s", counter, amountOfPages));
|
||||
}
|
||||
branch.crawl(curUrl);
|
||||
|
||||
int amount = branch.searchForWord(searchWord);
|
||||
if (amount > 0) {
|
||||
print(String.format("SUCCESS -- word %s found at %s %s times\n", searchWord, curUrl, amount));
|
||||
// print(String.format("SUCCESS -- word %s found at %s %s times\n", searchWord, curUrl, amount));
|
||||
addMessage(String.format("SUCCESS -- word %s found at %s %s times\n", searchWord, curUrl, amount));
|
||||
successPages++;
|
||||
amountFound += amount;
|
||||
if (shouldSaveHitLinks)
|
||||
@@ -101,10 +106,13 @@ public class WebCrawler {
|
||||
}
|
||||
this.pagesPending.addAll(branch.getLinks());
|
||||
}
|
||||
print("========================");
|
||||
print(String.format("DONE -- Visited %s webpages\nHits: %s\nAmount of pages with hits: %s\n", this.pagesVisited.size(), amountFound, successPages));
|
||||
// print("========================");
|
||||
addMessage("========================");
|
||||
// print(String.format("DONE -- Visited %s webpages\nHits: %s\nAmount of pages with hits: %s\n", this.pagesVisited.size(), amountFound, successPages));
|
||||
addMessage(String.format("DONE -- Visited %s webpages\nHits: %s\nAmount of pages with hits: %s\n", this.pagesVisited.size(), amountFound, successPages));
|
||||
if (shouldSaveHitLinks) {
|
||||
print(String.format("Successful pages: \n%s", showCombinations(urlHits)));
|
||||
// print(String.format("Successful pages: \n%s", showCombinations(urlHits)));
|
||||
addMessage(String.format("Successful pages: \n%s", showCombinations(urlHits)));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -173,4 +181,17 @@ public class WebCrawler {
|
||||
private void print(String text) {
|
||||
if (debug) logger.log(text);
|
||||
}
|
||||
|
||||
public void addMessage(String message) {
|
||||
this.messages.add(message);
|
||||
// System.out.println(message);
|
||||
}
|
||||
|
||||
public LinkedList<String> getMessages() {
|
||||
return this.messages;
|
||||
}
|
||||
|
||||
public void clearMessages() {
|
||||
this.messages.clear();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -19,6 +19,7 @@ import javafx.scene.layout.HBox;
|
||||
import javafx.scene.layout.VBox;
|
||||
import javafx.stage.Stage;
|
||||
import main.java.webcrawler.crawler.CrawlThread;
|
||||
import main.java.webcrawler.crawler.WebCrawler;
|
||||
import org.jfree.fx.FXGraphics2D;
|
||||
import org.jfree.fx.ResizableCanvas;
|
||||
|
||||
@@ -27,12 +28,14 @@ import java.awt.geom.Line2D;
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.util.ArrayList;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
|
||||
public class Visualiser extends Application {
|
||||
private double frameTime = 0;
|
||||
private BorderPane pane;
|
||||
private ResizableCanvas canvas;
|
||||
private ListView<String> log;
|
||||
private CrawlThread thread;
|
||||
|
||||
@Override
|
||||
public void start(Stage primaryStage) throws Exception {
|
||||
@@ -96,7 +99,7 @@ public class Visualiser extends Application {
|
||||
Button button = new Button("Run");
|
||||
button.setOnAction(e -> {
|
||||
log.getItems().clear();
|
||||
CrawlThread thread = new CrawlThread(Integer.parseInt(amountField.getText()), true, urlField.getText(), wordField.getText(), this);
|
||||
thread = new CrawlThread(Integer.parseInt(amountField.getText()), true, urlField.getText(), wordField.getText(), this);
|
||||
thread.start();
|
||||
});
|
||||
|
||||
@@ -142,6 +145,19 @@ public class Visualiser extends Application {
|
||||
updateFrame();
|
||||
this.frameTime = 0d;
|
||||
}
|
||||
|
||||
if (thread != null && thread.isAlive()) {
|
||||
WebCrawler crawler = thread.getCrawler();
|
||||
if (crawler != null) {
|
||||
List<String> msgs = crawler.getMessages();
|
||||
if (msgs != null)
|
||||
if (!msgs.isEmpty()) {
|
||||
log.getItems().addAll(msgs);
|
||||
thread.getCrawler().clearMessages();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
public void log(String item) {
|
||||
|
||||
Reference in New Issue
Block a user