made debug window functional
This commit is contained in:
@@ -18,13 +18,16 @@ public class CrawlBranch {
|
|||||||
private boolean debug;
|
private boolean debug;
|
||||||
private static final String USER_AGENT = "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.112 Safari/535.1";
|
private static final String USER_AGENT = "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.112 Safari/535.1";
|
||||||
|
|
||||||
|
private WebCrawler parent;
|
||||||
|
|
||||||
public CrawlBranch() {
|
public CrawlBranch() {
|
||||||
this(false,null);
|
this(false,null,null);
|
||||||
}
|
}
|
||||||
|
|
||||||
public CrawlBranch(boolean debug, Visualiser logger) {
|
public CrawlBranch(boolean debug, WebCrawler parent,Visualiser logger) {
|
||||||
this.debug = debug;
|
this.debug = debug;
|
||||||
this.logger = logger;
|
this.logger = logger;
|
||||||
|
this.parent = parent;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -39,14 +42,17 @@ public class CrawlBranch {
|
|||||||
this.htmlDocument = connection.get();
|
this.htmlDocument = connection.get();
|
||||||
|
|
||||||
if (connection.response().statusCode() == 200) {
|
if (connection.response().statusCode() == 200) {
|
||||||
print("VISITING -- Recieved web page at " + url);
|
// print("VISITING -- Recieved web page at " + url);
|
||||||
|
sendMessage("VISITING -- Recieved web page at " + url);
|
||||||
} else {
|
} else {
|
||||||
print("FAIL -- recieved something else than a web page");
|
// print("FAIL -- recieved something else than a web page");
|
||||||
|
sendMessage("FAIL -- recieved something else than a web page");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
Elements linksOnPage = htmlDocument.select("a[href]");
|
Elements linksOnPage = htmlDocument.select("a[href]");
|
||||||
print("FOUND (" + linksOnPage.size() + ") links");
|
// print("FOUND (" + linksOnPage.size() + ") links");
|
||||||
|
sendMessage("FOUND (" + linksOnPage.size() + ") links");
|
||||||
for (Element link : linksOnPage) {
|
for (Element link : linksOnPage) {
|
||||||
this.links.add(link.absUrl("href"));
|
this.links.add(link.absUrl("href"));
|
||||||
}
|
}
|
||||||
@@ -68,7 +74,8 @@ public class CrawlBranch {
|
|||||||
//System.out.println("ERROR -- call crawl before searhing");
|
//System.out.println("ERROR -- call crawl before searhing");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
print(String.format("Searching for %s...", word));
|
// print(String.format("Searching for %s...", word));
|
||||||
|
sendMessage(String.format("Searching for %s...", word));
|
||||||
String bodyText = this.htmlDocument.body().text();
|
String bodyText = this.htmlDocument.body().text();
|
||||||
return count(bodyText.toLowerCase(), word.toLowerCase());
|
return count(bodyText.toLowerCase(), word.toLowerCase());
|
||||||
}
|
}
|
||||||
@@ -101,4 +108,8 @@ public class CrawlBranch {
|
|||||||
private void print(String text) {
|
private void print(String text) {
|
||||||
if (debug) logger.log(text);
|
if (debug) logger.log(text);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void sendMessage(String message) {
|
||||||
|
this.parent.addMessage(message);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -2,6 +2,8 @@ package main.java.webcrawler.crawler;
|
|||||||
|
|
||||||
import main.java.webcrawler.visualiser.Visualiser;
|
import main.java.webcrawler.visualiser.Visualiser;
|
||||||
|
|
||||||
|
import java.util.LinkedList;
|
||||||
|
|
||||||
public class CrawlThread extends Thread {
|
public class CrawlThread extends Thread {
|
||||||
|
|
||||||
private final int amount;
|
private final int amount;
|
||||||
@@ -44,4 +46,10 @@ public class CrawlThread extends Thread {
|
|||||||
public String getWord() {
|
public String getWord() {
|
||||||
return word;
|
return word;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public LinkedList<String> retrieveLog() {
|
||||||
|
return this.crawler.getMessages();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -17,6 +17,8 @@ public class WebCrawler {
|
|||||||
private boolean shouldSaveHitLinks;
|
private boolean shouldSaveHitLinks;
|
||||||
private boolean debug;
|
private boolean debug;
|
||||||
|
|
||||||
|
private LinkedList<String> messages;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* creates a new WebCrawler object with standard values
|
* creates a new WebCrawler object with standard values
|
||||||
*/
|
*/
|
||||||
@@ -52,6 +54,7 @@ public class WebCrawler {
|
|||||||
this.urlHits = new HashMap<>();
|
this.urlHits = new HashMap<>();
|
||||||
this.debug = debug;
|
this.debug = debug;
|
||||||
this.logger = logger;
|
this.logger = logger;
|
||||||
|
this.messages = new LinkedList<>();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -79,20 +82,22 @@ public class WebCrawler {
|
|||||||
int counter = 0;
|
int counter = 0;
|
||||||
while (this.pagesVisited.size() < amountOfPages) {
|
while (this.pagesVisited.size() < amountOfPages) {
|
||||||
String curUrl;
|
String curUrl;
|
||||||
CrawlBranch branch = new CrawlBranch(debug,logger);
|
CrawlBranch branch = new CrawlBranch(debug, this, logger);
|
||||||
if (this.pagesPending.isEmpty()) {
|
if (this.pagesPending.isEmpty()) {
|
||||||
curUrl = url;
|
curUrl = url;
|
||||||
this.pagesVisited.add(url);
|
this.pagesVisited.add(url);
|
||||||
} else {
|
} else {
|
||||||
curUrl = this.nextUrl();
|
curUrl = this.nextUrl();
|
||||||
counter++;
|
counter++;
|
||||||
print(String.format("visiting page %s / %s",counter,amountOfPages));
|
// print(String.format("visiting page %s / %s",counter,amountOfPages));
|
||||||
|
addMessage(String.format("visiting page %s / %s", counter, amountOfPages));
|
||||||
}
|
}
|
||||||
branch.crawl(curUrl);
|
branch.crawl(curUrl);
|
||||||
|
|
||||||
int amount = branch.searchForWord(searchWord);
|
int amount = branch.searchForWord(searchWord);
|
||||||
if (amount > 0) {
|
if (amount > 0) {
|
||||||
print(String.format("SUCCESS -- word %s found at %s %s times\n", searchWord, curUrl, amount));
|
// print(String.format("SUCCESS -- word %s found at %s %s times\n", searchWord, curUrl, amount));
|
||||||
|
addMessage(String.format("SUCCESS -- word %s found at %s %s times\n", searchWord, curUrl, amount));
|
||||||
successPages++;
|
successPages++;
|
||||||
amountFound += amount;
|
amountFound += amount;
|
||||||
if (shouldSaveHitLinks)
|
if (shouldSaveHitLinks)
|
||||||
@@ -101,10 +106,13 @@ public class WebCrawler {
|
|||||||
}
|
}
|
||||||
this.pagesPending.addAll(branch.getLinks());
|
this.pagesPending.addAll(branch.getLinks());
|
||||||
}
|
}
|
||||||
print("========================");
|
// print("========================");
|
||||||
print(String.format("DONE -- Visited %s webpages\nHits: %s\nAmount of pages with hits: %s\n", this.pagesVisited.size(), amountFound, successPages));
|
addMessage("========================");
|
||||||
|
// print(String.format("DONE -- Visited %s webpages\nHits: %s\nAmount of pages with hits: %s\n", this.pagesVisited.size(), amountFound, successPages));
|
||||||
|
addMessage(String.format("DONE -- Visited %s webpages\nHits: %s\nAmount of pages with hits: %s\n", this.pagesVisited.size(), amountFound, successPages));
|
||||||
if (shouldSaveHitLinks) {
|
if (shouldSaveHitLinks) {
|
||||||
print(String.format("Successful pages: \n%s", showCombinations(urlHits)));
|
// print(String.format("Successful pages: \n%s", showCombinations(urlHits)));
|
||||||
|
addMessage(String.format("Successful pages: \n%s", showCombinations(urlHits)));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -173,4 +181,17 @@ public class WebCrawler {
|
|||||||
private void print(String text) {
|
private void print(String text) {
|
||||||
if (debug) logger.log(text);
|
if (debug) logger.log(text);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void addMessage(String message) {
|
||||||
|
this.messages.add(message);
|
||||||
|
// System.out.println(message);
|
||||||
|
}
|
||||||
|
|
||||||
|
public LinkedList<String> getMessages() {
|
||||||
|
return this.messages;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void clearMessages() {
|
||||||
|
this.messages.clear();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -19,6 +19,7 @@ import javafx.scene.layout.HBox;
|
|||||||
import javafx.scene.layout.VBox;
|
import javafx.scene.layout.VBox;
|
||||||
import javafx.stage.Stage;
|
import javafx.stage.Stage;
|
||||||
import main.java.webcrawler.crawler.CrawlThread;
|
import main.java.webcrawler.crawler.CrawlThread;
|
||||||
|
import main.java.webcrawler.crawler.WebCrawler;
|
||||||
import org.jfree.fx.FXGraphics2D;
|
import org.jfree.fx.FXGraphics2D;
|
||||||
import org.jfree.fx.ResizableCanvas;
|
import org.jfree.fx.ResizableCanvas;
|
||||||
|
|
||||||
@@ -27,12 +28,14 @@ import java.awt.geom.Line2D;
|
|||||||
import java.awt.geom.Rectangle2D;
|
import java.awt.geom.Rectangle2D;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.LinkedList;
|
import java.util.LinkedList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
public class Visualiser extends Application {
|
public class Visualiser extends Application {
|
||||||
private double frameTime = 0;
|
private double frameTime = 0;
|
||||||
private BorderPane pane;
|
private BorderPane pane;
|
||||||
private ResizableCanvas canvas;
|
private ResizableCanvas canvas;
|
||||||
private ListView<String> log;
|
private ListView<String> log;
|
||||||
|
private CrawlThread thread;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void start(Stage primaryStage) throws Exception {
|
public void start(Stage primaryStage) throws Exception {
|
||||||
@@ -96,7 +99,7 @@ public class Visualiser extends Application {
|
|||||||
Button button = new Button("Run");
|
Button button = new Button("Run");
|
||||||
button.setOnAction(e -> {
|
button.setOnAction(e -> {
|
||||||
log.getItems().clear();
|
log.getItems().clear();
|
||||||
CrawlThread thread = new CrawlThread(Integer.parseInt(amountField.getText()), true, urlField.getText(), wordField.getText(), this);
|
thread = new CrawlThread(Integer.parseInt(amountField.getText()), true, urlField.getText(), wordField.getText(), this);
|
||||||
thread.start();
|
thread.start();
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -142,6 +145,19 @@ public class Visualiser extends Application {
|
|||||||
updateFrame();
|
updateFrame();
|
||||||
this.frameTime = 0d;
|
this.frameTime = 0d;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (thread != null && thread.isAlive()) {
|
||||||
|
WebCrawler crawler = thread.getCrawler();
|
||||||
|
if (crawler != null) {
|
||||||
|
List<String> msgs = crawler.getMessages();
|
||||||
|
if (msgs != null)
|
||||||
|
if (!msgs.isEmpty()) {
|
||||||
|
log.getItems().addAll(msgs);
|
||||||
|
thread.getCrawler().clearMessages();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void log(String item) {
|
public void log(String item) {
|
||||||
|
|||||||
Reference in New Issue
Block a user