From ef3c08bde765fc93e570e3941e3c958b7f3b0def Mon Sep 17 00:00:00 2001 From: Luis Fernando Kimura Date: Mon, 28 Jun 2021 21:30:56 -0300 Subject: [PATCH] UTF-8 Support --- src/main/kotlin/io/thelandscape/krawler/http/KrawlDocument.kt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/main/kotlin/io/thelandscape/krawler/http/KrawlDocument.kt b/src/main/kotlin/io/thelandscape/krawler/http/KrawlDocument.kt index 0d98776..9838fc5 100644 --- a/src/main/kotlin/io/thelandscape/krawler/http/KrawlDocument.kt +++ b/src/main/kotlin/io/thelandscape/krawler/http/KrawlDocument.kt @@ -28,6 +28,7 @@ import org.jsoup.nodes.Element import org.jsoup.select.Elements import java.io.ByteArrayInputStream import javax.xml.parsers.DocumentBuilderFactory +import java.nio.charset.StandardCharsets interface RequestResponse @@ -53,7 +54,7 @@ class KrawlDocument(val url: KrawlUrl, response: HttpResponse, context: HttpClie /** * Raw HTML */ - val rawHtml: String = try { EntityUtils.toString(response.entity) ?: "" } catch (e: Throwable) { "" } + val rawHtml: String = try { EntityUtils.toString(response.entity, StandardCharsets.UTF_8) ?: "" } catch (e: Throwable) { "" } /** * Status code