From a542446739875281b6d434d4aa6559da9940c8d1 Mon Sep 17 00:00:00 2001 From: Paul-Henri Froidmont Date: Mon, 8 Dec 2025 05:19:21 +0100 Subject: [PATCH] Escape strings --- build.mill | 3 +- scalive/core/src/scalive/Escaping.scala | 91 +++++ scalive/core/src/scalive/HtmlBuilder.scala | 5 +- scalive/core/src/scalive/HtmlElement.scala | 14 +- scalive/core/src/scalive/Scalive.scala | 3 +- scalive/core/src/scalive/StaticBuilder.scala | 4 +- .../test/src/scalive/HtmlBuilderSpec.scala | 319 ++++++++++++++++++ 7 files changed, 426 insertions(+), 13 deletions(-) create mode 100644 scalive/core/src/scalive/Escaping.scala create mode 100644 scalive/core/test/src/scalive/HtmlBuilderSpec.scala diff --git a/build.mill b/build.mill index 8e0b46d..0c4c86d 100644 --- a/build.mill +++ b/build.mill @@ -21,7 +21,8 @@ trait ScalaCommon extends ScalaModule with ScalafmtModule with ScalafixModule: "-feature", "-language:implicitConversions", "-Wvalue-discard", - "-Wnonunit-statement" + "-Wnonunit-statement", + "-deprecation" ) trait PublishCommon extends PublishModule: diff --git a/scalive/core/src/scalive/Escaping.scala b/scalive/core/src/scalive/Escaping.scala new file mode 100644 index 0000000..a3b29e7 --- /dev/null +++ b/scalive/core/src/scalive/Escaping.scala @@ -0,0 +1,91 @@ +package scalive + +import java.io.StringWriter + +/** Taken from Scalatags + * https://github.com/com-lihaoyi/scalatags/blob/0024ce995f301b10a435c672ff643f2a432a7f3b/scalatags/src/scalatags/Escaping.scala + */ +object Escaping: + + private val tagRegex = "^[a-z][:\\w0-9-]*$".r + + /** Uses a regex to check if something is a valid tag name. + */ + def validTag(s: String) = tagRegex.unapplySeq(s).isDefined + + /** Check if 's' is a valid attribute name. + */ + def validAttrName(s: String): Boolean = + // this is equivalent of the regex but without a huge amount of object creation. + // original regex - ^[a-zA-Z_:][-a-zA-Z0-9_:.]*$ + // n.b. I know its ugly, but its fast + val len = s.length + if len == 0 then return false + + val sc = s.charAt(0) + val startCharValid = (sc >= 'a' && sc <= 'z') || (sc >= 'A' && sc <= 'Z') || sc == ':' + if !startCharValid then return false + + var pos = 1 + while pos < len do + val c = s.charAt(pos) + val valid = (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || + c == '-' || c == ':' || c == '.' || c == '_' + if !valid then return false + pos += 1 + + true + + /** Code to escape text HTML nodes. Based on code from scala.xml + */ + def escape(text: String, s: java.io.Writer) = + // Implemented per XML spec: + // http://www.w3.org/International/questions/qa-controls + // Highly imperative code, ~2-3x faster than the previous implementation (2020-06-11) + val charsArray = text.toCharArray + val len = charsArray.size + var pos = 0 + var i = 0 + while i < len do + val c = charsArray(i) + c match + case '<' => + s.write(charsArray, pos, i - pos) + s.write("<") + pos = i + 1 + case '>' => + s.write(charsArray, pos, i - pos) + s.write(">") + pos = i + 1 + case '&' => + s.write(charsArray, pos, i - pos) + s.write("&") + pos = i + 1 + case '"' => + s.write(charsArray, pos, i - pos) + s.write(""") + pos = i + 1 + case '\n' => + case '\r' => + case '\t' => + case c if c < ' ' => + s.write(charsArray, pos, i - pos) + pos = i + 1 + case _ => + i += 1 + // Apparently this isn't technically necessary if (len - pos) == 0 as + // it doesn't cause any exception to occur in the JVM. + // The problem is that it isn't documented anywhere so I left this if here + // to make the error clear. + if pos < len then s.write(charsArray, pos, len - pos) + end escape + + def escape(text: String): String = + val s = new StringWriter + escape(text, s) + s.toString +end Escaping + +extension (s: StringWriter) + def writeEscaped(text: String) = + Escaping.escape(text, s) diff --git a/scalive/core/src/scalive/HtmlBuilder.scala b/scalive/core/src/scalive/HtmlBuilder.scala index e9bf0c0..cf4bdef 100644 --- a/scalive/core/src/scalive/HtmlBuilder.scala +++ b/scalive/core/src/scalive/HtmlBuilder.scala @@ -24,13 +24,14 @@ object HtmlBuilder: case Attr.Binding(_, id, _) => strw.write(id.render(false).getOrElse("")) case Attr.JsBinding(_, jsonValue, _) => strw.write(jsonValue.render(false).getOrElse("")) case Attr.Dyn(name, value, isJson) => - strw.write(value.render(false).getOrElse("")) + if isJson then strw.write(value.render(false).getOrElse("")) + else strw.writeEscaped(value.render(false).getOrElse("")) case Attr.DynValueAsPresence(name, value) => strw.write( value.render(false).map(if _ then s" $name" else "").getOrElse("") ) case Content.Tag(el) => build(el.static, el.dynamicMods, strw) - case Content.DynText(dyn) => strw.write(dyn.render(false).getOrElse("")) + case Content.DynText(dyn) => strw.writeEscaped(dyn.render(false).getOrElse("")) case Content.DynElement(dyn) => ??? case Content.DynOptionElement(dyn) => dyn.render(false).foreach(_.foreach(el => build(el.static, el.dynamicMods, strw))) diff --git a/scalive/core/src/scalive/HtmlElement.scala b/scalive/core/src/scalive/HtmlElement.scala index 77c877c..cb419d4 100644 --- a/scalive/core/src/scalive/HtmlElement.scala +++ b/scalive/core/src/scalive/HtmlElement.scala @@ -121,10 +121,10 @@ object Mod: case DynValueAsPresence(name: String, value: scalive.Dyn[Boolean]) extends Attr with DynamicMod enum Content extends Mod: - case Text(text: String) extends Content with StaticMod - case Tag(el: HtmlElement) extends Content with StaticMod with DynamicMod - case DynText(dyn: Dyn[String]) extends Content with DynamicMod - case DynElement(dyn: Dyn[HtmlElement]) extends Content with DynamicMod + case Text(text: String, raw: Boolean = false) extends Content with StaticMod + case Tag(el: HtmlElement) extends Content with StaticMod with DynamicMod + case DynText(dyn: Dyn[String]) extends Content with DynamicMod + case DynElement(dyn: Dyn[HtmlElement]) extends Content with DynamicMod // TODO support arbitrary collection case DynOptionElement(dyn: Dyn[Option[HtmlElement]]) extends Content with DynamicMod case DynElementColl(dyn: Dyn[IterableOnce[HtmlElement]]) extends Content with DynamicMod @@ -139,7 +139,7 @@ extension (mod: Mod) case Attr.StaticValueAsPresence(_, _) => () case Attr.Dyn(_, value, _) => value.setUnchanged() case Attr.DynValueAsPresence(_, value) => value.setUnchanged() - case Content.Text(text) => () + case Content.Text(text, _) => () case Content.Tag(el) => el.setAllUnchanged() case Content.DynText(dyn) => dyn.setUnchanged() case Content.DynElement(dyn) => @@ -163,7 +163,7 @@ extension (mod: Mod) case Attr.JsBinding(_, json, _) => json.sync() case Attr.Dyn(_, value, _) => value.sync() case Attr.DynValueAsPresence(_, value) => value.sync() - case Content.Text(text) => () + case Content.Text(text, _) => () case Content.Tag(el) => el.syncAll() case Content.DynText(dyn) => dyn.sync() case Content.DynElement(dyn) => @@ -190,7 +190,7 @@ extension (mod: Mod) bindings.get(id).map(msg => _ => msg.asInstanceOf[Msg]) case Attr.Dyn(_, value, _) => None case Attr.DynValueAsPresence(_, value) => None - case Content.Text(text) => None + case Content.Text(text, _) => None case Content.Tag(el) => el.findBinding(id) case Content.DynText(dyn) => None case Content.DynElement(dyn) => dyn.currentValue.findBinding(id) diff --git a/scalive/core/src/scalive/Scalive.scala b/scalive/core/src/scalive/Scalive.scala index 6a5ee12..bbea5a2 100644 --- a/scalive/core/src/scalive/Scalive.scala +++ b/scalive/core/src/scalive/Scalive.scala @@ -9,7 +9,8 @@ import scalive.defs.tags.HtmlTags package object scalive extends HtmlTags with HtmlAttrs with ComplexHtmlKeys: - lazy val defer = htmlAttr("defer", codecs.BooleanAsAttrPresenceEncoder) + lazy val defer = htmlAttr("defer", codecs.BooleanAsAttrPresenceEncoder) + def rawHtml(html: String) = Mod.Content.Text(html, raw = true) object link: def navigate(path: String, mods: Mod*): HtmlElement = diff --git a/scalive/core/src/scalive/StaticBuilder.scala b/scalive/core/src/scalive/StaticBuilder.scala index 35f4291..7732e38 100644 --- a/scalive/core/src/scalive/StaticBuilder.scala +++ b/scalive/core/src/scalive/StaticBuilder.scala @@ -13,7 +13,7 @@ object StaticBuilder: private def buildStaticFragments(el: HtmlElement): Seq[Option[String]] = val attrs = el.attrMods.flatMap { - case Attr.Static(name, value) => List(Some(s" $name='$value'")) + case Attr.Static(name, value) => List(Some(s""" $name="${Escaping.escape(value)}"""")) case Attr.StaticValueAsPresence(name, value) => List(Some(s" $name")) case Attr.Binding(name, _, _) => List(Some(s""" $name=""""), None, Some('"'.toString)) case Attr.JsBinding(name, _, _) => List(Some(s" $name='"), None, Some("'")) @@ -23,7 +23,7 @@ object StaticBuilder: case Attr.DynValueAsPresence(_, value) => List(Some(""), None, Some("")) } val children = el.contentMods.flatMap { - case Content.Text(text) => List(Some(text)) + case Content.Text(text, raw) => List(Some(if raw then text else Escaping.escape(text))) case Content.Tag(el) => buildStaticFragments(el) case Content.DynText(_) => List(None) case Content.DynElement(_) => List(None) diff --git a/scalive/core/test/src/scalive/HtmlBuilderSpec.scala b/scalive/core/test/src/scalive/HtmlBuilderSpec.scala new file mode 100644 index 0000000..d3f97b9 --- /dev/null +++ b/scalive/core/test/src/scalive/HtmlBuilderSpec.scala @@ -0,0 +1,319 @@ +package scalive + +import utest.* + +object HtmlBuilderSpec extends TestSuite: + + final case class TestModel( + title: String = "title value", + otherString: String = "other string value", + bool: Boolean = false, + nestedTitle: String = "nested title value", + cls: String = "text-sm", + items: List[NestedModel] = List.empty) + final case class NestedModel(name: String, age: Int) + + val tests = Tests { + + test("Static HTML rendering") { + test("Simple div") { + val el = div("Hello World") + val result = HtmlBuilder.build(el) + assert(result == "
Hello World
") + } + + test("Nested elements") { + val el = div( + h1("Title"), + p("Content") + ) + val result = HtmlBuilder.build(el) + assert(result == "

Title

Content

") + } + + test("With attributes") { + val el = div(cls := "container", "Content") + val result = HtmlBuilder.build(el) + assert(result == "
Content
") + } + + test("Multiple attributes") { + val el = div( + cls := "container", + idAttr := "main", + "Content" + ) + val result = HtmlBuilder.build(el) + assert(result == "
Content
") + } + } + + test("Dynamic HTML rendering") { + test("Dynamic text") { + val model = Var(TestModel(title = "dynamic title")) + val el = h1(model(_.title)) + el.syncAll() + + val result = HtmlBuilder.build(el) + assert(result == "

dynamic title

") + } + + test("Dynamic attribute") { + val model = Var(TestModel(cls = "dynamic-class")) + val el = div(cls := model(_.cls), "Content") + el.syncAll() + + val result = HtmlBuilder.build(el) + assert(result == "
Content
") + } + + test("Dynamic boolean attribute") { + val model = Var(TestModel(bool = true)) + val el = div( + cls := model(_.cls), + disabled := model(_.bool), + "Content" + ) + el.syncAll() + + val result = HtmlBuilder.build(el) + assert(result == "
Content
") + } + + test("Dynamic text with update") { + val model = Var(TestModel(title = "initial")) + val el = h1(model(_.title)) + el.syncAll() + + // Initial render + var result = HtmlBuilder.build(el) + assert(result == "

initial

") + + // Update model + model.update(_.copy(title = "updated")) + el.syncAll() + + result = HtmlBuilder.build(el) + assert(result == "

updated

") + } + } + + test("Complex HTML rendering") { + test("Form with dynamic fields") { + val model = Var( + TestModel( + title = "Form Title", + cls = "form-container" + ) + ) + + val el = form( + cls := model(_.cls), + div( + label("Title:"), + input(value := model(_.title)) + ), + button("Submit") + ) + el.syncAll() + + val result = HtmlBuilder.build(el) + val expected = + "
" + assert(result == expected) + } + + test("List with dynamic content") { + val model = Var( + TestModel( + items = List( + NestedModel("Item 1", 10), + NestedModel("Item 2", 20) + ) + ) + ) + + val el = ul( + model(_.items).splitByIndex((_, elem) => + li( + elem(_.name), + " (", + elem(_.age.toString), + ")" + ) + ) + ) + el.syncAll() + + val result = HtmlBuilder.build(el) + val expected = "" + assert(result == expected) + } + } + + test("Root HTML rendering") { + test("With doctype") { + val el = div("Content") + val result = HtmlBuilder.build(el, isRoot = true) + val expected = "
Content
" + assert(result == expected) + } + + test("Without doctype") { + val el = div("Content") + val result = HtmlBuilder.build(el, isRoot = false) + val expected = "
Content
" + assert(result == expected) + } + } + + test("Edge cases") { + test("Empty content") { + val el = div("") + val result = HtmlBuilder.build(el) + assert(result == "
") + } + + test("Whitespace handling") { + val el = div(" Hello World ") + val result = HtmlBuilder.build(el) + assert(result == "
Hello World
") + } + + test("Special characters") { + val el = div("Hello & World ") + val result = HtmlBuilder.build(el) + assert(result == "
<script>alert('xss')</script>
") + assert(!result.contains("" + val model = Var(TestModel(title = maliciousInput)) + val el = h1(model(_.title)) + el.syncAll() + + val result = HtmlBuilder.build(el) + assert(result.contains("<script>")) + assert(!result.contains("" + val el = div(title := maliciousInput) + val result = HtmlBuilder.build(el) + assert(result.contains("<script>")) + assert(!result.contains("", + "More safe text" + ) + val result = HtmlBuilder.build(el) + assert(result.contains("<script>")) + assert(!result.contains("") + val result = HtmlBuilder.build(el) + assert(result.contains("<script>")) + assert(!result.contains("")) + val result = HtmlBuilder.build(el) + assert(result == "
") + assert(result.contains("" + val el = div(rawHtml(maliciousInput)) + val result = HtmlBuilder.build(el) + // This demonstrates that rawHtml bypasses security - use only with trusted content + assert(result.contains("")) + assert(!result.contains("<script>")) + } + + test("Raw HTML vs escaped HTML comparison") { + val dangerousContent = "" + + // Escaped version (safe) + val escapedEl = div(dangerousContent) + val escapedResult = HtmlBuilder.build(escapedEl) + assert(escapedResult.contains("<script>")) + assert(!escapedResult.contains("