Escape strings

This commit is contained in:
Paul-Henri Froidmont 2025-12-08 05:19:21 +01:00
parent 4c9dfb2533
commit a542446739
Signed by: phfroidmont
GPG key ID: BE948AFD7E7873BE
7 changed files with 426 additions and 13 deletions

View file

@ -21,7 +21,8 @@ trait ScalaCommon extends ScalaModule with ScalafmtModule with ScalafixModule:
"-feature", "-feature",
"-language:implicitConversions", "-language:implicitConversions",
"-Wvalue-discard", "-Wvalue-discard",
"-Wnonunit-statement" "-Wnonunit-statement",
"-deprecation"
) )
trait PublishCommon extends PublishModule: trait PublishCommon extends PublishModule:

View file

@ -0,0 +1,91 @@
package scalive
import java.io.StringWriter
/** Taken from Scalatags
* https://github.com/com-lihaoyi/scalatags/blob/0024ce995f301b10a435c672ff643f2a432a7f3b/scalatags/src/scalatags/Escaping.scala
*/
object Escaping:
private val tagRegex = "^[a-z][:\\w0-9-]*$".r
/** Uses a regex to check if something is a valid tag name.
*/
def validTag(s: String) = tagRegex.unapplySeq(s).isDefined
/** Check if 's' is a valid attribute name.
*/
def validAttrName(s: String): Boolean =
// this is equivalent of the regex but without a huge amount of object creation.
// original regex - ^[a-zA-Z_:][-a-zA-Z0-9_:.]*$
// n.b. I know its ugly, but its fast
val len = s.length
if len == 0 then return false
val sc = s.charAt(0)
val startCharValid = (sc >= 'a' && sc <= 'z') || (sc >= 'A' && sc <= 'Z') || sc == ':'
if !startCharValid then return false
var pos = 1
while pos < len do
val c = s.charAt(pos)
val valid = (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') ||
c == '-' || c == ':' || c == '.' || c == '_'
if !valid then return false
pos += 1
true
/** Code to escape text HTML nodes. Based on code from scala.xml
*/
def escape(text: String, s: java.io.Writer) =
// Implemented per XML spec:
// http://www.w3.org/International/questions/qa-controls
// Highly imperative code, ~2-3x faster than the previous implementation (2020-06-11)
val charsArray = text.toCharArray
val len = charsArray.size
var pos = 0
var i = 0
while i < len do
val c = charsArray(i)
c match
case '<' =>
s.write(charsArray, pos, i - pos)
s.write("&lt;")
pos = i + 1
case '>' =>
s.write(charsArray, pos, i - pos)
s.write("&gt;")
pos = i + 1
case '&' =>
s.write(charsArray, pos, i - pos)
s.write("&amp;")
pos = i + 1
case '"' =>
s.write(charsArray, pos, i - pos)
s.write("&quot;")
pos = i + 1
case '\n' =>
case '\r' =>
case '\t' =>
case c if c < ' ' =>
s.write(charsArray, pos, i - pos)
pos = i + 1
case _ =>
i += 1
// Apparently this isn't technically necessary if (len - pos) == 0 as
// it doesn't cause any exception to occur in the JVM.
// The problem is that it isn't documented anywhere so I left this if here
// to make the error clear.
if pos < len then s.write(charsArray, pos, len - pos)
end escape
def escape(text: String): String =
val s = new StringWriter
escape(text, s)
s.toString
end Escaping
extension (s: StringWriter)
def writeEscaped(text: String) =
Escaping.escape(text, s)

View file

@ -24,13 +24,14 @@ object HtmlBuilder:
case Attr.Binding(_, id, _) => strw.write(id.render(false).getOrElse("")) case Attr.Binding(_, id, _) => strw.write(id.render(false).getOrElse(""))
case Attr.JsBinding(_, jsonValue, _) => strw.write(jsonValue.render(false).getOrElse("")) case Attr.JsBinding(_, jsonValue, _) => strw.write(jsonValue.render(false).getOrElse(""))
case Attr.Dyn(name, value, isJson) => case Attr.Dyn(name, value, isJson) =>
strw.write(value.render(false).getOrElse("")) if isJson then strw.write(value.render(false).getOrElse(""))
else strw.writeEscaped(value.render(false).getOrElse(""))
case Attr.DynValueAsPresence(name, value) => case Attr.DynValueAsPresence(name, value) =>
strw.write( strw.write(
value.render(false).map(if _ then s" $name" else "").getOrElse("") value.render(false).map(if _ then s" $name" else "").getOrElse("")
) )
case Content.Tag(el) => build(el.static, el.dynamicMods, strw) case Content.Tag(el) => build(el.static, el.dynamicMods, strw)
case Content.DynText(dyn) => strw.write(dyn.render(false).getOrElse("")) case Content.DynText(dyn) => strw.writeEscaped(dyn.render(false).getOrElse(""))
case Content.DynElement(dyn) => ??? case Content.DynElement(dyn) => ???
case Content.DynOptionElement(dyn) => case Content.DynOptionElement(dyn) =>
dyn.render(false).foreach(_.foreach(el => build(el.static, el.dynamicMods, strw))) dyn.render(false).foreach(_.foreach(el => build(el.static, el.dynamicMods, strw)))

View file

@ -121,10 +121,10 @@ object Mod:
case DynValueAsPresence(name: String, value: scalive.Dyn[Boolean]) extends Attr with DynamicMod case DynValueAsPresence(name: String, value: scalive.Dyn[Boolean]) extends Attr with DynamicMod
enum Content extends Mod: enum Content extends Mod:
case Text(text: String) extends Content with StaticMod case Text(text: String, raw: Boolean = false) extends Content with StaticMod
case Tag(el: HtmlElement) extends Content with StaticMod with DynamicMod case Tag(el: HtmlElement) extends Content with StaticMod with DynamicMod
case DynText(dyn: Dyn[String]) extends Content with DynamicMod case DynText(dyn: Dyn[String]) extends Content with DynamicMod
case DynElement(dyn: Dyn[HtmlElement]) extends Content with DynamicMod case DynElement(dyn: Dyn[HtmlElement]) extends Content with DynamicMod
// TODO support arbitrary collection // TODO support arbitrary collection
case DynOptionElement(dyn: Dyn[Option[HtmlElement]]) extends Content with DynamicMod case DynOptionElement(dyn: Dyn[Option[HtmlElement]]) extends Content with DynamicMod
case DynElementColl(dyn: Dyn[IterableOnce[HtmlElement]]) extends Content with DynamicMod case DynElementColl(dyn: Dyn[IterableOnce[HtmlElement]]) extends Content with DynamicMod
@ -139,7 +139,7 @@ extension (mod: Mod)
case Attr.StaticValueAsPresence(_, _) => () case Attr.StaticValueAsPresence(_, _) => ()
case Attr.Dyn(_, value, _) => value.setUnchanged() case Attr.Dyn(_, value, _) => value.setUnchanged()
case Attr.DynValueAsPresence(_, value) => value.setUnchanged() case Attr.DynValueAsPresence(_, value) => value.setUnchanged()
case Content.Text(text) => () case Content.Text(text, _) => ()
case Content.Tag(el) => el.setAllUnchanged() case Content.Tag(el) => el.setAllUnchanged()
case Content.DynText(dyn) => dyn.setUnchanged() case Content.DynText(dyn) => dyn.setUnchanged()
case Content.DynElement(dyn) => case Content.DynElement(dyn) =>
@ -163,7 +163,7 @@ extension (mod: Mod)
case Attr.JsBinding(_, json, _) => json.sync() case Attr.JsBinding(_, json, _) => json.sync()
case Attr.Dyn(_, value, _) => value.sync() case Attr.Dyn(_, value, _) => value.sync()
case Attr.DynValueAsPresence(_, value) => value.sync() case Attr.DynValueAsPresence(_, value) => value.sync()
case Content.Text(text) => () case Content.Text(text, _) => ()
case Content.Tag(el) => el.syncAll() case Content.Tag(el) => el.syncAll()
case Content.DynText(dyn) => dyn.sync() case Content.DynText(dyn) => dyn.sync()
case Content.DynElement(dyn) => case Content.DynElement(dyn) =>
@ -190,7 +190,7 @@ extension (mod: Mod)
bindings.get(id).map(msg => _ => msg.asInstanceOf[Msg]) bindings.get(id).map(msg => _ => msg.asInstanceOf[Msg])
case Attr.Dyn(_, value, _) => None case Attr.Dyn(_, value, _) => None
case Attr.DynValueAsPresence(_, value) => None case Attr.DynValueAsPresence(_, value) => None
case Content.Text(text) => None case Content.Text(text, _) => None
case Content.Tag(el) => el.findBinding(id) case Content.Tag(el) => el.findBinding(id)
case Content.DynText(dyn) => None case Content.DynText(dyn) => None
case Content.DynElement(dyn) => dyn.currentValue.findBinding(id) case Content.DynElement(dyn) => dyn.currentValue.findBinding(id)

View file

@ -9,7 +9,8 @@ import scalive.defs.tags.HtmlTags
package object scalive extends HtmlTags with HtmlAttrs with ComplexHtmlKeys: package object scalive extends HtmlTags with HtmlAttrs with ComplexHtmlKeys:
lazy val defer = htmlAttr("defer", codecs.BooleanAsAttrPresenceEncoder) lazy val defer = htmlAttr("defer", codecs.BooleanAsAttrPresenceEncoder)
def rawHtml(html: String) = Mod.Content.Text(html, raw = true)
object link: object link:
def navigate(path: String, mods: Mod*): HtmlElement = def navigate(path: String, mods: Mod*): HtmlElement =

View file

@ -13,7 +13,7 @@ object StaticBuilder:
private def buildStaticFragments(el: HtmlElement): Seq[Option[String]] = private def buildStaticFragments(el: HtmlElement): Seq[Option[String]] =
val attrs = el.attrMods.flatMap { val attrs = el.attrMods.flatMap {
case Attr.Static(name, value) => List(Some(s" $name='$value'")) case Attr.Static(name, value) => List(Some(s""" $name="${Escaping.escape(value)}""""))
case Attr.StaticValueAsPresence(name, value) => List(Some(s" $name")) case Attr.StaticValueAsPresence(name, value) => List(Some(s" $name"))
case Attr.Binding(name, _, _) => List(Some(s""" $name=""""), None, Some('"'.toString)) case Attr.Binding(name, _, _) => List(Some(s""" $name=""""), None, Some('"'.toString))
case Attr.JsBinding(name, _, _) => List(Some(s" $name='"), None, Some("'")) case Attr.JsBinding(name, _, _) => List(Some(s" $name='"), None, Some("'"))
@ -23,7 +23,7 @@ object StaticBuilder:
case Attr.DynValueAsPresence(_, value) => List(Some(""), None, Some("")) case Attr.DynValueAsPresence(_, value) => List(Some(""), None, Some(""))
} }
val children = el.contentMods.flatMap { val children = el.contentMods.flatMap {
case Content.Text(text) => List(Some(text)) case Content.Text(text, raw) => List(Some(if raw then text else Escaping.escape(text)))
case Content.Tag(el) => buildStaticFragments(el) case Content.Tag(el) => buildStaticFragments(el)
case Content.DynText(_) => List(None) case Content.DynText(_) => List(None)
case Content.DynElement(_) => List(None) case Content.DynElement(_) => List(None)

View file

@ -0,0 +1,319 @@
package scalive
import utest.*
object HtmlBuilderSpec extends TestSuite:
final case class TestModel(
title: String = "title value",
otherString: String = "other string value",
bool: Boolean = false,
nestedTitle: String = "nested title value",
cls: String = "text-sm",
items: List[NestedModel] = List.empty)
final case class NestedModel(name: String, age: Int)
val tests = Tests {
test("Static HTML rendering") {
test("Simple div") {
val el = div("Hello World")
val result = HtmlBuilder.build(el)
assert(result == "<div>Hello World</div>")
}
test("Nested elements") {
val el = div(
h1("Title"),
p("Content")
)
val result = HtmlBuilder.build(el)
assert(result == "<div><h1>Title</h1><p>Content</p></div>")
}
test("With attributes") {
val el = div(cls := "container", "Content")
val result = HtmlBuilder.build(el)
assert(result == "<div class=\"container\">Content</div>")
}
test("Multiple attributes") {
val el = div(
cls := "container",
idAttr := "main",
"Content"
)
val result = HtmlBuilder.build(el)
assert(result == "<div class=\"container\" id=\"main\">Content</div>")
}
}
test("Dynamic HTML rendering") {
test("Dynamic text") {
val model = Var(TestModel(title = "dynamic title"))
val el = h1(model(_.title))
el.syncAll()
val result = HtmlBuilder.build(el)
assert(result == "<h1>dynamic title</h1>")
}
test("Dynamic attribute") {
val model = Var(TestModel(cls = "dynamic-class"))
val el = div(cls := model(_.cls), "Content")
el.syncAll()
val result = HtmlBuilder.build(el)
assert(result == "<div class=\"dynamic-class\">Content</div>")
}
test("Dynamic boolean attribute") {
val model = Var(TestModel(bool = true))
val el = div(
cls := model(_.cls),
disabled := model(_.bool),
"Content"
)
el.syncAll()
val result = HtmlBuilder.build(el)
assert(result == "<div class=\"text-sm\" disabled>Content</div>")
}
test("Dynamic text with update") {
val model = Var(TestModel(title = "initial"))
val el = h1(model(_.title))
el.syncAll()
// Initial render
var result = HtmlBuilder.build(el)
assert(result == "<h1>initial</h1>")
// Update model
model.update(_.copy(title = "updated"))
el.syncAll()
result = HtmlBuilder.build(el)
assert(result == "<h1>updated</h1>")
}
}
test("Complex HTML rendering") {
test("Form with dynamic fields") {
val model = Var(
TestModel(
title = "Form Title",
cls = "form-container"
)
)
val el = form(
cls := model(_.cls),
div(
label("Title:"),
input(value := model(_.title))
),
button("Submit")
)
el.syncAll()
val result = HtmlBuilder.build(el)
val expected =
"<form class=\"form-container\"><div><label>Title:</label><input value=\"Form Title\"/></div><button>Submit</button></form>"
assert(result == expected)
}
test("List with dynamic content") {
val model = Var(
TestModel(
items = List(
NestedModel("Item 1", 10),
NestedModel("Item 2", 20)
)
)
)
val el = ul(
model(_.items).splitByIndex((_, elem) =>
li(
elem(_.name),
" (",
elem(_.age.toString),
")"
)
)
)
el.syncAll()
val result = HtmlBuilder.build(el)
val expected = "<ul><li>Item 1 (10)</li><li>Item 2 (20)</li></ul>"
assert(result == expected)
}
}
test("Root HTML rendering") {
test("With doctype") {
val el = div("Content")
val result = HtmlBuilder.build(el, isRoot = true)
val expected = "<!doctype html><div>Content</div>"
assert(result == expected)
}
test("Without doctype") {
val el = div("Content")
val result = HtmlBuilder.build(el, isRoot = false)
val expected = "<div>Content</div>"
assert(result == expected)
}
}
test("Edge cases") {
test("Empty content") {
val el = div("")
val result = HtmlBuilder.build(el)
assert(result == "<div></div>")
}
test("Whitespace handling") {
val el = div(" Hello World ")
val result = HtmlBuilder.build(el)
assert(result == "<div> Hello World </div>")
}
test("Special characters") {
val el = div("Hello & World <script>")
val result = HtmlBuilder.build(el)
assert(result == "<div>Hello &amp; World &lt;script&gt;</div>")
}
test("XSS prevention") {
test("Script tags in content are escaped") {
val el = div("<script>alert('xss')</script>")
val result = HtmlBuilder.build(el)
assert(result == "<div>&lt;script&gt;alert('xss')&lt;/script&gt;</div>")
assert(!result.contains("<script>"))
}
test("Script tags in dynamic content are escaped") {
val maliciousInput = "<script>alert('xss')</script>"
val model = Var(TestModel(title = maliciousInput))
val el = h1(model(_.title))
el.syncAll()
val result = HtmlBuilder.build(el)
assert(result.contains("&lt;script&gt;"))
assert(!result.contains("<script>"))
}
test("Angle brackets in attributes are escaped") {
val maliciousInput = "<script>alert('xss')</script>"
val el = div(title := maliciousInput)
val result = HtmlBuilder.build(el)
assert(result.contains("&lt;script&gt;"))
assert(!result.contains("<script>"))
}
test("Mixed content with scripts") {
val el = div(
"Safe text",
"<script>alert('xss')</script>",
"More safe text"
)
val result = HtmlBuilder.build(el)
assert(result.contains("&lt;script&gt;"))
assert(!result.contains("<script>"))
}
test("Style tags are escaped") {
val el = div("<style>body { background: red; }</style>")
val result = HtmlBuilder.build(el)
assert(result.contains("&lt;style&gt;"))
assert(!result.contains("<style>"))
}
test("Iframe tags are escaped") {
val el = div("<iframe src='javascript:alert(\"xss\")'></iframe>")
val result = HtmlBuilder.build(el)
assert(result.contains("&lt;iframe"))
assert(!result.contains("<iframe>"))
}
test("JavaScript protocol in attributes") {
val el = div(href := "javascript:alert('xss')")
val result = HtmlBuilder.build(el)
// This tests if javascript: protocol is allowed in href attributes
assert(result.contains("javascript:alert"))
}
test("Unicode-based attacks") {
val el = div("\u202E\u202D<script>alert('xss')</script>")
val result = HtmlBuilder.build(el)
assert(result.contains("&lt;script&gt;"))
assert(!result.contains("<script>"))
}
test("HTML entity encoding bypass attempts") {
val el = div("&lt;script&gt;alert('xss')&lt;/script&gt;")
val result = HtmlBuilder.build(el)
// Should not decode entities, should display them as literal text
assert(result.contains("&amp;lt;script&amp;gt;"))
}
test("Raw HTML rendering without escaping") {
test("Basic raw HTML") {
val el = div(rawHtml("<span>Raw HTML</span>"))
val result = HtmlBuilder.build(el)
// Should render the HTML as-is without escaping
assert(result == "<div><span>Raw HTML</span></div>")
assert(result.contains("<span>"))
assert(!result.contains("&lt;span&gt;"))
}
test("Raw HTML with scripts") {
val el = div(rawHtml("<script>alert('raw')</script>"))
val result = HtmlBuilder.build(el)
assert(result == "<div><script>alert('raw')</script></div>")
assert(result.contains("<script>"))
assert(!result.contains("&lt;script&gt;"))
}
test("Raw HTML with nested elements") {
val el = div(
"Prefix: ",
rawHtml("<strong>Bold</strong> <em>Italic</em>"),
" Suffix"
)
val result = HtmlBuilder.build(el)
assert(result == "<div>Prefix: <strong>Bold</strong> <em>Italic</em> Suffix</div>")
}
test("Raw HTML security warning") {
val maliciousInput = "<script>alert('XSS via rawHtml')</script>"
val el = div(rawHtml(maliciousInput))
val result = HtmlBuilder.build(el)
// This demonstrates that rawHtml bypasses security - use only with trusted content
assert(result.contains("<script>alert('XSS via rawHtml')</script>"))
assert(!result.contains("&lt;script&gt;"))
}
test("Raw HTML vs escaped HTML comparison") {
val dangerousContent = "<script>alert('test')</script>"
// Escaped version (safe)
val escapedEl = div(dangerousContent)
val escapedResult = HtmlBuilder.build(escapedEl)
assert(escapedResult.contains("&lt;script&gt;"))
assert(!escapedResult.contains("<script>"))
// Raw version (unsafe but intentional)
val rawEl = div(rawHtml(dangerousContent))
val rawResult = HtmlBuilder.build(rawEl)
assert(rawResult.contains("<script>"))
assert(!rawResult.contains("&lt;script&gt;"))
}
}
}
}
}
end HtmlBuilderSpec