choonkeat / html-data-extra / HtmlData.Extra

Functions to

to convert HtmlData.Html values into String

toTextHtml : HtmlData.Html msg -> String

Returns String in text/html format, suitable for use in email or static browser rendering.

import HtmlData exposing (..)
import HtmlData.Attributes exposing (..)

div [ classList
        [ ("hello", True)
        , ("world", True )
        , ("there", False )
        ]
    ]
    [ button [ id "Decrement", name "buttonDecrement" ] [ text "-" ]
    , div [] [ text ("Hello " ++ String.fromInt 1999) ]
    , button [ id "Increment", name "buttonIncrement" ] [ text "+" ]
    ]
    |> toTextHtml
--> "<div class=\"hello&#32;world\"><button id=\"Decrement\" name=\"buttonDecrement\">-</button><div>Hello&#32;1999</div><button id=\"Increment\" name=\"buttonIncrement\">&#43;</button></div>"

text "<bad> content"
|> toTextHtml
--> "&lt;bad&gt;&#32;content"

toTextPlain : TextPlainConfig msg -> HtmlData.Html msg -> String

Returns String in text/plain format, suitable for use in email or console output.

import HtmlData exposing (..)
import HtmlData.Attributes exposing (..)


div [ class "hello" ]
    [ p [] [ text "Hi Bob," ]
    , ol []
        [ li [] [ text "Do this" ]
        , li [] [ a [ href "https://example.com" ] [ text "Go here" ] ]
        ]
    ]
    |> toTextPlain defaultTextPlainConfig
--> String.join "\n"
-->     [ "Hi Bob,"
-->     , ""
-->     , "    1. Do this"
-->     , ""
-->     , "    2. Go here https://example.com "
-->     ]

toElmHtml : HtmlData.Html msg -> Html msg

Converts into a regular elm/html Html msg

Functions from

fromHtmlParserNodes : List Html.Parser.Node -> List (HtmlData.Html msg)

Converts from Html.Parser.Node into HtmlData.Html

We could achieve String -> List (HtmlData.Html msg) by

  1. combining with Html.Parser.run

  2. adding fallback value for error

Like this

import Html.Parser
import HtmlData exposing (..)
import HtmlData.Attributes exposing (..)

fromString : String -> List (Html msg)
fromString str =
    case Html.Parser.run str of
        Err err ->
            [ text (Debug.toString err) ]
        --
        Ok nodes ->
            fromHtmlParserNodes nodes

fromString "<p class=\"hello world\"><b>young</b> and <em>dangerous</em></p>"
--> [Element "p" [Attribute "class" "hello world"] [Element "b" [] [Text "young"],Text " and ",Element "em" [] [Text "dangerous"]]]

Configs

default configurations on how content is sanitized for toTextHtml, and how layout is done for toTextPlain


type alias SanitizeConfig =
{ urlAttributes : List String
, removedAttributes : List String
, isAllowedUrl : String -> Basics.Bool 
}

Config for sanitization of content (element and attributes)

defaultSanitizeConfig : SanitizeConfig

Default SanitizeConfig provided out of the box


type alias TextPlainConfig msg =
{ textlinkFromHtml : List (HtmlData.Attributes.Attribute msg) -> List (HtmlData.Html msg) -> String }

Config for converting html to text

defaultTextPlainConfig : TextPlainConfig msg

Default TextPlainConfig provided out of the box

Note: there's a space added behind the string to prevent punctuations from being confused as part of url.

import HtmlData exposing (..)
import HtmlData.Attributes exposing (..)

defaultTextPlainConfig.textlinkFromHtml
    [ id "some-id"
    , href "https://example.com/url"
    , src "https://example.com/src"
    ]
    [ text "Hello World "
    , b [] [ text "Everyone!" ]
    , text " How are you?"
    ]
--> "Hello World Everyone! How are you? https://example.com/url "

defaultTextPlainConfig.textlinkFromHtml
    [ href "https://example.com/url" ]
    [ text "Click here"
    ]
--> "Click here https://example.com/url "

defaultTextPlainConfig.textlinkFromHtml
    [ href "https://example.com/url" ]
    [ text "See https://example.com/url"
    ]
--> "See https://example.com/url "

defaultTextPlainConfig.textlinkFromHtml
    [ href "https://example.com/url" ]
    [ text "https://example.com/url"
    ]
--> "https://example.com/url "

escapeHtml : String -> String

http://wonko.com/post/html-escaping

"<a href=\"/user/foo\" onmouseover=\"alert(1)\">foo\" onmouseover=\"alert(1)</a>"
|> escapeHtml
--> "&lt;a&#32;href&#61;&quot;/user/foo&quot;&#32;onmouseover&#61;&quot;alert&#40;1&#41;&quot;&gt;foo&quot;&#32;onmouseover&#61;&quot;alert&#40;1&#41;&lt;/a&gt;"

"<a href='/user/foo' onmouseover='alert(1)'>foo' onmouseover='alert(1)</a>"
|> escapeHtml
--> "&lt;a&#32;href&#61;&#39;/user/foo&#39;&#32;onmouseover&#61;&#39;alert&#40;1&#41;&#39;&gt;foo&#39;&#32;onmouseover&#61;&#39;alert&#40;1&#41;&lt;/a&gt;"

sanitize : SanitizeConfig -> String -> Maybe String

Given some String, run it through a sanitizer and get back safe String that we can use as text/html

sanitize defaultSanitizeConfig """<h1 class="javascript:yo"> hello </h1>"""
--> Just "<h1 class=\"javascript:yo\">&#32;hello&#32;</h1>"

sanitize defaultSanitizeConfig """<a onclick='yo' data-other='yo' href="javascript :alert('Hi')">Cli>ckMe</a><script>alert("hello");</script>"""
--> Just "<a data-other=\"yo\">Cli&gt;ckMe</a>"

sanitize defaultSanitizeConfig """<b onmouseover=alert('Wufff!')>click me!</b>"""
--> Nothing

sanitize defaultSanitizeConfig """blah"/><script>alert("hello");</script>"""
--> Just "blah&quot;/&gt;"

sanitize defaultSanitizeConfig """<b onmouseover=alert(‘XSS!‘)></b>"""
--> Just "<b></b>"

sanitize defaultSanitizeConfig """<body style="javascript:yo" onload=alert(‘something’)></body>"""
--> Just "<body></body>"

sanitize defaultSanitizeConfig """<script>alert("hello");</script>"""
--> Nothing

sanitize defaultSanitizeConfig """<scr<script>ipt>alert(‘XSS’)</script>"""
--> Nothing

sanitize defaultSanitizeConfig """<SCRIPT>yo</SCRIPT>"""
--> Nothing

sanitize defaultSanitizeConfig """<IMG SRC=j&#X41vascript:alert('test2')>"""
--> Nothing

sanitize defaultSanitizeConfig """<IMG SRC="j&#X41vascript:alert('test2')">"""
--> Just "<img>"

sanitize defaultSanitizeConfig """<a onclick='yo' href="javascript :alert('Hi')">ClickMe</a><scr<script>ipt>alert("hello");</script>"""
--> Nothing

sanitize defaultSanitizeConfig """<img src="data:text/html;base64,PHNjcmlwdD5hbGVydCgndGVzdDMnKTwvc2NyaXB0Pg">"""
--> Just "<img>"

sanitize defaultSanitizeConfig """< h1>strict</h1>"""
--> Nothing

sanitize defaultSanitizeConfig """<h1>strict</ h1>"""
--> Nothing

sanitize defaultSanitizeConfig ""
--> Nothing

More ~tests~ examples

import HtmlData exposing (..)
import HtmlData.Attributes exposing (..)

div []
    [ h1 [] [ text "Block-level elements" ]
    , p []
        [ text "In this article, we'll examine HTML block-level elements and how they differ from "
        , a [ href "https://developer.mozilla.org/en-US/docs/Web/HTML/Inline_elements" ] [ text "inline-level elements" ]
        , text "."
        ]
    , p []
        [ text "HTML ("
        , b [] [ text "Hypertext Markup Language" ]
        , text ") elements ... by CSS in the "
        , a [ href "https://developer.mozilla.org/en-US/docs/Web/CSS/CSS_Flow_Layout" ] [ text "Flow Layout" ]
        , text ". A Block-level element occupies ... contents, thereby creating a \"block\"."
        , aside []
            [ strong [] [ text "Note:" ]
            , text " A block-level element always starts on a new line and ... as it can)."
            ]
        , h3 [] [ text "See also" ]
        , ol []
            [ li [] [ a [ href "" ] [ text "Inline elements" ] ]
            , li [] [ a [ href "" ] [ text "display" ] ]
            , li [] [ a [ href "" ] [ text "Block and Inline Layout in Normal Flow" ] ]
            ]
        ]
    ]
    |> toTextHtml
--> "<div><h1>Block-level&#32;elements</h1><p>In&#32;this&#32;article,&#32;we&#39;ll&#32;examine&#32;HTML&#32;block-level&#32;elements&#32;and&#32;how&#32;they&#32;differ&#32;from&#32;<a href=\"https://developer.mozilla.org/en-US/docs/Web/HTML/Inline_elements\">inline-level&#32;elements</a>.</p><p>HTML&#32;&#40;<b>Hypertext&#32;Markup&#32;Language</b>&#41;&#32;elements&#32;...&#32;by&#32;CSS&#32;in&#32;the&#32;<a href=\"https://developer.mozilla.org/en-US/docs/Web/CSS/CSS_Flow_Layout\">Flow&#32;Layout</a>.&#32;A&#32;Block-level&#32;element&#32;occupies&#32;...&#32;contents,&#32;thereby&#32;creating&#32;a&#32;&quot;block&quot;.<aside><strong>Note:</strong>&#32;A&#32;block-level&#32;element&#32;always&#32;starts&#32;on&#32;a&#32;new&#32;line&#32;and&#32;...&#32;as&#32;it&#32;can&#41;.</aside><h3>See&#32;also</h3><ol><li><a href=\"\">Inline&#32;elements</a></li><li><a href=\"\">display</a></li><li><a href=\"\">Block&#32;and&#32;Inline&#32;Layout&#32;in&#32;Normal&#32;Flow</a></li></ol></p></div>"


-- https://developer.mozilla.org/en-US/docs/Web/HTML/Element/dd
div []
    [ p []
        [ text "Cryptids of Cornwall:" ]
    , dl []
        [ dt []
            [ text "Beast of Bodmin" ]
        , dd []
            [ text "A large feline inhabiting Bodmin Moor." ]
        , dt []
            [ text "Morgawr" ]
        , dd []
            [ text "A sea serpent." ]
        , dt []
            [ text "Owlman" ]
        , dd []
            [ text "A giant owl-like creature." ]
        ]
    ]
    |> toTextHtml
--> "<div><p>Cryptids&#32;of&#32;Cornwall:</p><dl><dt>Beast&#32;of&#32;Bodmin</dt><dd>A&#32;large&#32;feline&#32;inhabiting&#32;Bodmin&#32;Moor.</dd><dt>Morgawr</dt><dd>A&#32;sea&#32;serpent.</dd><dt>Owlman</dt><dd>A&#32;giant&#32;owl-like&#32;creature.</dd></dl></div>"



div []
    [ h1 [] [ text "Block-level elements" ]
    , p []
        [ text "In this article, we'll examine HTML block-level elements and how they differ from "
        , a [ href "https://developer.mozilla.org/en-US/docs/Web/HTML/Inline_elements" ] [ text "inline-level elements" ]
        , text "."
        ]
    , p []
        [ text "HTML ("
        , b [] [ text "Hypertext Markup Language" ]
        , text ") elements ... by CSS in the "
        , a [ href "https://developer.mozilla.org/en-US/docs/Web/CSS/CSS_Flow_Layout" ] [ text "Flow Layout" ]
        , text ". A Block-level element occupies ... contents, thereby creating a \"block\"."
        , aside []
            [ strong [] [ text "Note:" ]
            , text " A block-level element always starts on a new line and ... as it can)."
            ]
        , h3 [] [ text "See also" ]
        , ol []
            [ li [] [ a [ href "" ] [ text "Inline elements" ] ]
            , li [] [ a [ href "" ] [ text "display" ] ]
            , li [] [ a [ href "" ] [ text "Block and Inline Layout in Normal Flow" ] ]
            ]
        ]
    ]
    |> toTextPlain defaultTextPlainConfig
--> String.join "\n"
-->     [ "Block-level elements"
-->     , ""
-->     , "In this article, we'll examine HTML block-level elements and how they differ from inline-level elements https://developer.mozilla.org/en-US/docs/Web/HTML/Inline_elements ."
-->     , ""
-->     , "HTML (Hypertext Markup Language) elements ... by CSS in the Flow Layout https://developer.mozilla.org/en-US/docs/Web/CSS/CSS_Flow_Layout . A Block-level element occupies ... contents, thereby creating a \"block\"."
-->     , ""
-->     , "Note: A block-level element always starts on a new line and ... as it can)."
-->     , ""
-->     , "See also"
-->     , ""
-->     , "    1. Inline elements"
-->     , ""
-->     , "    2. display"
-->     , ""
-->     , "    3. Block and Inline Layout in Normal Flow"
-->     ]


-- https://developer.mozilla.org/en-US/docs/Web/HTML/Element/dd
div []
    [ p []
        [ text "Cryptids of Cornwall:" ]
    , dl []
        [ dt []
            [ text "Beast of Bodmin" ]
        , dd []
            [ text "A large feline inhabiting Bodmin Moor." ]
        , dt []
            [ text "Morgawr" ]
        , dd []
            [ text "A sea serpent." ]
        , dt []
            [ text "Owlman" ]
        , dd []
            [ text "A giant owl-like creature." ]
        ]
    ]
    |> toTextPlain defaultTextPlainConfig
--> String.join "\n"
-->     [ "Cryptids of Cornwall:"
-->     , ""
-->     , "Beast of Bodmin"
-->     , ""
-->     , "    A large feline inhabiting Bodmin Moor."
-->     , ""
-->     , "Morgawr"
-->     , ""
-->     , "    A sea serpent."
-->     , ""
-->     , "Owlman"
-->     , ""
-->     , "    A giant owl-like creature."
-->     ]

div []
    [ text "hi"
    , p [] [ text "hello" ]
    , text "world"
    ]
    |> toTextPlain defaultTextPlainConfig
--> String.join "\n"
-->     [ "hi"
-->     , ""
-->     , "hello"
-->     , ""
-->     , "world"
-->     ]