DetachedNode.purs 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136
  1. module LinkedIn.DetachedNode where
  2. import Prelude
  3. import Control.Comonad.Cofree (head, tail)
  4. import Data.Array as A
  5. import Data.Generic.Rep (class Generic)
  6. import Data.List (List(..), (:))
  7. import Data.List as L
  8. import Data.Maybe (Maybe(..), fromJust)
  9. import Data.Show.Generic (genericShow)
  10. import Data.String (Pattern(..))
  11. import Data.String as S
  12. import Data.String.CodeUnits (fromCharArray, toCharArray)
  13. import Data.Traversable (sequence)
  14. import Effect (Effect)
  15. import Partial.Unsafe (unsafePartial)
  16. import Web.DOM (Node)
  17. import Web.DOM.Element (getAttribute)
  18. import Web.DOM.Element as E
  19. import Web.DOM.Node (nodeType, textContent)
  20. import Web.DOM.Node as N
  21. import Web.DOM.NodeList as NL
  22. import Web.DOM.NodeType (NodeType(..))
  23. import Yoga.Tree (Tree, leaf, mkTree)
  24. data DetachedNode =
  25. DetachedElement {tag :: String, content :: String, id :: Maybe String, classes :: List String}
  26. | DetachedA {content :: String, href :: String}
  27. | DetachedButton {content :: String, role :: Maybe String, classes :: List String}
  28. | DetachedComment String
  29. | DetachedText String
  30. derive instance Generic DetachedNode _
  31. derive instance Eq DetachedNode
  32. instance Show DetachedNode where
  33. show = genericShow
  34. toDetached :: Node -> Effect DetachedNode
  35. toDetached node = unsafePartial $ toDetached' (nodeType node) node where
  36. toDetached' :: Partial => NodeType -> Node -> Effect DetachedNode
  37. toDetached' CommentNode n = do
  38. txt <- textContent n
  39. pure $ DetachedComment $ normalize txt
  40. toDetached' TextNode n = do
  41. txt <- textContent n
  42. pure $ DetachedText $ normalize txt
  43. toDetached' ElementNode n = do
  44. txt <- textContent n
  45. let
  46. el = unsafePartial $ fromJust $ E.fromNode n
  47. tag = E.tagName el
  48. elementToDetached el tag txt
  49. elementToDetached el "A" text = do
  50. href <- getAttribute "href" el
  51. pure $ DetachedA {
  52. content: normalize text,
  53. href: unsafePartial $ fromJust href
  54. }
  55. elementToDetached el "BUTTON" text = do
  56. role <- getAttribute "role" el
  57. classes <- getClassList el
  58. pure $ DetachedButton {
  59. content: normalize text,
  60. role,
  61. classes
  62. }
  63. elementToDetached el tag text = do
  64. id <- E.id el
  65. -- On SVG elements "className" returns a weird "SVGString" type that cannot be trimmed
  66. classes <- if tag /= "svg" && tag /= "use" && tag /= "path" then getClassList el else pure $ Nil
  67. pure $ DetachedElement {
  68. tag: E.tagName el,
  69. content: normalize text,
  70. id: if S.null id then Nothing else Just id,
  71. classes
  72. }
  73. getClassList el = do
  74. classStr <- E.className el
  75. pure $ A.toUnfoldable $ S.split (Pattern " ") (normalize classStr)
  76. normalize :: String -> String
  77. normalize = normaliseSpace >>> S.trim
  78. normaliseSpace :: String -> String
  79. normaliseSpace s = fromCharArray $ A.fromFoldable $ normaliseSpace' $ A.toUnfoldable $ toCharArray s
  80. where
  81. badSequence ' ' ' ' = true
  82. badSequence ' ' '\n' = true
  83. badSequence '\n' '\n' = true
  84. badSequence '\n' ' ' = true
  85. badSequence _ _ = false
  86. normaliseSpace':: List Char -> List Char
  87. normaliseSpace' Nil = Nil
  88. normaliseSpace' (c1 : xs@(c2 : _)) | badSequence c1 c2 = normaliseSpace' xs
  89. normaliseSpace' (x:xs) = x : normaliseSpace' xs
  90. cutBranches :: forall a. (Tree a -> Boolean) -> Tree a -> Tree a
  91. cutBranches filterIn tree = case head tree, tail tree of
  92. h, [] -> mkTree h []
  93. h, t ->
  94. mkTree h (A.filter filterIn tail) where
  95. tail = map (cutBranches filterIn) t :: Array (Tree a)
  96. filterEmpty ∷ Tree DetachedNode → Boolean
  97. filterEmpty t = case head t of
  98. DetachedComment _ -> false
  99. DetachedText "" -> false
  100. DetachedElement {tag: "SPAN", classes}
  101. | L.any (_ == "white-space-pre") classes -> false
  102. DetachedElement {classes} -> L.all (\c -> c /= "visually-hidden" && c /= "a11y-text") classes
  103. _ -> true
  104. asTree' :: Node -> Effect (Tree DetachedNode)
  105. asTree' n = do
  106. children <- N.childNodes n
  107. childArray <- NL.toArray children :: Effect (Array Node)
  108. detached <- toDetached n
  109. case childArray of
  110. [] -> pure $ leaf detached
  111. arr -> do
  112. a <- sequence (map asTree' arr) :: Effect (Array (Tree DetachedNode))
  113. pure $ mkTree detached a