LinkedIn.purs 2.9 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182
  1. module LinkedIn (APIError(..), encodeToJson, getContext, getContextJson, extractFromDocument, extractFromDocumentJson, forceExtract, loopUntilElementAppears) where
  2. import Prelude
  3. import Control.Monad.Except (ExceptT, lift, runExceptT, throwError, withExceptT)
  4. import Data.Argonaut.Core (Json)
  5. import Data.Argonaut.Encode (class EncodeJson, encodeJson)
  6. import Data.Argonaut.Encode.Generic (genericEncodeJson)
  7. import Data.Either (Either(..))
  8. import Data.Generic.Rep (class Generic)
  9. import Data.Maybe (Maybe(..))
  10. import Data.Show.Generic (genericShow)
  11. import Data.Traversable (class Traversable)
  12. import Effect (Effect)
  13. import LinkedIn.CanBeQueried (class CanBeQueried)
  14. import LinkedIn.Extractible (class Extractible)
  15. import LinkedIn.Loadable (waitFor)
  16. import LinkedIn.Output (OutputError, run, toOutput)
  17. import LinkedIn.Output.Types (Output)
  18. import LinkedIn.PageUrl (PageUrl, pageUrlP)
  19. import Parsing (runParser)
  20. import Promise.Aff (Promise, fromAff)
  21. import Type.Proxy (Proxy)
  22. import Web.DOM (Document)
  23. import Web.DOM.Document (url)
  24. import Web.URL as U
  25. data APIError =
  26. ErrorExtraction OutputError
  27. | ErrorInvalidUrl
  28. | ErrorUnexpectedUrl
  29. derive instance Generic APIError _
  30. instance Show APIError where show = genericShow
  31. instance EncodeJson APIError where encodeJson a = genericEncodeJson a
  32. getContext ∷ Document → Effect (Either APIError PageUrl)
  33. getContext = runExceptT <<< getContext'
  34. getContextJson ∷ Document → Effect Json
  35. getContextJson d = getContext d >>= (pure <<< encodeJson)
  36. getContext' ∷ Document → ExceptT APIError Effect PageUrl
  37. getContext' dom = do
  38. u <- lift $ url dom
  39. case U.fromAbsolute u of
  40. Nothing -> throwError ErrorInvalidUrl
  41. Just u' -> case runParser (U.pathname u') pageUrlP of
  42. Left _ -> throwError ErrorUnexpectedUrl
  43. Right page -> pure page
  44. extractFromDocument :: Document -> Effect (Either APIError Output)
  45. extractFromDocument = runExceptT <<< extractFromDocument'
  46. extractFromDocumentJson ∷ Document → Effect Json
  47. extractFromDocumentJson d = extractFromDocument d >>= (pure <<< encodeJson)
  48. extractFromDocument' ∷ Document → ExceptT APIError Effect Output
  49. extractFromDocument' dom = do
  50. ctx <- getContext' dom
  51. toOutput' ctx dom
  52. toOutput' ∷ PageUrl → Document → ExceptT APIError Effect Output
  53. toOutput' ctx dom = withExceptT (\err -> ErrorExtraction err) $ toOutput ctx dom
  54. encodeToJson :: Either String Output -> Json
  55. encodeToJson = encodeJson
  56. loopUntilElementAppears ∷ String → Document → Effect (Promise Boolean)
  57. loopUntilElementAppears selector q = fromAff $ waitFor 200 50 selector q
  58. -- | Force extraction of data from a page, when the context given by the URL is imprecise
  59. -- | or plain wrong (e.g. for local files).
  60. -- | Can be call e.g. `forceExtract (Proxy :: Proxy JobOfferPage) dom`
  61. forceExtract ∷ ∀ root t.
  62. Traversable t
  63. ⇒ CanBeQueried root t
  64. ⇒ Extractible t
  65. ⇒ Proxy t
  66. → root
  67. → Effect (Either OutputError Output)
  68. forceExtract p = runExceptT <<< run p