English (UK)
Français (France)
Brezhoneg (Breizh)

Auto-search on leboncoin.fr

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import java.util.Date

import akka.actor.{ActorSystem, Cancellable}
import net.ruippeixotog.scalascraper.browser.JsoupBrowser
import net.ruippeixotog.scalascraper.dsl.DSL.Extract._
import net.ruippeixotog.scalascraper.dsl.DSL._

import scala.collection.mutable
import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.duration._
import scala.io.Codec

object MyApp {

  case class Offer(description: String, vendor: String, url: String) // simple model for an offer
  val searchUrl = """https://www.leboncoin.fr/_loisirs_/offres/bretagne/?th=1&f=p&q=manga&location=Lannion%2022300"""
  val alreadyChecked = new mutable.HashMap[String, Date]() // url/ date map

  def main(args: Array[String]): Unit = startScheduler() // starts scheduling

  def startScheduler(): Cancellable = ActorSystem("MySystem").scheduler.schedule(0.seconds, 10.minutes)(scrap) // run now and every 10 mins

  def scrap(): Unit = { println("Scrapping...")

    val browser = JsoupBrowser() // starts a browser
    val page = browser.parseString(scala.io.Source.fromURL(searchUrl, Codec.ISO8859.name).mkString) // read and parse the page

    val offerHrefs: Seq[String] = page >> elementList("a.list_item") flatMap (_ >?> element("a") map (_ attr "href")) // extract the links href
    val withProtocolHrefs: Seq[String] = offerHrefs map ("https:" + _) // add missing protocol to each extracted href

    withProtocolHrefs filterNot alreadyChecked.contains flatMap { offerHref => // load each offer if not already in the hashmap
      alreadyChecked += offerHref -> new Date() // add the offer url to the hashmap to avoid to re-process it
      val offerSection = browser.get(offerHref) >?> element("section #adview") // extract section containing offer info

      for { // extract optionaly description and vendor name
        propertiesDescription <- offerSection >?> element("div.properties_description")
        description <- propertiesDescription flatMap (_ >?> text("p.value"))
        linePro <- offerSection >?> element("div.line_pro")
        vendor <- linePro flatMap (_ >?> text("p a"))
      } yield Offer(description, vendor, offerHref)

    } filter myFilter foreach myNotifier // filter with the custom filter then notify for each remaining/non-filtered offer

    alreadyChecked retain { case (url, date) => date.getTime > (new Date().getTime - 1.day.toMillis) } // clean hasmap old entries
  }

  def myFilter(offer: Offer): Boolean = (offer.description.toLowerCase contains "dragon") || (offer.vendor == "MyFavoriteVendor")

  def myNotifier(offer: Offer): Unit = println(offer)
}

// build.sbt
//scalaVersion := "2.11.11"
//libraryDependencies += "net.ruippeixotog" %% "scala-scraper" % "2.0.0"
//libraryDependencies += "com.typesafe.akka" %% "akka-actor" % "2.5.4"

Get notified when an object you want is available on leboncoin.fr

Back to the projects page