title: //h1 # We can have multiple authors author: //span[@id='publisher'] author: //a[@class='auteur'] # Last edition date (if any) date: //time[@itemprop='dateModified']/@datetime # Publication date date: //time[@itemprop='datePublished']/@datetime body: //div[@id='articleBody']|//section[@class='contenu']//div[@class='texte'] # Remove highlighted quotes strip_id_or_class: accroche strip_id_or_class: encart # Remove "Lire aussi" blocks strip: //p[@class='lire'] # Remove footers for Le Monde Festival strip: //aside[@class='fenetre'] # Remove the insane "conjugaison.lemonde.fr" links: find_string: