diff --git a/docs/conf.py b/docs/conf.py
index feed1420..f29ddbd9 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -15,7 +15,7 @@
 
 extensions = [
     "autoapi.extension",
-    "myst_parser",
+    "myst_nb",
     "numpydoc",
     "sphinxcontrib.mermaid",
     "sphinx_design",
diff --git a/docs/tutorial/context_detection.md b/docs/tutorial/context_detection.md
index fe0372f1..91d427c1 100644
--- a/docs/tutorial/context_detection.md
+++ b/docs/tutorial/context_detection.md
@@ -1,21 +1,37 @@
+---
+jupytext:
+  formats: md:myst
+  text_representation:
+    extension: .md
+    format_name: myst
+kernelspec:
+  display_name: Python 3
+  language: python
+  name: python3
+---
+
 # Context Detection
 
 In this tutorial, we will use rule-based operations to attach additional contextual information to entities,
-such has:
+such as:
 - the section in which the entity is located;
 - is the entity negated;
 - whether it appears as part of an hypothesis;
 - whether it is related to the patient or part of their family's medical history.
 
-Let's start by loading a document:
+Let's start by loading a medical report to work on:
 
-:::{code}
+```{code-cell} ipython3
 from pathlib import Path
 from medkit.core.text import TextDocument
 
-doc = TextDocument.from_file(Path("../data/mtsamplesfr/1.txt"))
+# In case this notebook is executed outside medkit, download the example data with:
+# !wget https://raw.githubusercontent.com/medkit-lib/medkit/main/docs/data/mtsamplesfr/1.txt
+# and adjust the path below.
+doc_file = Path("../data/mtsamplesfr/1.txt")
+doc = TextDocument.from_file(doc_file)
 print(doc.text)
-:::
+```
 
 ## Section detection
 
@@ -31,7 +47,7 @@ so we will manually define our own section rules:
 
 [default list of sections]: https://github.com/medkit-lib/medkit/blob/main/medkit/text/segmentation/default_section_definition.yml
 
-:::{code}
+```{code-cell} ipython3
 from medkit.text.segmentation import SectionTokenizer
 
 # Give a definition of the sections we may encounter
@@ -54,7 +70,7 @@ for section_seg in section_segs:
     section_attr = section_seg.attrs.get(label="section")[0]
     print("section", section_attr.value)
     print(section_seg.text, end="\n\n\n")
-:::
+```
 
 ## Sentence splitting
 
@@ -69,7 +85,7 @@ to the new sentences segments created by the operation.
 Here, we will use it to copy the "section" attribute of the section segments
 (which has the section name as value):
 
-:::{code}
+```{code-cell} ipython3
 from medkit.text.segmentation import SentenceTokenizer
 
 sentence_tokenizer = SentenceTokenizer(
@@ -89,7 +105,7 @@ for sentence_seg in sentence_segs:
     section_attr = sentence_seg.attrs.get(label="section")[0]
     print("section:", section_attr.value)
     print(sentence_seg.text, end="\n\n")
-:::
+```
 
 ## Family history detection
 
@@ -108,7 +124,7 @@ For the sake of learning, we will manually create a few rules:
 
 [predefined rules]: https://github.com/medkit-lib/medkit/blob/main/medkit/text/context/family_detector_default_rules.yml
 
-:::{code}
+```{code-cell} ipython3
 from medkit.text.context import FamilyDetector, FamilyDetectorRule
 
 family_rule_1 = FamilyDetectorRule(
@@ -145,7 +161,7 @@ for sentence_seg in sentence_segs:
     # Only print sentences about family history
     if family_attr.value:
         print(sentence_seg.text)
-:::
+```
 
 As with all rule-based operations, `FamilyDetector` provides
 the {func}`~medkit.text.context.FamilyDetector.load_rules`
@@ -159,7 +175,7 @@ However, for negation and hypothesis, it is better to split sentences into small
 as the scope of negation and hypothesis can be very limited.
 For this purpose, `medkit` provides a {class}`~medkit.text.segmentation.SyntagmaTokenizer` operation.
 
-:::{code}
+```{code-cell} ipython3
 from medkit.text.segmentation import SyntagmaTokenizer
 
 # Here we will use the default settings of SyntagmaTokenizer,
@@ -175,13 +191,13 @@ syntagma_segs = syntagma_tokenizer.run(sentence_segs)
 
 for syntagma_seg in syntagma_segs:
     print(syntagma_seg.text)
-:::
+```
 
 As you can see, a few sentences were split into smaller parts.
 We can now run a {class}`~medkit.text.context.NegationDetector` instance on the syntagmata
 (using the default rules).
 
-:::{code}
+```{code-cell} ipython3
 from medkit.text.context import NegationDetector, NegationDetectorRule
 
 # NegationDetectorRule objects have the same structure as FamilyDetectorRule
@@ -194,7 +210,7 @@ for syntagma_seg in syntagma_segs:
     negation_attr = syntagma_seg.attrs.get(label="negation")[0]
     if negation_attr.value:
         print(syntagma_seg.text)
-:::
+```
 
 ## Hypothesis detection
 
@@ -204,7 +220,7 @@ except it also uses a list of conjugated verb forms in addition to the list of r
 By default, verbs at conditional and future tenses indicate the presence of an hypothesis.
 This can be configured alongside the list of verbs.
 
-:::{code}
+```{code-cell} ipython3
 from medkit.text.context import HypothesisDetector
 
 hypothesis_detector = HypothesisDetector(output_label="hypothesis")
@@ -215,7 +231,7 @@ for syntagma_seg in syntagma_segs:
     hypothesis_attr = syntagma_seg.attrs.get(label="hypothesis")[0]
     if hypothesis_attr.value:
         print(syntagma_seg.text)
-:::
+```
 
 As you can see, no hypothesis was detected in this document.
 
@@ -233,7 +249,7 @@ we want to propagate it to the entities that we will find in the document.
 This can be done using the `attrs_to_copy` mechanism that we have already seen,
 which is available to all NER operations:
 
-:::{code}
+```{code-cell} ipython3
 from medkit.text.ner.hf_entity_matcher import HFEntityMatcher
 
 # Create a matcher using a pretrained HuggingFace model
@@ -259,211 +275,11 @@ for entity in doc.anns.entities:
     hypothesis_attr = entity.attrs.get(label="hypothesis")[0]
     print("hypothesis:", hypothesis_attr.value)
     print()
-:::
-
-```text
-problem : Thrombocytose essentielle
-section: head
-family: False
-negation: False
-hypothesis: False
-
-problem : thrombocytose essentielle
-section: antecedents
-family: False
-negation: False
-hypothesis: False
-
-test : nombre
-section: antecedents
-family: False
-negation: False
-hypothesis: False
-
-test : plaquettes
-section: antecedents
-family: False
-negation: False
-hypothesis: False
-
-treatment : Hydrea
-section: antecedents
-family: False
-negation: False
-hypothesis: False
-
-test : biopsie de moelle osseuse
-section: antecedents
-family: False
-negation: False
-hypothesis: False
-
-problem : thrombocytose essentielle
-section: antecedents
-family: False
-negation: False
-hypothesis: False
-
-problem : mutation JAK-2
-section: antecedents
-family: False
-negation: False
-hypothesis: False
-
-treatment : Hydrea
-section: antecedents
-family: False
-negation: False
-hypothesis: False
-
-problem : polyarthrite rhumatoïde
-section: antecedents
-family: False
-negation: False
-hypothesis: False
-
-test : d
-section: antecedents
-family: False
-negation: False
-hypothesis: False
-
-test : énergie
-section: antecedents
-family: False
-negation: False
-hypothesis: False
-
-test : statut de performance ECOG
-section: antecedents
-family: False
-negation: False
-hypothesis: False
-
-problem : fièvre
-section: antecedents
-family: False
-negation: True
-hypothesis: False
-
-problem : frissons
-section: antecedents
-family: False
-negation: True
-hypothesis: False
-
-problem : sueurs nocturnes
-section: antecedents
-family: False
-negation: True
-hypothesis: False
-
-problem : adénopathie
-section: antecedents
-family: False
-negation: True
-hypothesis: False
-
-problem : nausées
-section: antecedents
-family: False
-negation: True
-hypothesis: False
-
-problem : vomissements
-section: antecedents
-family: False
-negation: True
-hypothesis: False
-
-treatment : vitamine D
-section: current_drugs
-family: False
-negation: False
-hypothesis: False
-
-treatment : aspirine
-section: current_drugs
-family: False
-negation: False
-hypothesis: False
-
-treatment : vitamine C
-section: current_drugs
-family: False
-negation: False
-hypothesis: False
-
-problem : allergie médicamenteuse
-section: allergies
-family: False
-negation: True
-hypothesis: False
-
-test : EXAMEN DES SYSTÈMES
-section: clinical_exam
-family: False
-negation: False
-hypothesis: False
-
-treatment : Appendicectomie
-section: antecedents
-family: False
-negation: False
-hypothesis: False
-
-treatment : Amygdalectomie
-section: antecedents
-family: False
-negation: False
-hypothesis: False
-
-treatment : adénoïdectomie
-section: antecedents
-family: False
-negation: False
-hypothesis: False
-
-treatment : Chirurgie bilatérale de la cataracte
-section: antecedents
-family: False
-negation: False
-hypothesis: False
-
-problem : tabagisme
-section: life_style
-family: False
-negation: False
-hypothesis: False
-
-problem : tumeur solide
-section: family_history
-family: True
-negation: False
-hypothesis: False
-
-problem : hémopathies malignes
-section: family_history
-family: True
-negation: True
-hypothesis: False
-
-test : EXAMEN PHYSIQUE
-section: clinical_exam
-family: False
-negation: False
-hypothesis: False
-
-problem : pèse
-section: clinical_exam
-family: False
-negation: False
-hypothesis: False
 ```
 
 Let's visualize this in context with `displacy`:
 
-:::{code}
+```{code-cell} ipython3
 from spacy import displacy
 from medkit.text.spacy.displacy_utils import medkit_doc_to_displacy
 
@@ -492,90 +308,4 @@ def _custom_formatter(entity):
 # Pass the formatter to medkit_doc_to_displacy()
 displacy_data = medkit_doc_to_displacy(doc, entity_formatter=_custom_formatter)
 displacy.render(docs=displacy_data, manual=True, style="ent")
-:::
-
-<span class="tex2jax_ignore"><div class="entities" style="line-height: 2.5; direction: ltr">PLAINTE PRINCIPALE :<br>
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">Thrombocytose essentielle<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">problem</span></mark>
-.<br><br>ANTÉCÉDENTS DE LA MALADIE ACTUELLE : <br>C'est un M. de 64 ans que je suis pour une 
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">thrombocytose essentielle<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">problem</span></mark>
-. Il a été initialement diagnostiqué lorsqu'il a vu un hématologue pour la première fois le 09/07/07. A cette époque, son 
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">nombre<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">test</span></mark>
- de 
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">plaquettes<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">test</span></mark>
- était de 1 240 000. Il a d'abord commencé à prendre de l'
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">Hydrea<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">treatment</span></mark>
- 1000 mg par jour. Le 07/11/07, il a subi une 
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">biopsie de moelle osseuse<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">test</span></mark>
-, qui a montré une 
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">thrombocytose essentielle<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">problem</span></mark>
-. Il était positif pour la 
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">mutation JAK-2<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">problem</span></mark>
-. Le 11/06/07, ses plaquettes étaient à 766 000. Sa dose actuelle d'
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">Hydrea<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">treatment</span></mark>
- est maintenant de 1500 mg les lundis et vendredis et de 1000 mg tous les autres jours. Il a déménagé à ABCD en décembre 2009 pour tenter d'améliorer la 
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">polyarthrite rhumatoïde<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">problem</span></mark>
- de sa femme. Dans l'ensemble, il se porte bien. Il a un bon niveau 
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">d<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">test</span></mark>
-'
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">énergie<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">test</span></mark>
- et son 
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">statut de performance ECOG<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">test</span></mark>
- est de 0. Absence de 
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">fièvre<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">problem[n]</span></mark>
-, 
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">frissons<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">problem[n]</span></mark>
- ou 
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">sueurs nocturnes<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">problem[n]</span></mark>
-. Pas d'
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">adénopathie<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">problem[n]</span></mark>
-. Pas de 
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">nausées<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">problem[n]</span></mark>
- ni de 
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">vomissements<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">problem[n]</span></mark>
-. Aucun changement dans les habitudes intestinales ou vésicales.<br><br>MÉDICAMENTS ACTUELS : <br>Hydrea 1500 mg les lundis et vendredis et 1000 mg les autres jours de la semaine, Mecir 1cp/j, 
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">vitamine D<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">treatment</span></mark>
- 1/j, 
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">aspirine<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">treatment</span></mark>
- 80 mg 1/j et 
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">vitamine C<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">treatment</span></mark>
- 1/j <br><br>ALLERGIES : <br>Aucune 
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">allergie médicamenteuse<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">problem[n]</span></mark>
- connue.<br><br>
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">EXAMEN DES SYSTÈMES<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">test</span></mark>
- <br>Correspondant à l'histoire de la maladie. Pas d'autre signes.<br><br>ANTÉCÉDENTS MÉDICAUX :<br>1. 
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">Appendicectomie<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">treatment</span></mark>
-.<br>2. 
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">Amygdalectomie<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">treatment</span></mark>
- et une 
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">adénoïdectomie<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">treatment</span></mark>
-.<br>3. 
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">Chirurgie bilatérale de la cataracte<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">treatment</span></mark>
-.<br>4. HTA.<br><br>MODE DE VIE : <br>Il a des antécédents de 
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">tabagisme<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">problem</span></mark>
- qu'il a arrêté à l'âge de 37 ans. Il consomme une boisson alcoolisée par jour. Il est marié. Il est directeur de laboratoire à la retraite.<br><br>ANTÉCÉDENTS FAMILIAUX : <br>Antécédents de 
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">tumeur solide<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">problem[f]</span></mark>
- dans sa famille mais aucun d'
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">hémopathies malignes<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">problem[fn]</span></mark>
-.<br><br>
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">EXAMEN PHYSIQUE<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">test</span></mark>
- :<br>Le patient 
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">pèse<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">problem</span></mark>
- 85.7 kg.<br></div></span>
-
-## Adding context attributes retrospectively
-
-What if we already have some entities that we imported from another source,
-and we want to attach the resulting contextual information obtained with `medkit`?
-In that case, one can copy attributes retrospectively using the
-{class}`~medkit.text.postprocessing.AttributeDuplicator` operation.
-
-## Wrapping it up
-
-In this tutorial, we have seen how `medkit` can facilitate detection of contextual information
-with built-in and customizable rule-based detectors.
-
-These detectors can be run on segments of different granularity,
-including as sentences or syntagmas, with their results stored as attributes.
-
-In order to propagate these contextual attributes from the outermost segments down to the entities matched,
-we use the `attrs_to_copy` operation init parameter.
+```
diff --git a/docs/tutorial/entity_matching.md b/docs/tutorial/entity_matching.md
index 002f6f06..ef184f17 100644
--- a/docs/tutorial/entity_matching.md
+++ b/docs/tutorial/entity_matching.md
@@ -15,13 +15,17 @@ or because we will afterward perform some context detection operation at the sen
 
 Let's start by loading a medical report to work on:
 
-:::{code}
+```{code} python
 from pathlib import Path
 from medkit.core.text import TextDocument
 
-doc = TextDocument.from_file(Path("../data/mtsamplesfr/1.txt"))
+# In case this notebook is executed outside medkit, download the example data with:
+# !wget https://raw.githubusercontent.com/medkit-lib/medkit/main/docs/data/mtsamplesfr/1.txt
+# and adjust the path below.
+doc_file = Path("../data/mtsamplesfr/1.txt")
+doc = TextDocument.from_file(doc_file)
 print(doc.text)
-:::
+```
 
 We will now use a sentence tokenizing operation to create and display sentence segments.
 As seen [before](../user_guide/first_steps.md), the sentence tokenizer expects
@@ -30,7 +34,7 @@ Since we don't have any segments yet on our document,
 we use {class}`medkit.core.text.document.TextDocument`.raw_segment,
 which is a special segment that contains the full unprocessed text.
 
-:::{code}
+```{code} python
 from medkit.text.segmentation import SentenceTokenizer
 
 # By default, SentenceTokenizer will use a list of punctuation chars to detect sentences.
@@ -49,7 +53,7 @@ sentence_segs = sentence_tokenizer.run([doc.raw_segment])
 # Print all returned sentence segments
 for sentence_seg in sentence_segs:
     print(sentence_seg.text, end="\n\n")
-:::
+```
 
 ## Regular expression matching
 
@@ -60,7 +64,7 @@ We are going to use regular expressions to match entities
 that cannot be detected by a dictionary-based approach,
 such as age and weight indications:
 
-:::{code}
+```{code} python
 from medkit.text.ner import RegexpMatcher, RegexpMatcherRule
 
 # Rule with simple regexps to match age and weights
@@ -94,29 +98,29 @@ regexp_matcher = RegexpMatcher(rules=[regexp_rule_1, regexp_rule_2])
 entities = regexp_matcher.run(sentence_segs)
 for entity in entities:
     print(entity.text, entity.label)
-:::
+```
 
 Let's visualize them with `displacy`, using {func}`~medkit.text.spacy.displacy_utils.entities_to_displacy`
 (similar to {func}`~medkit.text.spacy.displacy_utils.medkit_doc_to_displacy`but we can pass it
 a list of entities rather than a `TextDocument`):
 
-:::{code}
+```{code} python
 from spacy import displacy
 from medkit.text.spacy.displacy_utils import entities_to_displacy
 
 displacy_data = entities_to_displacy(entities, doc.text)
 displacy.render(displacy_data, manual=True, style="ent")
-:::
+```
 
 Note that you can save a particular list of regexp rules into a yaml file
 using {func}`~medkit.text.ner.RegexpMatcher.save_rules`,
 and reload them with {func}`~medkit.text.ner.RegexpMatcher.load_rules`.
 This makes rules easier to share and reuse:
 
-:::{code}
+```{code} python
 RegexpMatcher.save_rules([regexp_rule_1, regexp_rule_2], "weight_and_age_rules.yml")
 rules = RegexpMatcher.load_rules("weight_and_age_rules.yml")
-:::
+```
 
 `medkit` comes with a list of predefined regexp rules,
 available at https://github.com/medkit-lib/medkit/blob/main/medkit/text/ner/regexp_matcher_default_rules.yml,
@@ -136,12 +140,12 @@ Let's take a look at it:
 [^atc_footnote]: This file was created by Bastien Rance, reusing scripts originally from
 Sébastien Cossin
 
-:::{code}
+```{code} python
 import pandas as pd
 
 drugs = pd.read_csv("../data/bdpm.csv")
 drugs.head(n=10)
-:::
+```
 
 Rather than regular expressions, we will use similarity-based matching
 using the {class}`~medkit.text.ner.SimstringMatcher` operation.
@@ -151,7 +155,7 @@ will be more tolerant to small spelling errors than the exact matching of a regu
 We are going to create a rule for each commercial name, and to each rule we will attach
 the ATC identifier of each molecule when we know them:
 
-:::{code}
+```{code} python
 from medkit.text.ner import SimstringMatcher, SimstringMatcherRule, SimstringMatcherNormalization
 
 simstring_rules = []
@@ -199,7 +203,7 @@ for entity in entities:
     for norm_attr in entity.attrs.norms:
         print(norm_attr.kb_name, norm_attr.kb_id)
     print()
-:::
+```
 
 ## Advanced entity matching with IAMSystem
 
@@ -212,7 +216,7 @@ even when the dictionary of terms to match is very large.
 
 Let's see how to use it to match a couple of manually-defined terms:
 
-:::{code}
+```{code} python
 from iamsystem import Matcher, ESpellWiseAlgo
 from medkit.text.ner.iamsystem_matcher import IAMSystemMatcher
 
@@ -234,7 +238,7 @@ entities = iam_system_matcher.run(sentence_segs)
 
 for entity in entities:
     print(entity.label, ":", entity.text)
-:::
+```
 
 To learn more about the possibilities of `IAMSystem`,
 please refer to its [documentation](https://iamsystem-python.readthedocs.io/en/).
@@ -268,7 +272,7 @@ To download them, you must request a license on the [UMLS website].
 
 [UMLS website]: https://www.nlm.nih.gov/research/umls/index.html
 
-:::{code}
+```{code} python
 from medkit.text.ner import UMLSMatcher
 
 # Codes of UMLS semantic groups to take into account
@@ -312,81 +316,7 @@ def custom_formatter(entity):
 
 displacy_data = entities_to_displacy(entities, doc.text, entity_formatter=custom_formatter)
 displacy.render(displacy_data, manual=True, style="ent")
-:::
-
-
-<span class="tex2jax_ignore"><div class="entities" style="line-height: 2.5; direction: ltr">PLAINTE PRINCIPALE :<br>
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">Thrombocytose essentielle<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">disorder (C0040028)</span></mark>
-.<br><br>ANTÉCÉDENTS DE LA 
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">MALADIE<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">disorder (C0012634)</span></mark>
- ACTUELLE : <br>C'est un M. de 64 ans que je suis pour une 
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">thrombocytose essentielle<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">disorder (C0040028)</span></mark>
-. Il a été initialement diagnostiqué lorsqu'il a vu un hématologue pour la première fois le 09/07/07. A cette époque, son nombre de 
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">plaquettes<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">anatomy (C0005821)</span></mark>
- était de 1 240 000. Il a d'abord commencé à prendre de l'Hydrea 1000 mg par jour. Le 07/11/07, il a subi une 
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">biopsie de moelle osseuse<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">procedure (C0005954)</span></mark>
-, qui a montré une 
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">thrombocytose essentielle<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">disorder (C0040028)</span></mark>
-. Il était positif pour la 
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">mutation JAK-2<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">disorder (C3267069)</span></mark>
-. Le 11/06/07, ses 
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">plaquettes<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">anatomy (C0005821)</span></mark>
- étaient à 766 000. Sa dose actuelle d'Hydrea est maintenant de 1500 mg les lundis et vendredis et de 1000 mg tous les autres jours. Il a déménagé à ABCD en décembre 2009 pour tenter d'améliorer la 
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">polyarthrite rhumatoïde<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">disorder (C0003873)</span></mark>
- de sa 
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">femme<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">living_being (C0043210)</span></mark>
-. Dans l'ensemble, il se porte bien. Il a un bon niveau d'énergie et son statut de performance 
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">ECOG<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">procedure (C0430797)</span></mark>
- est de 0. Absence de 
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">fièvre<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">disorder (C0015967)</span></mark>
-, 
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">frissons<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">disorder (C0085593)</span></mark>
- ou 
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">sueurs nocturnes<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">disorder (C0028081)</span></mark>
-. Pas d'
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">adénopathie<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">disorder (C0497156)</span></mark>
-. Pas de nausées ni de vomissements. Aucun changement dans les habitudes intestinales ou vésicales.<br><br>
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">MÉDICAMENTS<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">chemical (C0013227)</span></mark>
- ACTUELS : <br>Hydrea 1500 mg les lundis et vendredis et 1000 mg les autres jours de la semaine, Mecir 1cp/j, 
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">vitamine D<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">chemical (C0042866)</span></mark>
- 1/j, 
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">aspirine<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">chemical (C0004057)</span></mark>
- 80 mg 1/j et 
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">vitamine C<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">chemical (C0003968)</span></mark>
- 1/j <br><br>ALLERGIES : <br>Aucune 
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">allergie médicamenteuse<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">disorder (C0013182)</span></mark>
- connue.<br><br>EXAMEN DES SYSTÈMES <br>Correspondant à l'histoire de la 
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">maladie<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">disorder (C0012634)</span></mark>
-. Pas d'autre signes.<br><br>ANTÉCÉDENTS MÉDICAUX :<br>1. 
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">Appendicectomie<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">procedure (C0003611)</span></mark>
-.<br>2. 
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">Amygdalectomie<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">procedure (C0040423)</span></mark>
- et une 
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">adénoïdectomie<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">procedure (C0001425)</span></mark>
-.<br>3. 
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">Chirurgie<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">procedure (C0543467)</span></mark>
- bilatérale de la 
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">cataracte<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">disorder (C0086543)</span></mark>
-.<br>4. HTA.<br><br>MODE DE VIE : <br>Il a des antécédents de 
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">tabagisme<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">disorder (C0040332)</span></mark>
- qu'il a arrêté à l'âge de 37 ans. Il consomme une boisson alcoolisée par jour. Il est 
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">marié<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">living_being (C0015209)</span></mark>
-. Il est directeur de laboratoire à la 
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">retraite<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">disorder (C0035345)</span></mark>
-.<br><br>
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">ANTÉCÉDENTS FAMILIAUX<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">disorder (C0241889)</span></mark>
- : <br>Antécédents de 
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">tumeur solide<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">disorder (C0280100)</span></mark>
- dans sa 
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">famille<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">living_being (C0015576)</span></mark>
-
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">mais<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">object (C1138842)</span></mark>
- aucun d'
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">hémopathies malignes<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">disorder (C0376545)</span></mark>
-.<br><br>
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">EXAMEN PHYSIQUE<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">procedure (C0031809)</span></mark>
- :<br>Le patient pèse 85.7 kg.<br></div></span>
-
+```
 
 ## Finding entities with BERT models
 
@@ -410,7 +340,7 @@ to perform [entity matching](https://huggingface.co/medkit/DrBERT-CASM2).
 Let's use this model using {class}`~medkit.text.ner.hf_entity_matcher.HFEntityMatcher`
 to look for entities in our document:
 
-:::{code}
+```{code} python
 from medkit.text.ner.hf_entity_matcher import HFEntityMatcher
 
 # HFEntityMatcher just needs the name of a model on the HuggingFace hub or a path to a local checkpoint
@@ -425,76 +355,7 @@ entities = bert_matcher.run(sentence_segs)
 
 displacy_data = entities_to_displacy(entities, doc.text)
 displacy.render(docs=displacy_data, manual=True, style="ent")
-:::
-
-<span class="tex2jax_ignore"><div class="entities" style="line-height: 2.5; direction: ltr">PLAINTE PRINCIPALE :<br>
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">Thrombocytose essentielle<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">problem</span></mark>
-.<br><br>ANTÉCÉDENTS DE LA MALADIE ACTUELLE : <br>C'est un M. de 64 ans que je suis pour une 
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">thrombocytose essentielle<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">problem</span></mark>
-. Il a été initialement diagnostiqué lorsqu'il a vu un hématologue pour la première fois le 09/07/07. A cette époque, son 
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">nombre<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">test</span></mark>
- de 
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">plaquettes<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">test</span></mark>
- était de 1 240 000. Il a d'abord commencé à prendre de l'
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">Hydrea<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">treatment</span></mark>
- 1000 mg par jour. Le 07/11/07, il a subi une 
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">biopsie de moelle osseuse<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">test</span></mark>
-, qui a montré une 
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">thrombocytose essentielle<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">problem</span></mark>
-. Il était positif pour la 
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">mutation JAK-2<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">problem</span></mark>
-. Le 11/06/07, ses plaquettes étaient à 766 000. Sa dose actuelle d'
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">Hydrea<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">treatment</span></mark>
- est maintenant de 1500 mg les lundis et vendredis et de 1000 mg tous les autres jours. Il a déménagé à ABCD en décembre 2009 pour tenter d'améliorer la 
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">polyarthrite rhumatoïde<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">problem</span></mark>
- de sa femme. Dans l'ensemble, il se porte bien. Il a un bon niveau 
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">d<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">test</span></mark>
-'
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">énergie<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">test</span></mark>
- et son 
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">statut de performance ECOG<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">test</span></mark>
- est de 0. Absence de 
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">fièvre<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">problem</span></mark>
-, 
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">frissons<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">problem</span></mark>
- ou 
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">sueurs nocturnes<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">problem</span></mark>
-. Pas d'
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">adénopathie<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">problem</span></mark>
-. Pas de 
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">nausées<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">problem</span></mark>
- ni de 
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">vomissements<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">problem</span></mark>
-. Aucun changement dans les habitudes intestinales ou vésicales.<br><br>MÉDICAMENTS ACTUELS : <br>Hydrea 1500 mg les lundis et vendredis et 1000 mg les autres jours de la semaine, Mecir 1cp/j, 
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">vitamine D<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">treatment</span></mark>
- 1/j, 
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">aspirine<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">treatment</span></mark>
- 80 mg 1/j et 
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">vitamine C<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">treatment</span></mark>
- 1/j <br><br>ALLERGIES : <br>Aucune 
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">allergie médicamenteuse<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">problem</span></mark>
- connue.<br><br>
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">EXAMEN DES SYSTÈMES<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">test</span></mark>
- <br>Correspondant à l'histoire de la maladie. Pas d'autre signes.<br><br>ANTÉCÉDENTS MÉDICAUX :<br>1. 
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">Appendicectomie<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">treatment</span></mark>
-.<br>2. 
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">Amygdalectomie<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">treatment</span></mark>
- et une 
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">adénoïdectomie<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">treatment</span></mark>
-.<br>3. 
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">Chirurgie bilatérale de la cataracte<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">treatment</span></mark>
-.<br>4. HTA.<br><br>MODE DE VIE : <br>Il a des antécédents de 
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">tabagisme<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">problem</span></mark>
- qu'il a arrêté à l'âge de 37 ans. Il consomme une boisson alcoolisée par jour. Il est marié. Il est directeur de laboratoire à la retraite.<br><br>ANTÉCÉDENTS FAMILIAUX : <br>Antécédents de 
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">tumeur solide<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">problem</span></mark>
- dans sa famille mais aucun d'
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">hémopathies malignes<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">problem</span></mark>
-.<br><br>
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">EXAMEN PHYSIQUE<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">test</span></mark>
- :<br>Le patient 
-<mark class="entity" style="background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">pèse<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">problem</span></mark>
- 85.7 kg.<br></div></span>
-
+```
 
 Note that the entities obtained with `HFEntityMatcher` don't have any normalization attributes attached to them.
 
@@ -503,35 +364,35 @@ Note that the entities obtained with `HFEntityMatcher` don't have any normalizat
 Let's consider a more realistic case in which we are dealing with a collection of documents
 rather than a unique document:
 
-:::{code}
+```{code} python
 from glob import glob
 
 # Let's load all of our sample documents
 docs = TextDocument.from_dir(Path("../data/mtsamplesfr/"))
 print(len(docs))
-:::
+```
 
 It is possible to run the sentence splitting and entity matching operations on all documents at once:
 
-:::{code}
+```{code} python
 sentence_segs = sentence_tokenizer.run([d.raw_segment for d in docs])
 entities = regexp_matcher.run(sentence_segs)
 for entity in entities:
     print(entity.label, entity.text)
-:::
+```
 
 Here, `entities` contains a list of entities found by the regexp matcher across all of our documents.
 But if we want to attach the entities back to the document they belong to,
 then we need to process each document independently:
 
-:::{code}
+```{code} python
 for doc in docs:
     clean_text_segs = sentence_tokenizer.run([doc.raw_segment])
     sentence_segs = sentence_tokenizer.run(clean_text_segs)
     entities = regexp_matcher.run(sentence_segs)
     for entity in entities:
         doc.anns.add(entity)
-:::
+```
 
 When using [pipelines](../user_guide/pipeline.md),
 this last use case is covered using {class}`~medkit.core.DocPipeline`.
@@ -549,8 +410,8 @@ so you can use them anywhere else within `medkit`. See the [module](../user_guid
 
 Contributions to `medkit` are welcome, feel free to submit your operations.
 
-:::{code}
+```{code} python
 import os
 
 os.unlink("weight_and_age_rules.yml")
-:::
+```
diff --git a/docs/user_guide/first_steps.md b/docs/user_guide/first_steps.md
index e9332aa2..181879a3 100644
--- a/docs/user_guide/first_steps.md
+++ b/docs/user_guide/first_steps.md
@@ -8,7 +8,7 @@ and context detection operations.
 
 For starters, let's load a text file using the {class}`~medkit.core.text.TextDocument` class:
 
-:::{code}
+```{code} python
 # You can download the file available in source code
 # !wget https://raw.githubusercontent.com/medkit-lib/medkit/main/docs/data/text/1.txt
 
@@ -16,13 +16,13 @@ from pathlib import Path
 from medkit.core.text import TextDocument
 
 doc = TextDocument.from_file(Path("../data/text/1.txt"))
-:::
+```
 
 The full raw text can be accessed through the `text` attribute:
 
-:::{code}
+```{code} python
 print(doc.text)
-:::
+```
 
 A `TextDocument` can store {class}`~medkit.core.text.TextAnnotation` objects.
 For now, our document is free of annotations.
@@ -36,14 +36,14 @@ documents in sentences.
 including a rule-based {class}`~medkit.text.segmentation.SentenceTokenizer` class
 that relies on a list of punctuation characters.
 
-:::{code}
+```{code} python
 from medkit.text.segmentation import SentenceTokenizer
 
 sent_tokenizer = SentenceTokenizer(
     output_label="sentence",
     punct_chars=[".", "?", "!"],
 )
-:::
+```
 
 As all operations, `SentenceTokenizer` defines a `run()` method.
 
@@ -54,14 +54,14 @@ and returns a list of `Segment` objects.
 Here, we can pass a special `Segment` containing the full text of the document,
 which can be retrieved through the `raw_segment` attribute of `TextDocument`:
 
-:::{code}
+```{code} python
 sentences = sent_tokenizer.run([doc.raw_segment])
 
 for sentence in sentences:
     print(f"uid={sentence.uid}")
     print(f"text={sentence.text!r}")
     print(f"spans={sentence.spans}, label={sentence.label}\n")
-:::
+```
 
 Each segment features:
  - an `uid` attribute, which unique value is automatically generated;
@@ -76,10 +76,10 @@ Each segment features:
 If you take a look at the 13th and 14th detected sentences,
 you will notice something strange:
 
-:::{code}
+```{code} python
 print(repr(sentences[12].text))
 print(repr(sentences[13].text))
-:::
+```
 
 This is actually one sentence that was split into two segments,
 because the sentence tokenizer incorrectly considers the dot in the decimal weight value
@@ -92,12 +92,12 @@ For this, we can use the {class}`~medkit.text.preprocessing.RegexpReplacer` clas
 a regexp-based "search-and-replace" operation.
 As other `medkit` operations, it can be configured with a set of user-determined rules:
 
-:::{code}
+```{code} python
 from medkit.text.preprocessing import RegexpReplacer
 
 rule = (r"(?<=\d)\.(?=\d)", ",")  # => (pattern to replace, new text)
 regexp_replacer = RegexpReplacer(output_label="clean_text", rules=[rule])
-:::
+```
 
 The `run()` method of the normalizer takes a list of `Segment` objects
 and returns a list of new `Segment` objects, one for each input `Segment`.
@@ -105,18 +105,18 @@ In our case we only want to preprocess the full raw text segment,
 and we will only receive one preprocessed segment,
 so we can call it with:
 
-:::{code}
+```{code} python
 clean_segment = regexp_replacer.run([doc.raw_segment])[0]
 print(clean_segment.text)
-:::
+```
 
 We may use again our previously-defined sentence tokenizer again,
 but this time on the preprocessed text:
 
-:::{code}
+```{code} python
 sentences = sent_tokenizer.run([clean_segment])
 print(sentences[12].text)
-:::
+```
 
 Problem fixed!
 
@@ -126,7 +126,7 @@ The `medkit` library also comes with operations to perform NER (named entity rec
 for instance with {class}`~medkit.text.ner.regexp_matcher.RegexpMatcher`.
 Let's instantiate one with a few simple rules:
 
-:::{code}
+```{code} python
 from medkit.text.ner import RegexpMatcher, RegexpMatcherRule
 
 regexp_rules = [
@@ -138,7 +138,7 @@ regexp_rules = [
     RegexpMatcherRule(regexp=r"\bnasonex?\b", label="treatment", case_sensitive=False),
 ]
 regexp_matcher = RegexpMatcher(rules=regexp_rules)
-:::
+```
 
 As you can see, you can also define some rules that ignore case distinctions
 by setting `case-sensitive` parameter to `False`.
@@ -162,13 +162,13 @@ representing the entities that were matched (`Entity` is a subclass of `Segment`
 As input, it expects a list of `Segment` objects.
 Let's give it the sentences returned by the sentence tokenizer:
 
-:::{code}
+```{code} python
 entities = regexp_matcher.run(sentences)
 
 for entity in entities:
     print(f"uid={entity.uid}")
     print(f"text={entity.text!r}, spans={entity.spans}, label={entity.label}\n")
-:::
+```
 
 Just like sentences, each entity features `uid`, `text`, `spans` and `label` attributes
 (in this case, determined by the rule that was used to match it).
@@ -192,7 +192,7 @@ accessible through their {class}`~medkit.core.AttributeContainer`).
 Let's instantiate a `NegationDetector` with a couple of simplistic handcrafted rules
 and run it on our sentences:
 
-:::{code}
+```{code} python
 from medkit.text.context import NegationDetector, NegationDetectorRule
 
 neg_rules = [
@@ -202,7 +202,7 @@ neg_rules = [
 ]
 neg_detector = NegationDetector(output_label="is_negated", rules=neg_rules)
 neg_detector.run(sentences)
-:::
+```
 
 :::{note}
 Similarly to `RegexpMatcher`, `DetectionDetector` also comes with a set of default rules
@@ -213,12 +213,12 @@ located in the `medkit.text.context` module.
 
 And now, let's look at which sentence have been detected as being negated:
 
-:::{code}
+```{code} python
 for sentence in sentences:
     neg_attr = sentence.attrs.get(label="is_negated")[0]
     if neg_attr.value:
         print(sentence.text)
-:::
+```
 
 Our simple negation detector does not work too bad,
 but sometimes some part of the sentence is tagged with a negation whilst the rest does not,
@@ -235,7 +235,7 @@ which are stored in file `default_syntagma_definition.yml`
 located in the `medkit.text.segmentation` module.
 :::
 
-:::{code}
+```{code} python
 from medkit.text.segmentation import SyntagmaTokenizer
 
 synt_tokenizer = SyntagmaTokenizer(
@@ -249,7 +249,7 @@ for syntagma in syntagmas:
     neg_attr = syntagma.attrs.get(label="is_negated")[0]
     if neg_attr.value:
         print(syntagma.text)
-:::
+```
 
 We now have some information about negation attached to syntagmas,
 but the end goal is really to know, for each entity,
@@ -268,14 +268,14 @@ Let's again use a `RegexpMatcher` to find some entities,
 but this time from syntagmas rather than from sentences,
 and using `attrs_to_copy` to copy negation attributes:
 
-:::{code}
+```{code} python
 regexp_matcher = RegexpMatcher(rules=regexp_rules, attrs_to_copy=["is_negated"])
 entities = regexp_matcher.run(syntagmas)
 
 for entity in entities:
     neg_attr = entity.attrs.get(label="is_negated")[0]
     print(f"text='{entity.text}', label={entity.label}, is_negated={neg_attr.value}")
-:::
+```
 
 We now have a negation `Attribute` for each entity!
 
@@ -293,21 +293,21 @@ an instance of {class}`~medkit.core.text.TextAnnotationContainer`)
 that behaves roughly like a list but also offers additional filtering methods.
 Annotations can be added by calling its `add()` method:
 
-:::{code}
+```{code} python
 for entity in entities:
     doc.anns.add(entity)
-:::
+```
 
 The document and its corresponding entities can be exported to supported formats
 such as brat (see {class}`~medkit.io.brat.BratOutputConverter`)
 or Doccano (see {class}`~medkit.io.doccano.DoccanoOutputConverter`),
 or serialized to JSON (see {mod}`~medkit.io.medkit_json`):
 
-:::{code}
+```{code} python
 from medkit.io import medkit_json
 
 medkit_json.save_text_document(doc, "doc_1.json")
-:::
+```
 
 ## Visualizing entities with displacy
 
@@ -316,13 +316,13 @@ a visualization tool part of the [spaCy](https://spacy.io/) NLP library.
 `medkit` provides helper functions to facilitate the use of `displacy`
 in the {mod}`~medkit.text.spacy.displacy_utils` module:
 
-:::{code}
+```{code} python
 from spacy import displacy
 from medkit.text.spacy.displacy_utils import medkit_doc_to_displacy
 
 displacy_data = medkit_doc_to_displacy(doc)
 displacy.render(displacy_data, manual=True, style="ent")
-:::
+```
 
 ## Wrapping it up
 
diff --git a/docs/user_guide/module.md b/docs/user_guide/module.md
index 498bc235..ad46784a 100644
--- a/docs/user_guide/module.md
+++ b/docs/user_guide/module.md
@@ -57,14 +57,17 @@ segment.
 
 ```python
 class MyTokenizer(SegmentationOperation):
-    ...
+    
+    def _tokenize(self, segment: Segment) -> Segment:
+        """Custom method for segment tokenization."""
+        ...
+    
     def run(self, segments: List[Segment]) -> List[Segment]:
-        # Here is your code for the tokenizer:
-        # * process each input
         return [
             token
             for segment in segments
-            for token in self._mytokenmethod(segment) 
+            for token in self._tokenize(segment)
+        ]
 ```
 
 ## 3. Make your operation non-destructive (for text)
@@ -85,7 +88,7 @@ segments.
 ```python
 class MyTokenizer(SegmentationOperation):
     ...
-    def _mytokenmethod(self, segment):
+    def _tokenize(self, segment):
         # process the segment (e.g., cut the segment)
         size = len(segment)
         cut_index = size // 2
@@ -140,7 +143,7 @@ Here is our example which store information about:
 ```python
 class MyTokenizer(SegmentationOperation):
     ...
-    def _mytokenmethod(self, segment):
+    def _tokenize(self, segment):
         ...
         
         # save the provenance data for this operation
@@ -166,7 +169,7 @@ To illustrate what we have seen in a more concrete manner, here is a fictional
 "days of the week" matcher that takes text segments as input a return entities
 for week days:
 
-:::{code}
+```python
 import re
 from medkit.core import Operation
 from medkit.core.text import Entity, span_utils
@@ -222,7 +225,7 @@ class DayMatcher(Operation):
                         )
 
         return entities
-:::
+```
 
 Note than since this is a entity matcher, adding support for `attrs_to_copy`
 would be nice (cf [Context detection](../tutorial/context_detection.md)).
diff --git a/docs/user_guide/pipeline.md b/docs/user_guide/pipeline.md
index 06956575..2c7c8d08 100644
--- a/docs/user_guide/pipeline.md
+++ b/docs/user_guide/pipeline.md
@@ -8,7 +8,7 @@ and how to create pipelines to enrich documents.
 Let's reuse the preprocessing, segmentation, context detection and entity recognition operations
 from the [First steps](./first_steps.md) tutorial:
 
-:::{code}
+```{code} python
 from medkit.text.preprocessing import RegexpReplacer
 from medkit.text.segmentation import SentenceTokenizer, SyntagmaTokenizer
 from medkit.text.context import NegationDetector, NegationDetectorRule
@@ -30,14 +30,14 @@ syntagma_tokenizer = SyntagmaTokenizer(
 )
 
 # context detection 
-neg_rules = [
+negation_rules = [
     NegationDetectorRule(regexp=r"\bpas\s*d[' e]\b"),
     NegationDetectorRule(regexp=r"\bsans\b", exclusion_regexps=[r"\bsans\s*doute\b"]),
     NegationDetectorRule(regexp=r"\bne\s*semble\s*pas"),
 ]
 negation_detector = NegationDetector(
     output_label="is_negated",
-    rules=neg_rules,
+    rules=negation_rules,
 )
 
 # entity recognition
@@ -50,13 +50,13 @@ regexp_rules = [
     RegexpMatcherRule(regexp=r"\bnasonex?\b", label="treatment", case_sensitive=False),
 ]
 regexp_matcher = RegexpMatcher(rules=regexp_rules, attrs_to_copy=["is_negated"])
-:::
+```
 
 Each of these operations features a `run()` method, which could be called sequentially.
 Data need to be routed manually between inputs and outputs for each operation,
 using a document's raw text segment as initial input:
 
-:::{code}
+```{code} python
 from pathlib import Path
 from medkit.core.text import TextDocument
 
@@ -74,7 +74,7 @@ syntagmas = syntagma_tokenizer.run(sentences)
 # but rather appends attributes to the segments it received.
 negation_detector.run(syntagmas)
 entities = regexp_matcher.run(syntagmas)
-:::
+```
 
 This way of coding is useful for interactive exploration of `medkit`.
 In the next section, we will introduce a different way using `Pipeline` objects.
@@ -105,7 +105,7 @@ But we also need to "connect" the operations together,
 i.e. to indicate which output of an operation should be fed as input to another operation.
 This is the purpose of the {class}`~medkit.core.PipelineStep` objects:
 
-:::{code}
+```{code} python
 from medkit.core import PipelineStep
 
 steps = [
@@ -115,13 +115,13 @@ steps = [
     PipelineStep(negation_detector, input_keys=["syntagmas"], output_keys=[]),  # no output
     PipelineStep(regexp_matcher, input_keys=["syntagmas"], output_keys=["entities"]),
 ]
-:::
+```
 
 Each `PipelineStep` associates an operation with input and output _keys_.
 Pipeline steps with matching input and output keys will be connected to each other.
 The resulting pipeline can be represented like this:
 
-:::{mermaid}
+```{mermaid}
 ---
 align: center
 ---
@@ -143,11 +143,11 @@ graph TD
     F --> G
 
     classDef io fill:#fff4dd,stroke:#edb:
-:::
+```
 
 Pipeline steps can then be used to instantiate a {class}`~medkit.core.Pipeline` object:
 
-:::{code}
+```{code} python
 from medkit.core import Pipeline
 
 pipeline = Pipeline(
@@ -162,7 +162,7 @@ pipeline = Pipeline(
     # (and therefore that it should be the output of the regexp matcher)
     output_keys=["entities"]
 )
-:::
+```
 
 The resulting pipeline is functionally equivalent to some operation
 processing full text segments as input and returning entities with family attributes as output.
@@ -171,13 +171,13 @@ but more complex pipelines with multiple inputs and outputs are supported.
 
 Like any other operation, the pipeline can be evaluated using its `run` method: 
 
-:::{code}
+```{code} python
 entities = pipeline.run([doc.raw_segment])
 
 for entity in entities:
     neg_attr = entity.attrs.get(label="is_negated")[0]
     print(f"text='{entity.text}', label={entity.label}, is_negated={neg_attr.value}")
-:::
+```
 
 ## Nested pipelines
 
@@ -188,7 +188,7 @@ which can be used, tested and exercised in isolation.
 In our example, we can use this feature to regroup together our regexp replacer,
 sentence tokenizer and family detector into a context sub-pipeline:
 
-:::{code}
+```{code} python
 # Context pipeline that receives full text segments
 # and returns preprocessed syntagmas segments with negation attributes.
 context_pipeline = Pipeline(
@@ -197,20 +197,20 @@ context_pipeline = Pipeline(
     name="context",
     steps=[
         PipelineStep(regexp_replacer, input_keys=["full_text"], output_keys=["clean_text"]),
-        PipelineStep(sent_tokenizer, input_keys=["clean_text"], output_keys=["sentences"]),
-        PipelineStep(synt_tokenizer, input_keys=["sentences"], output_keys=["syntagmas"]),
-        PipelineStep(neg_detector, input_keys=["syntagmas"], output_keys=[]),
+        PipelineStep(sentence_tokenizer, input_keys=["clean_text"], output_keys=["sentences"]),
+        PipelineStep(syntagma_tokenizer, input_keys=["sentences"], output_keys=["syntagmas"]),
+        PipelineStep(negation_detector, input_keys=["syntagmas"], output_keys=[]),
     ],
     input_keys=["full_text"],
     output_keys=["syntagmas"],
 )
-:::
+```
 
 Likewise, we can introduce a NER sub-pipelines
 composed of a UMLS-based matching operation (see also [Entity Matching](../tutorial/entity_matching.md))
 grouped with the previously defined regexp matcher:
 
-:::{code}
+```{code} python
 from medkit.text.ner import UMLSMatcher
 
 umls_matcher = UMLSMatcher(
@@ -231,7 +231,7 @@ ner_pipeline = Pipeline(
     input_keys=["syntagmas"],
     output_keys=["entities"],
 )
-:::
+```
 
 Since both pipeline steps feature the same output key (_entities_),
 the pipeline will return a list containing the entities matched by
@@ -239,7 +239,7 @@ both the regexp matcher and the UMLS matcher.
 
 The NER and context sub-pipelines can now be sequenced with:
 
-:::{code}
+```{code} python
 pipeline = Pipeline(
     steps=[
         PipelineStep(context_pipeline, input_keys=["full_text"], output_keys=["syntagmas"]),
@@ -248,7 +248,7 @@ pipeline = Pipeline(
     input_keys=["full_text"],
     output_keys=["entities"],
 )
-:::
+```
 
 which can be represented like this:
 
@@ -287,14 +287,14 @@ graph TD
 
 Let's run the pipeline and verify entities with negation attributes:
 
-:::{code}
+```{code} python
 entities = pipeline.run([doc.raw_segment])
 
 for entity in entities:
     neg_attr = entity.attrs.get(label="is_negated")[0]
     print(entity.label, ":", entity.text)
     print("negation:", neg_attr.value, end="\n\n")
-:::
+```
 
 ```text
 problem : allergies
@@ -393,28 +393,28 @@ To scale the processing of such pipeline to a collection of documents,
 one needs to iterate over each document manually to obtain its entities
 rather than processing all the documents at once:
 
-:::{code}
+```{code} python
 docs = TextDocument.from_dir(Path("..data/text"))
 
 for doc in docs:
     entities = pipeline.run([doc.raw_segment])
     for entity in entities:
         doc.anns.add(entity)
-:::
+```
 
 To handle this common use case, `medkit` provides a {class}`~medkit.core.DocPipeline` class,
 which wraps a `Pipeline` instance and run it on a list of documents.
 
 Here is an example of its usage:
 
-:::{code}
+```{code} python
 from medkit.core import DocPipeline
 
 docs = TextDocument.from_dir(Path("..data/text"))
 
 doc_pipeline = DocPipeline(pipeline=pipeline)
 doc_pipeline.run(docs)
-:::
+```
 
 ## Summary
 
diff --git a/docs/user_guide/provenance.md b/docs/user_guide/provenance.md
index c510e576..72be5065 100644
--- a/docs/user_guide/provenance.md
+++ b/docs/user_guide/provenance.md
@@ -25,7 +25,7 @@ and take a look at provenance for a single annotation, generated by a single ope
 We are going to create a very simple `TextDocument` containing just one sentence,
 and run a `RegexpMatcher` to match a single `Entity`:
 
-:::{code}
+```{code} python
 from medkit.core.text import TextDocument
 from medkit.text.ner import RegexpMatcher, RegexpMatcherRule
 
@@ -34,32 +34,32 @@ doc = TextDocument(text=text)
 
 regexp_rule = RegexpMatcherRule(regexp=r"\basthme\b", label="problem")
 regexp_matcher = RegexpMatcher(rules=[regexp_rule])
-:::
+```
 
 Before calling the `run()` method of our regexp matcher,
 we will activate provenance tracing for the generated entities.
 This is done by assigning it a {class}`~medkit.core.ProvTracer` object.
 The `ProvTracer` is in charge of gathering provenance information across all operations.
 
-:::{code}
+```{code} python
 from medkit.core import ProvTracer
 
 prov_tracer = ProvTracer()
 regexp_matcher.set_prov_tracer(prov_tracer)
-:::
+```
 
 Now that provenance is enabled, the regexp matcher can be applied to the input document:
 
-:::{code}
+```{code} python
 entities = regexp_matcher.run([doc.raw_segment])
 
 for entity in entities:
     print(f"text={entity.text!r}, label={entity.label}")
-:::
+```
 
 Let's retrieve and inspect provenance information concerning the matched entity:
 
-:::{code}
+```{code} python
 def print_prov(prov):
     # data item
     print(f"data_item={prov.data_item.text!r}")
@@ -74,7 +74,7 @@ def print_prov(prov):
 entity = entities[0]
 prov = prov_tracer.get_prov(entity.uid)
 print_prov(prov)
-:::
+```
 
 The `get_prov()` method of `ProvTracer` returns a simple {class}`~medkit.core.Prov` object
 containing all the provenance information related to a specific object.
@@ -89,17 +89,17 @@ It features the following attributes:
   Here there is only one source, the raw text segment,
   because the entity was found in this particular segment by the regexp matcher.
   But it is possible to have more than one data item in the sources;
- - `derived_data_items` contains the objects that were derived from the data item by further operations.
+- `derived_data_items` contains the objects that were derived from the data item by further operations.
   In this simple example, there are none.
 
 If we are interested in all the provenance information gathered by the `ProvTracer` instance,
 rather than the provenance of a specific item,
 then we can call the `get_provs()` method:
 
-:::{code}
+```{code} python
 for prov in prov_tracer.get_provs():
     print_prov(prov)
-:::
+```
 
 Here, we have another `Prov` object with partial provenance information about the raw text segment:
 we know how it was used (the entity was derived from it) but we don't know how it was created.
@@ -118,7 +118,7 @@ It also provides command-line executable named `dot` to generate images from suc
 You will need to install `graphviz` on your system to be able to run the following code.
 :::
 
-:::{code}
+```{code} python
 from pathlib import Path
 from IPython.display import Image
 from medkit.tools import save_prov_to_dot
@@ -144,7 +144,7 @@ dot_file = output_dir / "prov.dot"
 
 save_prov_to_dot(prov_tracer, dot_file)
 display_dot(dot_file)
-:::
+```
 
 ## Provenance composition
 
@@ -152,7 +152,7 @@ Let's move on to a slightly more complex example.
 Before using the `RegexpMatcher` matcher, we will split our document into sentences with a `SentenceTokenizer`.
 We will also compose the `SentenceTokenizer` and our `RegexpMatcher` operations in a `Pipeline`.
 
-:::{code}
+```{code} python
 from medkit.text.segmentation import SentenceTokenizer
 from medkit.core.pipeline import PipelineStep, Pipeline
 
@@ -166,7 +166,7 @@ steps = [
     PipelineStep(regexp_matcher, input_keys=["sentences"], output_keys=["entities"]),
 ]
 pipeline = Pipeline(steps=steps, input_keys=["full_text"], output_keys=["entities"])
-:::
+```
 
 A pipeline being itself an operation, it also features a `set_prov_tracer()` method,
 and calling it will automatically enable provenance tracing for all the operations in the pipeline.
@@ -175,7 +175,7 @@ and calling it will automatically enable provenance tracing for all the operatio
 Provenance tracers can only accumulate provenance information, not modify or delete it.
 :::
 
-:::{code}
+```{code} python
 prov_tracer = ProvTracer()
 pipeline.set_prov_tracer(prov_tracer)
 
@@ -183,15 +183,15 @@ entities = pipeline.run([doc.raw_segment])
 
 for entity in entities:
     print(f"text={entity.text!r}, label={entity.label}")
-:::
+```
 
 As expected, the result is identical to the first example: we have matched one entity.
 However, its provenance is structured differently:
 
-:::{code}
+```{code} python
 for prov in prov_tracer.get_provs():
     print_prov(prov)
-:::
+```
 
 Compared to the simpler case, the operation that created the entity is the `Pipeline`, instead of the `RegexpMatcher`.
 It might sound a little surprising, but it does make sense: the pipeline is a processing operation itself,
@@ -202,12 +202,12 @@ If we are interested in the details about what happened inside the `Pipeline`,
 the information is still available through a sub-provenance tracer
 that can be retrieved with `get_sub_prov_tracer()`:
 
-:::{code}
+```{code} python
 pipeline_prov_tracer = prov_tracer.get_sub_prov_tracer(pipeline.uid)
 
 for prov in pipeline_prov_tracer.get_provs():
     print_prov(prov)
-:::
+```
 
 Although the order of each `Prov` returned by `get_provs()` is not the order of creation of the annotations themselves,
 we can see the details of what happened in the pipeline.
@@ -220,17 +220,17 @@ The `save_prov_to_dot()` helper is able to leverage this structure.
 By default, it will expand and display all sub-provenance info recursively,
 but it has a optional `max_sub_prov_depth` parameter that allows to limit the depth of the sub-provenance to show:
 
-:::{code}
+```{code} python
 # show only outer-most provenance
 save_prov_to_dot(prov_tracer, dot_file, max_sub_prov_depth=0)
 display_dot(dot_file)
-:::
+```
 
-:::{code}
+```{code} python
 # expand next level of sub-provenance
 save_prov_to_dot(prov_tracer, dot_file, max_sub_prov_depth=1)
 display_dot(dot_file)
-:::
+```
 
 The same way that pipeline can contain sub-pipelines recursively,
 the provenance tracer can contain sub-provenance tracers recursively for the corresponding sub-pipelines.
@@ -247,7 +247,7 @@ To demonstrate a bit more the potential of provenance tracing in `medkit`,
 let's build a more complicated pipeline involving a sub-pipeline
 and an operation that creates attributes:
 
-:::{code}
+```{code} python
 from medkit.text.context import NegationDetector, NegationDetectorRule
 
 # segmentation
@@ -284,14 +284,14 @@ pipeline = Pipeline(
     input_keys=["full_text"],
     output_keys=["entities"],
 )
-:::
+```
 
 Since there are 2 pipelines, we need to pass an optional `name` parameter to each of them
 that will be used in the operation description and will help us to distinguish between them.
 
 Running the main pipeline returns 2 entities with negation attributes:
 
-:::{code}
+```{code} python
 prov_tracer = ProvTracer()
 pipeline.set_prov_tracer(prov_tracer)
 entities = pipeline.run([doc.raw_segment])
@@ -299,15 +299,15 @@ entities = pipeline.run([doc.raw_segment])
 for entity in entities:
     is_negated = entity.attrs.get(label="is_negated")[0].value
     print(f"text={entity.text!r}, label={entity.label}, is_negated={is_negated}")
-:::
+```
 
 At the outermost level, provenance tells us that the main pipeline created 2 entities and 2 attributes.
 Intermediary data and operations (`SentenceTokenizer`, `NegationDetector`, `RegexpMatcher`) are hidden.
 
-:::{code}
+```{code} python
 save_prov_to_dot(prov_tracer, dot_file, max_sub_prov_depth=0)
 display_dot(dot_file)
-:::
+```
 
 You can see dotted arrow showing which attribute relates to which annotation.
 While this is not strictly speaking provenance information,
@@ -318,10 +318,10 @@ are copied to new annotations (cf `attrs_to_copy` as explained in the
 
 Expanding one more level of provenance gives us the following graph:
 
-:::{code}
+```{code} python
 save_prov_to_dot(prov_tracer, dot_file, max_sub_prov_depth=1)
 display_dot(dot_file)
-:::
+```
 
 Now, We can see the details of the operations and data items handled in our main pipeline.
 A sub-pipeline created sentence segments and negation attributes,
@@ -330,10 +330,10 @@ The negation attributes were attached to both the sentences and the entities der
 
 To have more details about the processing inside the context sub-pipeline, we have to go one level deeper:
 
-:::{code}
+```{code} python
 save_prov_to_dot(prov_tracer, dot_file, max_sub_prov_depth=2)
 display_dot(dot_file)
-:::
+```
 
 ## Wrapping it up
 
diff --git a/pyproject.toml b/pyproject.toml
index 2f34063f..90f4192b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -164,9 +164,10 @@ all = [
     webrtc-voice-detector]""",
 ]
 docs = [
-  "myst-parser",
+  "myst-nb",
   "numpydoc",
-  "sphinx>=7,<8",
+  "pandas",
+  "sphinx",
   "sphinx-autoapi",
   "sphinx-autobuild",
   "sphinx-book-theme",
@@ -207,15 +208,20 @@ cov = [
 
 [tool.hatch.envs.docs]
 dependencies = [
-  "myst-parser",
+  "myst-nb",
   "numpydoc",
-  "sphinx>=7,<8",
+  "pandas",
+  "sphinx",
   "sphinx-autoapi",
   "sphinx-autobuild",
   "sphinx-book-theme",
   "sphinx-design",
   "sphinxcontrib-mermaid",
 ]
+features = [
+  "spacy",
+]
+python = "3.12"
 
 [tool.hatch.envs.docs.scripts]
 clean = "rm -rf docs/_build"