-
Notifications
You must be signed in to change notification settings - Fork 10
Expand file tree
/
Copy pathhooks.py
More file actions
147 lines (117 loc) · 4.09 KB
/
hooks.py
File metadata and controls
147 lines (117 loc) · 4.09 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
import re
import requests
def extract_annotation_blocks(markdown):
supported_ontologies = [
"oeo",
"sms",
"fmi",
"dogont",
"brick",
"s4grid",
"sargon",
"s4ener",
"bont",
"openadr",
"dices",
]
ontology_pattern = "|".join(supported_ontologies)
annotation_pattern = (
f"<!-- BEGIN-ANNOTATION: ({ontology_pattern}) -->"
f"(.*?)<!-- END-ANNOTATION: \1 -->"
)
annotation_matches = re.finditer(annotation_pattern, markdown, re.DOTALL)
annotation_blocks = []
for match in annotation_matches:
full_match = match.group(0)
ontology_id = match.group(1)
content = match.group(2)
annotation_blocks.append((full_match, ontology_id, content))
return annotation_blocks
def get_ontology_annotations(config, ontology_id, text):
api_endpoint = config.get(
"api_url", "https://service.tib.eu/sandbox/nfdi4energyannotator/annotate"
)
try:
response = requests.post(
api_endpoint,
json={"text": text, "ontology_ids": [ontology_id]},
headers={"Content-Type": "application/json"},
timeout=60,
)
response.raise_for_status()
response_data = response.json()
print(f"Response from {ontology_id} ontology: {response_data}")
if "matches" in response_data and isinstance(response_data["matches"], list):
text_with_spans = wrap_terms_in_span(text, response_data["matches"])
return text_with_spans
elif "annotated_text" in response_data:
return response_data["annotated_text"]
else:
return text
except Exception as e:
print(f"Error annotating text with {ontology_id} ontology: {str(e)}")
return text
def wrap_terms_in_span(text, matches):
cleaner_matches = [
{
"term": m["token"],
"matched_term": m.get("matched_term", m["label"]), # Fallback auf label
"start": m["start"],
"end": m["end"],
"iri": m["iri"],
}
for m in matches
]
sorted_matches = sorted(
cleaner_matches, key=lambda m: (-len(m["matched_term"]), -m["start"])
)
modified_positions = set()
modifications = []
# First pass: determine which matches to process
for match in sorted_matches:
overlap = False
for pos in range(match["start"], match["end"] + 1):
if pos in modified_positions:
overlap = True
break
if overlap:
continue
for pos in range(match["start"], match["end"] + 1):
modified_positions.add(pos)
modifications.append(match)
modifications.sort(key=lambda m: m["start"])
# Second pass: apply modifications
result = ""
last_end = 0
ontology_colors = {
"oeo": "#1F567D",
"sms": "#FFD6E0",
"fmi": "#FFEFCF",
"dogont": "#DCEDC2",
"brick": "#A8E6CE",
"s4grid": "#FFD3B5",
"sargon": "#D5E5F2",
"s4ener": "#E0F7FA",
"bont": "#FFF9C4",
"openadr": "#F8BBD0",
"dices": "#FFC0CB",
}
for match in modifications:
result += text[last_end : match["start"]]
color = ontology_colors.get(match.get("ontology_id", ""), "#1F567D") # Default
result += (
f'<a href="{match["iri"]}" style=\'background-color: {color}; '
"color: #fff; text-decoration: none; padding: 0 3px; border-radius: 3px;'>"
f"{text[match['start'] : match['end']]}</a>"
)
last_end = match["end"]
result += text[last_end:]
return result
def on_page_markdown(markdown, page, config, files):
print("Hook executed for page:", page.file.name)
annotation_blocks = extract_annotation_blocks(markdown)
for full_match, ontology_id, content in annotation_blocks:
annotated_content = get_ontology_annotations(config, ontology_id, content)
markdown = markdown.replace(full_match, annotated_content)
print("MkDocs hook loaded")
return markdown