Skip to content

Commit eb2cd78

Browse files
Merge pull request #108 from amd/alex_journal
JournalPlugin update
2 parents 921f5b9 + 06ed398 commit eb2cd78

6 files changed

Lines changed: 615 additions & 7 deletions

File tree

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
###############################################################################
2+
#
3+
# MIT License
4+
#
5+
# Copyright (c) 2026 Advanced Micro Devices, Inc.
6+
#
7+
# Permission is hereby granted, free of charge, to any person obtaining a copy
8+
# of this software and associated documentation files (the "Software"), to deal
9+
# in the Software without restriction, including without limitation the rights
10+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11+
# copies of the Software, and to permit persons to whom the Software is
12+
# furnished to do so, subject to the following conditions:
13+
#
14+
# The above copyright notice and this permission notice shall be included in all
15+
# copies or substantial portions of the Software.
16+
#
17+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23+
# SOFTWARE.
24+
#
25+
###############################################################################
26+
from typing import Optional
27+
28+
from nodescraper.models import TimeRangeAnalysisArgs
29+
30+
31+
class JournalAnalyzerArgs(TimeRangeAnalysisArgs):
32+
"""Arguments for journal analyzer"""
33+
34+
check_priority: Optional[int] = None
35+
"""Check against journal log priority levels.
36+
emergency(0), alert(1), critical(2), error(3), warning(4), notice(5), info(6), debug(7).
37+
If a journal log entry has a priority level less than or equal to check_priority,
38+
an ERROR event will be raised."""
39+
40+
group: bool = True
41+
"""Groups entries if they have the same priority and the same message"""
Lines changed: 238 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,238 @@
1+
###############################################################################
2+
#
3+
# MIT License
4+
#
5+
# Copyright (c) 2026 Advanced Micro Devices, Inc.
6+
#
7+
# Permission is hereby granted, free of charge, to any person obtaining a copy
8+
# of this software and associated documentation files (the "Software"), to deal
9+
# in the Software without restriction, including without limitation the rights
10+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11+
# copies of the Software, and to permit persons to whom the Software is
12+
# furnished to do so, subject to the following conditions:
13+
#
14+
# The above copyright notice and this permission notice shall be included in all
15+
# copies or substantial portions of the Software.
16+
#
17+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23+
# SOFTWARE.
24+
#
25+
###############################################################################
26+
from datetime import datetime
27+
from typing import Optional, TypedDict
28+
29+
from nodescraper.enums import EventCategory, EventPriority, ExecutionStatus
30+
from nodescraper.interfaces import DataAnalyzer
31+
from nodescraper.models import TaskResult
32+
33+
from .analyzer_args import JournalAnalyzerArgs
34+
from .journaldata import JournalData, JournalJsonEntry
35+
36+
37+
class JournalEvent(TypedDict):
38+
count: int
39+
first_occurrence: datetime
40+
last_occurrence: datetime
41+
42+
43+
class JournalPriority:
44+
EMERGENCY = 0
45+
ALERT = 1
46+
CRITICAL = 2
47+
ERROR = 3
48+
WARNING = 4
49+
NOTICE = 5
50+
INFO = 6
51+
DEBUG = 7
52+
53+
54+
class JournalAnalyzer(DataAnalyzer[JournalData, JournalAnalyzerArgs]):
55+
"""Check journalctl for errors"""
56+
57+
DATA_MODEL = JournalData
58+
59+
@classmethod
60+
def filter_journal(
61+
cls,
62+
journal_content_json: list[JournalJsonEntry],
63+
analysis_range_start: Optional[datetime] = None,
64+
analysis_range_end: Optional[datetime] = None,
65+
) -> list[JournalJsonEntry]:
66+
"""Filter a journal log by date
67+
68+
Args:
69+
journal_content_json (list[JournalJsonEntry]): unfiltered journal log
70+
analysis_range_start (Optional[datetime], optional): start of analysis range. Defaults to None.
71+
analysis_range_end (Optional[datetime], optional): end of analysis range. Defaults to None.
72+
73+
Returns:
74+
list[JournalJsonEntry]: filtered journal log
75+
"""
76+
77+
filtered_journal = []
78+
79+
found_start = False if analysis_range_start else True
80+
81+
# Parse through the journal log and filter by date
82+
for entry in journal_content_json:
83+
date = entry.REALTIME_TIMESTAMP
84+
85+
# Skip entries without valid timestamp
86+
if date is None:
87+
continue
88+
89+
if analysis_range_start and not found_start and date >= analysis_range_start:
90+
found_start = True
91+
elif analysis_range_end and date >= analysis_range_end:
92+
break
93+
94+
# only read entries after starting timestamp is found, ignore entries that do not have valid date
95+
if found_start:
96+
filtered_journal.append(entry)
97+
98+
return filtered_journal
99+
100+
def _priority_to_entry_priority(self, priority: int) -> EventPriority:
101+
if priority <= JournalPriority.ERROR:
102+
entry_priority = EventPriority.ERROR
103+
elif priority == JournalPriority.WARNING:
104+
entry_priority = EventPriority.WARNING
105+
elif priority >= JournalPriority.NOTICE:
106+
entry_priority = EventPriority.INFO
107+
else:
108+
# Unknown?
109+
entry_priority = EventPriority.ERROR
110+
return entry_priority
111+
112+
def _analyze_journal_entries_by_priority(
113+
self, journal_content_json: list[JournalJsonEntry], priority: int, group: bool
114+
) -> None:
115+
"""Analyze a list of Journal Entries for a priority.
116+
if WARNING, CRITICAL or it is unknown then log an error/warning Journal Entry.
117+
Parameters
118+
----------
119+
journal_content_json : list[JournalJsonEntry]
120+
List of JournalJsonEntry to analyze
121+
priority : int
122+
Priority threshold to check against
123+
group : bool
124+
Whether to group similar entries
125+
"""
126+
# Use a tuple of (message, priority) as the key instead of the JournalJsonEntry object
127+
journal_event_map: dict[tuple[str, int], JournalEvent] = {}
128+
129+
# Check against journal log priority levels. emergency(0), alert(1), critical(2), error(3), warning(4), notice(5), info(6), debug(7)
130+
for entry in journal_content_json:
131+
if entry.PRIORITY <= priority:
132+
self.result.status = ExecutionStatus.ERROR
133+
if not group:
134+
entry_dict = entry.model_dump() # Convert JournalJsonEntry to dictionary
135+
entry_dict["task_name"] = self.__class__.__name__
136+
self._log_event(
137+
category=EventCategory.OS,
138+
description="Journal log entry with priority level %s" % entry.PRIORITY,
139+
data=entry_dict,
140+
priority=self._priority_to_entry_priority(entry.PRIORITY),
141+
console_log=False,
142+
)
143+
else:
144+
# Handle MESSAGE as either string or list
145+
message = entry.MESSAGE
146+
if isinstance(message, list):
147+
message = " ".join(message)
148+
149+
# Create a tuple key from message and priority
150+
entry_key = (message, entry.PRIORITY)
151+
if journal_event_map.get(entry_key) is None:
152+
journal_event_map[entry_key] = {
153+
"count": 1,
154+
"first_occurrence": (
155+
entry.REALTIME_TIMESTAMP
156+
if entry.REALTIME_TIMESTAMP
157+
else datetime.fromtimestamp(0)
158+
),
159+
"last_occurrence": (
160+
entry.REALTIME_TIMESTAMP
161+
if entry.REALTIME_TIMESTAMP
162+
else datetime.fromtimestamp(0)
163+
),
164+
}
165+
else:
166+
journal_event_map[entry_key]["count"] += 1
167+
if entry.REALTIME_TIMESTAMP:
168+
journal_event_map[entry_key][
169+
"last_occurrence"
170+
] = entry.REALTIME_TIMESTAMP
171+
172+
# log all events that were grouped
173+
if group:
174+
for (message, entry_priority), event_data in journal_event_map.items():
175+
self._log_event(
176+
category=EventCategory.OS,
177+
description="Journal entries found in OS journal log",
178+
priority=self._priority_to_entry_priority(entry_priority),
179+
data={
180+
"message": message,
181+
"priority": entry_priority,
182+
"count": event_data["count"],
183+
"first_occurrence": event_data["first_occurrence"],
184+
"last_occurrence": event_data["last_occurrence"],
185+
},
186+
console_log=False,
187+
)
188+
189+
def analyze_data(
190+
self, data: JournalData, args: Optional[JournalAnalyzerArgs] = None
191+
) -> TaskResult:
192+
"""Analyze the OS journal log for errors
193+
194+
Parameters
195+
----------
196+
data : JournalData
197+
Journal data to analyze
198+
args : Optional[JournalAnalyzerArgs], optional
199+
Analysis arguments, by default None
200+
201+
Returns
202+
-------
203+
TaskResult
204+
A TaskResult object containing the result of the analysis
205+
If journal log entries are found ExecutionStatus.OK
206+
If journal log entries are found with priority level less than or equal to check_priority ExecutionStatus.ERROR
207+
"""
208+
if args is None:
209+
args = JournalAnalyzerArgs()
210+
211+
journal_content_json = data.journal_content_json
212+
213+
# Filter by time range if specified
214+
if args.analysis_range_start or args.analysis_range_end:
215+
self.logger.info(
216+
"Filtering journal log using range %s - %s",
217+
args.analysis_range_start,
218+
args.analysis_range_end,
219+
)
220+
journal_content_json = self.filter_journal(
221+
journal_content_json=journal_content_json,
222+
analysis_range_start=args.analysis_range_start,
223+
analysis_range_end=args.analysis_range_end,
224+
)
225+
226+
self.result.status = ExecutionStatus.OK
227+
228+
if args.check_priority is not None:
229+
self._analyze_journal_entries_by_priority(
230+
journal_content_json, args.check_priority, args.group
231+
)
232+
233+
if self.result.status == ExecutionStatus.OK:
234+
self.result.message = "No journal errors found"
235+
else:
236+
self.result.message = f"Found journal entries with priority <= {args.check_priority}"
237+
238+
return self.result

nodescraper/plugins/inband/journal/journal_collector.py

Lines changed: 68 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
# SOFTWARE.
2424
#
2525
###############################################################################
26+
import json
2627
from typing import Optional
2728

2829
from pydantic import ValidationError
@@ -33,7 +34,7 @@
3334
from nodescraper.utils import get_exception_details
3435

3536
from .collector_args import JournalCollectorArgs
36-
from .journaldata import JournalData
37+
from .journaldata import JournalData, JournalJsonEntry
3738

3839

3940
class JournalCollector(InBandDataCollector[JournalData, JournalCollectorArgs]):
@@ -42,6 +43,7 @@ class JournalCollector(InBandDataCollector[JournalData, JournalCollectorArgs]):
4243
SUPPORTED_OS_FAMILY = {OSFamily.LINUX}
4344
DATA_MODEL = JournalData
4445
CMD = "journalctl --no-pager --system --output=short-iso"
46+
CMD_JSON = "journalctl --no-pager --system --output=json"
4547

4648
def _read_with_journalctl(self, args: Optional[JournalCollectorArgs] = None):
4749
"""Read journal logs using journalctl
@@ -50,11 +52,11 @@ def _read_with_journalctl(self, args: Optional[JournalCollectorArgs] = None):
5052
str|None: system journal read
5153
"""
5254

53-
cmd = "journalctl --no-pager --system --output=short-iso"
55+
cmd = self.CMD
5456
try:
5557
# safe check for args.boot
5658
if args is not None and getattr(args, "boot", None):
57-
cmd = f"journalctl --no-pager -b {args.boot} --system --output=short-iso"
59+
cmd = f"{self.CMD} -b {args.boot}"
5860

5961
res = self._run_sut_cmd(cmd, sudo=True, log_artifact=False, strip=False)
6062

@@ -84,6 +86,63 @@ def _read_with_journalctl(self, args: Optional[JournalCollectorArgs] = None):
8486

8587
return res.stdout
8688

89+
def _read_with_journalctl_json(
90+
self, args: Optional[JournalCollectorArgs] = None
91+
) -> Optional[list[JournalJsonEntry]]:
92+
"""Read journal logs in JSON format using journalctl
93+
94+
Returns:
95+
list[JournalJsonEntry]|None: system journal read as JSON entries
96+
"""
97+
98+
cmd = self.CMD_JSON
99+
try:
100+
# safe check for args.boot
101+
if args is not None and getattr(args, "boot", None):
102+
cmd = f"{self.CMD_JSON} -b {args.boot}"
103+
104+
res = self._run_sut_cmd(cmd, sudo=True, log_artifact=False, strip=False)
105+
106+
except ValidationError as val_err:
107+
self._log_event(
108+
category=EventCategory.OS,
109+
description="Exception while running journalctl JSON",
110+
data=get_exception_details(val_err),
111+
priority=EventPriority.ERROR,
112+
console_log=True,
113+
)
114+
return None
115+
116+
if res.exit_code != 0:
117+
self._log_event(
118+
category=EventCategory.OS,
119+
description="Error reading journalctl JSON",
120+
data={"command": res.command, "exit_code": res.exit_code},
121+
priority=EventPriority.ERROR,
122+
console_log=True,
123+
)
124+
return None
125+
126+
# Parse JSON entries
127+
json_entries: list[JournalJsonEntry] = []
128+
for line in res.stdout.splitlines():
129+
if not line.strip():
130+
continue
131+
try:
132+
entry_dict = json.loads(line)
133+
json_entries.append(JournalJsonEntry(**entry_dict))
134+
except (json.JSONDecodeError, ValidationError) as e:
135+
self._log_event(
136+
category=EventCategory.OS,
137+
description="Failed to parse journal JSON entry",
138+
data={"error": str(e), "line": line[:200]},
139+
priority=EventPriority.WARNING,
140+
console_log=False,
141+
)
142+
continue
143+
144+
return json_entries
145+
87146
def collect_data(
88147
self,
89148
args: Optional[JournalCollectorArgs] = None,
@@ -100,8 +159,13 @@ def collect_data(
100159
args = JournalCollectorArgs()
101160

102161
journal_log = self._read_with_journalctl(args)
162+
journal_json = self._read_with_journalctl_json(args)
163+
103164
if journal_log:
104-
data = JournalData(journal_log=journal_log)
165+
data = JournalData(
166+
journal_log=journal_log,
167+
journal_content_json=journal_json if journal_json else [],
168+
)
105169
self.result.message = self.result.message or "Journal data collected"
106170
return self.result, data
107171
return self.result, None

0 commit comments

Comments
 (0)