|
3 | 3 | import base64 |
4 | 4 | import difflib |
5 | 5 | import re |
| 6 | +import hashlib |
| 7 | +import zipfile |
6 | 8 | from typing import Any, Dict, List, Union |
7 | 9 | from datetime import datetime |
8 | 10 |
|
@@ -404,6 +406,143 @@ def get_file_info(sandbox_id: str, file_path: str) -> str: |
404 | 406 | return f"Error getting file info: {e}" |
405 | 407 |
|
406 | 408 |
|
| 409 | + return f"Error getting file info: {e}" |
| 410 | + |
| 411 | + |
| 412 | +def import_outlook_emails(sandbox_id: str, query: str = None, received_after: str = None) -> str: |
| 413 | + """ |
| 414 | + Import emails from Outlook based on a grep-like query and optional time boundary. |
| 415 | + Only works on Windows. |
| 416 | + """ |
| 417 | + try: |
| 418 | + import win32com.client |
| 419 | + except ImportError: |
| 420 | + return "Error: Outlook import tool is only available on Windows systems with pywin32 installed." |
| 421 | + |
| 422 | + sandbox_path = get_sandbox_path(sandbox_id) |
| 423 | + if not os.path.exists(sandbox_path): |
| 424 | + return "Error: Sandbox directory does not exist." |
| 425 | + |
| 426 | + processed_count = 0 |
| 427 | + saved_paths = [] |
| 428 | + |
| 429 | + # Parse date if provided |
| 430 | + filter_date = None |
| 431 | + if received_after: |
| 432 | + try: |
| 433 | + # Parse YYYY-MM-DD |
| 434 | + dt = datetime.strptime(received_after, "%Y-%m-%d") |
| 435 | + # Make it timezone-aware (UTC) to compare with Outlook's timezone-aware datetimes |
| 436 | + # Or simpler: remove timezone info from Outlook date for comparison |
| 437 | + filter_date = dt |
| 438 | + except ValueError: |
| 439 | + return "Error: received_after must be in YYYY-MM-DD format." |
| 440 | + |
| 441 | + try: |
| 442 | + outlook = win32com.client.Dispatch('Outlook.Application').GetNamespace('MAPI') |
| 443 | + |
| 444 | + # Helper to recursively check folders |
| 445 | + def process_folder(folder): |
| 446 | + nonlocal processed_count |
| 447 | + |
| 448 | + # Recursive call for subfolders |
| 449 | + for subfolder in folder.Folders: |
| 450 | + process_folder(subfolder) |
| 451 | + |
| 452 | + # Outlook items are not always sorted, checking all |
| 453 | + # Optimization: could restrict folder types? For now, checking all. |
| 454 | + for message in folder.Items: |
| 455 | + try: |
| 456 | + # Filter by date first |
| 457 | + if filter_date: |
| 458 | + try: |
| 459 | + # message.ReceivedTime is a python datetime handled by pywin32 |
| 460 | + # Ensure we can compare them. Usually naive comparison works if we strip tz or make both aware. |
| 461 | + # safest: strip tz from message time |
| 462 | + msg_time = message.ReceivedTime |
| 463 | + if msg_time.replace(tzinfo=None) < filter_date: |
| 464 | + continue |
| 465 | + except AttributeError: |
| 466 | + # Some items might not have ReceivedTime (e.g. drafts or diverse items) |
| 467 | + continue |
| 468 | + |
| 469 | + # Filter by query (grep-like regex) |
| 470 | + # Fields: Subject, Body, SenderName/SenderEmailAddress, To |
| 471 | + if query: |
| 472 | + subject = getattr(message, 'Subject', '') or '' |
| 473 | + body = getattr(message, 'Body', '') or '' |
| 474 | + to = getattr(message, 'To', '') or '' |
| 475 | + sender = '' |
| 476 | + try: |
| 477 | + sender = getattr(message, 'SenderName', '') or getattr(message, 'SenderEmailAddress', '') |
| 478 | + except: |
| 479 | + pass |
| 480 | + |
| 481 | + full_text = f"Subject: {subject}\nFrom: {sender}\nTo: {to}\n\n{body}" |
| 482 | + |
| 483 | + if not re.search(query, full_text, re.IGNORECASE | re.MULTILINE): |
| 484 | + continue |
| 485 | + |
| 486 | + # Generate unique ID |
| 487 | + try: |
| 488 | + unique_id = hashlib.sha256((getattr(message, 'Subject', '') + getattr(message, 'Body', '')).encode('utf-8', errors='ignore')).hexdigest() |
| 489 | + except Exception: |
| 490 | + continue |
| 491 | + |
| 492 | + save_folder = os.path.join(sandbox_path, "memory", f"memory_{unique_id}") |
| 493 | + if os.path.exists(save_folder): |
| 494 | + continue |
| 495 | + |
| 496 | + os.makedirs(save_folder, exist_ok=True) |
| 497 | + |
| 498 | + # Save metadata/content |
| 499 | + meta = { |
| 500 | + "id": unique_id, |
| 501 | + "Subject": getattr(message, 'Subject', 'No Subject'), |
| 502 | + "Body": getattr(message, 'Body', ''), |
| 503 | + "ReceivedTime": str(getattr(message, 'ReceivedTime', '')), |
| 504 | + "Sender": getattr(message, 'SenderName', ''), |
| 505 | + "To": getattr(message, 'To', ''), |
| 506 | + "Memory": os.path.relpath(save_folder, sandbox_path) |
| 507 | + } |
| 508 | + |
| 509 | + with open(os.path.join(save_folder, "email_data.json"), 'w', encoding='utf-8') as f: |
| 510 | + json.dump(meta, f, indent=2) |
| 511 | + |
| 512 | + # Attachments |
| 513 | + if hasattr(message, 'Attachments'): |
| 514 | + for attachment in message.Attachments: |
| 515 | + try: |
| 516 | + file_path = os.path.join(save_folder, attachment.FileName) |
| 517 | + attachment.SaveAsFile(file_path) |
| 518 | + |
| 519 | + if attachment.FileName.lower().endswith('.zip'): |
| 520 | + try: |
| 521 | + with zipfile.ZipFile(file_path, 'r') as zip_ref: |
| 522 | + zip_ref.extractall(save_folder) |
| 523 | + except Exception: |
| 524 | + pass |
| 525 | + except Exception: |
| 526 | + pass |
| 527 | + |
| 528 | + processed_count += 1 |
| 529 | + saved_paths.append(os.path.relpath(save_folder, sandbox_path)) |
| 530 | + |
| 531 | + except Exception: |
| 532 | + continue |
| 533 | + |
| 534 | + for account in outlook.Folders: |
| 535 | + process_folder(account) |
| 536 | + |
| 537 | + except Exception as e: |
| 538 | + return f"Error connecting to Outlook: {e}" |
| 539 | + |
| 540 | + if processed_count == 0: |
| 541 | + return "No new emails found matching criteria." |
| 542 | + |
| 543 | + return f"Imported {processed_count} emails. Saved to: {', '.join(saved_paths[:5])}..." |
| 544 | + |
| 545 | + |
407 | 546 | # --- Tool Registry --- |
408 | 547 |
|
409 | 548 | TOOL_DEFINITIONS = [ |
@@ -559,6 +698,27 @@ def get_file_info(sandbox_id: str, file_path: str) -> str: |
559 | 698 | "required": ["file_path", "edits"] |
560 | 699 | } |
561 | 700 | } |
| 701 | + }, |
| 702 | + { |
| 703 | + "type": "function", |
| 704 | + "function": { |
| 705 | + "name": "import_outlook_emails", |
| 706 | + "description": "Import emails from Outlook (Windows only) using a grep-like query and time boundary.", |
| 707 | + "parameters": { |
| 708 | + "type": "object", |
| 709 | + "properties": { |
| 710 | + "query": { |
| 711 | + "type": "string", |
| 712 | + "description": "Regex pattern to search in Subject, Body, Sender, and To fields." |
| 713 | + }, |
| 714 | + "received_after": { |
| 715 | + "type": "string", |
| 716 | + "description": "Optional: Filter emails received after this date (YYYY-MM-DD)." |
| 717 | + } |
| 718 | + }, |
| 719 | + "required": ["query"] |
| 720 | + } |
| 721 | + } |
562 | 722 | } |
563 | 723 | ] |
564 | 724 |
|
@@ -595,6 +755,8 @@ def execute_tool(name: str, args: Dict[str, Any]) -> str: |
595 | 755 | args.get("dry_run", False), |
596 | 756 | args.get("options") |
597 | 757 | )) |
| 758 | + elif name == "import_outlook_emails": |
| 759 | + return str(import_outlook_emails(args.get("sandbox_id"), args.get("query"), args.get("received_after"))) |
598 | 760 | else: |
599 | 761 | return f"Error: Tool {name} not found." |
600 | 762 | except Exception as e: |
|
0 commit comments