From c180c6be823df445d2ca4024fbc32c8fcdccd522 Mon Sep 17 00:00:00 2001
From: tforrewot <tforrewot@users.noreply.github.com>
Date: Wed, 18 Feb 2026 13:04:52 +0400
Subject: [PATCH] Completed Lab 1 with all tasks and functions

---
 ...ipython-python-basics-lab-checkpoint.ipynb | 503 ++++++++++++++++++
 .../m1_01_task_3_functions-checkpoint.py      |  12 +
 .../m1_01_task_3_functions.cpython-311.pyc    | Bin 0 -> 896 bytes
 m1-01-ipython-python-basics-lab.ipynb         | 503 ++++++++++++++++++
 m1_01_task_3_functions.py                     |  12 +
 5 files changed, 1030 insertions(+)
 create mode 100644 .ipynb_checkpoints/m1-01-ipython-python-basics-lab-checkpoint.ipynb
 create mode 100644 .ipynb_checkpoints/m1_01_task_3_functions-checkpoint.py
 create mode 100644 __pycache__/m1_01_task_3_functions.cpython-311.pyc
 create mode 100644 m1-01-ipython-python-basics-lab.ipynb
 create mode 100644 m1_01_task_3_functions.py
diff --git a/.ipynb_checkpoints/m1-01-ipython-python-basics-lab-checkpoint.ipynb b/.ipynb_checkpoints/m1-01-ipython-python-basics-lab-checkpoint.ipynb
new file mode 100644
index 0000000..7ee3ce6
--- /dev/null
+++ b/.ipynb_checkpoints/m1-01-ipython-python-basics-lab-checkpoint.ipynb
@@ -0,0 +1,503 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "f60e6c4d-468f-4a49-81da-ced20ecc3935",
+   "metadata": {},
+   "source": [
+    "Task 1"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "d371b9d3-67a0-4935-aea1-16c9a5dd2c47",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<class 'int'>\n",
+      "<class 'float'>\n",
+      "<class 'str'>\n",
+      "<class 'list'>\n",
+      "<class 'dict'>\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Creating variables\n",
+    "age = 25                  # Integer\n",
+    "price = 19.99             # Float\n",
+    "name = \"IronHacker\"       # String\n",
+    "skills = [\"Python\", \"Git\"] # List\n",
+    "info = {\"track\": \"Data\"}   # Dictionary\n",
+    "\n",
+    "print(type(age))\n",
+    "print(type(price))\n",
+    "print(type(name))\n",
+    "print(type(skills))\n",
+    "print(type(info))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "c1724569-1a2e-4c6a-bc80-a47eebbd00ed",
+   "metadata": {},
+   "source": [
+    "Using the type() function, I identified the following common data types in my workspace:\n",
+    "\n",
+    "age: <class 'int'> (Integer)\n",
+    "\n",
+    "price: <class 'float'> (Floating-point number)\n",
+    "\n",
+    "name: <class 'str'> (String)\n",
+    "\n",
+    "skills: <class 'list'> (List)\n",
+    "\n",
+    "info: <class 'dict'> (Dictionary)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "c4406d2d-e92a-4a66-b1fb-4c472d903288",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "IRONHACKER\n",
+      "['Python', 'Git', 'SQL']\n",
+      "dict_keys(['track'])\n"
+     ]
+    }
+   ],
+   "source": [
+    "# String method\n",
+    "name_upper = name.upper()\n",
+    "print(name_upper)\n",
+    "\n",
+    "# List method\n",
+    "skills.append(\"SQL\")\n",
+    "print(skills)\n",
+    "\n",
+    "# Dictionary method\n",
+    "keys = info.keys()\n",
+    "print(keys)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "94f566a1-27ad-4220-8875-bbc38ef28142",
+   "metadata": {},
+   "source": [
+    "I called the following methods to transform or inspect my data:\n",
+    "\n",
+    "String (.upper()): Returned a new version of the string in all capital letters.\n",
+    "\n",
+    "List (.append()): Modified the list in-place by adding a new element to the end.\n",
+    "\n",
+    "Dictionary (.keys()): Returned a dict_keys object containing all the labels (keys) currently in the dictionary."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "71cf4cda-36d9-4505-8ec9-e668bcea9eeb",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "\u001b[31mSignature:\u001b[39m name.replace(old, new, count=-\u001b[32m1\u001b[39m, /)\n",
+       "\u001b[31mDocstring:\u001b[39m\n",
+       "Return a copy with all occurrences of substring old replaced by new.\n",
+       "\n",
+       "  count\n",
+       "    Maximum number of occurrences to replace.\n",
+       "    -1 (the default value) means replace all occurrences.\n",
+       "\n",
+       "If the optional argument count is given, only the first count occurrences are\n",
+       "replaced.\n",
+       "\u001b[31mType:\u001b[39m      builtin_function_or_method"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "name.replace?"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f4361f98-c833-433c-9874-c157db41be72",
+   "metadata": {},
+   "source": [
+    "By using the ? introspection command, I learned that the .replace() method does not actually change the original string in place. Instead, it returns a new copy with the specified changes. I also discovered the count argument, which allows me to limit how many occurrences are replaced. For example, setting count=1 would only replace the first time a character appears, rather than every instance in the string."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "5fd9bb4c-b757-4318-8a41-ae434cf1efce",
+   "metadata": {},
+   "source": [
+    "Task 2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "5a34eab8-ae34-4a92-be18-af15c3f567c0",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Values: [1, 2, 3, 4]\n",
+      "Alias: [1, 2, 3, 4]\n"
+     ]
+    }
+   ],
+   "source": [
+    "# 1. Creating a list and an alias\n",
+    "values = [1, 2, 3]\n",
+    "alias = values\n",
+    "\n",
+    "# 2. Appending to the alias\n",
+    "alias.append(4)\n",
+    "\n",
+    "# 3. Checking both\n",
+    "print(f\"Values: {values}\")\n",
+    "print(f\"Alias: {alias}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "06a4b2d5-6443-4ce2-a483-5bc39a409da1",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Original Values: [1, 2, 3, 4]\n",
+      "Copy: [1, 2, 3, 4, 99]\n"
+     ]
+    }
+   ],
+   "source": [
+    "# 1. Creating a true copy\n",
+    "values_copy = values.copy()\n",
+    "\n",
+    "# 2. Appending a different value to the copy\n",
+    "values_copy.append(99)\n",
+    "\n",
+    "# 3. Checking both\n",
+    "print(f\"Original Values: {values}\")\n",
+    "print(f\"Copy: {values_copy}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "72ab2cd5-b4a1-403b-a276-73572077ef9b",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Original Record: {'name': 'Alice', 'role': 'Data Scientist', 'status': 'Active'}\n",
+      "Original Record after copy change: {'name': 'Alice', 'role': 'Data Scientist', 'status': 'Active'}\n",
+      "Record Copy: {'name': 'Alice', 'role': 'AI Engineer', 'status': 'Active'}\n"
+     ]
+    }
+   ],
+   "source": [
+    "# 1. Creating dict and alias\n",
+    "record = {\"name\": \"Alice\", \"role\": \"Data Scientist\"}\n",
+    "record_alias = record\n",
+    "record_alias[\"status\"] = \"Active\"\n",
+    "\n",
+    "print(f\"Original Record: {record}\")\n",
+    "\n",
+    "# 2. Creating a true copy\n",
+    "record_copy = record.copy()\n",
+    "record_copy[\"role\"] = \"AI Engineer\"\n",
+    "\n",
+    "print(f\"Original Record after copy change: {record}\")\n",
+    "print(f\"Record Copy: {record_copy}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "9efb1232-803a-4394-a60a-2251f65cc781",
+   "metadata": {},
+   "source": [
+    "In this task, I observed that assigning a list or dictionary to a new variable name (an alias) does not create a new object. Instead, both names point to the same location in memory. Therefore, modifying the alias also modified the original values. However, using the .copy() method created a separate instance in memory, allowing me to change the copy without affecting the original.\n",
+    "\n",
+    "This behavior is critical when passing objects into functions. If a function modifies a mutable object (like a list) passed as an argument, those changes will persist outside the function, which can lead to \"hidden state\" bugs if not handled carefully."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "bac02457-f95c-4ca4-b875-0da4ff98d636",
+   "metadata": {},
+   "source": [
+    "Task 3"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "daf0af4a-deef-4457-bfae-a93aa3efea0f",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Cleaned Strings: ['12.5', 'abc', '42', '', 'python']\n",
+      "Converted Floats: [12.5, None, 42.0, None, None]\n"
+     ]
+    }
+   ],
+   "source": [
+    "from m1_01_task_3_functions import to_float, clean_string\n",
+    "\n",
+    "# Creating test data\n",
+    "test_inputs = [\" 12.5 \", \"ABC\", \"  42\", \"\", \"Python  \"]\n",
+    "\n",
+    "# Testing the cleaning function\n",
+    "cleaned = [clean_string(item) for item in test_inputs]\n",
+    "print(f\"Cleaned Strings: {cleaned}\")\n",
+    "\n",
+    "# Testing the float conversion\n",
+    "floats = [to_float(item) for item in cleaned]\n",
+    "print(f\"Converted Floats: {floats}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "c266ed41-3100-4dd5-9169-337626bc433d",
+   "metadata": {},
+   "source": [
+    "I implemented two utility functions in m1_01_task_3_functions.py. The to_float function safely attempts to convert inputs to decimals, returning None for non-numeric data to prevent errors during analysis. The clean_string function standardizes text by removing leading/trailing whitespace and converting characters to lowercase, ensuring consistency across the dataset."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "712bfd39-60ed-4cd2-92eb-860cdd76fa5b",
+   "metadata": {},
+   "source": [
+    "Task 4"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "018e233b-3632-4086-a31f-1377caa276fe",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "raw_events = [\n",
+    "    {\"user_id\": 101, \"event_type\": \"login\", \"duration_seconds\": 5},\n",
+    "    {\"user_id\": 102, \"event_type\": \"upload\", \"duration_seconds\": 120},\n",
+    "    {\"user_id\": 101, \"event_type\": \"update\", \"duration_seconds\": 45},\n",
+    "    {\"user_id\": 103, \"event_type\": \"login\", \"duration_seconds\": \"ERROR\"},  # Invalid: String\n",
+    "    {\"user_id\": 104, \"event_type\": \"login\", \"duration_seconds\": 15},\n",
+    "    {\"user_id\": 102, \"event_type\": \"logout\", \"duration_seconds\": -10},     # Invalid: Negative\n",
+    "    {\"user_id\": 105, \"event_type\": \"login\", \"duration_seconds\": 8},\n",
+    "    {\"user_id\": 103, \"event_type\": \"update\", \"duration_seconds\": 300},\n",
+    "    {\"user_id\": 101, \"event_type\": \"logout\", \"duration_seconds\": 20},\n",
+    "    {\"user_id\": 104, \"event_type\": \"logout\", \"duration_seconds\": 12},\n",
+    "    {\"user_id\": 106, \"event_type\": \"login\", \"duration_seconds\": 5},\n",
+    "    {\"user_id\": 105, \"event_type\": \"logout\", \"duration_seconds\": \" \"},     # Invalid: Empty/Whitespace\n",
+    "]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "1501ee9e-456f-4989-8efc-f4bae48809b3",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Number of valid records found: 9\n",
+      "\n",
+      "Sample transformed records:\n",
+      "{'user_id': 101, 'event_type': 'login', 'duration_seconds': 5, 'duration_minutes': 0.08333333333333333}\n",
+      "{'user_id': 102, 'event_type': 'upload', 'duration_seconds': 120, 'duration_minutes': 2.0}\n"
+     ]
+    }
+   ],
+   "source": [
+    "cleaned_events = []\n",
+    "\n",
+    "for event in raw_events:\n",
+    "    duration = event[\"duration_seconds\"]\n",
+    "    \n",
+    "    # Validation: Must be an int or float AND greater than zero\n",
+    "    if isinstance(duration, (int, float)) and duration > 0:\n",
+    "        # In Task 2, we learned to use .copy() so we don't change the original.\n",
+    "        clean_item = event.copy()\n",
+    "        \n",
+    "        # Adding the new key: duration_minutes\n",
+    "        clean_item[\"duration_minutes\"] = duration / 60\n",
+    "        \n",
+    "        cleaned_events.append(clean_item)\n",
+    "\n",
+    "# Verification steps\n",
+    "print(f\"Number of valid records found: {len(cleaned_events)}\")\n",
+    "\n",
+    "# Inspecting the first two cleaned records\n",
+    "print(\"\\nSample transformed records:\")\n",
+    "for record in cleaned_events[:2]:\n",
+    "    print(record)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "09c147dc-65d2-41e5-bfe9-5d0fd10518ab",
+   "metadata": {},
+   "source": [
+    "I processed the raw activity list using a for loop to filter out invalid records. I applied conditional logic to ensure only records with positive, numeric duration_seconds were kept. For each valid record, I created a copy to avoid side effects and added a calculated duration_minutes field. This ensures the data is formatted correctly for the final summary tasks."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3ff02b89-d64e-4ae4-8d62-504e4eef81d2",
+   "metadata": {},
+   "source": [
+    "Task 5"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "ad4c3214-7037-40cd-909b-67229fdc854e",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Validation: Sum of counts (9) == len(cleaned_events) (9)\n"
+     ]
+    }
+   ],
+   "source": [
+    "# 1. Initializing our counters\n",
+    "counts_by_type = {}\n",
+    "total_mins_by_type = {}\n",
+    "unique_user_ids = set()\n",
+    "\n",
+    "# 2. Looping through the cleaned data once\n",
+    "for event in cleaned_events:\n",
+    "    etype = event['event_type']\n",
+    "    mins = event['duration_minutes']\n",
+    "    uid = event['user_id']\n",
+    "    \n",
+    "    # Updating counts\n",
+    "    counts_by_type[etype] = counts_by_type.get(etype, 0) + 1\n",
+    "    \n",
+    "    # Updating total minutes (for calculating average later)\n",
+    "    total_mins_by_type[etype] = total_mins_by_type.get(etype, 0) + mins\n",
+    "    \n",
+    "    # Adding user to the set (sets automatically handle uniqueness)\n",
+    "    unique_user_ids.add(uid)\n",
+    "\n",
+    "# 3. Calculating the averages\n",
+    "averages_by_type = {etype: total_mins_by_type[etype] / counts_by_type[etype] \n",
+    "                    for etype in counts_by_type}\n",
+    "\n",
+    "# 4. Validation Check\n",
+    "total_counts = sum(counts_by_type.values())\n",
+    "print(f\"Validation: Sum of counts ({total_counts}) == len(cleaned_events) ({len(cleaned_events)})\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "b5dc38b9-f04a-43ab-8726-4fd98d582e8e",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "--- Final CSV String ---\n",
+      "metric,key,value\n",
+      "count,login,4\n",
+      "count,upload,1\n",
+      "count,update,2\n",
+      "count,logout,2\n",
+      "average_duration,login,0.14\n",
+      "average_duration,upload,2.00\n",
+      "average_duration,update,2.88\n",
+      "average_duration,logout,0.27\n",
+      "unique_users,all,6\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Initializing the string with the header\n",
+    "csv_output = \"metric,key,value\\n\"\n",
+    "\n",
+    "# Adding count rows\n",
+    "for etype, count in counts_by_type.items():\n",
+    "    csv_output += f\"count,{etype},{count}\\n\"\n",
+    "\n",
+    "# Adding average duration rows (rounded to 2 decimal places)\n",
+    "for etype, avg in averages_by_type.items():\n",
+    "    csv_output += f\"average_duration,{etype},{avg:.2f}\\n\"\n",
+    "\n",
+    "# Adding the unique user count\n",
+    "csv_output += f\"unique_users,all,{len(unique_user_ids)}\\n\"\n",
+    "\n",
+    "print(\"--- Final CSV String ---\")\n",
+    "print(csv_output)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "77f7071e-d4b4-4b9f-84f3-95c356ce7ac0",
+   "metadata": {},
+   "source": [
+    "In this final task, I aggregated the cleaned event data to determine the frequency and average duration for each event type. I utilized a Python set to efficiently count the number of unique users. Finally, I formatted these results into a structured CSV string. To ensure accuracy, I validated that the total number of events in my summary matched the number of records in my cleaned_events list."
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.14"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/.ipynb_checkpoints/m1_01_task_3_functions-checkpoint.py b/.ipynb_checkpoints/m1_01_task_3_functions-checkpoint.py
new file mode 100644
index 0000000..8ec4499
--- /dev/null
+++ b/.ipynb_checkpoints/m1_01_task_3_functions-checkpoint.py
@@ -0,0 +1,12 @@
+def to_float(value):
+    """Converts a string to a float, returns None if invalid."""
+    try:
+        return float(value)
+    except (ValueError, TypeError):
+        return None
+
+def clean_string(text):
+    """Removes whitespace and makes casing consistent."""
+    if isinstance(text, str):
+        return text.strip().lower()
+    return None
\ No newline at end of file
diff --git a/__pycache__/m1_01_task_3_functions.cpython-311.pyc b/__pycache__/m1_01_task_3_functions.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4881c2ed8ff1920876127f026af1dec718aa6aec
GIT binary patch
literal 896
zcmZ{j&1(}u6u{qRvn1))Hd?&c!`6gAz|@9{haw*Q00rrx@q<cWnQkU_>1Jk`nKW$)
zQbeSRU_D4rg<h;k{+Uz?mW7_gd!U!llW(`}S}i`9H}Ac9Z{F~JyI+Tg2LR>mtIzIp
z4d7?!oC*4o99_a~3u^ENUO-Jlt<@40DC*8xMwphwrQ<r9L$w7NQ0GqDu!~+Nj#MBA
zDuIcoqQdG^sVDLvk{ZlouL`taMW2V3R!pqjy2sfv6-rvfl1jMjrKLF9b&nHu-4awa
z1(Vh+XVh}*mdloj=hh}?i+W&0Il;g);x*|5A-D)KPgeaCv#13IrZ%Yz-)9}q-JhvG
z5}Z9G&eC1Zl|veS^)XqH)ke8ARW5mCq2&6jYLT<j>9#<m>xAd_RM}QUF4@y|y~!Nq
zawaGJ)gZ079lb%Mu}9cP&Y)U{UQ##44$ZO6r|;Sy+THZk{^8J-?M!#@S`Rd1?2kGy
zFK?&~)zdNB@Au^pmcJg!!l!IO|DqNC1EY;uTmPUqKlojQ(1}5aLK}8s4@+a&MXM8g
zJ0a)`@^?-p!iU&y%+Us4rqXIHx{6AlIMgDnW;MtX1|6Inf^=|Bu2hsM<dqBtT#QOZ
zm_q{{SqDZ4+z$+ox2Qm1Nkv!G2``btmjpwOM+vh}z6)VuC>*q$Mzs#VQrXS?w!W7d
z-%pK)BhBM9Kd!xB3uperocL}|?3uZJGuKV$4hGGQn@3m%v&BS2E4u9)h*6A1&5qNz
k8@$%^=p7OM8WGJB8puY|dI>{AY_S?iYNI{4sG{qB0}z4E%m4rY

literal 0
HcmV?d00001

diff --git a/m1-01-ipython-python-basics-lab.ipynb b/m1-01-ipython-python-basics-lab.ipynb
new file mode 100644
index 0000000..7ee3ce6
--- /dev/null
+++ b/m1-01-ipython-python-basics-lab.ipynb
@@ -0,0 +1,503 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "f60e6c4d-468f-4a49-81da-ced20ecc3935",
+   "metadata": {},
+   "source": [
+    "Task 1"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "d371b9d3-67a0-4935-aea1-16c9a5dd2c47",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<class 'int'>\n",
+      "<class 'float'>\n",
+      "<class 'str'>\n",
+      "<class 'list'>\n",
+      "<class 'dict'>\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Creating variables\n",
+    "age = 25                  # Integer\n",
+    "price = 19.99             # Float\n",
+    "name = \"IronHacker\"       # String\n",
+    "skills = [\"Python\", \"Git\"] # List\n",
+    "info = {\"track\": \"Data\"}   # Dictionary\n",
+    "\n",
+    "print(type(age))\n",
+    "print(type(price))\n",
+    "print(type(name))\n",
+    "print(type(skills))\n",
+    "print(type(info))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "c1724569-1a2e-4c6a-bc80-a47eebbd00ed",
+   "metadata": {},
+   "source": [
+    "Using the type() function, I identified the following common data types in my workspace:\n",
+    "\n",
+    "age: <class 'int'> (Integer)\n",
+    "\n",
+    "price: <class 'float'> (Floating-point number)\n",
+    "\n",
+    "name: <class 'str'> (String)\n",
+    "\n",
+    "skills: <class 'list'> (List)\n",
+    "\n",
+    "info: <class 'dict'> (Dictionary)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "c4406d2d-e92a-4a66-b1fb-4c472d903288",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "IRONHACKER\n",
+      "['Python', 'Git', 'SQL']\n",
+      "dict_keys(['track'])\n"
+     ]
+    }
+   ],
+   "source": [
+    "# String method\n",
+    "name_upper = name.upper()\n",
+    "print(name_upper)\n",
+    "\n",
+    "# List method\n",
+    "skills.append(\"SQL\")\n",
+    "print(skills)\n",
+    "\n",
+    "# Dictionary method\n",
+    "keys = info.keys()\n",
+    "print(keys)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "94f566a1-27ad-4220-8875-bbc38ef28142",
+   "metadata": {},
+   "source": [
+    "I called the following methods to transform or inspect my data:\n",
+    "\n",
+    "String (.upper()): Returned a new version of the string in all capital letters.\n",
+    "\n",
+    "List (.append()): Modified the list in-place by adding a new element to the end.\n",
+    "\n",
+    "Dictionary (.keys()): Returned a dict_keys object containing all the labels (keys) currently in the dictionary."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "71cf4cda-36d9-4505-8ec9-e668bcea9eeb",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "\u001b[31mSignature:\u001b[39m name.replace(old, new, count=-\u001b[32m1\u001b[39m, /)\n",
+       "\u001b[31mDocstring:\u001b[39m\n",
+       "Return a copy with all occurrences of substring old replaced by new.\n",
+       "\n",
+       "  count\n",
+       "    Maximum number of occurrences to replace.\n",
+       "    -1 (the default value) means replace all occurrences.\n",
+       "\n",
+       "If the optional argument count is given, only the first count occurrences are\n",
+       "replaced.\n",
+       "\u001b[31mType:\u001b[39m      builtin_function_or_method"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "name.replace?"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f4361f98-c833-433c-9874-c157db41be72",
+   "metadata": {},
+   "source": [
+    "By using the ? introspection command, I learned that the .replace() method does not actually change the original string in place. Instead, it returns a new copy with the specified changes. I also discovered the count argument, which allows me to limit how many occurrences are replaced. For example, setting count=1 would only replace the first time a character appears, rather than every instance in the string."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "5fd9bb4c-b757-4318-8a41-ae434cf1efce",
+   "metadata": {},
+   "source": [
+    "Task 2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "5a34eab8-ae34-4a92-be18-af15c3f567c0",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Values: [1, 2, 3, 4]\n",
+      "Alias: [1, 2, 3, 4]\n"
+     ]
+    }
+   ],
+   "source": [
+    "# 1. Creating a list and an alias\n",
+    "values = [1, 2, 3]\n",
+    "alias = values\n",
+    "\n",
+    "# 2. Appending to the alias\n",
+    "alias.append(4)\n",
+    "\n",
+    "# 3. Checking both\n",
+    "print(f\"Values: {values}\")\n",
+    "print(f\"Alias: {alias}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "06a4b2d5-6443-4ce2-a483-5bc39a409da1",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Original Values: [1, 2, 3, 4]\n",
+      "Copy: [1, 2, 3, 4, 99]\n"
+     ]
+    }
+   ],
+   "source": [
+    "# 1. Creating a true copy\n",
+    "values_copy = values.copy()\n",
+    "\n",
+    "# 2. Appending a different value to the copy\n",
+    "values_copy.append(99)\n",
+    "\n",
+    "# 3. Checking both\n",
+    "print(f\"Original Values: {values}\")\n",
+    "print(f\"Copy: {values_copy}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "72ab2cd5-b4a1-403b-a276-73572077ef9b",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Original Record: {'name': 'Alice', 'role': 'Data Scientist', 'status': 'Active'}\n",
+      "Original Record after copy change: {'name': 'Alice', 'role': 'Data Scientist', 'status': 'Active'}\n",
+      "Record Copy: {'name': 'Alice', 'role': 'AI Engineer', 'status': 'Active'}\n"
+     ]
+    }
+   ],
+   "source": [
+    "# 1. Creating dict and alias\n",
+    "record = {\"name\": \"Alice\", \"role\": \"Data Scientist\"}\n",
+    "record_alias = record\n",
+    "record_alias[\"status\"] = \"Active\"\n",
+    "\n",
+    "print(f\"Original Record: {record}\")\n",
+    "\n",
+    "# 2. Creating a true copy\n",
+    "record_copy = record.copy()\n",
+    "record_copy[\"role\"] = \"AI Engineer\"\n",
+    "\n",
+    "print(f\"Original Record after copy change: {record}\")\n",
+    "print(f\"Record Copy: {record_copy}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "9efb1232-803a-4394-a60a-2251f65cc781",
+   "metadata": {},
+   "source": [
+    "In this task, I observed that assigning a list or dictionary to a new variable name (an alias) does not create a new object. Instead, both names point to the same location in memory. Therefore, modifying the alias also modified the original values. However, using the .copy() method created a separate instance in memory, allowing me to change the copy without affecting the original.\n",
+    "\n",
+    "This behavior is critical when passing objects into functions. If a function modifies a mutable object (like a list) passed as an argument, those changes will persist outside the function, which can lead to \"hidden state\" bugs if not handled carefully."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "bac02457-f95c-4ca4-b875-0da4ff98d636",
+   "metadata": {},
+   "source": [
+    "Task 3"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "daf0af4a-deef-4457-bfae-a93aa3efea0f",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Cleaned Strings: ['12.5', 'abc', '42', '', 'python']\n",
+      "Converted Floats: [12.5, None, 42.0, None, None]\n"
+     ]
+    }
+   ],
+   "source": [
+    "from m1_01_task_3_functions import to_float, clean_string\n",
+    "\n",
+    "# Creating test data\n",
+    "test_inputs = [\" 12.5 \", \"ABC\", \"  42\", \"\", \"Python  \"]\n",
+    "\n",
+    "# Testing the cleaning function\n",
+    "cleaned = [clean_string(item) for item in test_inputs]\n",
+    "print(f\"Cleaned Strings: {cleaned}\")\n",
+    "\n",
+    "# Testing the float conversion\n",
+    "floats = [to_float(item) for item in cleaned]\n",
+    "print(f\"Converted Floats: {floats}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "c266ed41-3100-4dd5-9169-337626bc433d",
+   "metadata": {},
+   "source": [
+    "I implemented two utility functions in m1_01_task_3_functions.py. The to_float function safely attempts to convert inputs to decimals, returning None for non-numeric data to prevent errors during analysis. The clean_string function standardizes text by removing leading/trailing whitespace and converting characters to lowercase, ensuring consistency across the dataset."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "712bfd39-60ed-4cd2-92eb-860cdd76fa5b",
+   "metadata": {},
+   "source": [
+    "Task 4"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "018e233b-3632-4086-a31f-1377caa276fe",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "raw_events = [\n",
+    "    {\"user_id\": 101, \"event_type\": \"login\", \"duration_seconds\": 5},\n",
+    "    {\"user_id\": 102, \"event_type\": \"upload\", \"duration_seconds\": 120},\n",
+    "    {\"user_id\": 101, \"event_type\": \"update\", \"duration_seconds\": 45},\n",
+    "    {\"user_id\": 103, \"event_type\": \"login\", \"duration_seconds\": \"ERROR\"},  # Invalid: String\n",
+    "    {\"user_id\": 104, \"event_type\": \"login\", \"duration_seconds\": 15},\n",
+    "    {\"user_id\": 102, \"event_type\": \"logout\", \"duration_seconds\": -10},     # Invalid: Negative\n",
+    "    {\"user_id\": 105, \"event_type\": \"login\", \"duration_seconds\": 8},\n",
+    "    {\"user_id\": 103, \"event_type\": \"update\", \"duration_seconds\": 300},\n",
+    "    {\"user_id\": 101, \"event_type\": \"logout\", \"duration_seconds\": 20},\n",
+    "    {\"user_id\": 104, \"event_type\": \"logout\", \"duration_seconds\": 12},\n",
+    "    {\"user_id\": 106, \"event_type\": \"login\", \"duration_seconds\": 5},\n",
+    "    {\"user_id\": 105, \"event_type\": \"logout\", \"duration_seconds\": \" \"},     # Invalid: Empty/Whitespace\n",
+    "]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "1501ee9e-456f-4989-8efc-f4bae48809b3",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Number of valid records found: 9\n",
+      "\n",
+      "Sample transformed records:\n",
+      "{'user_id': 101, 'event_type': 'login', 'duration_seconds': 5, 'duration_minutes': 0.08333333333333333}\n",
+      "{'user_id': 102, 'event_type': 'upload', 'duration_seconds': 120, 'duration_minutes': 2.0}\n"
+     ]
+    }
+   ],
+   "source": [
+    "cleaned_events = []\n",
+    "\n",
+    "for event in raw_events:\n",
+    "    duration = event[\"duration_seconds\"]\n",
+    "    \n",
+    "    # Validation: Must be an int or float AND greater than zero\n",
+    "    if isinstance(duration, (int, float)) and duration > 0:\n",
+    "        # In Task 2, we learned to use .copy() so we don't change the original.\n",
+    "        clean_item = event.copy()\n",
+    "        \n",
+    "        # Adding the new key: duration_minutes\n",
+    "        clean_item[\"duration_minutes\"] = duration / 60\n",
+    "        \n",
+    "        cleaned_events.append(clean_item)\n",
+    "\n",
+    "# Verification steps\n",
+    "print(f\"Number of valid records found: {len(cleaned_events)}\")\n",
+    "\n",
+    "# Inspecting the first two cleaned records\n",
+    "print(\"\\nSample transformed records:\")\n",
+    "for record in cleaned_events[:2]:\n",
+    "    print(record)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "09c147dc-65d2-41e5-bfe9-5d0fd10518ab",
+   "metadata": {},
+   "source": [
+    "I processed the raw activity list using a for loop to filter out invalid records. I applied conditional logic to ensure only records with positive, numeric duration_seconds were kept. For each valid record, I created a copy to avoid side effects and added a calculated duration_minutes field. This ensures the data is formatted correctly for the final summary tasks."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3ff02b89-d64e-4ae4-8d62-504e4eef81d2",
+   "metadata": {},
+   "source": [
+    "Task 5"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "ad4c3214-7037-40cd-909b-67229fdc854e",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Validation: Sum of counts (9) == len(cleaned_events) (9)\n"
+     ]
+    }
+   ],
+   "source": [
+    "# 1. Initializing our counters\n",
+    "counts_by_type = {}\n",
+    "total_mins_by_type = {}\n",
+    "unique_user_ids = set()\n",
+    "\n",
+    "# 2. Looping through the cleaned data once\n",
+    "for event in cleaned_events:\n",
+    "    etype = event['event_type']\n",
+    "    mins = event['duration_minutes']\n",
+    "    uid = event['user_id']\n",
+    "    \n",
+    "    # Updating counts\n",
+    "    counts_by_type[etype] = counts_by_type.get(etype, 0) + 1\n",
+    "    \n",
+    "    # Updating total minutes (for calculating average later)\n",
+    "    total_mins_by_type[etype] = total_mins_by_type.get(etype, 0) + mins\n",
+    "    \n",
+    "    # Adding user to the set (sets automatically handle uniqueness)\n",
+    "    unique_user_ids.add(uid)\n",
+    "\n",
+    "# 3. Calculating the averages\n",
+    "averages_by_type = {etype: total_mins_by_type[etype] / counts_by_type[etype] \n",
+    "                    for etype in counts_by_type}\n",
+    "\n",
+    "# 4. Validation Check\n",
+    "total_counts = sum(counts_by_type.values())\n",
+    "print(f\"Validation: Sum of counts ({total_counts}) == len(cleaned_events) ({len(cleaned_events)})\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "b5dc38b9-f04a-43ab-8726-4fd98d582e8e",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "--- Final CSV String ---\n",
+      "metric,key,value\n",
+      "count,login,4\n",
+      "count,upload,1\n",
+      "count,update,2\n",
+      "count,logout,2\n",
+      "average_duration,login,0.14\n",
+      "average_duration,upload,2.00\n",
+      "average_duration,update,2.88\n",
+      "average_duration,logout,0.27\n",
+      "unique_users,all,6\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Initializing the string with the header\n",
+    "csv_output = \"metric,key,value\\n\"\n",
+    "\n",
+    "# Adding count rows\n",
+    "for etype, count in counts_by_type.items():\n",
+    "    csv_output += f\"count,{etype},{count}\\n\"\n",
+    "\n",
+    "# Adding average duration rows (rounded to 2 decimal places)\n",
+    "for etype, avg in averages_by_type.items():\n",
+    "    csv_output += f\"average_duration,{etype},{avg:.2f}\\n\"\n",
+    "\n",
+    "# Adding the unique user count\n",
+    "csv_output += f\"unique_users,all,{len(unique_user_ids)}\\n\"\n",
+    "\n",
+    "print(\"--- Final CSV String ---\")\n",
+    "print(csv_output)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "77f7071e-d4b4-4b9f-84f3-95c356ce7ac0",
+   "metadata": {},
+   "source": [
+    "In this final task, I aggregated the cleaned event data to determine the frequency and average duration for each event type. I utilized a Python set to efficiently count the number of unique users. Finally, I formatted these results into a structured CSV string. To ensure accuracy, I validated that the total number of events in my summary matched the number of records in my cleaned_events list."
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.14"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/m1_01_task_3_functions.py b/m1_01_task_3_functions.py
new file mode 100644
index 0000000..8ec4499
--- /dev/null
+++ b/m1_01_task_3_functions.py
@@ -0,0 +1,12 @@
+def to_float(value):
+    """Converts a string to a float, returns None if invalid."""
+    try:
+        return float(value)
+    except (ValueError, TypeError):
+        return None
+
+def clean_string(text):
+    """Removes whitespace and makes casing consistent."""
+    if isinstance(text, str):
+        return text.strip().lower()
+    return None
\ No newline at end of file