From c180c6be823df445d2ca4024fbc32c8fcdccd522 Mon Sep 17 00:00:00 2001 From: tforrewot Date: Wed, 18 Feb 2026 13:04:52 +0400 Subject: [PATCH] Completed Lab 1 with all tasks and functions --- ...ipython-python-basics-lab-checkpoint.ipynb | 503 ++++++++++++++++++ .../m1_01_task_3_functions-checkpoint.py | 12 + .../m1_01_task_3_functions.cpython-311.pyc | Bin 0 -> 896 bytes m1-01-ipython-python-basics-lab.ipynb | 503 ++++++++++++++++++ m1_01_task_3_functions.py | 12 + 5 files changed, 1030 insertions(+) create mode 100644 .ipynb_checkpoints/m1-01-ipython-python-basics-lab-checkpoint.ipynb create mode 100644 .ipynb_checkpoints/m1_01_task_3_functions-checkpoint.py create mode 100644 __pycache__/m1_01_task_3_functions.cpython-311.pyc create mode 100644 m1-01-ipython-python-basics-lab.ipynb create mode 100644 m1_01_task_3_functions.py diff --git a/.ipynb_checkpoints/m1-01-ipython-python-basics-lab-checkpoint.ipynb b/.ipynb_checkpoints/m1-01-ipython-python-basics-lab-checkpoint.ipynb new file mode 100644 index 0000000..7ee3ce6 --- /dev/null +++ b/.ipynb_checkpoints/m1-01-ipython-python-basics-lab-checkpoint.ipynb @@ -0,0 +1,503 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "f60e6c4d-468f-4a49-81da-ced20ecc3935", + "metadata": {}, + "source": [ + "Task 1" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "d371b9d3-67a0-4935-aea1-16c9a5dd2c47", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\n", + "\n", + "\n" + ] + } + ], + "source": [ + "# Creating variables\n", + "age = 25 # Integer\n", + "price = 19.99 # Float\n", + "name = \"IronHacker\" # String\n", + "skills = [\"Python\", \"Git\"] # List\n", + "info = {\"track\": \"Data\"} # Dictionary\n", + "\n", + "print(type(age))\n", + "print(type(price))\n", + "print(type(name))\n", + "print(type(skills))\n", + "print(type(info))" + ] + }, + { + "cell_type": "markdown", + "id": "c1724569-1a2e-4c6a-bc80-a47eebbd00ed", + "metadata": {}, + "source": [ + "Using the type() function, I identified the following common data types in my workspace:\n", + "\n", + "age: (Integer)\n", + "\n", + "price: (Floating-point number)\n", + "\n", + "name: (String)\n", + "\n", + "skills: (List)\n", + "\n", + "info: (Dictionary)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "c4406d2d-e92a-4a66-b1fb-4c472d903288", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "IRONHACKER\n", + "['Python', 'Git', 'SQL']\n", + "dict_keys(['track'])\n" + ] + } + ], + "source": [ + "# String method\n", + "name_upper = name.upper()\n", + "print(name_upper)\n", + "\n", + "# List method\n", + "skills.append(\"SQL\")\n", + "print(skills)\n", + "\n", + "# Dictionary method\n", + "keys = info.keys()\n", + "print(keys)" + ] + }, + { + "cell_type": "markdown", + "id": "94f566a1-27ad-4220-8875-bbc38ef28142", + "metadata": {}, + "source": [ + "I called the following methods to transform or inspect my data:\n", + "\n", + "String (.upper()): Returned a new version of the string in all capital letters.\n", + "\n", + "List (.append()): Modified the list in-place by adding a new element to the end.\n", + "\n", + "Dictionary (.keys()): Returned a dict_keys object containing all the labels (keys) currently in the dictionary." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "71cf4cda-36d9-4505-8ec9-e668bcea9eeb", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\u001b[31mSignature:\u001b[39m name.replace(old, new, count=-\u001b[32m1\u001b[39m, /)\n", + "\u001b[31mDocstring:\u001b[39m\n", + "Return a copy with all occurrences of substring old replaced by new.\n", + "\n", + " count\n", + " Maximum number of occurrences to replace.\n", + " -1 (the default value) means replace all occurrences.\n", + "\n", + "If the optional argument count is given, only the first count occurrences are\n", + "replaced.\n", + "\u001b[31mType:\u001b[39m builtin_function_or_method" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "name.replace?" + ] + }, + { + "cell_type": "markdown", + "id": "f4361f98-c833-433c-9874-c157db41be72", + "metadata": {}, + "source": [ + "By using the ? introspection command, I learned that the .replace() method does not actually change the original string in place. Instead, it returns a new copy with the specified changes. I also discovered the count argument, which allows me to limit how many occurrences are replaced. For example, setting count=1 would only replace the first time a character appears, rather than every instance in the string." + ] + }, + { + "cell_type": "markdown", + "id": "5fd9bb4c-b757-4318-8a41-ae434cf1efce", + "metadata": {}, + "source": [ + "Task 2" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "5a34eab8-ae34-4a92-be18-af15c3f567c0", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Values: [1, 2, 3, 4]\n", + "Alias: [1, 2, 3, 4]\n" + ] + } + ], + "source": [ + "# 1. Creating a list and an alias\n", + "values = [1, 2, 3]\n", + "alias = values\n", + "\n", + "# 2. Appending to the alias\n", + "alias.append(4)\n", + "\n", + "# 3. Checking both\n", + "print(f\"Values: {values}\")\n", + "print(f\"Alias: {alias}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "06a4b2d5-6443-4ce2-a483-5bc39a409da1", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Original Values: [1, 2, 3, 4]\n", + "Copy: [1, 2, 3, 4, 99]\n" + ] + } + ], + "source": [ + "# 1. Creating a true copy\n", + "values_copy = values.copy()\n", + "\n", + "# 2. Appending a different value to the copy\n", + "values_copy.append(99)\n", + "\n", + "# 3. Checking both\n", + "print(f\"Original Values: {values}\")\n", + "print(f\"Copy: {values_copy}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "72ab2cd5-b4a1-403b-a276-73572077ef9b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Original Record: {'name': 'Alice', 'role': 'Data Scientist', 'status': 'Active'}\n", + "Original Record after copy change: {'name': 'Alice', 'role': 'Data Scientist', 'status': 'Active'}\n", + "Record Copy: {'name': 'Alice', 'role': 'AI Engineer', 'status': 'Active'}\n" + ] + } + ], + "source": [ + "# 1. Creating dict and alias\n", + "record = {\"name\": \"Alice\", \"role\": \"Data Scientist\"}\n", + "record_alias = record\n", + "record_alias[\"status\"] = \"Active\"\n", + "\n", + "print(f\"Original Record: {record}\")\n", + "\n", + "# 2. Creating a true copy\n", + "record_copy = record.copy()\n", + "record_copy[\"role\"] = \"AI Engineer\"\n", + "\n", + "print(f\"Original Record after copy change: {record}\")\n", + "print(f\"Record Copy: {record_copy}\")" + ] + }, + { + "cell_type": "markdown", + "id": "9efb1232-803a-4394-a60a-2251f65cc781", + "metadata": {}, + "source": [ + "In this task, I observed that assigning a list or dictionary to a new variable name (an alias) does not create a new object. Instead, both names point to the same location in memory. Therefore, modifying the alias also modified the original values. However, using the .copy() method created a separate instance in memory, allowing me to change the copy without affecting the original.\n", + "\n", + "This behavior is critical when passing objects into functions. If a function modifies a mutable object (like a list) passed as an argument, those changes will persist outside the function, which can lead to \"hidden state\" bugs if not handled carefully." + ] + }, + { + "cell_type": "markdown", + "id": "bac02457-f95c-4ca4-b875-0da4ff98d636", + "metadata": {}, + "source": [ + "Task 3" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "daf0af4a-deef-4457-bfae-a93aa3efea0f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Cleaned Strings: ['12.5', 'abc', '42', '', 'python']\n", + "Converted Floats: [12.5, None, 42.0, None, None]\n" + ] + } + ], + "source": [ + "from m1_01_task_3_functions import to_float, clean_string\n", + "\n", + "# Creating test data\n", + "test_inputs = [\" 12.5 \", \"ABC\", \" 42\", \"\", \"Python \"]\n", + "\n", + "# Testing the cleaning function\n", + "cleaned = [clean_string(item) for item in test_inputs]\n", + "print(f\"Cleaned Strings: {cleaned}\")\n", + "\n", + "# Testing the float conversion\n", + "floats = [to_float(item) for item in cleaned]\n", + "print(f\"Converted Floats: {floats}\")" + ] + }, + { + "cell_type": "markdown", + "id": "c266ed41-3100-4dd5-9169-337626bc433d", + "metadata": {}, + "source": [ + "I implemented two utility functions in m1_01_task_3_functions.py. The to_float function safely attempts to convert inputs to decimals, returning None for non-numeric data to prevent errors during analysis. The clean_string function standardizes text by removing leading/trailing whitespace and converting characters to lowercase, ensuring consistency across the dataset." + ] + }, + { + "cell_type": "markdown", + "id": "712bfd39-60ed-4cd2-92eb-860cdd76fa5b", + "metadata": {}, + "source": [ + "Task 4" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "018e233b-3632-4086-a31f-1377caa276fe", + "metadata": {}, + "outputs": [], + "source": [ + "raw_events = [\n", + " {\"user_id\": 101, \"event_type\": \"login\", \"duration_seconds\": 5},\n", + " {\"user_id\": 102, \"event_type\": \"upload\", \"duration_seconds\": 120},\n", + " {\"user_id\": 101, \"event_type\": \"update\", \"duration_seconds\": 45},\n", + " {\"user_id\": 103, \"event_type\": \"login\", \"duration_seconds\": \"ERROR\"}, # Invalid: String\n", + " {\"user_id\": 104, \"event_type\": \"login\", \"duration_seconds\": 15},\n", + " {\"user_id\": 102, \"event_type\": \"logout\", \"duration_seconds\": -10}, # Invalid: Negative\n", + " {\"user_id\": 105, \"event_type\": \"login\", \"duration_seconds\": 8},\n", + " {\"user_id\": 103, \"event_type\": \"update\", \"duration_seconds\": 300},\n", + " {\"user_id\": 101, \"event_type\": \"logout\", \"duration_seconds\": 20},\n", + " {\"user_id\": 104, \"event_type\": \"logout\", \"duration_seconds\": 12},\n", + " {\"user_id\": 106, \"event_type\": \"login\", \"duration_seconds\": 5},\n", + " {\"user_id\": 105, \"event_type\": \"logout\", \"duration_seconds\": \" \"}, # Invalid: Empty/Whitespace\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "1501ee9e-456f-4989-8efc-f4bae48809b3", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of valid records found: 9\n", + "\n", + "Sample transformed records:\n", + "{'user_id': 101, 'event_type': 'login', 'duration_seconds': 5, 'duration_minutes': 0.08333333333333333}\n", + "{'user_id': 102, 'event_type': 'upload', 'duration_seconds': 120, 'duration_minutes': 2.0}\n" + ] + } + ], + "source": [ + "cleaned_events = []\n", + "\n", + "for event in raw_events:\n", + " duration = event[\"duration_seconds\"]\n", + " \n", + " # Validation: Must be an int or float AND greater than zero\n", + " if isinstance(duration, (int, float)) and duration > 0:\n", + " # In Task 2, we learned to use .copy() so we don't change the original.\n", + " clean_item = event.copy()\n", + " \n", + " # Adding the new key: duration_minutes\n", + " clean_item[\"duration_minutes\"] = duration / 60\n", + " \n", + " cleaned_events.append(clean_item)\n", + "\n", + "# Verification steps\n", + "print(f\"Number of valid records found: {len(cleaned_events)}\")\n", + "\n", + "# Inspecting the first two cleaned records\n", + "print(\"\\nSample transformed records:\")\n", + "for record in cleaned_events[:2]:\n", + " print(record)" + ] + }, + { + "cell_type": "markdown", + "id": "09c147dc-65d2-41e5-bfe9-5d0fd10518ab", + "metadata": {}, + "source": [ + "I processed the raw activity list using a for loop to filter out invalid records. I applied conditional logic to ensure only records with positive, numeric duration_seconds were kept. For each valid record, I created a copy to avoid side effects and added a calculated duration_minutes field. This ensures the data is formatted correctly for the final summary tasks." + ] + }, + { + "cell_type": "markdown", + "id": "3ff02b89-d64e-4ae4-8d62-504e4eef81d2", + "metadata": {}, + "source": [ + "Task 5" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "ad4c3214-7037-40cd-909b-67229fdc854e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Validation: Sum of counts (9) == len(cleaned_events) (9)\n" + ] + } + ], + "source": [ + "# 1. Initializing our counters\n", + "counts_by_type = {}\n", + "total_mins_by_type = {}\n", + "unique_user_ids = set()\n", + "\n", + "# 2. Looping through the cleaned data once\n", + "for event in cleaned_events:\n", + " etype = event['event_type']\n", + " mins = event['duration_minutes']\n", + " uid = event['user_id']\n", + " \n", + " # Updating counts\n", + " counts_by_type[etype] = counts_by_type.get(etype, 0) + 1\n", + " \n", + " # Updating total minutes (for calculating average later)\n", + " total_mins_by_type[etype] = total_mins_by_type.get(etype, 0) + mins\n", + " \n", + " # Adding user to the set (sets automatically handle uniqueness)\n", + " unique_user_ids.add(uid)\n", + "\n", + "# 3. Calculating the averages\n", + "averages_by_type = {etype: total_mins_by_type[etype] / counts_by_type[etype] \n", + " for etype in counts_by_type}\n", + "\n", + "# 4. Validation Check\n", + "total_counts = sum(counts_by_type.values())\n", + "print(f\"Validation: Sum of counts ({total_counts}) == len(cleaned_events) ({len(cleaned_events)})\")" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "b5dc38b9-f04a-43ab-8726-4fd98d582e8e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--- Final CSV String ---\n", + "metric,key,value\n", + "count,login,4\n", + "count,upload,1\n", + "count,update,2\n", + "count,logout,2\n", + "average_duration,login,0.14\n", + "average_duration,upload,2.00\n", + "average_duration,update,2.88\n", + "average_duration,logout,0.27\n", + "unique_users,all,6\n", + "\n" + ] + } + ], + "source": [ + "# Initializing the string with the header\n", + "csv_output = \"metric,key,value\\n\"\n", + "\n", + "# Adding count rows\n", + "for etype, count in counts_by_type.items():\n", + " csv_output += f\"count,{etype},{count}\\n\"\n", + "\n", + "# Adding average duration rows (rounded to 2 decimal places)\n", + "for etype, avg in averages_by_type.items():\n", + " csv_output += f\"average_duration,{etype},{avg:.2f}\\n\"\n", + "\n", + "# Adding the unique user count\n", + "csv_output += f\"unique_users,all,{len(unique_user_ids)}\\n\"\n", + "\n", + "print(\"--- Final CSV String ---\")\n", + "print(csv_output)" + ] + }, + { + "cell_type": "markdown", + "id": "77f7071e-d4b4-4b9f-84f3-95c356ce7ac0", + "metadata": {}, + "source": [ + "In this final task, I aggregated the cleaned event data to determine the frequency and average duration for each event type. I utilized a Python set to efficiently count the number of unique users. Finally, I formatted these results into a structured CSV string. To ensure accuracy, I validated that the total number of events in my summary matched the number of records in my cleaned_events list." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.14" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/.ipynb_checkpoints/m1_01_task_3_functions-checkpoint.py b/.ipynb_checkpoints/m1_01_task_3_functions-checkpoint.py new file mode 100644 index 0000000..8ec4499 --- /dev/null +++ b/.ipynb_checkpoints/m1_01_task_3_functions-checkpoint.py @@ -0,0 +1,12 @@ +def to_float(value): + """Converts a string to a float, returns None if invalid.""" + try: + return float(value) + except (ValueError, TypeError): + return None + +def clean_string(text): + """Removes whitespace and makes casing consistent.""" + if isinstance(text, str): + return text.strip().lower() + return None \ No newline at end of file diff --git a/__pycache__/m1_01_task_3_functions.cpython-311.pyc b/__pycache__/m1_01_task_3_functions.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4881c2ed8ff1920876127f026af1dec718aa6aec GIT binary patch literal 896 zcmZ{j&1(}u6u{qRvn1))Hd?&c!`6gAz|@9{haw*Q00rrx@q1Jk`nKW$) zQbeSRU_D4rgmtIzIp z4d7?!oC*4o99_a~3u^ENUO-Jlt<@40DC*8xMwphwrQ9#xAd_RM}QUF4@y|y~!Nq zawaGJ)gZ079lb%Mu}9cP&Y)U{UQ##44$ZO6r|;Sy+THZk{^8J-?M!#@S`Rd1?2kGy zFK?&~)zdNB@Au^pmcJg!!l!IO|DqNC1EY;uTmPUqKlojQ(1}5aLK}8s4@+a&MXM8g zJ0a)`@^?-p!iU&y%+Us4rqXIHx{6AlIMgDnW;MtX1|6Inf^=|Bu2hsM*q$Mzs#VQrXS?w!W7d z-%pK)BhBM9Kd!xB3uperocL}|?3uZJGuKV$4hGGQn@3m%v&BS2E4u9)h*6A1&5qNz k8@$%^=p7OM8WGJB8puY|dI>{AY_S?iYNI{4sG{qB0}z4E%m4rY literal 0 HcmV?d00001 diff --git a/m1-01-ipython-python-basics-lab.ipynb b/m1-01-ipython-python-basics-lab.ipynb new file mode 100644 index 0000000..7ee3ce6 --- /dev/null +++ b/m1-01-ipython-python-basics-lab.ipynb @@ -0,0 +1,503 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "f60e6c4d-468f-4a49-81da-ced20ecc3935", + "metadata": {}, + "source": [ + "Task 1" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "d371b9d3-67a0-4935-aea1-16c9a5dd2c47", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\n", + "\n", + "\n" + ] + } + ], + "source": [ + "# Creating variables\n", + "age = 25 # Integer\n", + "price = 19.99 # Float\n", + "name = \"IronHacker\" # String\n", + "skills = [\"Python\", \"Git\"] # List\n", + "info = {\"track\": \"Data\"} # Dictionary\n", + "\n", + "print(type(age))\n", + "print(type(price))\n", + "print(type(name))\n", + "print(type(skills))\n", + "print(type(info))" + ] + }, + { + "cell_type": "markdown", + "id": "c1724569-1a2e-4c6a-bc80-a47eebbd00ed", + "metadata": {}, + "source": [ + "Using the type() function, I identified the following common data types in my workspace:\n", + "\n", + "age: (Integer)\n", + "\n", + "price: (Floating-point number)\n", + "\n", + "name: (String)\n", + "\n", + "skills: (List)\n", + "\n", + "info: (Dictionary)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "c4406d2d-e92a-4a66-b1fb-4c472d903288", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "IRONHACKER\n", + "['Python', 'Git', 'SQL']\n", + "dict_keys(['track'])\n" + ] + } + ], + "source": [ + "# String method\n", + "name_upper = name.upper()\n", + "print(name_upper)\n", + "\n", + "# List method\n", + "skills.append(\"SQL\")\n", + "print(skills)\n", + "\n", + "# Dictionary method\n", + "keys = info.keys()\n", + "print(keys)" + ] + }, + { + "cell_type": "markdown", + "id": "94f566a1-27ad-4220-8875-bbc38ef28142", + "metadata": {}, + "source": [ + "I called the following methods to transform or inspect my data:\n", + "\n", + "String (.upper()): Returned a new version of the string in all capital letters.\n", + "\n", + "List (.append()): Modified the list in-place by adding a new element to the end.\n", + "\n", + "Dictionary (.keys()): Returned a dict_keys object containing all the labels (keys) currently in the dictionary." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "71cf4cda-36d9-4505-8ec9-e668bcea9eeb", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\u001b[31mSignature:\u001b[39m name.replace(old, new, count=-\u001b[32m1\u001b[39m, /)\n", + "\u001b[31mDocstring:\u001b[39m\n", + "Return a copy with all occurrences of substring old replaced by new.\n", + "\n", + " count\n", + " Maximum number of occurrences to replace.\n", + " -1 (the default value) means replace all occurrences.\n", + "\n", + "If the optional argument count is given, only the first count occurrences are\n", + "replaced.\n", + "\u001b[31mType:\u001b[39m builtin_function_or_method" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "name.replace?" + ] + }, + { + "cell_type": "markdown", + "id": "f4361f98-c833-433c-9874-c157db41be72", + "metadata": {}, + "source": [ + "By using the ? introspection command, I learned that the .replace() method does not actually change the original string in place. Instead, it returns a new copy with the specified changes. I also discovered the count argument, which allows me to limit how many occurrences are replaced. For example, setting count=1 would only replace the first time a character appears, rather than every instance in the string." + ] + }, + { + "cell_type": "markdown", + "id": "5fd9bb4c-b757-4318-8a41-ae434cf1efce", + "metadata": {}, + "source": [ + "Task 2" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "5a34eab8-ae34-4a92-be18-af15c3f567c0", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Values: [1, 2, 3, 4]\n", + "Alias: [1, 2, 3, 4]\n" + ] + } + ], + "source": [ + "# 1. Creating a list and an alias\n", + "values = [1, 2, 3]\n", + "alias = values\n", + "\n", + "# 2. Appending to the alias\n", + "alias.append(4)\n", + "\n", + "# 3. Checking both\n", + "print(f\"Values: {values}\")\n", + "print(f\"Alias: {alias}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "06a4b2d5-6443-4ce2-a483-5bc39a409da1", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Original Values: [1, 2, 3, 4]\n", + "Copy: [1, 2, 3, 4, 99]\n" + ] + } + ], + "source": [ + "# 1. Creating a true copy\n", + "values_copy = values.copy()\n", + "\n", + "# 2. Appending a different value to the copy\n", + "values_copy.append(99)\n", + "\n", + "# 3. Checking both\n", + "print(f\"Original Values: {values}\")\n", + "print(f\"Copy: {values_copy}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "72ab2cd5-b4a1-403b-a276-73572077ef9b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Original Record: {'name': 'Alice', 'role': 'Data Scientist', 'status': 'Active'}\n", + "Original Record after copy change: {'name': 'Alice', 'role': 'Data Scientist', 'status': 'Active'}\n", + "Record Copy: {'name': 'Alice', 'role': 'AI Engineer', 'status': 'Active'}\n" + ] + } + ], + "source": [ + "# 1. Creating dict and alias\n", + "record = {\"name\": \"Alice\", \"role\": \"Data Scientist\"}\n", + "record_alias = record\n", + "record_alias[\"status\"] = \"Active\"\n", + "\n", + "print(f\"Original Record: {record}\")\n", + "\n", + "# 2. Creating a true copy\n", + "record_copy = record.copy()\n", + "record_copy[\"role\"] = \"AI Engineer\"\n", + "\n", + "print(f\"Original Record after copy change: {record}\")\n", + "print(f\"Record Copy: {record_copy}\")" + ] + }, + { + "cell_type": "markdown", + "id": "9efb1232-803a-4394-a60a-2251f65cc781", + "metadata": {}, + "source": [ + "In this task, I observed that assigning a list or dictionary to a new variable name (an alias) does not create a new object. Instead, both names point to the same location in memory. Therefore, modifying the alias also modified the original values. However, using the .copy() method created a separate instance in memory, allowing me to change the copy without affecting the original.\n", + "\n", + "This behavior is critical when passing objects into functions. If a function modifies a mutable object (like a list) passed as an argument, those changes will persist outside the function, which can lead to \"hidden state\" bugs if not handled carefully." + ] + }, + { + "cell_type": "markdown", + "id": "bac02457-f95c-4ca4-b875-0da4ff98d636", + "metadata": {}, + "source": [ + "Task 3" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "daf0af4a-deef-4457-bfae-a93aa3efea0f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Cleaned Strings: ['12.5', 'abc', '42', '', 'python']\n", + "Converted Floats: [12.5, None, 42.0, None, None]\n" + ] + } + ], + "source": [ + "from m1_01_task_3_functions import to_float, clean_string\n", + "\n", + "# Creating test data\n", + "test_inputs = [\" 12.5 \", \"ABC\", \" 42\", \"\", \"Python \"]\n", + "\n", + "# Testing the cleaning function\n", + "cleaned = [clean_string(item) for item in test_inputs]\n", + "print(f\"Cleaned Strings: {cleaned}\")\n", + "\n", + "# Testing the float conversion\n", + "floats = [to_float(item) for item in cleaned]\n", + "print(f\"Converted Floats: {floats}\")" + ] + }, + { + "cell_type": "markdown", + "id": "c266ed41-3100-4dd5-9169-337626bc433d", + "metadata": {}, + "source": [ + "I implemented two utility functions in m1_01_task_3_functions.py. The to_float function safely attempts to convert inputs to decimals, returning None for non-numeric data to prevent errors during analysis. The clean_string function standardizes text by removing leading/trailing whitespace and converting characters to lowercase, ensuring consistency across the dataset." + ] + }, + { + "cell_type": "markdown", + "id": "712bfd39-60ed-4cd2-92eb-860cdd76fa5b", + "metadata": {}, + "source": [ + "Task 4" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "018e233b-3632-4086-a31f-1377caa276fe", + "metadata": {}, + "outputs": [], + "source": [ + "raw_events = [\n", + " {\"user_id\": 101, \"event_type\": \"login\", \"duration_seconds\": 5},\n", + " {\"user_id\": 102, \"event_type\": \"upload\", \"duration_seconds\": 120},\n", + " {\"user_id\": 101, \"event_type\": \"update\", \"duration_seconds\": 45},\n", + " {\"user_id\": 103, \"event_type\": \"login\", \"duration_seconds\": \"ERROR\"}, # Invalid: String\n", + " {\"user_id\": 104, \"event_type\": \"login\", \"duration_seconds\": 15},\n", + " {\"user_id\": 102, \"event_type\": \"logout\", \"duration_seconds\": -10}, # Invalid: Negative\n", + " {\"user_id\": 105, \"event_type\": \"login\", \"duration_seconds\": 8},\n", + " {\"user_id\": 103, \"event_type\": \"update\", \"duration_seconds\": 300},\n", + " {\"user_id\": 101, \"event_type\": \"logout\", \"duration_seconds\": 20},\n", + " {\"user_id\": 104, \"event_type\": \"logout\", \"duration_seconds\": 12},\n", + " {\"user_id\": 106, \"event_type\": \"login\", \"duration_seconds\": 5},\n", + " {\"user_id\": 105, \"event_type\": \"logout\", \"duration_seconds\": \" \"}, # Invalid: Empty/Whitespace\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "1501ee9e-456f-4989-8efc-f4bae48809b3", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of valid records found: 9\n", + "\n", + "Sample transformed records:\n", + "{'user_id': 101, 'event_type': 'login', 'duration_seconds': 5, 'duration_minutes': 0.08333333333333333}\n", + "{'user_id': 102, 'event_type': 'upload', 'duration_seconds': 120, 'duration_minutes': 2.0}\n" + ] + } + ], + "source": [ + "cleaned_events = []\n", + "\n", + "for event in raw_events:\n", + " duration = event[\"duration_seconds\"]\n", + " \n", + " # Validation: Must be an int or float AND greater than zero\n", + " if isinstance(duration, (int, float)) and duration > 0:\n", + " # In Task 2, we learned to use .copy() so we don't change the original.\n", + " clean_item = event.copy()\n", + " \n", + " # Adding the new key: duration_minutes\n", + " clean_item[\"duration_minutes\"] = duration / 60\n", + " \n", + " cleaned_events.append(clean_item)\n", + "\n", + "# Verification steps\n", + "print(f\"Number of valid records found: {len(cleaned_events)}\")\n", + "\n", + "# Inspecting the first two cleaned records\n", + "print(\"\\nSample transformed records:\")\n", + "for record in cleaned_events[:2]:\n", + " print(record)" + ] + }, + { + "cell_type": "markdown", + "id": "09c147dc-65d2-41e5-bfe9-5d0fd10518ab", + "metadata": {}, + "source": [ + "I processed the raw activity list using a for loop to filter out invalid records. I applied conditional logic to ensure only records with positive, numeric duration_seconds were kept. For each valid record, I created a copy to avoid side effects and added a calculated duration_minutes field. This ensures the data is formatted correctly for the final summary tasks." + ] + }, + { + "cell_type": "markdown", + "id": "3ff02b89-d64e-4ae4-8d62-504e4eef81d2", + "metadata": {}, + "source": [ + "Task 5" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "ad4c3214-7037-40cd-909b-67229fdc854e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Validation: Sum of counts (9) == len(cleaned_events) (9)\n" + ] + } + ], + "source": [ + "# 1. Initializing our counters\n", + "counts_by_type = {}\n", + "total_mins_by_type = {}\n", + "unique_user_ids = set()\n", + "\n", + "# 2. Looping through the cleaned data once\n", + "for event in cleaned_events:\n", + " etype = event['event_type']\n", + " mins = event['duration_minutes']\n", + " uid = event['user_id']\n", + " \n", + " # Updating counts\n", + " counts_by_type[etype] = counts_by_type.get(etype, 0) + 1\n", + " \n", + " # Updating total minutes (for calculating average later)\n", + " total_mins_by_type[etype] = total_mins_by_type.get(etype, 0) + mins\n", + " \n", + " # Adding user to the set (sets automatically handle uniqueness)\n", + " unique_user_ids.add(uid)\n", + "\n", + "# 3. Calculating the averages\n", + "averages_by_type = {etype: total_mins_by_type[etype] / counts_by_type[etype] \n", + " for etype in counts_by_type}\n", + "\n", + "# 4. Validation Check\n", + "total_counts = sum(counts_by_type.values())\n", + "print(f\"Validation: Sum of counts ({total_counts}) == len(cleaned_events) ({len(cleaned_events)})\")" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "b5dc38b9-f04a-43ab-8726-4fd98d582e8e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--- Final CSV String ---\n", + "metric,key,value\n", + "count,login,4\n", + "count,upload,1\n", + "count,update,2\n", + "count,logout,2\n", + "average_duration,login,0.14\n", + "average_duration,upload,2.00\n", + "average_duration,update,2.88\n", + "average_duration,logout,0.27\n", + "unique_users,all,6\n", + "\n" + ] + } + ], + "source": [ + "# Initializing the string with the header\n", + "csv_output = \"metric,key,value\\n\"\n", + "\n", + "# Adding count rows\n", + "for etype, count in counts_by_type.items():\n", + " csv_output += f\"count,{etype},{count}\\n\"\n", + "\n", + "# Adding average duration rows (rounded to 2 decimal places)\n", + "for etype, avg in averages_by_type.items():\n", + " csv_output += f\"average_duration,{etype},{avg:.2f}\\n\"\n", + "\n", + "# Adding the unique user count\n", + "csv_output += f\"unique_users,all,{len(unique_user_ids)}\\n\"\n", + "\n", + "print(\"--- Final CSV String ---\")\n", + "print(csv_output)" + ] + }, + { + "cell_type": "markdown", + "id": "77f7071e-d4b4-4b9f-84f3-95c356ce7ac0", + "metadata": {}, + "source": [ + "In this final task, I aggregated the cleaned event data to determine the frequency and average duration for each event type. I utilized a Python set to efficiently count the number of unique users. Finally, I formatted these results into a structured CSV string. To ensure accuracy, I validated that the total number of events in my summary matched the number of records in my cleaned_events list." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.14" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/m1_01_task_3_functions.py b/m1_01_task_3_functions.py new file mode 100644 index 0000000..8ec4499 --- /dev/null +++ b/m1_01_task_3_functions.py @@ -0,0 +1,12 @@ +def to_float(value): + """Converts a string to a float, returns None if invalid.""" + try: + return float(value) + except (ValueError, TypeError): + return None + +def clean_string(text): + """Removes whitespace and makes casing consistent.""" + if isinstance(text, str): + return text.strip().lower() + return None \ No newline at end of file