From 1daefd5ff7280ab9e9658cdfb8f1806a03b7881f Mon Sep 17 00:00:00 2001 From: amantechy <112rawataman@gmail.com> Date: Fri, 12 Dec 2025 14:07:32 +0530 Subject: [PATCH 1/5] Create Occupation_Improved --- 02_Filtering_&_Sorting/Occupation_Improved | 1 + 1 file changed, 1 insertion(+) create mode 100644 02_Filtering_&_Sorting/Occupation_Improved diff --git a/02_Filtering_&_Sorting/Occupation_Improved b/02_Filtering_&_Sorting/Occupation_Improved new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/02_Filtering_&_Sorting/Occupation_Improved @@ -0,0 +1 @@ + From 164fa72c9bcde1a1d2c4c1fe1e2ee67b9a73d4db Mon Sep 17 00:00:00 2001 From: amantechy <112rawataman@gmail.com> Date: Fri, 12 Dec 2025 14:17:07 +0530 Subject: [PATCH 2/5] Delete 02_Filtering_&_Sorting/Occupation_Improved --- 02_Filtering_&_Sorting/Occupation_Improved | 1 - 1 file changed, 1 deletion(-) delete mode 100644 02_Filtering_&_Sorting/Occupation_Improved diff --git a/02_Filtering_&_Sorting/Occupation_Improved b/02_Filtering_&_Sorting/Occupation_Improved deleted file mode 100644 index 8b1378917..000000000 --- a/02_Filtering_&_Sorting/Occupation_Improved +++ /dev/null @@ -1 +0,0 @@ - From 5489dbb221262957cb7222c3d718138eedfb498d Mon Sep 17 00:00:00 2001 From: amantechy <112rawataman@gmail.com> Date: Fri, 12 Dec 2025 14:18:21 +0530 Subject: [PATCH 3/5] Add files via upload --- 02_Filtering_&_Sorting/occu_exe.ipynb | 1091 +++++++++++++++++++++++++ 1 file changed, 1091 insertions(+) create mode 100644 02_Filtering_&_Sorting/occu_exe.ipynb diff --git a/02_Filtering_&_Sorting/occu_exe.ipynb b/02_Filtering_&_Sorting/occu_exe.ipynb new file mode 100644 index 000000000..c1aba9386 --- /dev/null +++ b/02_Filtering_&_Sorting/occu_exe.ipynb @@ -0,0 +1,1091 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "v_EYz24qYOSV" + }, + "source": [ + "\n", + "# **Improved Occupation Exercise – Pandas Filtering & Sorting**\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8dMSKDOMeBXK" + }, + "source": [ + "This notebook improves the original exercise by providing clearer instructions, additional tasks, and well-documented solutions. The dataset contains user demographics, occupations, and zip codes." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 206 + }, + "collapsed": true, + "id": "qmpqFGcxZU2j", + "outputId": "73541045-12ea-4192-9e7f-eb5db3a66a9f" + }, + "outputs": [ + { + "data": { + "application/vnd.google.colaboratory.intrinsic+json": { + "summary": "{\n \"name\": \"users\",\n \"rows\": 943,\n \"fields\": [\n {\n \"column\": \"user_id\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 272,\n \"min\": 1,\n \"max\": 943,\n \"num_unique_values\": 943,\n \"samples\": [\n 97,\n 266,\n 811\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"age\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 12,\n \"min\": 7,\n \"max\": 73,\n \"num_unique_values\": 61,\n \"samples\": [\n 24,\n 57,\n 52\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"gender\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"F\",\n \"M\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"occupation\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 21,\n \"samples\": [\n \"technician\",\n \"healthcare\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"zip_code\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 795,\n \"samples\": [\n \"90016\",\n \"15232\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}", + "type": "dataframe", + "variable_name": "users" + }, + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
user_idagegenderoccupationzip_code
0124Mtechnician85711
1253Fother94043
2323Mwriter32067
3424Mtechnician43537
4533Fother15213
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "text/plain": [ + " user_id age gender occupation zip_code\n", + "0 1 24 M technician 85711\n", + "1 2 53 F other 94043\n", + "2 3 23 M writer 32067\n", + "3 4 24 M technician 43537\n", + "4 5 33 F other 15213" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "\n", + "# Load the dataset\n", + "users = pd.read_table(\n", + " \"https://raw.githubusercontent.com/justmarkham/DAT8/master/data/u.user\",\n", + " sep='|'\n", + ")\n", + "\n", + "users.head()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-hGYygIoeboD" + }, + "source": [ + "## Original Tasks (Improved)\n", + "\n", + "\n", + "---\n", + "\n", + "\n", + "**Task 1**\n", + "\n", + "Display the first 10 records of the dataset and describe the column names.\n", + "\n", + "**Task 2**\n", + "\n", + "Find the most common occupation in the dataset.\n", + "\n", + "**Task 3**\n", + "\n", + "Show the number of male and female users in the dataset." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "r-_xSntjfOEm" + }, + "source": [ + "## Additional Tasks Added for Clarity and Practice\n", + "\n", + "\n", + "---\n", + "\n", + "\n", + "**Additional Task 1**\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "Show the top 5 most common occupations and their counts.\n", + "\n", + "\n", + "\n", + "\n", + "**Additional Task 2**\n", + "\n", + "Calculate the proportion (percentage) of each occupation in the dataset.\n", + "\n", + "**Additional Task 3**\n", + "\n", + "Group the users by gender and occupation and show the count for each combination." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "mKS56wipf7qL" + }, + "source": [ + "#**Solutions**" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "VgbFoJAghCn_", + "outputId": "66c53d14-23ee-431d-ab96-0f8ccf73e050" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['user_id', 'age', 'gender', 'occupation', 'zip_code'], dtype='object')" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Task 1\n", + "users.head(10)\n", + "users.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 36 + }, + "id": "i4GKrkbvhZWb", + "outputId": "86943e20-56b3-4e73-c0c3-f989f83de4df" + }, + "outputs": [ + { + "data": { + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" + }, + "text/plain": [ + "'student'" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Task 2\n", + "users['occupation'].value_counts().idxmax()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 178 + }, + "id": "x4Tu2ZdJhZmU", + "outputId": "5a7619eb-fa47-4257-a9df-8606e618ef46" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
count
gender
M670
F273
\n", + "

" + ], + "text/plain": [ + "gender\n", + "M 670\n", + "F 273\n", + "Name: count, dtype: int64" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Task 3\n", + "users['gender'].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 272 + }, + "collapsed": true, + "id": "K1nIDw4thZvd", + "outputId": "32c052c2-9c76-4df4-e18f-1770a713bc05" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
count
occupation
student196
other105
educator95
administrator79
engineer67
\n", + "

" + ], + "text/plain": [ + "occupation\n", + "student 196\n", + "other 105\n", + "educator 95\n", + "administrator 79\n", + "engineer 67\n", + "Name: count, dtype: int64" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Additional Task 1\n", + "users['occupation'].value_counts().head(5)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 774 + }, + "id": "tUswtzdtknne", + "outputId": "82f0d09d-8927-4cb4-ecf6-49b37cf90915" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
proportion
occupation
student20.78
other11.13
educator10.07
administrator8.38
engineer7.10
programmer7.00
librarian5.41
writer4.77
executive3.39
scientist3.29
artist2.97
technician2.86
marketing2.76
entertainment1.91
healthcare1.70
retired1.48
lawyer1.27
salesman1.27
none0.95
homemaker0.74
doctor0.74
\n", + "

" + ], + "text/plain": [ + "occupation\n", + "student 20.78\n", + "other 11.13\n", + "educator 10.07\n", + "administrator 8.38\n", + "engineer 7.10\n", + "programmer 7.00\n", + "librarian 5.41\n", + "writer 4.77\n", + "executive 3.39\n", + "scientist 3.29\n", + "artist 2.97\n", + "technician 2.86\n", + "marketing 2.76\n", + "entertainment 1.91\n", + "healthcare 1.70\n", + "retired 1.48\n", + "lawyer 1.27\n", + "salesman 1.27\n", + "none 0.95\n", + "homemaker 0.74\n", + "doctor 0.74\n", + "Name: proportion, dtype: float64" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Additional Task 2\n", + "(users['occupation'].value_counts(normalize=True)*100).round(2)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "id": "bWvPHWHfkoGM", + "outputId": "da3c71d5-6f9d-4db6-d851-42cb150e2c1e" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
0
genderoccupation
Fadministrator36
artist13
educator26
engineer2
entertainment2
executive3
healthcare11
homemaker6
lawyer2
librarian29
marketing10
none4
other36
programmer6
retired1
salesman3
scientist3
student60
technician1
writer19
Madministrator43
artist15
doctor7
educator69
engineer65
entertainment16
executive29
healthcare5
homemaker1
lawyer10
librarian22
marketing16
none5
other69
programmer60
retired13
salesman9
scientist28
student136
technician26
writer26
\n", + "

" + ], + "text/plain": [ + "gender occupation \n", + "F administrator 36\n", + " artist 13\n", + " educator 26\n", + " engineer 2\n", + " entertainment 2\n", + " executive 3\n", + " healthcare 11\n", + " homemaker 6\n", + " lawyer 2\n", + " librarian 29\n", + " marketing 10\n", + " none 4\n", + " other 36\n", + " programmer 6\n", + " retired 1\n", + " salesman 3\n", + " scientist 3\n", + " student 60\n", + " technician 1\n", + " writer 19\n", + "M administrator 43\n", + " artist 15\n", + " doctor 7\n", + " educator 69\n", + " engineer 65\n", + " entertainment 16\n", + " executive 29\n", + " healthcare 5\n", + " homemaker 1\n", + " lawyer 10\n", + " librarian 22\n", + " marketing 16\n", + " none 5\n", + " other 69\n", + " programmer 60\n", + " retired 13\n", + " salesman 9\n", + " scientist 28\n", + " student 136\n", + " technician 26\n", + " writer 26\n", + "dtype: int64" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Additional Task 3\n", + "users.groupby(['gender', 'occupation']).size()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Q5f31COznAua" + }, + "source": [ + "## **Summary**\n", + "\n", + "This improved exercise provides clearer tasks, additional practice questions, and well-documented solutions to help beginners understand Pandas filtering, sorting, aggregation, and groupby operations.\n" + ] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} From cafa381e7146b0ca2a392696e6dc743671351a0d Mon Sep 17 00:00:00 2001 From: amantechy <112rawataman@gmail.com> Date: Fri, 12 Dec 2025 14:21:31 +0530 Subject: [PATCH 4/5] Rename 02_Filtering_&_Sorting/occu_exe.ipynb to 02_Filtering_&_Sorting/pandas_exercises/Occupation_Improved/occu_exe.ipynb --- .../{ => pandas_exercises/Occupation_Improved}/occu_exe.ipynb | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename 02_Filtering_&_Sorting/{ => pandas_exercises/Occupation_Improved}/occu_exe.ipynb (100%) diff --git a/02_Filtering_&_Sorting/occu_exe.ipynb b/02_Filtering_&_Sorting/pandas_exercises/Occupation_Improved/occu_exe.ipynb similarity index 100% rename from 02_Filtering_&_Sorting/occu_exe.ipynb rename to 02_Filtering_&_Sorting/pandas_exercises/Occupation_Improved/occu_exe.ipynb From d67fa7ef723468f7992b90ac5eae7271f636fc72 Mon Sep 17 00:00:00 2001 From: amantechy <112rawataman@gmail.com> Date: Fri, 12 Dec 2025 14:22:37 +0530 Subject: [PATCH 5/5] Rename 02_Filtering_&_Sorting/pandas_exercises/Occupation_Improved/occu_exe.ipynb to 02_Filtering_&_Sorting/Occupation_Improved/occu_exe.ipynb --- .../{pandas_exercises => }/Occupation_Improved/occu_exe.ipynb | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename 02_Filtering_&_Sorting/{pandas_exercises => }/Occupation_Improved/occu_exe.ipynb (100%) diff --git a/02_Filtering_&_Sorting/pandas_exercises/Occupation_Improved/occu_exe.ipynb b/02_Filtering_&_Sorting/Occupation_Improved/occu_exe.ipynb similarity index 100% rename from 02_Filtering_&_Sorting/pandas_exercises/Occupation_Improved/occu_exe.ipynb rename to 02_Filtering_&_Sorting/Occupation_Improved/occu_exe.ipynb