import numpy as np
from matplotlib import pyplot as plt
import pandas as pd


url = 'https://piratepeel.github.io/GlobalStudiesQuantMethodsS1/data/ice_cream_sales.csv'
iceCreamData = pd.read_csv(url)
iceCreamData.head(10)


# Count the occurrences of each category in the 'Category' column
category_counts_f = iceCreamData['Flavour'].value_counts()

# Create a pie chart
category_counts_f.plot(kind='pie', autopct='%1.1f%%')

# Adding a title
plt.title('Pie Chart of Flavour Distribution')
# Remove the x and y-label
plt.xlabel('')
plt.ylabel('')
# Display the chart
plt.axis('equal')  # Equal aspect ratio ensures that the pie chart is circular.

(-1.1129928317635196,
 1.1006187320057477,
 -1.1006187372479035,
 1.1129928321258653)


# Count the occurrences of each category in the 'Category' column
category_counts_f = iceCreamData['Flavour'].value_counts()

# Create a bar chart
category_counts_f.plot(kind='bar')

# Adding labels and title
plt.xlabel('Flavours')
plt.ylabel('Count')
plt.title('Count of Categories')

Text(0.5, 1.0, 'Count of Categories')


# Count the occurrences of each category in the 'Category' column
category_counts_c = iceCreamData['Cone'].value_counts()

# Create a pie chart
category_counts_c.plot(kind='pie', autopct='%1.1f%%')

# Adding a title
plt.title('Pie Chart of Cone Distribution')
# Remove the x and y-label
plt.xlabel('')
plt.ylabel('')
# Display the chart
plt.axis('equal')  # Equal aspect ratio ensures that the pie chart is circular.

(-1.1000000009884476,
 1.100000020757401,
 -1.1129928306484105,
 1.1006187062213528)


# Count the occurrences of each category in the 'Category' column
category_counts_c = iceCreamData['Cone'].value_counts()

# Create a bar chart
category_counts_c.plot(kind='bar')

# Adding labels and title
plt.xlabel('Cone')
plt.ylabel('Count')
plt.title('Count of Categories')

Text(0.5, 1.0, 'Count of Categories')


category_counts_c

standard cornet    5
giant cone         3
mini cup           2
Name: Cone, dtype: int64

# Get the counts of each category
values = df['category'].value_counts()

# Define the desired order of categories
desired_order = ['Group1', 'Group2', 'Group3']  # Change this order as needed
values = values.reindex(desired_order) # Reindex the values in the desired order


# Define the desired order of categories
desired_order = ['mini cup', 'standard cornet', 'giant cone']  # Change this order as needed
category_counts_c = category_counts_c.reindex(desired_order) # Reindex the values in the desired order


category_counts_c

mini cup           2
standard cornet    5
giant cone         3
Name: Cone, dtype: int64


# Create a bar chart
category_counts_c.plot(kind='bar')

# Adding labels and title
plt.xlabel('Cone')
plt.ylabel('Count')
plt.title('Count of Categories')

Text(0.5, 1.0, 'Count of Categories')


# Count the occurrences of each category in the 'Category' column
category_counts_t = iceCreamData['Temperature'].value_counts()

# Create a pie chart
category_counts_t.plot(kind='pie', autopct='%1.1f%%')

# Adding a title
plt.title('Pie Chart of Temperature Distribution')
# Remove the x and y-label
plt.xlabel('')
plt.ylabel('')
# Display the chart
plt.axis('equal')  # Equal aspect ratio ensures that the pie chart is circular.

(-1.1000000056491936,
 1.100000010561534,
 -1.100000007859747,
 1.1000000029474057)


# I don't need to calculate the counts again I already
# did it in the cell above

# Create a bar chart
category_counts_t.plot(kind='bar')

# Adding labels and title
plt.xlabel('Temperature')
plt.ylabel('Count')
plt.title('Count of Tempertures')

Text(0.5, 1.0, 'Count of Tempertures')


# Create a histogram
iceCreamData['Temperature'].plot(kind='hist', bins=3, edgecolor='black', alpha=0.7)

# Adding labels and title
plt.xlabel('Temperature')
plt.ylabel('Frequency')
plt.title('Temperature Distribution Histogram')

Text(0.5, 1.0, 'Temperature Distribution Histogram')


# Count the occurrences of each category in the 'Category' column
category_counts_s = iceCreamData['Scoops'].value_counts()

# Create a pie chart
category_counts_s.plot(kind='pie', autopct='%1.1f%%')

# Adding a title
plt.title('Pie Chart of Scoops Distribution')
# Remove the x and y-label
plt.xlabel('')
plt.ylabel('')
# Display the chart
plt.axis('equal')  # Equal aspect ratio ensures that the pie chart is circular.

(-1.1129928313852615,
 1.1006187216952246,
 -1.1008152309433816,
 1.1171195796322744)


# I don't need to calculate the counts again I already
# did it in the cell above

# Create a bar chart
category_counts_s.plot(kind='bar')

# Adding labels and title
plt.xlabel('Scoops')
plt.ylabel('Count')
plt.title('Count of Scoops')

Text(0.5, 1.0, 'Count of Scoops')


# Create a histogram
iceCreamData['Scoops'].plot(kind='hist', bins=5, edgecolor='black', alpha=0.7)

# Adding labels and title
plt.xlabel('Scoops')
plt.ylabel('Frequency')
plt.title('Temperature Distribution Histogram')

Text(0.5, 1.0, 'Temperature Distribution Histogram')


middle_values = np.sort(iceCreamData['Temperature'].values)[4:6]
np.mean(middle_values)

-1.65


np.median(iceCreamData['Temperature'])

-1.65


np.percentile(iceCreamData['Temperature'], 50)

-1.65


np.sum(iceCreamData['Flavour'])

'peachvanillavanillapeachpeachpeachchocolatepeachpeachvanilla'


np.sum(iceCreamData['Temperature'])/10

-1.4500000000000002


meanScoops = np.sum(iceCreamData['Scoops'])/10
print(f'The mean number of scoops is {meanScoops}')

The mean number of scoops is 3.0


# if all the values are the same then the difference 
# from the mean is zero for all purchases
np.ones(10)*3 - meanScoops

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])


# when people buy different  amounts of ice cream
# we see that we have different distances from the mean
differenceFromTheMean = iceCreamData['Scoops'].values - meanScoops
differenceFromTheMean

array([ 0., -2.,  1., -1., -1., -1.,  0., -1.,  5.,  0.])


# when we sum these differences, however, we always get zero
# this is because all the differences cancel each other out
np.sum(differenceFromTheMean)

0.0


differenceFromTheMean * differenceFromTheMean

array([ 0.,  4.,  1.,  1.,  1.,  1.,  0.,  1., 25.,  0.])


squaredDifference = np.square(differenceFromTheMean)
squaredDifference

array([ 0.,  4.,  1.,  1.,  1.,  1.,  0.,  1., 25.,  0.])


# this is the mean of the squared difference from the mean
# also called variance
variance = np.sum(squaredDifference)/10
variance

3.4


np.sqrt(variance)

1.8439088914585775


np.sqrt(np.sum(np.square(iceCreamData['Scoops'].values - np.mean(iceCreamData['Scoops'])))/10)

1.8439088914585775


url = 'https://piratepeel.github.io/GlobalStudiesQuantMethodsS1/data/ice_cream_sales_no_outlier.csv'
iceCreamData_noOutlier = pd.read_csv(url)
iceCreamData_noOutlier.head(10)


np.mean(iceCreamData['Scoops'])

3.0


np.mean(iceCreamData_noOutlier['Scoops'])

2.6


np.median(iceCreamData['Scoops'])

2.5


np.median(iceCreamData_noOutlier['Scoops'])

2.5


np.sort(iceCreamData['Scoops'])

array([1, 2, 2, 2, 2, 3, 3, 3, 4, 8])


np.sort(iceCreamData_noOutlier['Scoops'])

array([1, 2, 2, 2, 2, 3, 3, 3, 4, 4])


scoops75 = np.percentile(iceCreamData['Scoops'], 75)


scoops25 = np.percentile(iceCreamData['Scoops'], 25)


scoops75 - scoops25

1.0


scoops75 = np.percentile(iceCreamData_noOutlier['Scoops'], 75)


scoops25 = np.percentile(iceCreamData_noOutlier['Scoops'], 25)


scoops75 - scoops25

1.0

Plotting data and interpreting it¶

Plotting Nominal data¶

Plotting Ordinal data¶

How to change the order of the bars¶

Plotting Interval data¶

What do these plots tell you?¶

Plotting Ratio data¶

Mode¶

Median¶

Mean¶

Measuring how much the values vary¶

Sensitiviy to outliers¶

	Flavour	Cone	Temperature	Scoops
0	peach	giant cone	-0.4	3
1	vanilla	mini cup	-2.2	1
2	vanilla	giant cone	-2.0	4
3	peach	standard cornet	0.4	2
4	peach	standard cornet	-1.9	2
5	peach	mini cup	-1.4	2
6	chocolate	standard cornet	-2.8	3
7	peach	standard cornet	-2.5	2
8	peach	giant cone	-0.3	8
9	vanilla	standard cornet	-1.4	3