print(f'Hello World')

Hello World


print(f'Hello World three plus three is: {3 + 3}')

Hello World three plus three is: 6


a=24
b=7
c=a/b
print(f"a is of type, {type(a)} with a value of {a},and c is of type,{type(c)} with a value of {c}")

a is of type, <class 'int'> with a value of 24,and c is of type,<class 'float'> with a value of 3.4285714285714284


a=28
print(f"a is of type, {type(a)} with a value of {a},and c is of type,{type(c)} with a value of {c}")

a is of type, <class 'int'> with a value of 28,and c is of type,<class 'float'> with a value of 3.4285714285714284


# defines a functnion thgat takes an input, square it, add 7, then returns the answer
def x2p7(x):
    """    this is my documentation string
    it needs to be in triple double quotes
    this funciton returns x^2 + 7"""
    y = x*x
    z = y+7
    return z

print('x^2 + 7 = ',x2p7(3))

x^2 + 7 =  16


print(x2p7.__doc__) #documentation function

    this is my documentation string
    it needs to be in triple double quotes
    this funciton returns x^2 + 7


import pandas as pd
import numpy as np


#if some series has multiple of the same value then we can group all the unique entries together
mydict = ({'customer': ['Customer 1','Customer 1','Customer 2', 'Customer 2','Customer 3','Customer 3'], 
          'product1': [1.1,2.1,3.8,4.2,5.5,6.9],
          'product2': [8.2,9.1,11.1,5.2,44.66,983]})
df = pd.DataFrame(mydict,index=['Purchase 1','Purchase 2','Purchase 3','Purchase 4', 'Purchase 5','Purchase 6'])
df


mydata = pd.read_csv('datasets/Titanic.csv')


mydata.sample(5)


mydata.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 12 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   PassengerId  891 non-null    int64  
 1   Survived     891 non-null    int64  
 2   Pclass       891 non-null    int64  
 3   Name         891 non-null    object 
 4   Sex          891 non-null    object 
 5   Age          714 non-null    float64
 6   SibSp        891 non-null    int64  
 7   Parch        891 non-null    int64  
 8   Ticket       891 non-null    object 
 9   Fare         891 non-null    float64
 10  Cabin        204 non-null    object 
 11  Embarked     889 non-null    object 
dtypes: float64(2), int64(5), object(5)
memory usage: 83.7+ KB


mydata.Sex.sample(4)

408    male
579    male
185    male
648    male
Name: Sex, dtype: object


import seaborn as sns
import matplotlib.pyplot as plt # seaborn is based on matplotlib
sns.set(color_codes=True) # adds a nice background to the grpahs.
%matplotlib inline
#tells python to actually display the graphs


sns.histplot(data=mydata,x='Age',kde=True); # Curv integrates to one. Bars add up to 1


sns.jointplot(x='Fare',y='Age',data=mydata);

	PassengerId	Survived	Pclass	Name	Sex	Age	SibSp	Parch	Ticket	Fare	Cabin	Embarked
759	760	1	1	Rothes, the Countess. of (Lucy Noel Martha Dye...	female	33.0	0	0	110152	86.5000	B77	S
509	510	1	3	Lang, Mr. Fang	male	26.0	0	0	1601	56.4958	NaN	S
701	702	1	1	Silverthorne, Mr. Spencer Victor	male	35.0	0	0	PC 17475	26.2875	E24	S
559	560	1	3	de Messemaeker, Mrs. Guillaume Joseph (Emma)	female	36.0	1	0	345572	17.4000	NaN	S
423	424	0	3	Danbom, Mrs. Ernst Gilbert (Anna Sigrid Maria ...	female	28.0	1	1	347080	14.4000	NaN	S

Jupyter Notebooks¶

Shortcuts¶

Introduction¶

Anaconda Project and Anaconda Navigator¶

Visual Studio Code¶

Hello World Examples¶

Variables and Types¶

User Defined Function and Document String¶

Pandas¶

Analysis¶

Load Dataset¶

CFA Notes: This is a potential column that should be changed to a category.¶

Visualization¶

Plotting Univariate Distributions¶

Plotting bivariate distributions¶

	customer	product1	product2
Purchase 1	Customer 1	1.1	8.20
Purchase 2	Customer 1	2.1	9.10
Purchase 3	Customer 2	3.8	11.10
Purchase 4	Customer 2	4.2	5.20
Purchase 5	Customer 3	5.5	44.66
Purchase 6	Customer 3	6.9	983.00