# Importing packages!
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Reads in the precipitation dataset
precip = pd.read_csv('precip.csv')
precip.head()

# Converts an array of data to standard units
def convert_to_su(data_arr):
    # Subtracts the mean from the data
    mn_subtracted = data_arr - np.mean(data_arr)
    # Divides the subtracted data by the standard deviation
    su_data = mn_subtracted / np.std(data_arr)
    # Returns the standardized data
    return su_data

# Standardizes the precipitation data
precip_su = convert_to_su(precip['precip'])
# Standardizes the growth data
growth_su = convert_to_su(precip['growth'])

r = np.mean(precip_su * growth_su)
print(f'The manually-calculated Pearson correlation coefficient is {r}.')

The manually-calculated Pearson correlation coefficient is 0.19418647123114102.

corr_matrix = np.corrcoef(precip['precip'], precip['growth'])
r = corr_matrix[0, 1]
print(f'The numpy-calculated Pearson correlation coefficient is {r}.')

The numpy-calculated Pearson correlation coefficient is 0.19418647123114094.

# Creates a figure for plotting
fig, ax1 = plt.subplots()

# Plots the standardized data
ax1.scatter(precip_su, growth_su, edgecolors = 'g', facecolors = 'none')

# Generates 20 x-values between -3 and 3
xvls=np.linspace(-3, 3, 20)
# Calculates the y-values using the Pearson correlation coefficient r
yvls = r * xvls
# Plots the best-fit line
ax1.plot(xvls, yvls, 'b')

# Labels the plot
ax1.set_xlabel('Standardized precipitation')
ax1.set_ylabel('Standardized growth')
ax1.set_title('Best-Fit Line in Standard Units')

Text(0.5, 1.0, 'Best-Fit Line in Standard Units')

# Calculates the slope of the best-fit line
slope = r * (np.std(precip['growth']) / np.std(precip['precip']))
# Calculates the intercept of the best-fit line
intercept = np.mean(precip['growth']) - slope * np.mean(precip['precip'])

# Creates a figure for plotting
fig, ax2 = plt.subplots()

# Plots the original data
ax2.scatter(precip['precip'], precip['growth'], edgecolors = 'g', facecolors = 'none')

# Generates 20 x-values between 0 and 3
xvls = np.linspace(0, 3, 20)
# Calculates the y-values using our slope and intercept values
yvls = slope * xvls + intercept
# Plots the best-fit line
ax2.plot(xvls, yvls, 'b')

# Labels the plot
ax2.set_xlabel('Precipitation')
ax2.set_ylabel('Growth')
ax2.set_title('Best-Fit Line in Original Units')

Text(0.5, 1.0, 'Best-Fit Line in Original Units')

growth_prediction = slope * 3 + intercept
print(f'At a precipitation level of 3, the growth is predicted to be {growth_prediction}.')

At a precipitation level of 3, the growth is predicted to be 24.481837780469004.

	precip	growth
0	2.176092	25.350882
1	2.280644	17.534213
2	1.703581	28.590446
3	1.061713	21.454899
4	1.718713	14.993775

Linear Prediction Models¶

What are Prediction Models?¶

Why Create Prediction Models?¶

What is a Linear Prediction Model?¶

Creating a Linear Prediction Model¶

Constructing a Best-Fit Line¶

Best-Fit Lines with Standard Units¶

Best-Fit Lines with Original Units¶

Predicting Values with the Linear Model¶