import pandas as pd
import numpy as np
df1 = pd.read_csv('surveys.csv')
df1

import matplotlib.pyplot as plt
fig,ax1 = plt.subplots()
ax1.scatter(df1['hindfoot_length'], df1['weight'])

<matplotlib.collections.PathCollection at 0x28abc9fa290>

fig,ax2 = plt.subplots()

ds_df = df1[df1['species_id'] == 'DS']
so_df = df1[df1['species_id'] == 'SO']

ax2.scatter(ds_df['hindfoot_length'], ds_df['weight'], s=5, c='r', label='DS' )
ax2.scatter(so_df['hindfoot_length'], so_df['weight'], s=7, facecolors='none', edgecolors='c', label='SO')

ax2.set_aspect(.25)
ax2.set_xlabel('hindfoot length')
ax2.set_ylabel('weight')
ax2.set_title('Hindfoot Length vs Weight in DS and SO individuals')
ax2.legend()

<matplotlib.legend.Legend at 0x28abc9f8210>

fig, (ax3a, ax3b) = plt.subplots(1,2)
ax3a.scatter(ds_df['hindfoot_length'], ds_df['weight'], s=2, c='r')
ax3b.scatter(so_df['hindfoot_length'], so_df['weight'], s = 2, c = 'c')

ax3a.spines['top'].set_visible(False)
ax3a.spines['right'].set_visible(False)
ax3b.spines['top'].set_visible(False)
ax3b.spines['right'].set_visible(False)

fig,ax4 = plt.subplots()
ax4.hist(ds_df['hindfoot_length'], bins=range(0,80,2))
ax4.hist(so_df['hindfoot_length'], bins=range(0,80,2), histtype='step')

(array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,  0.,  1.,  2.,  2., 15.,
        11.,  5.,  2.,  0.,  1.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.]),
 array([ 0.,  2.,  4.,  6.,  8., 10., 12., 14., 16., 18., 20., 22., 24.,
        26., 28., 30., 32., 34., 36., 38., 40., 42., 44., 46., 48., 50.,
        52., 54., 56., 58., 60., 62., 64., 66., 68., 70., 72., 74., 76.,
        78.]),
 [<matplotlib.patches.Polygon at 0x28abd5dc950>])

fig,ax5 = plt.subplots()
grouped_data = df1.groupby('year')['hindfoot_length'].mean()
year_data = grouped_data.reset_index()
ax5.plot(year_data['year'], year_data['hindfoot_length'])
ax5.scatter(year_data['year'], year_data['hindfoot_length'])

<matplotlib.collections.PathCollection at 0x28abd663850>

df2 = pd.read_csv('alpaca.csv')
df2

fig,ax6 = plt.subplots(figsize = (3,3))
treat_xvls = np.zeros(len(df2['treatment']))
ctrl_xvls = np.zeros(len(df2['control'])) + 1

ax6.scatter(treat_xvls, df2['treatment'])
ax6.scatter(ctrl_xvls, df2['control'])
ax6.plot(0.1, np.mean(df2['treatment']), '<')
ax6.plot(1.1, np.mean(df2['control']), '<')

ax6.set_xticks([0,1])
ax6.set_xticklabels(['treatment', 'control'])

[Text(0, 0, 'treatment'), Text(1, 0, 'control')]

# obtaining data for confidence interval
control_data = np.array(df2['control'])
boot_means = []

for n in range(1000):
    boot_data = np.random.choice(control_data, len(control_data))
    boot_means.append(np.mean(boot_data))

conf_interval = np.percentile(boot_means, [2.5, 97.5])

fig,ax7 = plt.subplots()
ax7.hist(boot_means)
ax7.plot([conf_interval[0], conf_interval[1]], [225, 225], c='r')

[<matplotlib.lines.Line2D at 0x28abc986290>]

df3 = pd.read_csv('precip.csv')
df3.head()

#obtaining linear regression model
r = np.corrcoef(df3['precip'], df3['growth'])[0,1]
slope = r*(np.std(df3['growth'])/np.std(df3['precip']))
intercept = np.mean(df3['growth']) - slope*np.mean(df3['precip'])

fig,ax8 = plt.subplots()
ax8.scatter(df3['precip'], df3['growth'])
ax8.set_xlabel('Precipitation')
ax8.set_ylabel('Growth')
xdata = df3['precip']
model_ydata = slope*xdata + intercept
ax8.plot(xdata, model_ydata, c='r')

[<matplotlib.lines.Line2D at 0x28abc8e0f90>]

	record_id	month	day	year	plot_id	species_id	sex	hindfoot_length	weight
0	1	7	16	1977	2	NL	M	32.0	NaN
1	2	7	16	1977	3	NL	M	33.0	NaN
2	3	7	16	1977	2	DM	F	37.0	NaN
3	4	7	16	1977	7	DM	M	36.0	NaN
4	5	7	16	1977	3	DM	M	35.0	NaN
...	...	...	...	...	...	...	...	...	...
35544	35545	12	31	2002	15	AH	NaN	NaN	NaN
35545	35546	12	31	2002	15	AH	NaN	NaN	NaN
35546	35547	12	31	2002	10	RM	F	15.0	14.0
35547	35548	12	31	2002	7	DO	M	36.0	51.0
35548	35549	12	31	2002	5	NaN	NaN	NaN	NaN

	treatment	control
0	7.2	4.6
1	8.3	4.6
2	8.3	5.1
3	7.1	2.8
4	4.4	5.4
5	4.1	4.4
6	4.8	4.2
7	6.2	5.6
8	7.7	5.8
9	7.4	4.1
10	4.3	3.9
11	5.8	3.8

	precip	growth
0	2.176092	25.350882
1	2.280644	17.534213
2	1.703581	28.590446
3	1.061713	21.454899
4	1.718713	14.993775

Basics of Matplotlib¶

Importing data to plot in Matplotlib¶

Creating a basic scatterplot¶

Creating a more complex scatterplot¶

Creating multiple plots¶

Creating a histogram¶

Plotting lines¶

Plotting data in groups¶

Plotting a confidence interval¶

Plotting a linear regression model¶

	treatment	control
0	7.2	4.6
1	8.3	4.6
2	8.3	5.1
3	7.1	2.8
4	4.4	5.4
5	4.1	4.4
6	4.8	4.2
7	6.2	5.6
8	7.7	5.8
9	7.4	4.1
10	4.3	3.9
11	5.8	3.8

	treatment	control
0	7.2	4.6
1	8.3	4.6
2	8.3	5.1
3	7.1	2.8
4	4.4	5.4
5	4.1	4.4
6	4.8	4.2
7	6.2	5.6
8	7.7	5.8
9	7.4	4.1
10	4.3	3.9
11	5.8	3.8

	treatment	control
0	7.2	4.6
1	8.3	4.6
2	8.3	5.1
3	7.1	2.8
4	4.4	5.4
5	4.1	4.4
6	4.8	4.2
7	6.2	5.6
8	7.7	5.8
9	7.4	4.1
10	4.3	3.9
11	5.8	3.8