Matplotlib¶

Matplotlib is a package in Python that provides functions for plotting data and creating visualizations.

InĀ [Ā ]:
# Import the packages
import matplotlib.pyplot as plt
import numpy as np

Basic Matplotlib Plots¶

plt.plot() can be used to make a line plot.

InĀ [45]:
# Create a basic array
seq_array = np.array([1,2,3,4,5,6])
# Plot the array
plt.plot(seq_array, seq_array * 2)
Out[45]:
[<matplotlib.lines.Line2D at 0x19c94fd5010>]
No description has been provided for this image

plt.xlabel() and plt.ylabel() adds an x-axis label and y-axis label, respectively, while plt.title() adds a title.

InĀ [Ā ]:
# Make a line plot
plt.plot(seq_array, 2 * seq_array)
# Add x-axis label
plt.xlabel("Seq_array")
# Add y-axis label
plt.ylabel("Seq_array * 2")
# Add a title
plt.title("Seq_array compared to Seq_array * 2")
Out[Ā ]:
Text(0.5, 1.0, 'Seq_array compared to Seq_array * 2')
No description has been provided for this image

Adding the 'o' argument to plt.plot() adds points to the plot.

InĀ [63]:
# Make a line plot
plt.plot(seq_array, 2 * seq_array)
# Add points
plt.plot(seq_array, seq_array * 2, 'o')
# Add x-axis label
plt.xlabel("Seq_array")
# Add y-axis label
plt.ylabel("Seq_array * 2")
# Add a title
plt.title("Seq_array compared to Seq_array * 2")
Out[63]:
Text(0.5, 1.0, 'Seq_array compared to Seq_array * 2')
No description has been provided for this image

Working with Scatterplots¶

The Student Performance Dataset is a dataset designed to examine the factors influencing academic student performance. The dataset consists of 10,000 student records, with each record containing information about various predictors and a performance index.

Variables:

  • Hours Studied: The total number of hours spent studying by each student.
  • Previous Scores: The scores obtained by students in previous tests.
  • Extracurricular Activities: Whether the student participates in extracurricular activities (Yes = 1 or No = 0).
  • Sleep Hours: The average number of hours of sleep the student had per day.
  • Sample Question Papers Practiced: The number of sample question papers the student practiced.

Target Variable:

  • Performance Index: A measure of the overall performance of each student. The performance index represents the student's academic performance and has been rounded to the nearest integer. The index ranges from 10 to 100, with higher values indicating better performance.
InĀ [51]:
# Loading the student performance dataset
student = np.loadtxt("Student_Performance.csv", delimiter = ',', skiprows=1)
# Taking a subset of students to make visualizations clearer
student = student[:150, :]
student
Out[51]:
array([[  7.,  99.,   1.,   9.,   1.,  91.],
       [  4.,  82.,   0.,   4.,   2.,  65.],
       [  8.,  51.,   1.,   7.,   2.,  45.],
       [  5.,  52.,   1.,   5.,   2.,  36.],
       [  7.,  75.,   0.,   8.,   5.,  66.],
       [  3.,  78.,   0.,   9.,   6.,  61.],
       [  7.,  73.,   1.,   5.,   6.,  63.],
       [  8.,  45.,   1.,   4.,   6.,  42.],
       [  5.,  77.,   0.,   8.,   2.,  61.],
       [  4.,  89.,   0.,   4.,   0.,  69.],
       [  8.,  91.,   0.,   4.,   5.,  84.],
       [  8.,  79.,   0.,   6.,   2.,  73.],
       [  3.,  47.,   0.,   9.,   2.,  27.],
       [  6.,  47.,   0.,   4.,   2.,  33.],
       [  5.,  79.,   0.,   7.,   8.,  68.],
       [  2.,  72.,   0.,   4.,   3.,  43.],
       [  8.,  73.,   1.,   8.,   4.,  67.],
       [  6.,  83.,   1.,   7.,   2.,  70.],
       [  2.,  54.,   1.,   4.,   9.,  30.],
       [  5.,  75.,   0.,   7.,   0.,  63.],
       [  1.,  99.,   1.,   4.,   3.,  71.],
       [  6.,  96.,   0.,   9.,   0.,  85.],
       [  9.,  74.,   1.,   7.,   6.,  73.],
       [  1.,  85.,   0.,   5.,   6.,  57.],
       [  3.,  61.,   0.,   6.,   3.,  35.],
       [  7.,  62.,   1.,   7.,   4.,  49.],
       [  4.,  79.,   0.,   8.,   9.,  66.],
       [  9.,  84.,   1.,   6.,   6.,  83.],
       [  3.,  94.,   1.,   6.,   5.,  74.],
       [  5.,  90.,   1.,   4.,   3.,  74.],
       [  3.,  61.,   1.,   7.,   3.,  39.],
       [  7.,  44.,   1.,   9.,   1.,  36.],
       [  5.,  70.,   1.,   6.,   9.,  58.],
       [  9.,  52.,   1.,   8.,   1.,  47.],
       [  7.,  67.,   1.,   9.,   3.,  60.],
       [  2.,  97.,   1.,   9.,   4.,  74.],
       [  4.,  59.,   0.,   8.,   3.,  42.],
       [  9.,  72.,   0.,   8.,   2.,  68.],
       [  2.,  55.,   1.,   4.,   1.,  32.],
       [  9.,  68.,   0.,   5.,   3.,  64.],
       [  5.,  62.,   0.,   7.,   4.,  45.],
       [  2.,  63.,   1.,   6.,   0.,  39.],
       [  4.,  73.,   1.,   7.,   0.,  58.],
       [  7.,  46.,   0.,   9.,   5.,  36.],
       [  8.,  77.,   1.,   6.,   4.,  71.],
       [  3.,  76.,   1.,   4.,   3.,  54.],
       [  1.,  43.,   1.,   7.,   0.,  17.],
       [  4.,  73.,   0.,   4.,   6.,  54.],
       [  2.,  81.,   1.,   4.,   3.,  58.],
       [  8.,  61.,   0.,   7.,   2.,  53.],
       [  4.,  44.,   1.,   7.,   6.,  27.],
       [  2.,  89.,   1.,   6.,   1.,  65.],
       [  6.,  81.,   0.,   9.,   9.,  75.],
       [  6.,  62.,   1.,   9.,   0.,  52.],
       [  4.,  93.,   0.,   8.,   3.,  78.],
       [  6.,  99.,   0.,   4.,   7.,  91.],
       [  2.,  54.,   1.,   9.,   5.,  33.],
       [  2.,  70.,   1.,   5.,   8.,  47.],
       [  4.,  98.,   0.,   6.,   0.,  78.],
       [  8.,  48.,   0.,   6.,   0.,  38.],
       [  7.,  82.,   1.,   8.,   4.,  70.],
       [  9.,  97.,   1.,   8.,   5.,  98.],
       [  8.,  92.,   1.,   4.,   7.,  87.],
       [  5.,  64.,   1.,   6.,   2.,  49.],
       [  2.,  64.,   1.,   6.,   5.,  41.],
       [  5.,  88.,   0.,   5.,   2.,  71.],
       [  8.,  60.,   0.,   4.,   7.,  54.],
       [  9.,  48.,   0.,   4.,   3.,  42.],
       [  9.,  94.,   0.,   9.,   1.,  91.],
       [  1.,  87.,   1.,   8.,   8.,  61.],
       [  9.,  77.,   0.,   9.,   9.,  74.],
       [  7.,  61.,   1.,   4.,   7.,  54.],
       [  9.,  82.,   1.,   5.,   5.,  81.],
       [  8.,  62.,   1.,   4.,   2.,  52.],
       [  1.,  88.,   1.,   9.,   1.,  65.],
       [  8.,  43.,   1.,   7.,   3.,  36.],
       [  8.,  68.,   0.,   7.,   3.,  61.],
       [  3.,  54.,   1.,   5.,   0.,  35.],
       [  1.,  40.,   1.,   8.,   8.,  15.],
       [  8.,  93.,   1.,   6.,   9.,  88.],
       [  3.,  69.,   0.,   4.,   7.,  45.],
       [  3.,  68.,   1.,   9.,   6.,  49.],
       [  1.,  59.,   1.,   8.,   5.,  33.],
       [  5.,  72.,   1.,   9.,   2.,  60.],
       [  7.,  80.,   0.,   8.,   3.,  71.],
       [  9.,  83.,   1.,   4.,   1.,  81.],
       [  7.,  75.,   0.,   6.,   6.,  67.],
       [  9.,  99.,   0.,   4.,   1.,  95.],
       [  8.,  61.,   1.,   6.,   9.,  58.],
       [  2.,  52.,   0.,   9.,   0.,  29.],
       [  1.,  46.,   0.,   5.,   5.,  21.],
       [  7.,  47.,   1.,   6.,   3.,  38.],
       [  7.,  67.,   1.,   8.,   7.,  60.],
       [  8.,  84.,   1.,   5.,   0.,  76.],
       [  5.,  82.,   1.,   9.,   8.,  69.],
       [  3.,  48.,   0.,   7.,   5.,  30.],
       [  8.,  64.,   0.,   5.,   1.,  57.],
       [  6.,  95.,   1.,   6.,   7.,  81.],
       [  3.,  55.,   1.,   5.,   4.,  36.],
       [  1.,  49.,   1.,   9.,   4.,  25.],
       [  3.,  50.,   0.,   5.,   4.,  27.],
       [  5.,  70.,   1.,   9.,   6.,  61.],
       [  3.,  51.,   0.,   7.,   6.,  34.],
       [  1.,  99.,   0.,   6.,   9.,  76.],
       [  5.,  70.,   0.,   8.,   6.,  57.],
       [  7.,  53.,   0.,   5.,   9.,  45.],
       [  7.,  90.,   1.,   4.,   1.,  76.],
       [  9.,  83.,   1.,   8.,   7.,  83.],
       [  3.,  71.,   1.,   5.,   0.,  50.],
       [  7.,  89.,   0.,   8.,   1.,  81.],
       [  1.,  92.,   0.,   4.,   6.,  66.],
       [  4.,  56.,   1.,   8.,   3.,  38.],
       [  4.,  74.,   0.,   8.,   9.,  56.],
       [  5.,  40.,   0.,   4.,   9.,  25.],
       [  7.,  68.,   0.,   4.,   8.,  56.],
       [  7.,  90.,   1.,   4.,   8.,  82.],
       [  4.,  43.,   1.,   7.,   2.,  23.],
       [  7.,  62.,   1.,   9.,   2.,  56.],
       [  3.,  67.,   0.,   9.,   8.,  46.],
       [  6.,  55.,   1.,   4.,   9.,  43.],
       [  2.,  51.,   1.,   9.,   7.,  30.],
       [  9.,  92.,   1.,   8.,   0.,  92.],
       [  5.,  60.,   0.,   4.,   5.,  45.],
       [  6.,  84.,   0.,   5.,   6.,  70.],
       [  4.,  56.,   0.,   4.,   7.,  36.],
       [  7.,  79.,   0.,   4.,   1.,  71.],
       [  9.,  52.,   0.,   9.,   0.,  49.],
       [  7.,  91.,   1.,   6.,   3.,  82.],
       [  1.,  97.,   1.,   8.,   0.,  71.],
       [  1.,  71.,   0.,   9.,   1.,  43.],
       [  9.,  76.,   0.,   8.,   2.,  77.],
       [  9.,  89.,   1.,   5.,   3.,  86.],
       [  4.,  54.,   0.,   7.,   5.,  34.],
       [  9.,  51.,   0.,   6.,   7.,  49.],
       [  3.,  88.,   0.,   5.,   8.,  69.],
       [  7.,  96.,   0.,   5.,   3.,  84.],
       [  6.,  54.,   0.,   8.,   0.,  41.],
       [  8.,  73.,   1.,   6.,   4.,  68.],
       [  9.,  45.,   1.,   8.,   4.,  44.],
       [  5.,  56.,   0.,   7.,   8.,  41.],
       [  1.,  87.,   0.,   8.,   5.,  58.],
       [  3.,  88.,   1.,   4.,   4.,  68.],
       [  8.,  96.,   0.,   8.,   9.,  94.],
       [  6.,  59.,   1.,   4.,   9.,  47.],
       [  8.,  48.,   0.,   5.,   4.,  40.],
       [  9.,  98.,   1.,   7.,   7., 100.],
       [  4.,  44.,   1.,   9.,   0.,  23.],
       [  1.,  62.,   0.,   4.,   4.,  36.],
       [  1.,  72.,   0.,   7.,   0.,  47.],
       [  4.,  77.,   1.,   8.,   6.,  60.]])

plt.scatter() makes a basic scatterplot.

InĀ [56]:
# Extract data
previous_scores = student[:, 1]
performance_index = student[:, 5]
# Plot scatter plot
plt.scatter(previous_scores, performance_index)
# Add axis labels
plt.xlabel("Previous scores")
plt.ylabel("Performance index")
# Add title
plt.title("Previous scores compared to performance index in students")
Out[56]:
Text(0.5, 1.0, 'Previous scores compared to performance index in students')
No description has been provided for this image

A more complex scatterplot.

InĀ [64]:
# Extract data
hours_studied = student[:, 0]
previous_scores = student[:, 1]
performance_index = student[:, 5]

# Create lists for selected indices
three_less_hours_ind = []
seven_more_hours_ind = []
four_to_six_hours_ind = []

# Select indices for hours studied
for index, hour in enumerate(hours_studied):
    if hour <= 3:
        three_less_hours_ind.append(index)
    elif hour >= 7:
        seven_more_hours_ind.append(index)
    else:
        four_to_six_hours_ind.append(index)

# Plot 3 or fewer hours studied
plt.scatter(previous_scores[three_less_hours_ind], performance_index[three_less_hours_ind], color = 'r', marker = 'o', label = '3 or less hours studied')
# Plot 4 to 6 hours studied
plt.scatter(previous_scores[four_to_six_hours_ind], performance_index[four_to_six_hours_ind], color = 'b', marker = 's', label = '4 to 6 hours studied')
# Plot 7 or more hours studied
plt.scatter(previous_scores[seven_more_hours_ind], performance_index[seven_more_hours_ind], color = 'g', marker = '*', label = '7 or more hours studied')

# Add axis labels
plt.xlabel("Previous scores")
plt.ylabel("Performance index")
# Add title
plt.title("Previous scores compared to performance index in students")
# Add legend
plt.legend()
Out[64]:
<matplotlib.legend.Legend at 0x19c972e5be0>
No description has been provided for this image