import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import geopandas as gpd
import statsmodels.api as sm


def warn(*args, **kwargs):
    pass
import warnings
warnings.warn = warn


school_data = pd.read_csv("states_all_extended.csv")

# display first few rows
school_data.head()


school_data = school_data.drop(columns=['PRIMARY_KEY'])


# get previous number of rows
prev_rows = len(school_data.index)
school_data = school_data[school_data['YEAR'] >= 2009]
# get current number of rows
curr_rows = len(school_data.index)

print(str(prev_rows - curr_rows) + " rows were dropped.")

school_data.head()

1193 rows were dropped.


usa_states = gpd.read_file("cb_2018_us_state_500k.shp")
usa_states.head()


print("We have grid coordinates for: {}".format(list(usa_states["NAME"])))

We have grid coordinates for: ['Mississippi', 'North Carolina', 'Oklahoma', 'Virginia', 'West Virginia', 'Louisiana', 'Michigan', 'Massachusetts', 'Idaho', 'Florida', 'Nebraska', 'Washington', 'New Mexico', 'Puerto Rico', 'South Dakota', 'Texas', 'California', 'Alabama', 'Georgia', 'Pennsylvania', 'Missouri', 'Colorado', 'Utah', 'Tennessee', 'Wyoming', 'New York', 'Kansas', 'Alaska', 'Nevada', 'Illinois', 'Vermont', 'Montana', 'Iowa', 'South Carolina', 'New Hampshire', 'Arizona', 'District of Columbia', 'American Samoa', 'United States Virgin Islands', 'New Jersey', 'Maryland', 'Maine', 'Hawaii', 'Delaware', 'Guam', 'Commonwealth of the Northern Mariana Islands', 'Rhode Island', 'Kentucky', 'Ohio', 'Wisconsin', 'Oregon', 'North Dakota', 'Arkansas', 'Indiana', 'Minnesota', 'Connecticut']


print("We have state math and reading score information for: {}".format(list(school_data["STATE"].unique())))

We have state math and reading score information for: ['ALABAMA', 'ALASKA', 'ARIZONA', 'ARKANSAS', 'CALIFORNIA', 'COLORADO', 'CONNECTICUT', 'DELAWARE', 'DISTRICT_OF_COLUMBIA', 'FLORIDA', 'GEORGIA', 'HAWAII', 'IDAHO', 'ILLINOIS', 'INDIANA', 'IOWA', 'KANSAS', 'KENTUCKY', 'LOUISIANA', 'MAINE', 'MARYLAND', 'MASSACHUSETTS', 'MICHIGAN', 'MINNESOTA', 'MISSISSIPPI', 'MISSOURI', 'MONTANA', 'NEBRASKA', 'NEVADA', 'NEW_HAMPSHIRE', 'NEW_JERSEY', 'NEW_MEXICO', 'NEW_YORK', 'NORTH_CAROLINA', 'NORTH_DAKOTA', 'OHIO', 'OKLAHOMA', 'OREGON', 'PENNSYLVANIA', 'RHODE_ISLAND', 'SOUTH_CAROLINA', 'SOUTH_DAKOTA', 'TENNESSEE', 'TEXAS', 'UTAH', 'VERMONT', 'VIRGINIA', 'WASHINGTON', 'WEST_VIRGINIA', 'WISCONSIN', 'WYOMING', 'DODEA', 'NATIONAL']


# Dropping territories that do not have data in the education data dataset
states_coords = usa_states[usa_states.NAME != "United States Virgin Islands"]
states_coords = states_coords[states_coords.NAME != "American Samoa"]
states_coords = states_coords[states_coords.NAME != "Puerto Rico"]
states_coords = states_coords[states_coords.NAME != "Guam"]
states_coords = states_coords[states_coords.NAME != "Commonwealth of the Northern Mariana Islands"]
states_coords = states_coords.sort_values(by = ['NAME']) # For readability


# Matching formatting of NAME variable in states_coords with STATE variable formatting in education data 
for row, curr_state in states_coords.iterrows():
    states_coords.at[row, 'NAME'] = curr_state['NAME'].replace(' ', '_')
    states_coords.at[row, 'NAME'] = states_coords.at[row, 'NAME'].upper()
    
states_coords = states_coords.rename(columns = {"NAME": "STATE"})
states_coords.head()


# Dropping state values not relevant in education dataset for 2009
data_2009 = school_data[school_data['YEAR'] == 2009]
data_2009 = data_2009[data_2009.STATE != "DODEA"]
data_2009 = data_2009[data_2009.STATE != "NATIONAL"]
data_2009.drop('TOTAL_REVENUE', inplace=True, axis=1)
data_2009.drop('FEDERAL_REVENUE', inplace=True, axis=1)
data_2009.drop('STATE_REVENUE', inplace=True, axis=1)
data_2009.drop('LOCAL_REVENUE', inplace=True, axis=1)
data_2009.drop('ENROLL', inplace=True, axis=1)
data_2009.drop('TOTAL_EXPENDITURE', inplace=True, axis=1)
data_2009.drop('CAPITAL_OUTLAY_EXPENDITURE', inplace=True, axis=1)
data_2009.drop('INSTRUCTION_EXPENDITURE', inplace=True, axis=1)
data_2009.drop('SUPPORT_SERVICES_EXPENDITURE', inplace=True, axis=1)
data_2009.drop('OTHER_EXPENDITURE', inplace=True, axis=1)
data_2009 = data_2009.sort_values(by = ['STATE']) # For readability
data_2009 = data_2009.reset_index()
data_2009.drop('index', inplace=True, axis=1)
data_2009.head()


# Dropping state values not relevant in education dataset for 2015
data_2015 = school_data[school_data['YEAR'] == 2015]
data_2015 = data_2015[data_2015.STATE != "DODEA"]
data_2015 = data_2015[data_2015.STATE != "NATIONAL"]
data_2015.drop('TOTAL_REVENUE', inplace=True, axis=1)
data_2015.drop('FEDERAL_REVENUE', inplace=True, axis=1)
data_2015.drop('STATE_REVENUE', inplace=True, axis=1)
data_2015.drop('LOCAL_REVENUE', inplace=True, axis=1)
data_2015.drop('ENROLL', inplace=True, axis=1)
data_2015.drop('TOTAL_EXPENDITURE', inplace=True, axis=1)
data_2015.drop('CAPITAL_OUTLAY_EXPENDITURE', inplace=True, axis=1)
data_2015.drop('INSTRUCTION_EXPENDITURE', inplace=True, axis=1)
data_2015.drop('SUPPORT_SERVICES_EXPENDITURE', inplace=True, axis=1)
data_2015.drop('OTHER_EXPENDITURE', inplace=True, axis=1)
data_2015 = data_2015.sort_values(by = ['STATE']) # For readability
data_2015 = data_2015.reset_index()
data_2015.head()


# Adding the states to our empty dataframe
g04_15to09_states = pd.DataFrame()
g04_15to09_states[['STATE']] = data_2015[['STATE']]

# Traversing through 2009 and 2015 score dataset and tracking differences
for ind, currstate in data_2009.iterrows():
    read_val2015 = float(data_2015.at[ind, "G04_A_A_READING"])
    read_val2009 = float(data_2009.at[ind, "G04_A_A_READING"])
    read_diff = read_val2015 - read_val2009
    g04_15to09_states.at[ind, "READ_GROWTH"] = read_diff
    
    math_val2015 = float(data_2015.at[ind, "G04_A_A_MATHEMATICS"])
    math_val2009 = float(data_2009.at[ind, "G04_A_A_MATHEMATICS"])
    math_diff = math_val2015 - math_val2009
    g04_15to09_states.at[ind, "MATH_GROWTH"] = math_diff


g04_15to09_states.head()


map_and_stats = states_coords.merge(g04_15to09_states, on="STATE")
map_and_stats.head()


fig, ax = plt.subplots(1, figsize=(14, 14))
plt.xticks(rotation=90)
map_and_stats.plot(column = "READ_GROWTH", cmap = "RdYlGn", 
                   linewidth = 0.4, ax = ax, edgecolor = ".4")
ax.set_title('Heat Map of Growth in Reading Literacy for 4th Graders from 2009 to 2016')
bar_info = plt.cm.ScalarMappable(cmap="RdYlGn", norm=plt.Normalize(vmin= -6, vmax=6))
bar_info._A = []

#https://stackoverflow.com/questions/18195758/set-matplotlib-colorbar-size-to-match-graph
cax = fig.add_axes([ax.get_position().x1 + 0.01,ax.get_position().y0,0.05,ax.get_position().height])

cbar = fig.colorbar(bar_info, cax = cax)
ax.set_xlim(-130, -60)
ax.set_ylim(25, 50)
ax.axis("off")

(-130.0, -60.0, 25.0, 50.0)


fig, ax = plt.subplots(1, figsize=(14, 14))
plt.xticks(rotation=90)
map_and_stats.plot(column = "MATH_GROWTH", cmap = "RdYlBu", 
                   linewidth = 0.4, ax = ax, edgecolor = ".4")
ax.set_title('Heat Map of Growth in Mathematics Literacy for 4th Graders from 2009 to 2016')
bar_info = plt.cm.ScalarMappable(cmap="RdYlBu", norm=plt.Normalize(vmin= -6, vmax=6))
bar_info._A = []

#https://stackoverflow.com/questions/18195758/set-matplotlib-colorbar-size-to-match-graph
cax = fig.add_axes([ax.get_position().x1 + 0.01,ax.get_position().y0,0.05,ax.get_position().height])

cbar = fig.colorbar(bar_info, cax = cax)
ax.set_xlim(-130, -60)
ax.set_ylim(25, 50)
ax.axis("off")

(-130.0, -60.0, 25.0, 50.0)


states = []
female_math_change = []
male_math_change = []
female_read_change = []
male_read_change = []

for ind, currstate in data_2009.iterrows():
    states.append(map_and_stats["STUSPS"][ind])
    
    # READING CHANGE for FEMALE
    f_read_val2015 = float(data_2015.at[ind, "G04_A_F_READING"])
    f_read_val2009 = float(data_2009.at[ind, "G04_A_F_READING"])
    f_read_diff = f_read_val2015 - f_read_val2009
    female_read_change.append(f_read_diff)
    
    # MATH CHANGE for FEMALE
    f_math_val2015 = float(data_2015.at[ind, "G04_A_F_MATHEMATICS"])
    f_math_val2009 = float(data_2009.at[ind, "G04_A_F_MATHEMATICS"])
    f_math_diff = f_math_val2015 - f_math_val2009
    female_math_change.append(f_math_diff)
    
    # READING CHANGE for MALE
    m_read_val2015 = float(data_2015.at[ind, "G04_A_M_READING"])
    m_read_val2009 = float(data_2009.at[ind, "G04_A_M_READING"])
    m_read_diff = m_read_val2015 - m_read_val2009
    male_read_change.append(m_read_diff)
    
    # MATH CHANGE for MALE
    m_math_val2015 = float(data_2015.at[ind, "G04_A_M_MATHEMATICS"])
    m_math_val2009 = float(data_2009.at[ind, "G04_A_M_MATHEMATICS"])
    m_math_diff = m_math_val2015 - m_math_val2009
    male_math_change.append(m_math_diff)


# MATH GROWTH
state_locs = np.arange(len(states)) # the state label locations
bar_width = 0.55  # the width of the bars

fig, ax = plt.subplots(1, figsize=(15, 10))
male_bars = ax.bar(states, male_math_change, bar_width, label = 'MALE', color = 'magenta')
female_bars = ax.bar(states, female_math_change, bar_width, 
                     bottom = male_math_change, label = 'FEMALE', 
                     color = (0.2, 0.7, 0.9, 0.5))

ax.bar_label(male_bars, padding = 4)
ax.bar_label(female_bars, padding = 4)
ax.set_xlabel('State', fontsize = 15)
ax.set_ylabel('Change in Average Math Score', fontsize = 15)
ax.set_title('Mathematics Growth from \'09 to \'15 in NAEP Test by State and Gender', fontsize = 15)
ax.set_xticks(state_locs, states)
ax.legend()

fig.tight_layout()

plt.show()


# READING
state_locs = np.arange(len(states)) # the state label locations
bar_width = 0.55  # the width of the bars

fig, ax = plt.subplots(1, figsize=(15, 10))
male_bars = ax.bar(states, male_read_change, bar_width, label = 'MALE', color = 'orange')
female_bars = ax.bar(states, female_read_change, bar_width, 
                     bottom = male_read_change, label = 'FEMALE', 
                     color = (0.2, 0.7, 0.9, 0.5))

ax.bar_label(male_bars, padding = 4)
ax.bar_label(female_bars, padding = 4)
ax.set_xlabel('State', fontsize = 15)
ax.set_ylabel('Change in Average Reading Score', fontsize = 15)
ax.set_title('Reading Growth from \'09 to \'15 in NAEP Test by State and Gender', fontsize = 15)
ax.set_xticks(state_locs, states)
ax.legend()

fig.tight_layout()

plt.show()


# get columns needed
state_avg = school_data[['STATE', 'YEAR', 'G04_A_A_READING', 'G04_A_A_MATHEMATICS']]
state_avg = state_avg[state_avg.STATE != "DODEA"] # dropping rows that are not relevant states
state_avg = state_avg[state_avg.STATE != "NATIONAL"] # dropping rows that are not relevant states
state_avg.head()


# set reading growth to NaN first
state_avg['READING_GROWTH'] = np.NaN

# method to process each row and return the reading average in 2009
def process_reading(row):
    state = row['STATE']
    new = state_avg.loc[state_avg['STATE'] == state]
    new = new.loc[new['YEAR'] == 2009]
    return new['G04_A_A_READING']

# in each row update the reading growth value with the difference between this value and the value in 2009
for i, row in state_avg.iterrows():
    state_avg.at[i, 'READING_GROWTH'] = row['G04_A_A_READING'] - process_reading(row)
    
state_avg['MATHEMATICS_GROWTH'] = np.NaN

# similar function as reading, but for mathematics
def process_reading(row):
    state = row['STATE']
    new = state_avg.loc[state_avg['STATE'] == state]
    new = new.loc[new['YEAR'] == 2009]
    return new['G04_A_A_MATHEMATICS']
 
for i, row in state_avg.iterrows():
    state_avg.at[i, 'MATHEMATICS_GROWTH'] = row['G04_A_A_MATHEMATICS'] - process_reading(row)
    
state_avg.head()


# get dummies
state_avg = pd.get_dummies(state_avg, columns=['STATE'])
# drop alabama and reading and mathematics averages since we no longer need them
state_avg = state_avg.drop(columns=['STATE_ALABAMA', 'G04_A_A_READING', 'G04_A_A_MATHEMATICS'])

state_avg.head()


# drop the NaN rows
train_data = state_avg[state_avg['YEAR'] < 2017].dropna()
test_data = state_avg[state_avg['YEAR'] >= 2017].dropna()


from sklearn.linear_model import LinearRegression

X_reading = [] # independent values for reading
y_reading = [] # dependent values for reading
X_math = [] # independent values for math
y_math = [] # dependent values for reading

# iterate through each row and add the year and state to the X variables and the growths to the y variables
for i, row in train_data.iterrows():
    add = row[3:].tolist()
    add.insert(0, row['YEAR'])
    X_reading.append(add)
    y_reading.append(row['READING_GROWTH'])
    X_math.append(add)
    y_math.append(row['MATHEMATICS_GROWTH'])


# create reading regression and math regression
reading_regr = LinearRegression().fit(X_reading, y_reading)
mathematics_regr = LinearRegression().fit(X_math, y_math)

X_test_reading = []
X_test_math = []

# accumulate X values for reading and math
for i, row in test_data.iterrows():
    add = row[3:].tolist()
    add.insert(0, row['YEAR'])
    X_test_reading.append(add)
    X_test_math.append(add)
    
# predict based of X values
test_data['PREDICT_READING'] = reading_regr.predict(X_test_reading)
test_data['PREDICT_MATH'] = mathematics_regr.predict(X_test_math)

test_data.head()


# create statsmodel for reading data
p_reading = sm.OLS(train_data['READING_GROWTH'].tolist(), sm.add_constant(X_reading)).fit()
p_reading.summary()


# create statsmodel for math data
p_math = sm.OLS(train_data['MATHEMATICS_GROWTH'].tolist(), sm.add_constant(X_math)).fit()
p_math.summary()


# accumulate constant names
const_names = train_data.columns[3:].tolist()
const_names.insert(0, 'YEAR')

reading_significant = []

# iterate through p values and if less than 0.05 add const name to reading_significant
for i in range(len(p_reading.pvalues) - 1):
    if p_reading.pvalues[i + 1] < 0.05:
        reading_significant.append(const_names[i])
        
print("Significant reading test constants: " + str(reading_significant))
print()

math_significant = []

# iterate through p values and if less than 0.05 add const name to math_significant
for i in range(len(p_math.pvalues) - 1):
    if p_math.pvalues[i + 1] < 0.05:
        math_significant.append(const_names[i])
        
print("Significant math test constants: " + str(math_significant))

Significant reading test constants: ['YEAR', 'STATE_ALASKA', 'STATE_COLORADO', 'STATE_DELAWARE', 'STATE_KANSAS', 'STATE_MISSOURI', 'STATE_NEW_MEXICO', 'STATE_NEW_YORK', 'STATE_NORTH_DAKOTA', 'STATE_SOUTH_DAKOTA', 'STATE_TEXAS']

Significant math test constants: ['YEAR', 'STATE_ALASKA', 'STATE_ARIZONA', 'STATE_ARKANSAS', 'STATE_CONNECTICUT', 'STATE_DISTRICT_OF_COLUMBIA', 'STATE_FLORIDA', 'STATE_IDAHO', 'STATE_KANSAS', 'STATE_MARYLAND', 'STATE_MISSOURI', 'STATE_MONTANA', 'STATE_NEW_JERSEY', 'STATE_NEW_YORK', 'STATE_SOUTH_DAKOTA', 'STATE_VERMONT']


print("States that have a predicted negative growth in reading:")
for i in range(len(reading_regr.coef_)):
    if(i > 1):
        if reading_regr.coef_[i] < 0 and p_reading.pvalues[i + 1] < 0.05:
            print(const_names[i])
        
print()
print("States that have a predicted negative growth in math:")

for i in range(len(mathematics_regr.coef_)):
    if(i > 1):
        if mathematics_regr.coef_[i] < 0 and p_math.pvalues[i + 1] < 0.05:
            print(const_names[i])

States that have a predicted negative growth in reading:
STATE_COLORADO
STATE_DELAWARE
STATE_KANSAS
STATE_MISSOURI
STATE_NEW_MEXICO
STATE_NEW_YORK
STATE_NORTH_DAKOTA
STATE_SOUTH_DAKOTA
STATE_TEXAS

States that have a predicted negative growth in math:
STATE_ARKANSAS
STATE_CONNECTICUT
STATE_FLORIDA
STATE_IDAHO
STATE_KANSAS
STATE_MARYLAND
STATE_MISSOURI
STATE_MONTANA
STATE_NEW_JERSEY
STATE_NEW_YORK
STATE_SOUTH_DAKOTA
STATE_VERMONT


# get gender-based data
gender_avg = school_data[['STATE', 'YEAR', 'G04_A_M_READING', 'G04_A_M_MATHEMATICS', 'G04_A_F_READING', 'G04_A_F_MATHEMATICS']]
gender_avg = gender_avg[gender_avg.STATE != "DODEA"] # dropping rows that are not relevant states
gender_avg = gender_avg[gender_avg.STATE != "NATIONAL"] # dropping rows that are not relevant states

# display the first few results
gender_avg.head()


# define columns in dataframe and set to empty 
gender_avg['MATHEMATICS_GROWTH_M'] = np.NaN
gender_avg['MATHEMATICS_GROWTH_F'] = np.NaN
gender_avg['READING_GROWTH_M'] = np.NaN
gender_avg['READING_GROWTH_F'] = np.NaN

# display the first few results 
gender_avg.head()


# method to get growth
def get_growth(row, subject, gender):
    # create a list, new, that contains the average score data from 2009
    state = row['STATE']
    new = gender_avg.loc[gender_avg['STATE'] == state]
    new = new.loc[new['YEAR'] == 2009]
    
    # get the data based on subject and gender and subtract it with 2009 data average score
    if subject == 'MATH':
        if gender == 'M':
            return row['G04_A_M_MATHEMATICS'] - new['G04_A_M_MATHEMATICS']
        else:
            return row['G04_A_F_MATHEMATICS'] - new['G04_A_F_MATHEMATICS']
    else:
        if gender == 'M':
            return row['G04_A_M_READING'] - new['G04_A_M_READING']
        else:
            return row['G04_A_F_READING'] - new['G04_A_F_READING']

# iterate through each row in our dataframe and get the growth based off of subject and gender       
for i, row in gender_avg.iterrows():
    gender_avg.at[i, 'READING_GROWTH_M'] = get_growth(row, 'READ', 'M')
    gender_avg.at[i, 'READING_GROWTH_F'] = get_growth(row, 'READ', 'F')
    gender_avg.at[i, 'MATHEMATICS_GROWTH_M'] = get_growth(row, 'MATH', 'M')
    gender_avg.at[i, 'MATHEMATICS_GROWTH_F'] = get_growth(row, 'MATH', 'F')
    
gender_avg.head()


# get dummies
gender_avg = pd.get_dummies(gender_avg, columns=['STATE'])

# drop alabama and reading and mathematics averages since we no longer need them
gender_avg = gender_avg.drop(columns=['STATE_ALABAMA', 'G04_A_M_READING', 'G04_A_F_READING', 'G04_A_M_MATHEMATICS', 'G04_A_F_MATHEMATICS'])

# display the first few results
gender_avg.head()


train_data = gender_avg[gender_avg['YEAR'] < 2017].dropna()
test_data = gender_avg[gender_avg['YEAR'] >= 2017].dropna()


from sklearn.linear_model import LinearRegression

# create X and y lists by respective subject and gender
X_reading_M = []
X_reading_F = []
y_reading_M = []
y_reading_F = []
X_math_M = []
X_math_F = []
y_math_M = []
y_math_F = []

# iterate through each row and update X and y
for i, row in train_data.iterrows():
    # X is state and year
    X = row[3:].tolist()
    X.insert(0, row['YEAR'])

    # for each y (growth for assumed gender), append its respective value 
    X_reading_M.append(X)
    y_reading_M.append(row['READING_GROWTH_M'])
    
    X_reading_F.append(X)
    y_reading_F.append(row['READING_GROWTH_F'])
    
    X_math_M.append(X)
    y_math_M.append(row['MATHEMATICS_GROWTH_M'])
    
    X_math_F.append(X)
    y_math_F.append(row['MATHEMATICS_GROWTH_F'])


# create models based on subject and assumed gender and then fit the data
reading_M = LinearRegression().fit(X_reading_M, y_reading_M)
reading_F = LinearRegression().fit(X_reading_F, y_reading_F)

mathematics_M = LinearRegression().fit(X_math_M, y_math_M)
mathematics_F = LinearRegression().fit(X_math_F, y_math_F)

# create X values for state and subject
X_test_reading_M = []
X_test_reading_F = []
X_test_math_M = []
X_test_math_F = []

# accumulate X values for reading and math
for i, row in test_data.iterrows():
    X = row[3:].tolist()
    X.insert(0, row['YEAR'])
    X_test_reading_M.append(X)
    X_test_reading_F.append(X)
    X_test_math_M.append(X)
    X_test_math_F.append(X)
    
# predict based of X values
test_data['PREDICT_READING_M'] = reading_M.predict(X_test_reading_M)
test_data['PREDICT_READING_F'] = reading_F.predict(X_test_reading_F)
test_data['PREDICT_MATH_M'] = mathematics_M.predict(X_test_math_M)
test_data['PREDICT_MATH_F'] = mathematics_F.predict(X_test_math_F)

test_data.head()


import statsmodels.api as sm

# create statsmodel for reading data male
p_reading_M = sm.OLS(train_data['READING_GROWTH_M'].tolist(), sm.add_constant(X_reading_M)).fit()
p_reading_M.summary()


# create statsmodel for reading data female
p_reading_F = sm.OLS(train_data['READING_GROWTH_F'].tolist(), sm.add_constant(X_reading_F)).fit()
p_reading_F.summary()


p_math_M = sm.OLS(train_data['MATHEMATICS_GROWTH_M'].tolist(), sm.add_constant(X_math_M)).fit()
p_math_M.summary()


p_math_F = sm.OLS(train_data['MATHEMATICS_GROWTH_F'].tolist(), sm.add_constant(X_math_F)).fit()
p_math_F.summary()


# accumulate constant names
const_names = train_data.columns[3:].tolist()
const_names.insert(0, 'YEAR')

M_reading_significant = []
F_reading_significant = []

# iterate through p values and if below than 0.05 add const name to reading_significant
for i in range(len(p_reading_M.pvalues) - 1):
    if p_reading_M.pvalues[i + 1] < 0.05:
        M_reading_significant.append(const_names[i])
        
for i in range(len(p_reading_F.pvalues) - 1):
    if p_reading_F.pvalues[i + 1] < 0.05:
        F_reading_significant.append(const_names[i])
        
print("Significant Male reading test constants: " + str(M_reading_significant))
print()

print("Significant Female reading test constants: " + str(F_reading_significant))
print()

M_math_significant = []
F_math_significant = []

# iterate through p values and if below than 0.05 add const name to math_significant
for i in range(len(p_math_M.pvalues) - 1):
    if p_math_M.pvalues[i + 1] < 0.05:
        M_math_significant.append(const_names[i])
        
for i in range(len(p_math_F.pvalues) - 1):
    if p_math_F.pvalues[i + 1] < 0.05:
        F_math_significant.append(const_names[i])
        
print("Significant Male math test constants: " + str(M_math_significant))
print()

print("Significant Female math test constants: " + str(F_math_significant))

Significant Male reading test constants: ['READING_GROWTH_M']

Significant Female reading test constants: ['READING_GROWTH_F']

Significant Male math test constants: ['READING_GROWTH_M', 'READING_GROWTH_F', 'STATE_ARIZONA', 'STATE_ARKANSAS', 'STATE_CONNECTICUT', 'STATE_FLORIDA', 'STATE_ILLINOIS', 'STATE_MAINE', 'STATE_MONTANA', 'STATE_NEVADA', 'STATE_NEW_YORK', 'STATE_NORTH_CAROLINA', 'STATE_OREGON', 'STATE_PENNSYLVANIA', 'STATE_VERMONT']

Significant Female math test constants: ['READING_GROWTH_F', 'STATE_CONNECTICUT', 'STATE_IDAHO', 'STATE_MARYLAND', 'STATE_NEW_HAMPSHIRE', 'STATE_NORTH_CAROLINA', 'STATE_VERMONT']

	STATEFP	STATENS	AFFGEOID	GEOID	STUSPS	NAME	ALAND	AWATER	geometry
0	28	01779790	0400000US28	28	MS	Mississippi	121533519481	3926919758	MULTIPOLYGON (((-88.50297 30.21523, -88.49176 ...
1	37	01027616	0400000US37	37	NC	North Carolina	125923656064	13466071395	MULTIPOLYGON (((-75.72681 35.93584, -75.71827 ...
2	40	01102857	0400000US40	40	OK	Oklahoma	177662925723	3374587997	POLYGON ((-103.00257 36.52659, -103.00219 36.6...
3	51	01779803	0400000US51	51	VA	Virginia	102257717110	8528531774	MULTIPOLYGON (((-75.74241 37.80835, -75.74151 ...
4	54	01779805	0400000US54	54	WV	West Virginia	62266474513	489028543	POLYGON ((-82.64320 38.16909, -82.64300 38.169...

	STATEFP	STATENS	AFFGEOID	GEOID	STUSPS	STATE	ALAND	AWATER	geometry
17	01	01779775	0400000US01	01	AL	ALABAMA	131174048583	4593327154	MULTIPOLYGON (((-88.05338 30.50699, -88.05109 ...
27	02	01785533	0400000US02	02	AK	ALASKA	1478839695958	245481577452	MULTIPOLYGON (((179.48246 51.98283, 179.48656 ...
35	04	01779777	0400000US04	04	AZ	ARIZONA	294198551143	1027337603	POLYGON ((-114.81629 32.50804, -114.81432 32.5...
52	05	00068085	0400000US05	05	AR	ARKANSAS	134768872727	2962859592	POLYGON ((-94.61783 36.49941, -94.61765 36.499...
16	06	01779778	0400000US06	06	CA	CALIFORNIA	403503931312	20463871877	MULTIPOLYGON (((-118.60442 33.47855, -118.5987...

	index	STATE	YEAR	A_A_A	G01_A_A	G02_A_A	G03_A_A	G04_A_A	G05_A_A	G06_A_A	...	G08_HI_A_READING	G08_HI_A_MATHEMATICS	G08_AS_A_READING	G08_AS_A_MATHEMATICS	G08_AM_A_READING	G08_AM_A_MATHEMATICS	G08_HP_A_READING	G08_HP_A_MATHEMATICS	G08_TR_A_READING	G08_TR_A_MATHEMATICS
0	1173	ALABAMA	2015	743789.0	59023.0	58766.0	57963.0	55808.0	55340.0	54900.0	...	252.0	260.0	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
1	1174	ALASKA	2015	132477.0	10587.0	10512.0	10441.0	10118.0	9793.0	9648.0	...	263.0	279.0	260.0	282.0	231.0	257.0	242.0	NaN	270.0	285.0
2	1175	ARIZONA	2015	1109040.0	84804.0	87325.0	88194.0	86594.0	85719.0	85202.0	...	254.0	273.0	NaN	NaN	244.0	260.0	NaN	NaN	NaN	NaN
3	1176	ARKANSAS	2015	492132.0	38160.0	38590.0	38410.0	35893.0	35850.0	36020.0	...	255.0	269.0	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
4	1177	CALIFORNIA	2015	6226737.0	444573.0	463881.0	470157.0	485885.0	476427.0	471467.0	...	249.0	263.0	279.0	304.0	NaN	NaN	NaN	NaN	263.0	289.0

	STATEFP	STATENS	AFFGEOID	GEOID	STUSPS	STATE	ALAND	AWATER	geometry	READ_GROWTH	MATH_GROWTH
0	01	01779775	0400000US01	01	AL	ALABAMA	131174048583	4593327154	MULTIPOLYGON (((-88.05338 30.50699, -88.05109 ...	1.0	3.0
1	02	01785533	0400000US02	02	AK	ALASKA	1478839695958	245481577452	MULTIPOLYGON (((179.48246 51.98283, 179.48656 ...	2.0	-1.0
2	04	01779777	0400000US04	04	AZ	ARIZONA	294198551143	1027337603	POLYGON ((-114.81629 32.50804, -114.81432 32.5...	5.0	8.0
3	05	00068085	0400000US05	05	AR	ARKANSAS	134768872727	2962859592	POLYGON ((-94.61783 36.49941, -94.61765 36.499...	2.0	-3.0
4	06	01779778	0400000US06	06	CA	CALIFORNIA	403503931312	20463871877	MULTIPOLYGON (((-118.60442 33.47855, -118.5987...	3.0	0.0

	STATE	YEAR	G04_A_A_READING	G04_A_A_MATHEMATICS
867	ALABAMA	2009	216.0	228.0
868	ALASKA	2009	211.0	237.0
869	ARIZONA	2009	210.0	230.0
870	ARKANSAS	2009	216.0	238.0
871	CALIFORNIA	2009	210.0	232.0

Factors in Education Inequality

Maria Keerthi, Aarushi Dubey, Ayesha Nabiha

Introduction (Part 1: Data Collection)

Part 2: Data Management/Representation

Part 3: Exploratory data analysis - Data Visualization/Analysis

State Data Analysis in Math and Reading Literacy Change

Gender and State Data Analysis in Math and Reading Literacy Change

Part 4: Hypothesis Testing/Model Development

Test Score Growth per State Prediction

Test Score Growth per Gender per State Prediction

Conclusion/Insights Gained

	PRIMARY_KEY	STATE	YEAR	ENROLL	TOTAL_REVENUE	FEDERAL_REVENUE	STATE_REVENUE	LOCAL_REVENUE	TOTAL_EXPENDITURE	INSTRUCTION_EXPENDITURE	...	G08_HI_A_READING	G08_HI_A_MATHEMATICS	G08_AS_A_READING	G08_AS_A_MATHEMATICS	G08_AM_A_READING	G08_AM_A_MATHEMATICS	G08_HP_A_READING	G08_HP_A_MATHEMATICS	G08_TR_A_READING	G08_TR_A_MATHEMATICS
0	1992_ALABAMA	ALABAMA	1992	NaN	2678885.0	304177.0	1659028.0	715680.0	2653798.0	1481703.0	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
1	1992_ALASKA	ALASKA	1992	NaN	1049591.0	106780.0	720711.0	222100.0	972488.0	498362.0	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
2	1992_ARIZONA	ARIZONA	1992	NaN	3258079.0	297888.0	1369815.0	1590376.0	3401580.0	1435908.0	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
3	1992_ARKANSAS	ARKANSAS	1992	NaN	1711959.0	178571.0	958785.0	574603.0	1743022.0	964323.0	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
4	1992_CALIFORNIA	CALIFORNIA	1992	NaN	26260025.0	2072470.0	16546514.0	7641041.0	27138832.0	14358922.0	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN

	STATE	YEAR	ENROLL	TOTAL_REVENUE	FEDERAL_REVENUE	STATE_REVENUE	LOCAL_REVENUE	TOTAL_EXPENDITURE	INSTRUCTION_EXPENDITURE	SUPPORT_SERVICES_EXPENDITURE	...	G08_HI_A_READING	G08_HI_A_MATHEMATICS	G08_AS_A_READING	G08_AS_A_MATHEMATICS	G08_AM_A_READING	G08_AM_A_MATHEMATICS	G08_HP_A_READING	G08_HP_A_MATHEMATICS	G08_TR_A_READING	G08_TR_A_MATHEMATICS
867	ALABAMA	2009	745668.0	7186390.0	728795.0	4161103.0	2296492.0	7815467.0	3836398.0	2331552.0	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
868	ALASKA	2009	130236.0	2158970.0	312667.0	1357747.0	488556.0	2396412.0	1129756.0	832783.0	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
869	ARIZONA	2009	981303.0	8802515.0	1044140.0	3806064.0	3952311.0	9580393.0	4296503.0	2983729.0	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
870	ARKANSAS	2009	474423.0	4753142.0	534510.0	3530487.0	688145.0	5017352.0	2417974.0	1492691.0	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
871	CALIFORNIA	2009	6234155.0	73958896.0	9745250.0	40084244.0	24129402.0	74766086.0	35617964.0	21693675.0	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN

	STATE	YEAR	A_A_A	G01_A_A	G02_A_A	G03_A_A	G04_A_A	G05_A_A	G06_A_A	G07_A_A	...	G08_HI_A_READING	G08_HI_A_MATHEMATICS	G08_AS_A_READING	G08_AS_A_MATHEMATICS	G08_AM_A_READING	G08_AM_A_MATHEMATICS	G08_HP_A_READING	G08_HP_A_MATHEMATICS	G08_TR_A_READING	G08_TR_A_MATHEMATICS
0	ALABAMA	2009	748889.0	57821.0	56628.0	58608.0	59512.0	58656.0	58231.0	58118.0	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
1	ALASKA	2009	131661.0	9926.0	9827.0	10032.0	10046.0	9864.0	9567.0	9657.0	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
2	ARIZONA	2009	1077831.0	85725.0	84033.0	84060.0	83686.0	83193.0	81987.0	82050.0	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
3	ARKANSAS	2009	480559.0	37665.0	36934.0	36903.0	36479.0	36489.0	35958.0	36113.0	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
4	CALIFORNIA	2009	6263438.0	470783.0	459334.0	459813.0	465866.0	460248.0	461373.0	466893.0	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN

	YEAR	READING_GROWTH	MATHEMATICS_GROWTH	STATE_ALASKA	STATE_ARIZONA	STATE_ARKANSAS	STATE_CALIFORNIA	...	PREDICT_READING	PREDICT_MATH
1281	2017	0.0	4.0	0	0	0	0	...	3.642157	3.666667
1288	2017	-4.0	-7.0	1	0	0	0	...	0.892157	0.166667
1295	2017	5.0	4.0	0	1	0	0	...	4.142157	6.666667
1302	2017	0.0	-4.0	0	0	1	0	...	3.142157	0.666667
1309	2017	5.0	0.0	0	0	0	1	...	3.392157	1.916667

Dep. Variable:	y	R-squared:	0.501
Model:	OLS	Adj. R-squared:	0.334
Method:	Least Squares	F-statistic:	2.997
Date:	Mon, 16 May 2022	Prob (F-statistic):	1.12e-07
Time:	15:54:25	Log-Likelihood:	-381.05
No. Observations:	204	AIC:	866.1
Df Residuals:	152	BIC:	1039.
Df Model:	51
Covariance Type:	nonrobust

	coef	std err	t	P>\|t\|	[0.025	0.975]
const	-658.8039	114.346	-5.762	0.000	-884.716	-432.892
x1	0.3284	0.057	5.779	0.000	0.216	0.441
x2	-2.7500	1.283	-2.143	0.034	-5.286	-0.214
x3	0.5000	1.283	0.390	0.697	-2.036	3.036
x4	-0.5000	1.283	-0.390	0.697	-3.036	2.036
x5	-0.2500	1.283	-0.195	0.846	-2.786	2.286
x6	-3.0000	1.283	-2.338	0.021	-5.536	-0.464
x7	-2.2500	1.283	-1.753	0.082	-4.786	0.286
x8	-2.7500	1.283	-2.143	0.034	-5.286	-0.214
x9	1.2500	1.283	0.974	0.332	-1.286	3.786
x10	-1.7500	1.283	-1.364	0.175	-4.286	0.786
x11	0.7500	1.283	0.584	0.560	-1.786	3.286
x12	0.7500	1.283	0.584	0.560	-1.786	3.286
x13	-2.2500	1.283	-1.753	0.082	-4.786	0.286
x14	-1.2500	1.283	-0.974	0.332	-3.786	1.286
x15	-1.0000	1.283	-0.779	0.437	-3.536	1.536
x16	-0.5000	1.283	-0.390	0.697	-3.036	2.036
x17	-3.0000	1.283	-2.338	0.021	-5.536	-0.464
x18	-2.2500	1.283	-1.753	0.082	-4.786	0.286
x19	1.7500	1.283	1.364	0.175	-0.786	4.286
x20	-2.2500	1.283	-1.753	0.082	-4.786	0.286
x21	4.125e-12	1.283	3.21e-12	1.000	-2.536	2.536
x22	-1.5000	1.283	-1.169	0.244	-4.036	1.036
x23	-2.5000	1.283	-1.948	0.053	-5.036	0.036
x24	-1.2500	1.283	-0.974	0.332	-3.786	1.286
x25	-2.2500	1.283	-1.753	0.082	-4.786	0.286
x26	-3.7500	1.283	-2.922	0.004	-6.286	-1.214
x27	-2.5000	1.283	-1.948	0.053	-5.036	0.036
x28	-1.0000	1.283	-0.779	0.437	-3.536	1.536
x29	4.121e-12	1.283	3.21e-12	1.000	-2.536	2.536
x30	-0.2500	1.283	-0.195	0.846	-2.786	2.286
x31	-1.5000	1.283	-1.169	0.244	-4.036	1.036
x32	-2.7500	1.283	-2.143	0.034	-5.286	-0.214
x33	-2.7500	1.283	-2.143	0.034	-5.286	-0.214
x34	1.0000	1.283	0.779	0.437	-1.536	3.536
x35	-2.7500	1.283	-2.143	0.034	-5.286	-0.214
x36	-2.5000	1.283	-1.948	0.053	-5.036	0.036
x37	-1.2500	1.283	-0.974	0.332	-3.786	1.286
x38	-1.7500	1.283	-1.364	0.175	-4.286	0.786
x39	4.125e-12	1.283	3.21e-12	1.000	-2.536	2.536
x40	-1.7500	1.283	-1.364	0.175	-4.286	0.786
x41	-2.2500	1.283	-1.753	0.082	-4.786	0.286
x42	-4.0000	1.283	-3.117	0.002	-6.536	-1.464
x43	-1.2500	1.283	-0.974	0.332	-3.786	1.286
x44	-3.0000	1.283	-2.338	0.021	-5.536	-0.464
x45	1.0000	1.283	0.779	0.437	-1.536	3.536
x46	-2.5000	1.283	-1.948	0.053	-5.036	0.036
x47	-1.2500	1.283	-0.974	0.332	-3.786	1.286
x48	0.2500	1.283	0.195	0.846	-2.286	2.786
x49	-2.0000	1.283	-1.558	0.121	-4.536	0.536
x50	-0.7500	1.283	-0.584	0.560	-3.286	1.786
x51	0.2500	1.283	0.195	0.846	-2.286	2.786

Omnibus:	6.101	Durbin-Watson:	1.995
Prob(Omnibus):	0.047	Jarque-Bera (JB):	8.918
Skew:	0.113	Prob(JB):	0.0116
Kurtosis:	3.999	Cond. No.	1.81e+06

Dep. Variable:	y	R-squared:	0.511
Model:	OLS	Adj. R-squared:	0.347
Method:	Least Squares	F-statistic:	3.114
Date:	Mon, 16 May 2022	Prob (F-statistic):	4.02e-08
Time:	15:54:25	Log-Likelihood:	-403.42
No. Observations:	204	AIC:	910.8
Df Residuals:	152	BIC:	1083.
Df Model:	51
Covariance Type:	nonrobust

Omnibus:	7.342	Durbin-Watson:	1.715
Prob(Omnibus):	0.025	Jarque-Bera (JB):	8.179
Skew:	-0.316	Prob(JB):	0.0167
Kurtosis:	3.750	Cond. No.	1.81e+06

	STATE	YEAR	G04_A_M_READING	G04_A_M_MATHEMATICS	G04_A_F_READING	G04_A_F_MATHEMATICS
867	ALABAMA	2009	212.0	228.0	221.0	228.0
868	ALASKA	2009	207.0	238.0	216.0	236.0
869	ARIZONA	2009	207.0	230.0	213.0	230.0
870	ARKANSAS	2009	211.0	239.0	222.0	236.0
871	CALIFORNIA	2009	207.0	233.0	213.0	231.0

	coef	std err	t	P>\|t\|	[0.025	0.975]
const	-366.1167	127.593	-2.869	0.005	-618.201	-114.032
x1	0.1833	0.063	2.891	0.004	0.058	0.309
x2	-3.5000	1.432	-2.444	0.016	-6.329	-0.671
x3	3.0000	1.432	2.095	0.038	0.171	5.829
x4	-3.0000	1.432	-2.095	0.038	-5.829	-0.171
x5	-1.7500	1.432	-1.222	0.224	-4.579	1.079
x6	-1.7500	1.432	-1.222	0.224	-4.579	1.079
x7	-5.2500	1.432	-3.666	0.000	-8.079	-2.421
x8	-1.5000	1.432	-1.047	0.297	-4.329	1.329
x9	3.5000	1.432	2.444	0.016	0.671	6.329
x10	-3.0000	1.432	-2.095	0.038	-5.829	-0.171
x11	-1.2500	1.432	-0.873	0.384	-4.079	1.579
x12	0.2500	1.432	0.175	0.862	-2.579	3.079
x13	-3.5000	1.432	-2.444	0.016	-6.329	-0.671
x14	-2.5000	1.432	-1.746	0.083	-5.329	0.329
x15	0.2500	1.432	0.175	0.862	-2.579	3.079
x16	-2.0000	1.432	-1.397	0.165	-4.829	0.829
x17	-3.2500	1.432	-2.269	0.025	-6.079	-0.421
x18	-1.0000	1.432	-0.698	0.486	-3.829	1.829
x19	-0.5000	1.432	-0.349	0.727	-3.329	2.329
x20	-2.7500	1.432	-1.920	0.057	-5.579	0.079
x21	-3.0000	1.432	-2.095	0.038	-5.829	-0.171
x22	-2.5000	1.432	-1.746	0.083	-5.329	0.329
x23	-2.5000	1.432	-1.746	0.083	-5.329	0.329
x24	-1.5000	1.432	-1.047	0.297	-4.329	1.329
x25	0.7500	1.432	0.524	0.601	-2.079	3.579
x26	-3.7500	1.432	-2.619	0.010	-6.579	-0.921
x27	-3.5000	1.432	-2.444	0.016	-6.329	-0.671
x28	-0.2500	1.432	-0.175	0.862	-3.079	2.579
x29	-2.2500	1.432	-1.571	0.118	-5.079	0.579
x30	-2.5000	1.432	-1.746	0.083	-5.329	0.329
x31	-3.0000	1.432	-2.095	0.038	-5.829	-0.171
x32	-1.0000	1.432	-0.698	0.486	-3.829	1.829
x33	-4.7500	1.432	-3.317	0.001	-7.579	-1.921
x34	-2.2500	1.432	-1.571	0.118	-5.079	0.579
x35	-2.5000	1.432	-1.746	0.083	-5.329	0.329
x36	-2.2500	1.432	-1.571	0.118	-5.079	0.579
x37	-1.5000	1.432	-1.047	0.297	-4.329	1.329
x38	-2.5000	1.432	-1.746	0.083	-5.329	0.329

	coef	std err	t	P>\|t\|	[0.025	0.975]
const	1.08e-12	7.26e-12	0.149	0.882	-1.33e-11	1.54e-11
x1	-4.927e-16	3.61e-15	-0.136	0.892	-7.63e-15	6.64e-15
x2	1.0000	5.31e-15	1.88e+14	0.000	1.000	1.000
x3	-6.87e-16	5.48e-15	-0.125	0.900	-1.15e-14	1.01e-14
x4	-2.526e-15	7.47e-14	-0.034	0.973	-1.5e-13	1.45e-13
x5	-5.052e-15	7.46e-14	-0.068	0.946	-1.52e-13	1.42e-13
x6	1.887e-15	7.37e-14	0.026	0.980	-1.44e-13	1.47e-13
x7	1.665e-15	7.37e-14	0.023	0.982	-1.44e-13	1.47e-13
x8	-2.22e-16	7.48e-14	-0.003	0.998	-1.48e-13	1.48e-13
x9	1.665e-16	7.42e-14	0.002	0.998	-1.46e-13	1.47e-13
x10	-7.216e-16	7.43e-14	-0.010	0.992	-1.48e-13	1.46e-13
x11	1.332e-15	7.77e-14	0.017	0.986	-1.52e-13	1.55e-13
x12	-9.714e-16	7.45e-14	-0.013	0.990	-1.48e-13	1.46e-13
x13	4.441e-16	7.42e-14	0.006	0.995	-1.46e-13	1.47e-13
x14	1.11e-15	7.36e-14	0.015	0.988	-1.44e-13	1.47e-13
x15	-3.608e-16	7.44e-14	-0.005	0.996	-1.47e-13	1.47e-13
x16	0	7.38e-14	0	1.000	-1.46e-13	1.46e-13
x17	6.106e-16	7.34e-14	0.008	0.993	-1.44e-13	1.46e-13
x18	5.551e-16	7.37e-14	0.008	0.994	-1.45e-13	1.46e-13
x19	-3.331e-16	7.75e-14	-0.004	0.997	-1.54e-13	1.53e-13
x20	-5.551e-16	7.37e-14	-0.008	0.994	-1.46e-13	1.45e-13
x21	4.58e-16	7.46e-14	0.006	0.995	-1.47e-13	1.48e-13
x22	-1.416e-15	7.42e-14	-0.019	0.985	-1.48e-13	1.45e-13
x23	-1.11e-15	7.51e-14	-0.015	0.988	-1.5e-13	1.47e-13
x24	-2.22e-16	7.4e-14	-0.003	0.998	-1.46e-13	1.46e-13
x25	0	7.4e-14	0	1.000	-1.46e-13	1.46e-13
x26	9.992e-16	7.46e-14	0.013	0.989	-1.46e-13	1.48e-13
x27	-9.298e-16	7.47e-14	-0.012	0.990	-1.49e-13	1.47e-13
x28	-1.998e-15	7.52e-14	-0.027	0.979	-1.51e-13	1.47e-13
x29	5.412e-16	7.48e-14	0.007	0.994	-1.47e-13	1.48e-13
x30	-9.437e-16	7.44e-14	-0.013	0.990	-1.48e-13	1.46e-13
x31	7.91e-16	7.37e-14	0.011	0.991	-1.45e-13	1.46e-13
x32	9.021e-16	7.46e-14	0.012	0.990	-1.47e-13	1.48e-13
x33	8.327e-17	7.43e-14	0.001	0.999	-1.47e-13	1.47e-13
x34	-3.886e-16	7.43e-14	-0.005	0.996	-1.47e-13	1.46e-13
x35	-1.776e-15	7.48e-14	-0.024	0.981	-1.5e-13	1.46e-13
x36	1.554e-15	7.36e-14	0.021	0.983	-1.44e-13	1.47e-13
x37	1.221e-15	7.45e-14	0.016	0.987	-1.46e-13	1.48e-13
x38	-4.996e-16	7.44e-14	-0.007	0.995	-1.47e-13	1.46e-13

	coef	std err	t	P>\|t\|	[0.025	0.975]
const	1.734e-12	4.2e-12	0.413	0.680	-6.56e-12	1e-11
x1	-8.882e-16	2.09e-15	-0.426	0.671	-5.01e-15	3.23e-15
x2	5.135e-16	3.07e-15	0.167	0.867	-5.55e-15	6.58e-15
x3	1.0000	3.16e-15	3.16e+14	0.000	1.000	1.000
x4	2.054e-15	4.31e-14	0.048	0.962	-8.32e-14	8.73e-14
x5	-7.772e-16	4.31e-14	-0.018	0.986	-8.59e-14	8.44e-14
x6	-2.776e-16	4.26e-14	-0.007	0.995	-8.44e-14	8.38e-14
x7	1.499e-15	4.26e-14	0.035	0.972	-8.26e-14	8.56e-14
x8	9.992e-16	4.32e-14	0.023	0.982	-8.44e-14	8.64e-14
x9	2.359e-15	4.28e-14	0.055	0.956	-8.23e-14	8.7e-14
x10	1.776e-15	4.29e-14	0.041	0.967	-8.3e-14	8.66e-14
x11	4.441e-16	4.49e-14	0.010	0.992	-8.83e-14	8.92e-14
x12	-2.22e-16	4.3e-14	-0.005	0.996	-8.52e-14	8.48e-14
x13	-1.221e-15	4.29e-14	-0.028	0.977	-8.59e-14	8.35e-14
x14	-7.772e-16	4.25e-14	-0.018	0.985	-8.48e-14	8.32e-14
x15	1.554e-15	4.3e-14	0.036	0.971	-8.33e-14	8.64e-14
x16	9.714e-16	4.26e-14	0.023	0.982	-8.33e-14	8.52e-14
x17	7.355e-16	4.24e-14	0.017	0.986	-8.31e-14	8.45e-14
x18	9.992e-16	4.26e-14	0.023	0.981	-8.31e-14	8.51e-14
x19	8.882e-16	4.48e-14	0.020	0.984	-8.76e-14	8.94e-14
x20	1.138e-15	4.26e-14	0.027	0.979	-8.3e-14	8.53e-14
x21	-1.665e-15	4.31e-14	-0.039	0.969	-8.68e-14	8.35e-14
x22	0	4.28e-14	0	1.000	-8.46e-14	8.46e-14
x23	-1.11e-15	4.34e-14	-0.026	0.980	-8.68e-14	8.46e-14
x24	0	4.27e-14	0	1.000	-8.45e-14	8.45e-14
x25	4.025e-16	4.28e-14	0.009	0.993	-8.41e-14	8.49e-14
x26	1.665e-15	4.31e-14	0.039	0.969	-8.35e-14	8.68e-14
x27	1.11e-15	4.32e-14	0.026	0.980	-8.42e-14	8.64e-14
x28	1.554e-15	4.35e-14	0.036	0.972	-8.43e-14	8.74e-14
x29	1.332e-15	4.32e-14	0.031	0.975	-8.41e-14	8.67e-14
x30	-7.772e-16	4.3e-14	-0.018	0.986	-8.57e-14	8.42e-14
x31	5.412e-16	4.26e-14	0.013	0.990	-8.36e-14	8.47e-14
x32	1.443e-15	4.31e-14	0.033	0.973	-8.37e-14	8.66e-14
x33	9.992e-16	4.29e-14	0.023	0.981	-8.38e-14	8.58e-14
x34	1.61e-15	4.29e-14	0.038	0.970	-8.31e-14	8.64e-14
x35	1.055e-15	4.32e-14	0.024	0.981	-8.44e-14	8.65e-14
x36	-5.551e-16	4.25e-14	-0.013	0.990	-8.46e-14	8.35e-14
x37	1.069e-15	4.3e-14	0.025	0.980	-8.4e-14	8.61e-14
x38	4.996e-16	4.3e-14	0.012	0.991	-8.44e-14	8.54e-14

	coef	std err	t	P>\|t\|	[0.025	0.975]
const	117.5397	125.080	0.940	0.349	-129.607	364.686
x1	-0.0576	0.062	-0.926	0.356	-0.180	0.065
x2	0.3180	0.091	3.476	0.001	0.137	0.499
x3	0.3177	0.094	3.369	0.001	0.131	0.504
x4	-1.4225	1.285	-1.107	0.270	-3.962	1.117
x5	3.1146	1.284	2.425	0.016	0.577	5.652
x6	-3.5234	1.268	-2.778	0.006	-6.030	-1.017
x7	-1.8293	1.268	-1.442	0.151	-4.336	0.677
x8	-0.1490	1.288	-0.116	0.908	-2.695	2.397
x9	-3.8198	1.277	-2.991	0.003	-6.343	-1.296
x10	-1.1491	1.279	-0.898	0.371	-3.677	1.379
x11	2.2971	1.339	1.716	0.088	-0.348	4.942
x12	-2.8757	1.282	-2.243	0.026	-5.409	-0.342
x13	-2.3738	1.278	-1.858	0.065	-4.899	0.151
x14	-0.1356	1.267	-0.107	0.915	-2.640	2.369
x15	-1.9904	1.280	-1.555	0.122	-4.520	0.540
x16	-2.6261	1.271	-2.067	0.040	-5.137	-0.115
x17	0.6589	1.264	0.521	0.603	-1.839	3.157
x18	-0.7734	1.268	-0.610	0.543	-3.280	1.733
x19	-1.5014	1.335	-1.125	0.263	-4.139	1.137
x20	-0.0465	1.270	-0.037	0.971	-2.556	2.463
x21	-1.8506	1.285	-1.440	0.152	-4.389	0.688
x22	-2.6491	1.277	-2.075	0.040	-5.172	-0.126
x23	-2.3291	1.293	-1.801	0.074	-4.884	0.226
x24	-1.6936	1.274	-1.329	0.186	-4.211	0.824
x25	-1.9787	1.274	-1.553	0.123	-4.497	0.539
x26	-1.7051	1.285	-1.327	0.186	-4.243	0.833
x27	1.4538	1.286	1.130	0.260	-1.088	3.995
x28	-1.0252	1.295	-0.792	0.430	-3.584	1.534
x29	-3.3990	1.288	-2.638	0.009	-5.945	-0.853
x30	0.8181	1.282	0.638	0.524	-1.714	3.351
x31	-2.7382	1.269	-2.158	0.033	-5.245	-0.231
x32	-2.4202	1.285	-1.884	0.062	-4.959	0.118
x33	-1.7168	1.279	-1.343	0.181	-4.243	0.810
x34	0.5097	1.279	0.399	0.691	-2.017	3.036
x35	-3.1723	1.289	-2.462	0.015	-5.719	-0.626
x36	-2.6356	1.267	-2.079	0.039	-5.140	-0.131
x37	-1.5813	1.283	-1.233	0.220	-4.116	0.953
x38	-1.5579	1.281	-1.216	0.226	-4.089	0.973

Omnibus:	7.634	Durbin-Watson:	0.002
Prob(Omnibus):	0.022	Jarque-Bera (JB):	7.586
Skew:	0.469	Prob(JB):	0.0225
Kurtosis:	3.117	Cond. No.	2.01e+06

Omnibus:	21.730	Durbin-Watson:	0.014
Prob(Omnibus):	0.000	Jarque-Bera (JB):	48.054
Skew:	-0.479	Prob(JB):	3.67e-11
Kurtosis:	5.176	Cond. No.	2.01e+06

Dep. Variable:	y	R-squared:	0.670
Model:	OLS	Adj. R-squared:	0.554
Method:	Least Squares	F-statistic:	5.752
Date:	Mon, 16 May 2022	Prob (F-statistic):	1.56e-17
Time:	15:54:27	Log-Likelihood:	-376.61
No. Observations:	204	AIC:	861.2
Df Residuals:	150	BIC:	1040.
Df Model:	53
Covariance Type:	nonrobust

Omnibus:	8.941	Durbin-Watson:	1.507
Prob(Omnibus):	0.011	Jarque-Bera (JB):	9.440
Skew:	-0.408	Prob(JB):	0.00891
Kurtosis:	3.667	Cond. No.	2.01e+06

Dep. Variable:	y	R-squared:	0.559
Model:	OLS	Adj. R-squared:	0.403
Method:	Least Squares	F-statistic:	3.585
Date:	Mon, 16 May 2022	Prob (F-statistic):	5.14e-10
Time:	15:54:27	Log-Likelihood:	-388.09
No. Observations:	204	AIC:	884.2
Df Residuals:	150	BIC:	1063.
Df Model:	53
Covariance Type:	nonrobust

Omnibus:	0.595	Durbin-Watson:	1.221
Prob(Omnibus):	0.743	Jarque-Bera (JB):	0.729
Skew:	-0.083	Prob(JB):	0.695
Kurtosis:	2.758	Cond. No.	2.01e+06