#  ╔═════════════════════════════════════════════════════════════════════════╗
#  ║                               FINAL PROJECT                             ║
#  ╙─────────────────────────────────────────────────────────────────────────╜
#  │  Name:     Dylan A. Lucia                                               │
#  │  Course:   CMPS6160 - Intro to Data Science                             │
#  │  Semester: Fa23                                                         │
#  └─────────────────────────────────────────────────────────────────────────┘

import pandas as pd   # we will need pandas for use of dataframes, etc.
                      # in this project; reference [7]

!pip install openpyxl # Need this library to handle Excel files; reference [14]
from openpyxl import load_workbook

import numpy as np    # we will need numpy; reference [8]

import matplotlib.pyplot as plt # additionally, we will need plotting
                                # capabilities; reference [9]

import seaborn as sns   # additional plotting capabilities; reference [10]

!pip install us # get state naming data from the us library; reference [11]
import us # import it
us.DC_STATEHOOD=1  # treat Washington, D.C. as a state

!pip install plotly-express # additional plotting capabilities; reference [13]
import plotly.express as px # import plotly express
#from plotly.offline import init_notebook_mode, iplot, plot
#import plotly.offline as py
#py.init_notebook_mode(connected=True)

Requirement already satisfied: openpyxl in /usr/local/lib/python3.10/dist-packages (3.1.2)
Requirement already satisfied: et-xmlfile in /usr/local/lib/python3.10/dist-packages (from openpyxl) (1.1.0)
Requirement already satisfied: us in /usr/local/lib/python3.10/dist-packages (3.1.1)
Requirement already satisfied: jellyfish==0.11.2 in /usr/local/lib/python3.10/dist-packages (from us) (0.11.2)
Requirement already satisfied: plotly-express in /usr/local/lib/python3.10/dist-packages (0.4.1)
Requirement already satisfied: pandas>=0.20.0 in /usr/local/lib/python3.10/dist-packages (from plotly-express) (1.5.3)
Requirement already satisfied: plotly>=4.1.0 in /usr/local/lib/python3.10/dist-packages (from plotly-express) (5.15.0)
Requirement already satisfied: statsmodels>=0.9.0 in /usr/local/lib/python3.10/dist-packages (from plotly-express) (0.14.0)
Requirement already satisfied: scipy>=0.18 in /usr/local/lib/python3.10/dist-packages (from plotly-express) (1.11.4)
Requirement already satisfied: patsy>=0.5 in /usr/local/lib/python3.10/dist-packages (from plotly-express) (0.5.4)
Requirement already satisfied: numpy>=1.11 in /usr/local/lib/python3.10/dist-packages (from plotly-express) (1.23.5)
Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.10/dist-packages (from pandas>=0.20.0->plotly-express) (2.8.2)
Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas>=0.20.0->plotly-express) (2023.3.post1)
Requirement already satisfied: six in /usr/local/lib/python3.10/dist-packages (from patsy>=0.5->plotly-express) (1.16.0)
Requirement already satisfied: tenacity>=6.2.0 in /usr/local/lib/python3.10/dist-packages (from plotly>=4.1.0->plotly-express) (8.2.3)
Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from plotly>=4.1.0->plotly-express) (23.2)


masc_raw_df = pd.read_csv('https://github.com/fivethirtyeight/data/raw/master/masculinity-survey/raw-responses.csv')  # save the raw survey data in a DataFrame
masc_raw_df.head(1)  # Check if import worked


elec16_raw_df = pd.read_excel('https://www.fec.gov/documents/1890/federalelections2016.xlsx',
                              sheet_name='Table 2. Electoral &  Pop Vote',
                              header=3) # save the raw excel 2016 election
                                        # data in a DataFrame starting with the
                                        # 3rd row
elec16_raw_df.head(3) # let's check if the data imported correctly


masc_raw_df.replace('Not selected', np.NaN, inplace=True) # replace instances where someone didn't answer the question with a NaN
masc_raw_df.replace('No answer', np.NaN, inplace=True)    # Same as previous line

masc_raw_df['StartDate'] = pd.to_datetime(masc_raw_df['StartDate']) #convert to a datetime
masc_raw_df['EndDate'] = pd.to_datetime(masc_raw_df['EndDate']) # convert to a datetime

renames = {'q0001' : 'manly_you_feel',  #create a dict to rename headers with
           'q0002' : 'important_seen_manly',
           'q0004_0001' : 'where_good_man_father',
           'q0004_0002' : 'where_good_man_mother',
           'q0004_0003' : 'where_good_man_family',
           'q0004_0004' : 'where_good_man_popculture',
           'q0004_0005' : 'where_good_man_friends',
           'q0004_0006' : 'where_good_man_other',
           'q0005' : 'society_unhealthy_pressure',
           'q0007_0001' : 'often_friend_prof_advice',
           'q0007_0002' : 'often_friend_pers_advice',
           'q0007_0003' : 'often_phys_affect_friends',
           'q0007_0004' : 'often_cry',
           'q0007_0005' : 'often_phys_fight',
           'q0007_0006' : 'often_sex_women',
           'q0007_0007' : 'often_sex_men',
           'q0007_0008' : 'often_watch_sports',
           'q0007_0009' : 'often_exercise',
           'q0007_0010' : 'often_therapist',
           'q0007_0011' : 'often_feel_lonely',
           'q0008_0001' : 'worry_height',
           'q0008_0002' : 'worry_weight',
           'q0008_0003' : 'worry_hair',
           'q0008_0004' : 'worry_physique',
           'q0008_0005' : 'worry_genitals',
           'q0008_0006' : 'worry_style',
           'q0008_0007' : 'worry_sex',
           'q0008_0008' : 'worry_ment_health',
           'q0008_0009' : 'worry_phys_health',
           'q0008_0010' : 'worry_finance',
           'q0008_0011' : 'worry_provide',
           'q0008_0012' : 'worry_none',
           'q0009' : 'employment_status',
           'q0010_0001' : 'advantage_money',
           'q0010_0002' : 'advantage_seriously',
           'q0010_0003' : 'advantage_choice',
           'q0010_0004' : 'advantage_promotion',
           'q0010_0005' : 'advantage_praise',
           'q0010_0006' : 'advantage_support',
           'q0010_0007' : 'advantage_other',
           'q0010_0008' : 'advantage_none',
           'q0011_0001' : 'disadvantage_hire_women',
           'q0011_0002' : 'disadvantage_accused_sexharass',
           'q0011_0003' : 'disadvantage_accused_sexistracist',
           'q0011_0004' : 'disadvantage_other',
           'q0011_0005' : 'disadvantage_none',
           'q0012_0001' : 'seenharassment_confront',
           'q0012_0002' : 'seenharassment_hr',
           'q0012_0003' : 'seenharassment_manager',
           'q0012_0004' : 'seenharassment_victim',
           'q0012_0005' : 'seenharassment_nothing',
           'q0012_0006' : 'seenharassment_neversee',
           'q0012_0007' : 'seenharassment_other',
           'q0013' : 'why_no_response',
           'q0014' : 'heard_metoo',
           'q0015' : 'behavior_work_metoo',
           'q0017' : 'first_move',
           'q0018' : 'try_pay_date',
           'q0019_0001' : 'whypay_right',
           'q0019_0002' : 'whypay_makemore',
           'q0019_0003' : 'whypay_feelgood',
           'q0019_0004' : 'whypay_societalexpectations',
           'q0019_0005' : 'whypay_inviterobligation',
           'q0019_0006' : 'whypay_seeifdateoffers',
           'q0019_0007' : 'whypay_other',
           'q0020_0001' : 'physinterest_bodylanguage',
           'q0020_0002' : 'physinterest_verbalconsent',
           'q0020_0003' : 'physinterest_makemove',
           'q0020_0004' : 'physinterest_different',
           'q0020_0005' : 'physinterest_unclear',
           'q0020_0006' : 'physinterest_other',
           'q0021_0001' : 'sexboundaries_wonderpushedtoofar',
           'q0021_0002' : 'sexboundaries_talkedpushedtoofar',
           'q0021_0003' : 'sexboundaries_askedpartnerpushedtoofar',
           'q0021_0004' : 'sexboundaries_none',
           'q0022' : 'changed_romantic_behavior_metoo',
           'q0024' : 'demog_married',
           'q0025_0001' : 'demog_minorchildren',
           'q0025_0002' : 'demog_adultchildren',
           'q0025_0003' : 'demog_nokids',
           'q0026' : 'demog_sexualorientation',
           'q0028' : 'demog_race',
           'q0029' : 'demog_education',
           'q0030' : 'demog_state',
           'q0034' : 'demog_income',
           'q0035' : 'demog_region',
           'q0036' : 'demog_device'}
masc_df = masc_raw_df.rename(columns=renames) # rename the columns

masc_df['duration']=(masc_df['EndDate']-masc_df['StartDate']).dt.total_seconds()/60 # calc duration of survey in min

masc_df.drop(['Unnamed: 0','StartDate','EndDate','weight'], axis=1, inplace=True) # drop unneeded columns
masc_df # check


masc_df.often_cry.value_counts()

Rarely                       684
Sometimes                    525
Never, but open to it        152
Never, and not open to it     98
Often                         65
Name: often_cry, dtype: int64


elec16_df = elec16_raw_df.drop(index=list(range(51, 58))).reset_index(drop=True)  # drop the bottom rows

renames2 = {'Unnamed: 0' : 'State', # create a dictionary of values to rename
            'Trump (R)' : 'Electoral_Trump',
            'Clinton (D)' : 'Electoral_Clinton',
            'Trump (R).1' : 'Popular_Trump',
            'Clinton (D).1' : 'Popular_Clinton',
            'All Others' : 'Popular_Others',
            'Total Vote' : 'Popular_Total'}
elec16_df.rename(columns=renames2, inplace=True)  # rename the cols inplace

# elec16_df.drop(['Electoral_Trump','Electoral_Clinton'], axis=1, inplace=True) # drop the cols we don't need

cols = [col for col in elec16_df.columns if col != 'State']
elec16_df.fillna(0, inplace=True)
for c in cols:
    elec16_df[c] = pd.to_numeric(elec16_df[c].astype(str).str.replace('*', ''), errors='coerce').fillna(0).astype(int)

floats = elec16_df.select_dtypes(include='float').columns # select columns that
                                                          # are floats
elec16_df[floats] = elec16_df[floats].astype(int) # set them to integer

elec16_df.head(51)  # check the DataFrame

<ipython-input-213-589c9c7a9f47>:17: FutureWarning:

The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.


masc_df['society_unhealthy_pressure'].value_counts().plot.bar()  # create a bar plot for a binary question about whether society puts pressure on men
plt.title('Do you think society puts pressure on men that is unhealthy for them?')  # title the plot
plt.xlabel('Responses')
plt.ylabel('Count')

Text(0, 0.5, 'Count')


ages=masc_df['age3']  # save ages as var
ages.value_counts()   # 538 grouped ages rather than keeping them a raw number, so let's look at the value counts
# From the value counts, we can see that age has been grouped into 3 different categories,
# and as such, we don't have as much fine granularity with it as a metric.

35 - 64      855
65 and up    627
18 - 34      133
Name: age3, dtype: int64


masc_df['demog_state'].value_counts()  # get the value counts of the states.

California                   174
Texas                        127
Florida                      116
New York                      98
Illinois                      77
Pennsylvania                  70
Washington                    61
New Jersey                    49
Michigan                      46
Georgia                       45
Ohio                          44
Arizona                       43
North Carolina                42
Virginia                      42
Oregon                        41
Wisconsin                     39
Minnesota                     35
Maryland                      31
Indiana                       30
Massachusetts                 29
Colorado                      29
Missouri                      27
Nevada                        25
Utah                          21
Connecticut                   20
South Carolina                20
New Mexico                    18
Kentucky                      17
Alabama                       17
Tennessee                     17
Kansas                        16
Iowa                          15
Louisiana                     12
West Virginia                 11
Nebraska                      11
Montana                       10
Oklahoma                      10
Idaho                         10
Maine                          9
Hawaii                         9
New Hampshire                  8
Arkansas                       8
Vermont                        7
Mississippi                    7
Delaware                       6
Wyoming                        5
Rhode Island                   3
Alaska                         3
South Dakota                   2
North Dakota                   2
District of Columbia (DC)      1
Name: demog_state, dtype: int64


states=masc_df['demog_state'] # save states as a var
statemap = us.states.mapping('abbr', 'name')  # create a map of state names/abbreviations
statemap['DC'] = 'District of Columbia (DC)' # for some reason, even though I set DC Statehood to true, I have to manually add it.
statemap_inv = {v: k for k, v in statemap.items()}  # this inverts the map created by the us package [ref 12]
masc_df['demog_state2'] = masc_df['demog_state'].map(statemap_inv)  # create another column of the state abbreviations
response_counts = masc_df['demog_state2'].value_counts().reset_index()  # get response rates
response_counts.columns = ['state_abbrev', 'response_count']  # rename the columns

px.choropleth(response_counts,  #create a map of response counts for the data
              locations='state_abbrev',
              locationmode='USA-states',
              scope='usa',
              color='response_count',
              color_continuous_scale='Greens',  # this color scale seems to make it easiest to see
              labels={'response_count' : 'Responses', 'state_abbrev' : 'State'},
              title='Response Counts by State'
              )

# fig1 = px.choropleth(response_counts,  #create a map of response counts for the data
#               locations='state_abbrev',
#               locationmode='USA-states',
#               scope='usa',
#               color='response_count',
#               color_continuous_scale='Greens',  # this color scale seems to make it easiest to see
#               labels={'response_count' : 'Responses', 'state_abbrev' : 'State'},
#               title='Response Counts by State'
#               )
# plot(fig1,show_link = False)  # this creates a temp html file to download and manually insert in the overall notebook html export


# sns.countplot(x='q0005', hue='age3', data=df) # I can't get this to work yet
pressure = pd.crosstab(masc_df['society_unhealthy_pressure'],masc_df['age3']) # compute the frequencies
                                                                              # bewteen age and the response to the question
pressure_yes_no = pressure.div(pressure.sum(axis=0), axis=1)  # calc the y/n percentages for each age group
pressure_yes = pressure.loc['Yes'].div(pressure.sum(axis=0))  # calc just the y percent for each age group
pressure_yes.T.plot.bar() # plot
plt.title('Do you think society puts pressure on men that is unhealthy for them?')  # title
plt.xlabel('Response'), plt.ylabel('Response Percents')   # axis labels

(Text(0.5, 0, 'Response'), Text(0, 0.5, 'Response Percents'))


pressure_yes

age3
18 - 34      0.765152
35 - 64      0.603306
65 and up    0.550562
dtype: float64


masc_df['try_pay_date'].value_counts()[["Always","Often","Sometimes","Rarely", "Never"]].plot.bar()
plt.title('How often do you try to be the one who pays on a date?')

Text(0.5, 1.0, 'How often do you try to be the one who pays on a date?')


masc_df[['duration','demog_state2']].corr() #TK

<ipython-input-221-30bedb661dc4>:1: FutureWarning:

The default value of numeric_only in DataFrame.corr is deprecated. In a future version, it will default to False. Select only valid columns or specify the value of numeric_only to silence this warning.


elec16_df.describe()


elec16_df.sum(numeric_only=True)

Electoral_Trump            304
Electoral_Clinton          227
Popular_Trump         62984828
Popular_Clinton       65853514
Popular_Others         7830934
Popular_Total        136669276
dtype: int64


elec16_df['Popular_Red_Score'] = elec16_df['Popular_Trump']/elec16_df['Popular_Total']
elec16_df['Popular_Blu_Score'] = 1 - elec16_df['Popular_Red_Score']
elec16_df.head(1)


elec16_df.loc[elec16_df['Popular_Red_Score'].idxmax(),
 ['State','Popular_Red_Score']] # Print the state with the highest percentage of
                                # voters voting for Trump

State                      WV
Popular_Red_Score    0.684988
Name: 48, dtype: object


elec16_df.loc[elec16_df['Popular_Blu_Score'].idxmax(),
 ['State','Popular_Blu_Score']] # Print the state with the highest percentage of
                                # voters voting for Clinton

State                      DC
Popular_Blu_Score    0.959125
Name: 8, dtype: object


elec16_df['Electoral_Total'] = elec16_df['Electoral_Trump']+elec16_df['Electoral_Clinton']
elec16_df['Electoral_Red_Score'] = elec16_df['Electoral_Trump']/elec16_df['Electoral_Total']
elec16_df['Electoral_Blu_Score'] = 1 - elec16_df['Electoral_Red_Score']
elec16_df.head(1)


px.choropleth(elec16_df,  #create a map of how states voted based off pop vote
              locations='State',
              locationmode='USA-states',
              scope='usa',
              color='Popular_Blu_Score',
              color_continuous_scale='Rdbu',  # this color scale seems to make it easiest to see
              labels={'Popular_Blu_Score' : '% Dem'},
              title='Popular Vote',
              range_color=(0, 1)
              )


px.choropleth(elec16_df,  #create a map of how states voted based off pop vote
              locations='State',
              locationmode='USA-states',
              scope='usa',
              color='Electoral_Blu_Score',
              color_continuous_scale='Rdbu',  # this color scale seems to make it easiest to see
              labels={'Electoral_Blu_Score' : '% Dem'},
              title='Electoral Vote',
              range_color=(0, 1)
              )


both_df = masc_df.merge(elec16_df[['State', 'Popular_Red_Score', 'Popular_Blu_Score']],
                          how='left',
                          left_on='demog_state2',
                          right_on='State').drop(columns=['State']).rename(columns={'Popular_Red_Score':'chance_Red','Popular_Blu_Score':'chance_Blu'})
both_df.head(1)


list(both_df.columns)

['manly_you_feel',
 'important_seen_manly',
 'where_good_man_father',
 'where_good_man_mother',
 'where_good_man_family',
 'where_good_man_popculture',
 'where_good_man_friends',
 'where_good_man_other',
 'society_unhealthy_pressure',
 'often_friend_prof_advice',
 'often_friend_pers_advice',
 'often_phys_affect_friends',
 'often_cry',
 'often_phys_fight',
 'often_sex_women',
 'often_sex_men',
 'often_watch_sports',
 'often_exercise',
 'often_therapist',
 'often_feel_lonely',
 'worry_height',
 'worry_weight',
 'worry_hair',
 'worry_physique',
 'worry_genitals',
 'worry_style',
 'worry_sex',
 'worry_ment_health',
 'worry_phys_health',
 'worry_finance',
 'worry_provide',
 'worry_none',
 'employment_status',
 'advantage_money',
 'advantage_seriously',
 'advantage_choice',
 'advantage_promotion',
 'advantage_praise',
 'advantage_support',
 'advantage_other',
 'advantage_none',
 'disadvantage_hire_women',
 'disadvantage_accused_sexharass',
 'disadvantage_accused_sexistracist',
 'disadvantage_other',
 'disadvantage_none',
 'seenharassment_confront',
 'seenharassment_hr',
 'seenharassment_manager',
 'seenharassment_victim',
 'seenharassment_nothing',
 'seenharassment_neversee',
 'seenharassment_other',
 'why_no_response',
 'heard_metoo',
 'behavior_work_metoo',
 'first_move',
 'try_pay_date',
 'whypay_right',
 'whypay_makemore',
 'whypay_feelgood',
 'whypay_societalexpectations',
 'whypay_inviterobligation',
 'whypay_seeifdateoffers',
 'whypay_other',
 'physinterest_bodylanguage',
 'physinterest_verbalconsent',
 'physinterest_makemove',
 'physinterest_different',
 'physinterest_unclear',
 'physinterest_other',
 'sexboundaries_wonderpushedtoofar',
 'sexboundaries_talkedpushedtoofar',
 'sexboundaries_askedpartnerpushedtoofar',
 'sexboundaries_none',
 'changed_romantic_behavior_metoo',
 'demog_married',
 'demog_minorchildren',
 'demog_adultchildren',
 'demog_nokids',
 'demog_sexualorientation',
 'demog_race',
 'demog_education',
 'demog_state',
 'demog_income',
 'demog_region',
 'demog_device',
 'race2',
 'racethn4',
 'educ3',
 'educ4',
 'age3',
 'kids',
 'orientation',
 'duration',
 'demog_state2',
 'chance_Red',
 'chance_Blu']


selection= ['manly_you_feel',
 'important_seen_manly',
 'disadvantage_accused_sexharass',
 'disadvantage_accused_sexistracist',
 'society_unhealthy_pressure',
 'often_phys_affect_friends',
 'often_cry',
 'often_therapist',
 'often_feel_lonely',
 'worry_height',
 'worry_weight',
 'worry_ment_health',
 'seenharassment_confront',
 'seenharassment_nothing',
 'changed_romantic_behavior_metoo',
 'demog_married',
 'demog_sexualorientation',
 'demog_race',
 'demog_education',
 'age3',
 'chance_Red',
 'chance_Blu']  # select interesting columns to analyze
selected=both_df[selection] # copy into new df
cols=['worry_height','worry_weight','worry_ment_health','seenharassment_confront','seenharassment_nothing'] # cols that are alrady dummes from 538
selected[cols] = selected[cols].notna().astype(int) # convert the preceding cols to binary since they were already dummied from 538
selected = pd.get_dummies(selected) # create dummies for the categorical answers
selected.corr() # create a correlation matrix

<ipython-input-232-7f58be24fda6>:25: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


corr_matr = selected.corr()
np.fill_diagonal(corr_matr.values, np.nan)
plt.figure(figsize=(12, 10))
sns.heatmap(corr_matr, annot=False, cmap='coolwarm', fmt=".2f")
plt.show()


fig = px.imshow(corr_matr,
                labels=dict(x="Feature", y="Feature", color="Correlation"),
                x=corr_matr.columns,
                y=corr_matr.columns)
fig.show()


interested_columns = ['chance_Red',
                      'chance_Blu',
                      'demog_education_Did not complete high school',
                      'demog_education_High school or G.E.D.',
                      'demog_education_College graduate',
                      'demog_education_Post graduate degree',
                      'important_seen_manly_Not at all important',
                      'important_seen_manly_Very important',
                      'often_cry_Often',
                      'demog_married_Divorced',
                      'society_unhealthy_pressure_Yes',
                      'society_unhealthy_pressure_No']

for interested in interested_columns:
    # Find the column name with the maximum/min correlation to interested:
    max_corr_column = corr_matr[interested].idxmax()
    min_corr_column = corr_matr[interested].idxmin()

    # Find the corresponding maximum/min correlation value:
    max_corr_value = corr_matr.loc[interested, max_corr_column]
    min_corr_value = corr_matr.loc[interested, min_corr_column]

    print(f"The max corr. w/ '{interested}' is found with '{max_corr_column}' with a correlation of {max_corr_value:.2f}.")
    print(f"The min corr. w/ '{interested}' is found with '{min_corr_column}' with a correlation of {min_corr_value:.2f}.\n-")

The max corr. w/ 'chance_Red' is found with 'demog_married_Married' with a correlation of 0.06.
The min corr. w/ 'chance_Red' is found with 'chance_Blu' with a correlation of -1.00.
-
The max corr. w/ 'chance_Blu' is found with 'often_phys_affect_friends_Often' with a correlation of 0.08.
The min corr. w/ 'chance_Blu' is found with 'chance_Red' with a correlation of -1.00.
-
The max corr. w/ 'demog_education_Did not complete high school' is found with 'changed_romantic_behavior_metoo_Yes' with a correlation of 0.10.
The min corr. w/ 'demog_education_Did not complete high school' is found with 'changed_romantic_behavior_metoo_No' with a correlation of -0.08.
-
The max corr. w/ 'demog_education_High school or G.E.D.' is found with 'age3_18 - 34' with a correlation of 0.12.
The min corr. w/ 'demog_education_High school or G.E.D.' is found with 'demog_education_College graduate' with a correlation of -0.22.
-
The max corr. w/ 'demog_education_College graduate' is found with 'disadvantage_accused_sexistracist_Greater risk of being accused of being sexist or racist' with a correlation of 0.06.
The min corr. w/ 'demog_education_College graduate' is found with 'demog_education_Post graduate degree' with a correlation of -0.45.
-
The max corr. w/ 'demog_education_Post graduate degree' is found with 'age3_65 and up' with a correlation of 0.22.
The min corr. w/ 'demog_education_Post graduate degree' is found with 'demog_education_College graduate' with a correlation of -0.45.
-
The max corr. w/ 'important_seen_manly_Not at all important' is found with 'manly_you_feel_Not at all masculine' with a correlation of 0.24.
The min corr. w/ 'important_seen_manly_Not at all important' is found with 'important_seen_manly_Somewhat important' with a correlation of -0.33.
-
The max corr. w/ 'important_seen_manly_Very important' is found with 'manly_you_feel_Very masculine' with a correlation of 0.27.
The min corr. w/ 'important_seen_manly_Very important' is found with 'important_seen_manly_Somewhat important' with a correlation of -0.30.
-
The max corr. w/ 'often_cry_Often' is found with 'often_therapist_Often' with a correlation of 0.16.
The min corr. w/ 'often_cry_Often' is found with 'often_cry_Rarely' with a correlation of -0.18.
-
The max corr. w/ 'demog_married_Divorced' is found with 'age3_35 - 64' with a correlation of 0.11.
The min corr. w/ 'demog_married_Divorced' is found with 'demog_married_Married' with a correlation of -0.50.
-
The max corr. w/ 'society_unhealthy_pressure_Yes' is found with 'often_feel_lonely_Often' with a correlation of 0.14.
The min corr. w/ 'society_unhealthy_pressure_Yes' is found with 'society_unhealthy_pressure_No' with a correlation of -0.98.
-
The max corr. w/ 'society_unhealthy_pressure_No' is found with 'often_feel_lonely_Never, and not open to it' with a correlation of 0.15.
The min corr. w/ 'society_unhealthy_pressure_No' is found with 'society_unhealthy_pressure_Yes' with a correlation of -0.98.
-


list(selected.columns)  # list the columns we were previously working with

['worry_height',
 'worry_weight',
 'worry_ment_health',
 'seenharassment_confront',
 'seenharassment_nothing',
 'chance_Red',
 'chance_Blu',
 'manly_you_feel_Not at all masculine',
 'manly_you_feel_Not very masculine',
 'manly_you_feel_Somewhat masculine',
 'manly_you_feel_Very masculine',
 'important_seen_manly_Not at all important',
 'important_seen_manly_Not too important',
 'important_seen_manly_Somewhat important',
 'important_seen_manly_Very important',
 'disadvantage_accused_sexharass_Greater risk of being accused of sexual harassment',
 'disadvantage_accused_sexistracist_Greater risk of being accused of being sexist or racist',
 'society_unhealthy_pressure_No',
 'society_unhealthy_pressure_Yes',
 'often_phys_affect_friends_Never, and not open to it',
 'often_phys_affect_friends_Never, but open to it',
 'often_phys_affect_friends_Often',
 'often_phys_affect_friends_Rarely',
 'often_phys_affect_friends_Sometimes',
 'often_cry_Never, and not open to it',
 'often_cry_Never, but open to it',
 'often_cry_Often',
 'often_cry_Rarely',
 'often_cry_Sometimes',
 'often_therapist_Never, and not open to it',
 'often_therapist_Never, but open to it',
 'often_therapist_Often',
 'often_therapist_Rarely',
 'often_therapist_Sometimes',
 'often_feel_lonely_Never, and not open to it',
 'often_feel_lonely_Never, but open to it',
 'often_feel_lonely_Often',
 'often_feel_lonely_Rarely',
 'often_feel_lonely_Sometimes',
 'changed_romantic_behavior_metoo_No',
 'changed_romantic_behavior_metoo_Yes',
 'demog_married_Divorced',
 'demog_married_Married',
 'demog_married_Never married',
 'demog_married_Separated',
 'demog_married_Widowed',
 'demog_sexualorientation_Bisexual',
 'demog_sexualorientation_Gay',
 'demog_sexualorientation_Other',
 'demog_sexualorientation_Straight',
 'demog_race_Asian',
 'demog_race_Black',
 'demog_race_Hispanic',
 'demog_race_Other',
 'demog_race_White',
 "demog_education_Associate's degree",
 'demog_education_College graduate',
 'demog_education_Did not complete high school',
 'demog_education_High school or G.E.D.',
 'demog_education_Post graduate degree',
 'demog_education_Some college',
 'age3_18 - 34',
 'age3_35 - 64',
 'age3_65 and up']


from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier

# Defining the training data:
X_train = selected[['chance_Red',
                    'demog_married_Divorced',
                    'demog_married_Married',
                    'demog_married_Never married',
                    'demog_married_Separated',
                    'demog_married_Widowed',
                    'demog_sexualorientation_Bisexual',
                    'demog_sexualorientation_Gay',
                    'demog_sexualorientation_Other',
                    'demog_sexualorientation_Straight',
                    'demog_race_Asian',
                    'demog_race_Black',
                    'demog_race_Hispanic',
                    'demog_race_Other',
                    'demog_race_White',
                    "demog_education_Associate's degree",
                    'demog_education_College graduate',
                    'demog_education_Did not complete high school',
                    'demog_education_High school or G.E.D.',
                    'demog_education_Post graduate degree',
                    'demog_education_Some college',
                    'age3_18 - 34',
                    'age3_35 - 64',
                    'age3_65 and up']]
y_train = masc_df[['manly_you_feel','society_unhealthy_pressure']]


from sklearn.impute import SimpleImputer
imputer = SimpleImputer(strategy='most_frequent') # choosing frequency as the stratgey
y_train_imp = imputer.fit_transform(y_train)  # Impute the missing values


scaler = StandardScaler()
scaler.fit(X_train)
X_train_sc = scaler.transform(X_train)


model = KNeighborsClassifier(n_neighbors=15)
model.fit(X_train_sc, y_train_imp)

KNeighborsClassifier(n_neighbors=15)

KNeighborsClassifier(n_neighbors=15)


# Creating the test man
x_test = pd.DataFrame()
x_test['chance_Red']=[.7]
x_test['demog_married_Divorced']=[1]
x_test['demog_married_Married']=[0]
x_test['demog_married_Never married']=[0]
x_test['demog_married_Separated']=[0]
x_test['demog_married_Widowed']=[0]
x_test['demog_sexualorientation_Bisexual']=[0]
x_test['demog_sexualorientation_Gay']=[0]
x_test['demog_sexualorientation_Other']=[0]
x_test['demog_sexualorientation_Straight']=[1]
x_test['demog_race_Asian']=[0]
x_test['demog_race_Black']=[0]
x_test['demog_race_Hispanic']=[0]
x_test['demog_race_Other']=[0]
x_test['demog_race_White']=[1]
x_test["demog_education_Associate's degree"]=[1]
x_test['demog_education_College graduate']=[0]
x_test['demog_education_Did not complete high school']=[0]
x_test['demog_education_High school or G.E.D.']=[0]
x_test['demog_education_Post graduate degree']=[0]
x_test['demog_education_Some college']=[0]
x_test['age3_18 - 34']=[0]
x_test['age3_35 - 64']=[1]
x_test['age3_65 and up']=[0]

x_test_sc = scaler.transform(x_test)  # Scale with the same scaler.


prediction = model.predict(x_test_sc)

print(f"Asked how manly he feels: {prediction[:,0]}")
print(f"Asked if society puts unhealthy pressure on men: {prediction[:,1]}")

Asked how manly he feels: ['Very masculine']
Asked if society puts unhealthy pressure on men: ['Yes']


x_test['demog_sexualorientation_Bisexual']=[1]  # Changed to Bisexual
x_test['demog_sexualorientation_Straight']=[0]
x_test_sc = scaler.transform(x_test)  # Scale with the same scaler.
prediction = model.predict(x_test_sc)
print(f"Asked how manly he feels: {prediction[:,0]}")
print(f"Asked if society puts unhealthy pressure on men: {prediction[:,1]}")

Asked how manly he feels: ['Somewhat masculine']
Asked if society puts unhealthy pressure on men: ['Yes']


x_test['demog_sexualorientation_Bisexual']=[0]
x_test['demog_sexualorientation_Straight']=[1]  # Switched back to Straight
x_test['chance_Red']=[.32] # Change to a more CA-like likelihood
x_test_sc = scaler.transform(x_test)  # Scale with the same scaler.
prediction = model.predict(x_test_sc)
print(f"Asked how manly he feels: {prediction[:,0]}")
print(f"Asked if society puts unhealthy pressure on men: {prediction[:,1]}")

Asked how manly he feels: ['Somewhat masculine']
Asked if society puts unhealthy pressure on men: ['Yes']


x_test['demog_sexualorientation_Bisexual']=[0]
x_test['demog_sexualorientation_Straight']=[1]
x_test['chance_Red']=[.04] # Change to a more DC-like likelihood
x_test_sc = scaler.transform(x_test)  # Scale with the same scaler.
prediction = model.predict(x_test_sc)
print(f"Asked how manly he feels: {prediction[:,0]}")
print(f"Asked if society puts unhealthy pressure on men: {prediction[:,1]}")

Asked how manly he feels: ['Very masculine']
Asked if society puts unhealthy pressure on men: ['No']


from sklearn.metrics import accuracy_score
y_train_pred = model.predict(X_train_sc)
#accuracy_score(y_train_imp, y_train_pred) # We can't do this because it's predicting two features. Must do indiv. and average.
accuracies = [accuracy_score(y_train_imp[:, i], y_train_pred[:, i]) for i in range(y_train_imp.shape[1])]
train_acc=np.mean(accuracies)
train_acc

0.6058823529411765


from sklearn.metrics import f1_score
f1 = np.mean([f1_score(y_train_imp[:, i], y_train_pred[:, i], average='weighted') for i in range(y_train_imp.shape[1])])
f1

0.5624405782197712


from sklearn.pipeline import Pipeline
from sklearn.model_selection import cross_val_score
from sklearn.metrics import make_scorer

def cust_f1(y_true, y_pred):  # create custom scorer for averaging average f1
    f1_scores = [f1_score(y_true[:, t], y_pred[:, t], average='weighted') for t in range(y_true.shape[1])]
    return np.mean(f1_scores)

pipe = Pipeline([ # since we had to impute, we should add it to our pipeline, to cover our bases
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('scaler', StandardScaler()),
    ('classifier', KNeighborsClassifier(n_neighbors=15))
])

# cross-validation with custom scorer:
cust_scorer = make_scorer(cust_f1)
cv = cross_val_score(pipe, X_train, y_train_imp, cv=7, scoring=cust_scorer)

avg_f1 = np.mean(cv)  # average the average scores

avg_f1

0.5131048797161305


from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


!ls drive/MyDrive/Colab\ Notebooks/

CMPSFinal.html	 Lab01.ipynb  Lab03.ipynb  Lab05.ipynb	Lab07.ipynb  Lab09.ipynb  Lab11.ipynb
CMPSFinal.ipynb  Lab02.ipynb  Lab04.ipynb  Lab06.ipynb	Lab08.ipynb  Lab10.ipynb  Lab12.ipynb


%%shell
jupyter nbconvert --to html drive/MyDrive/Colab\ Notebooks/CMPSFinal.ipynb

[NbConvertApp] Converting notebook drive/MyDrive/Colab Notebooks/CMPSFinal.ipynb to html
[NbConvertApp] Writing 1424847 bytes to drive/MyDrive/Colab Notebooks/CMPSFinal.html

2016 PRESIDENTIAL ELECTORAL AND POPULAR VOTE
-
STATE	ELECTORAL VOTE	ELECTORAL VOTE	POPULAR VOTE	POPULAR VOTE	POPULAR VOTE	POPULAR VOTE
	Trump (R)	Clinton (D)	Trump (R)	Clinton (D)	All Others	Total Vote
AL	9		1,318,255	729,547	75,570	2,123,372
AK	3		163,387	116,454	38,767	318,608
AZ	11		1,252,401	1,161,167	159,597	2,573,165
AR	6		684,872	380,494	65,310	1,130,676
CA		55	4,483,814	8,753,792	943,998	14,181,604
$\vdots$	$\vdots$	$\vdots$	$\vdots$	$\vdots$	$\vdots$	$\vdots$

	Unnamed: 0	Trump (R)	Clinton (D)	Trump (R).1	Clinton (D).1	All Others	Total Vote
0	AL	9	NaN	1318255.0	729547.0	75570.0	2123372.0
1	AK	3	NaN	163387.0	116454.0	38767.0	318608.0
2	AZ	11	NaN	1252401.0	1161167.0	159597.0	2573165.0

	State	Electoral_Trump	Electoral_Clinton	Popular_Trump	Popular_Clinton	Popular_Others	Popular_Total
0	AL	9	0	1318255	729547	75570	2123372
1	AK	3	0	163387	116454	38767	318608
2	AZ	11	0	1252401	1161167	159597	2573165
3	AR	6	0	684872	380494	65310	1130676
4	CA	0	55	4483814	8753792	943998	14181604
5	CO	0	9	1202484	1338870	238893	2780247
6	CT	0	7	673215	897572	74133	1644920
7	DE	0	3	185127	235603	23084	443814
8	DC	0	3	12723	282830	15715	311268
9	FL	29	0	4617886	4504975	297178	9420039
10	GA	16	0	2089104	1877963	147665	4114732
11	HI	0	3	128847	266891	33199	428937
12	ID	4	0	409055	189765	91435	690255
13	IL	0	20	2146015	3090729	299680	5536424
14	IN	11	0	1557286	1033126	144546	2734958
15	IA	6	0	800983	653669	111379	1566031
16	KS	6	0	671018	427005	86379	1184402
17	KY	8	0	1202971	628854	92324	1924149
18	LA	8	0	1178638	780154	70240	2029032
19	ME	1	3	335593	357735	54599	747927
20	MD	0	10	943169	1677928	160349	2781446
21	MA	0	11	1090893	1995196	238957	3325046
22	MI	16	0	2279543	2268839	250902	4799284
23	MN	0	10	1322951	1367716	254146	2944813
24	MS	6	0	700714	485131	23512	1209357
25	MO	10	0	1594511	1071068	143026	2808605
26	MT	3	0	279240	177709	40198	497147
27	NE	5	0	495961	284494	63772	844227
28	NV	0	6	512058	539260	74067	1125385
29	NH	0	4	345790	348526	49980	744296
30	NJ	0	14	1601933	2148278	123835	3874046
31	NM	0	5	319667	385234	93418	798319
32	NY	0	29	2819533	4556118	345791	7721442
33	NC	15	0	2362631	2189316	189617	4741564
34	ND	3	0	216794	93758	33808	344360
35	OH	18	0	2841005	2394164	261318	5496487
36	OK	7	0	949136	420375	83481	1452992
37	OR	0	7	782403	1002106	216827	2001336
38	PA	20	0	2970733	2926441	268304	6165478
39	RI	0	4	180543	252525	31076	464144
40	SC	9	0	1155389	855373	92265	2103027
41	SD	3	0	227721	117458	24914	370093
42	TN	11	0	1522925	870695	114407	2508027
43	TX	36	0	4685047	3877868	406311	8969226
44	UT	6	0	515231	310676	305523	1131430
45	VT	0	3	95369	178573	41125	315067
46	VA	0	13	1769443	1981473	233715	3984631
47	WA	0	8	1221747	1742718	352554	3317019
48	WV	5	0	489371	188794	36258	714423
49	WI	10	0	1405284	1382536	188330	2976150
50	WY	3	0	174419	55973	25457	255849

	Electoral_Trump	Electoral_Clinton	Popular_Trump	Popular_Clinton	Popular_Others	Popular_Total
count	51.000000	51.000000	5.100000e+01	5.100000e+01	51.000000	5.100000e+01
mean	5.960784	4.450980	1.234997e+06	1.291245e+06	153547.725490	2.679790e+06
std	7.720002	9.297986	1.142383e+06	1.548085e+06	153314.934414	2.752630e+06
min	0.000000	0.000000	1.272300e+04	5.597300e+04	15715.000000	2.558490e+05
25%	0.000000	0.000000	3.774225e+05	2.975850e+05	52289.500000	7.461115e+05
50%	3.000000	0.000000	9.491360e+05	7.801540e+05	93418.000000	2.001336e+06
75%	9.000000	5.500000	1.575898e+06	1.810340e+06	236304.000000	3.321032e+06
max	36.000000	55.000000	4.685047e+06	8.753792e+06	943998.000000	1.418160e+07

	worry_height	worry_weight	worry_ment_health	seenharassment_confront	seenharassment_nothing	chance_Red	chance_Blu	manly_you_feel_Not at all masculine	manly_you_feel_Not very masculine	manly_you_feel_Somewhat masculine	...	demog_race_White	demog_education_Associate's degree	demog_education_College graduate	demog_education_Did not complete high school	demog_education_High school or G.E.D.	demog_education_Post graduate degree	demog_education_Some college	age3_18 - 34	age3_35 - 64	age3_65 and up
worry_height	1.000000	0.075604	0.081325	0.041220	0.094876	0.017997	-0.017997	-0.005698	0.045565	-0.007816	...	-0.101198	0.002512	0.019970	0.005104	-0.012139	0.001380	-0.019866	0.149044	-0.002261	-0.081756
worry_weight	0.075604	1.000000	0.139601	0.005132	0.040521	0.038940	-0.038940	-0.070816	0.025916	0.091440	...	0.039716	0.026617	-0.000729	-0.052352	-0.016341	0.057788	-0.059789	-0.033790	0.056007	-0.038301
worry_ment_health	0.081325	0.139601	1.000000	-0.019759	0.035724	0.024612	-0.024612	0.024045	0.071813	0.071196	...	0.005586	0.017297	-0.030251	-0.000578	0.035621	-0.034205	0.037085	0.184772	0.059781	-0.165452
seenharassment_confront	0.041220	0.005132	-0.019759	1.000000	0.033803	-0.017310	0.017310	-0.024616	-0.051439	0.007086	...	0.006803	-0.016689	-0.015710	-0.019872	-0.007076	0.056133	-0.023908	0.015134	0.037769	-0.047219
seenharassment_nothing	0.094876	0.040521	0.035724	0.033803	1.000000	0.008548	-0.008548	0.000754	0.053227	0.014001	...	-0.068239	-0.018648	0.049378	-0.020303	0.027254	-0.052260	0.001318	0.052064	0.094448	-0.126100
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
demog_education_Post graduate degree	0.001380	0.057788	-0.034205	0.056133	-0.052260	-0.064685	0.064685	-0.044185	-0.005438	0.047317	...	0.076083	-0.207169	-0.446289	-0.074864	-0.214032	1.000000	-0.306422	-0.096944	-0.160433	0.218996
demog_education_Some college	-0.019866	-0.059789	0.037085	-0.023908	0.001318	0.039140	-0.039140	-0.009071	-0.004039	-0.007547	...	-0.049023	-0.149220	-0.321454	-0.053923	-0.154164	-0.306422	1.000000	0.023135	0.062571	-0.077135
age3_18 - 34	0.149044	-0.033790	0.184772	0.015134	0.052064	-0.007517	0.007517	0.102869	0.051249	-0.027142	...	-0.153853	-0.040504	0.022175	0.005380	0.122187	-0.096944	0.023135	1.000000	-0.317744	-0.238648
age3_35 - 64	-0.002261	0.056007	0.059781	0.037769	0.094448	0.061491	-0.061491	-0.035083	-0.015235	0.019124	...	-0.108140	0.093076	0.019572	0.009662	0.041382	-0.160433	0.062571	-0.317744	1.000000	-0.844951
age3_65 and up	-0.081756	-0.038301	-0.165452	-0.047219	-0.126100	-0.058738	0.058738	-0.022095	-0.013304	-0.004276	...	0.197540	-0.072479	-0.032554	-0.012930	-0.111306	0.218996	-0.077135	-0.238648	-0.844951	1.000000

I.) Toxic Masculinity in the United States¶

🌐 Project Webpage¶

📝 Project Description¶

🔬 Exploratory Questions¶

📡 Data Sources¶

🤝 Collaboration Plan¶

II.) Extraction, Transform, and Load (ETL)¶

II.A) Preamble: Libraries & Packages¶

II.B) Loading Initial Data¶

II.B.1) Masculinity Survey¶

II.B.2) 2016 U.S. Presidential Election¶

II.C) Tidying Raw Data¶

II.C.1) Tidying `masc_raw_df`¶

II.C.2) Tidying `elec16_raw_df`¶

III.) Exploratory Data Analysis¶

III.A) Masculinity Data EDA¶

III.B) Election Data EDA¶

IV.) Analytical Modeling¶

IV.A) Classification Model¶

V.) Validation¶

VI.) Conclusions¶

VII.) References & Further Reading¶

📚 Content/information¶

📡 Data¶

🖥 Programming¶

	manly_you_feel	important_seen_manly	where_good_man_father	where_good_man_mother	where_good_man_family	where_good_man_popculture	where_good_man_friends	where_good_man_other	society_unhealthy_pressure	often_friend_prof_advice	...	demog_region	demog_device	race2	racethn4	educ3	educ4	age3	kids	orientation	duration
0	Somewhat masculine	Somewhat important	NaN	NaN	NaN	Pop culture	NaN	NaN	Yes	Often	...	Middle Atlantic	Windows Desktop / Laptop	Non-white	Hispanic	College or more	College or more	35 - 64	No children	Gay/Bisexual	5.0
1	Somewhat masculine	Somewhat important	Father or father figure(s)	NaN	NaN	NaN	NaN	NaN	Yes	Rarely	...	East North Central	iOS Phone / Tablet	White	White	Some college	Some college	65 and up	Has children	Straight	23.0
2	Very masculine	Not too important	Father or father figure(s)	NaN	NaN	NaN	NaN	Other (please specify)	No	Sometimes	...	East North Central	Windows Desktop / Laptop	White	White	College or more	College or more	35 - 64	Has children	Straight	7.0
3	Very masculine	Not too important	Father or father figure(s)	Mother or mother figure(s)	Other family members	NaN	NaN	NaN	No	Rarely	...	East North Central	Windows Desktop / Laptop	White	White	Some college	Some college	65 and up	Has children	NaN	4.0
4	Very masculine	Very important	NaN	NaN	Other family members	NaN	NaN	NaN	Yes	Sometimes	...	East North Central	Windows Desktop / Laptop	White	White	College or more	College or more	35 - 64	No children	Straight	7.0
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
1610	Not very masculine	Not at all important	Father or father figure(s)	Mother or mother figure(s)	Other family members	Pop culture	Friends	NaN	Yes	Rarely	...	West North Central	iOS Phone / Tablet	White	White	Some college	Some college	18 - 34	No children	Straight	4.0
1611	Very masculine	Very important	Father or father figure(s)	NaN	Other family members	NaN	NaN	NaN	Yes	Often	...	NaN	iOS Phone / Tablet	Non-white	Hispanic	High school or less	High school or less	35 - 64	Has children	Straight	9.0
1612	Somewhat masculine	Somewhat important	NaN	Mother or mother figure(s)	NaN	NaN	NaN	NaN	Yes	NaN	...	South Atlantic	Windows Desktop / Laptop	White	White	Some college	Some college	35 - 64	Has children	Straight	17.0
1613	Somewhat masculine	Somewhat important	NaN	Mother or mother figure(s)	NaN	NaN	NaN	NaN	Yes	Sometimes	...	Mountain	Windows Desktop / Laptop	White	White	High school or less	High school or less	18 - 34	No children	Straight	6.0
1614	Very masculine	Not at all important	Father or father figure(s)	Mother or mother figure(s)	NaN	NaN	NaN	NaN	No	Often	...	Middle Atlantic	iOS Phone / Tablet	White	White	Some college	Some college	18 - 34	No children	Straight	5.0

I.) Toxic Masculinity in the United States¶

🌐 Project Webpage¶

📝 Project Description¶

🔬 Exploratory Questions¶

📡 Data Sources¶

🤝 Collaboration Plan¶

II.) Extraction, Transform, and Load (ETL)¶

II.A) Preamble: Libraries & Packages¶

II.B) Loading Initial Data¶

II.B.1) Masculinity Survey¶

II.B.2) 2016 U.S. Presidential Election¶

II.C) Tidying Raw Data¶

II.C.1) Tidying masc_raw_df¶

II.C.2) Tidying elec16_raw_df¶

III.) Exploratory Data Analysis¶

III.A) Masculinity Data EDA¶

III.B) Election Data EDA¶

IV.) Analytical Modeling¶

IV.A) Classification Model¶

V.) Validation¶

VI.) Conclusions¶

VII.) References & Further Reading¶

📚 Content/information¶

📡 Data¶

🖥 Programming¶

II.C.1) Tidying `masc_raw_df`¶

II.C.2) Tidying `elec16_raw_df`¶