It is an interesting question to see how people spend the available 24 hours across the United States. Demographics and other factors such as time-zone, culture, population, socio-economic factors play a vital role in determining the life-style of people. This article examines the behaviour of person living in the United States.
Using data from the American Time Use Survey (ATUS) [1], it is possible to uncover patterns in time utilization of people across the United States. For instance, the dashboard below depicts how much time people spend in various activity categories across states and the distribution.
#nbi:hide_in
import pandas as pd
import numpy as np
import re
import bqplot as bq
import ipywidgets
from ipywidgets import Layout
#nbi:hide_in
#nbi:hide_out
ats_sum = pd.read_csv('https://raw.githubusercontent.com/sarvaniputta/nbinteract_tutorial/master/atussum_2017.csv')
cps = pd.read_csv('https://raw.githubusercontent.com/sarvaniputta/nbinteract_tutorial/master/atuscps_2017.csv', usecols=['TUCASEID', 'GESTFIPS'])
cps_ = cps.groupby('TUCASEID').GESTFIPS.first().to_frame()
merged = ats_sum.merge(cps_, left_on = 'TUCASEID', right_index = True)
def activity_columns(data, activity_code):
col_prefix = "t" + activity_code
return [column for column in data.columns if col_prefix in column]
work_cols = activity_columns(merged, '0501')
travel_cols = activity_columns(merged, '1805')
sleep_cols = activity_columns(merged, '0101')
religion_cols = activity_columns(merged, '1401')
leisure_cols = activity_columns(merged, '1203')
sports_cols = activity_columns(merged, '1301')
housework_cols = activity_columns(merged, '0201')
work_statewise = merged.loc[:, work_cols].groupby(merged.GESTFIPS).mean()
travel_statewise = merged.loc[:, travel_cols].groupby(merged.GESTFIPS).mean()
sleep_statewise = merged.loc[:, sleep_cols].groupby(merged.GESTFIPS).mean()
religion_statewise = merged.loc[:, religion_cols].groupby(merged.GESTFIPS).mean()
leisure_statewise = merged.loc[:, leisure_cols].groupby(merged.GESTFIPS).mean()
sports_statewise = merged.loc[:, sports_cols].groupby(merged.GESTFIPS).mean()
housework_statewise = merged.loc[:, housework_cols].groupby(merged.GESTFIPS).mean()
activity_cols_dict = {0: work_cols, 1: travel_cols, 2: sleep_cols, 3: religion_cols, 4: leisure_cols,
5: sports_cols, 6: housework_cols}
#nbi:hide_in
#nbi:hide_out
activity_list = ['Average Working Time', 'Average Travel Time', 'Average Sleeping Time', 'Average Religious Time',
'Average Leisure Time', 'Average Sports Time', 'Average Housework Time']
act_dd = ipywidgets.Dropdown(options = activity_list, description = 'Select activity',
style={'description_width': 'initial'})
######################### Map ############################
cscale = bq.ColorScale(scheme='Blues')
# reverse the colorscale or Hawaii is not visible
map_tt = bq.Tooltip(labels = ['State', 'Time (minutes)'], fields = ['name', 'color'])
sc_geo = bq.AlbersUSA(scale=2700)
states_map = bq.Map(color = work_statewise.sum(axis=1).round(1).to_dict(),
map_data=bq.topo_load('map_data/USStatesMap.json'),
scales = {'projection':sc_geo, 'color':cscale},tooltip = map_tt,
interactions = {'click': 'select', 'hover':'tooltip', },
anchor_style = {'fill':'red'},
selected_style = {'opacity': 1.0},
unselected_style = {'opacity': 1.0})
cax = bq.ColorAxis(scale=cscale, orientation='vertical', side='left', label='Time(minutes)')
fig_map = bq.Figure(marks=[states_map],axes=[cax], title = 'Average Working Time',
layout=Layout(min_width='800px', min_height='800px'),
background_style = {'fill': 'white'},
title_style = {'font-size': '20px', 'text-transform': 'uppercase',
'font-weight': '700', 'color': '#13294a'},)
fig_map.fig_margin = {'bottom': 0, 'top': 0, 'left': 65, 'right': 0}
fig_map.aspect_ratio = 1920/1080
###########################################################
######################### Bar #############################
time_spent = merged.loc[:, ['TUCASEID'] + work_cols].set_index('TUCASEID').sum(axis=1)
def normalized_hist(data, bins=10):
counts, bins = np.histogram(data, bins=bins)
counts = counts*100/counts.sum()
return bins, counts
sc_x2 = bq.LinearScale()
sc_y2 = bq.LinearScale()
ax_x2 = bq.Axis(label = 'Time spent(minutes)', scale=sc_x2, orientation='horizontal')
ax_y2 = bq.Axis(label='Population%', scale=sc_y2,orientation='vertical', grid_color='gray', grid_lines='dashed')
x, y = normalized_hist(time_spent, bins=24)
bars_hist = bq.Bars(x = x[:-1], y=y, align='right', scales={'x': sc_x2, 'y': sc_y2}, colors=['#13294a'])
# ,width=bins[1]-bins[0] , edgecolor='black')
fig_bar1 = bq.Figure(marks = [bars_hist],axes=[ax_x2, ax_y2], background_style = {'fill': 'white'}, title='Population Distribution of Time Spent')
fig_bar1.layout = Layout(max_width='99%', max_height='60%')
#fig_bar1.fig_margin = {'bottom':50, 'top':50, 'left':20, 'right':10}
###########################################################
##################### Age Line Plot #######################
age_wise_act_sum = merged.groupby('TEAGE').sum()[work_cols].sum(axis=1)
age_wise_count = merged.groupby('TEAGE').count()['TUCASEID']
y_data_line = (age_wise_act_sum / age_wise_count).values
x_data_line = sorted(ats_sum['TEAGE'].unique())
x_sc_line = bq.OrdinalScale()
y_sc_line = bq.LinearScale()
x_ax_line = bq.Axis(scale = x_sc_line, num_ticks=10, label='Age', color='#13294a')
y_ax_line = bq.Axis(scale = y_sc_line, num_ticks=10,
orientation = 'vertical', label='Time spent in minutes',
grid_color='gray', grid_lines='dashed')
lines = bq.Lines(x = x_data_line, y = y_data_line, scales = {'x': x_sc_line, 'y': y_sc_line},
colors=['#13294a'], interpolation = 'basis')
fig_line = bq.Figure(marks = [lines], axes = [x_ax_line, y_ax_line],
title='Average Working Time across US',
background_style = {'fill': 'white'})
fig_line.layout = Layout(max_width='99%', max_height='60%')
###########################################################
##################### Interactivity #######################
def on_select_map(change):
if not change['new']:
selected_fips = merged.GESTFIPS.unique()
else:
selected_fips = change['new']
states_map.selected = [selected_fips[-1]]
activity = act_dd.index
columns = activity_cols_dict[activity]
x, y = normalized_hist(merged
.loc[merged.GESTFIPS.isin([selected_fips[-1]]), ['TUCASEID']+columns]
.set_index('TUCASEID')
.sum(axis=1), bins=24)
bars_hist.x = x
bars_hist.y = y
# For updating line plot
#merged_subset = merged[merged['GESTFIPS'] == selected_fips[-1]]
#age_wise_act_sum = merged_subset.groupby('TEAGE').sum()[work_cols].sum(axis=1)
#age_wise_count = merged_subset.groupby('TEAGE').count()['TUCASEID']
#lines.y = (age_wise_act_sum / age_wise_count).values
#lines.x = sorted(age_wise_act_sum.index)
#print(selected_fips) Fix needed for selectex FIPS [50, 36]
# Observe above changes
states_map.observe(on_select_map, 'selected')
def on_activity_change(change):
states_map.selected=[]
activity = act_dd.index
columns = activity_cols_dict[activity]
filtered = merged.loc[:, columns].groupby(merged.GESTFIPS).mean().sum(axis=1).round(1).to_dict()
states_map.color = filtered
fig_map.title = act_dd.value
# For updating line plot
age_wise_act_sum = merged.groupby('TEAGE').sum()[columns].sum(axis=1)
age_wise_count = merged.groupby('TEAGE').count()['TUCASEID']
lines.y = (age_wise_act_sum / age_wise_count).values
lines.x = sorted(ats_sum['TEAGE'].unique())
fig_line.title = act_dd.value + ' across US'
x, y = normalized_hist(merged.loc[:, ['TUCASEID']+columns].set_index('TUCASEID').sum(axis=1), bins=24)
bars_hist.x = x
bars_hist.y = y
# Observe above changes
act_dd.observe(on_activity_change, 'value')
###########################################################
# ipywidgets.VBox([act_dd, ipywidgets.HBox([fig_map, ipywidgets.VBox([fig_bar1, fig_line])])])
#nbi:left
#nbi:hide_in
ipywidgets.VBox([act_dd, fig_map])
#nbi:right
#nbi:hide_in
ipywidgets.VBox([fig_bar1, fig_line])
Insights
Let’s take a closer look into the statistics of 7 specific activities and visualise how it affects the lifestyle of people of certain states.
NOTE: It is curious the plot of time spent(minutes) vs population% has the highest 0 value under each category of activity. The ‘0’ value indicates that people spent 0 minutes in doing that particular activity. ATUS uses the convention of blanking improper data to '0' which could increase the proportion of population that shows up with no time spent on activities.
People in some states like South Dakota seem to work significantly longer than others. Let us dive deeper to check for plausible causal factors. The chart below shows the gender distribution of respondents from the top working states namely South Dakota, New Mexico, Nebraska, Utah, Tennessee, Oregon, Alaska, Idaho, Louisiana and Georgia residents.
#nbi:hide_in
merged['work_total'] = merged[work_cols].sum(axis=1)
m_work = merged
m_work_men = merged[merged['TESEX'] == 1]
m_work_women = merged[merged['TESEX'] == 2]
m_work = m_work[m_work['work_total'] != 0]
m_work_men = m_work_men[m_work_men['work_total'] != 0]
m_work_women = m_work_women[m_work_women['work_total'] != 0]
m_work = m_work.loc[:, work_cols].groupby(m_work.GESTFIPS).mean()
m_work_men = m_work_men.loc[:, work_cols].groupby(m_work_men.GESTFIPS).mean()
m_work_women = m_work_women.loc[:, work_cols].groupby(m_work_women.GESTFIPS).mean()
#nbi:hide_in
mer_work = (m_work.sum(axis=1).sort_values(ascending=False)/60).round(2).to_frame().reset_index()
mer_work_male = (m_work_men.sum(axis=1).sort_values(ascending=False)/60).round(2).to_frame().reset_index()
mer_work_female = (m_work_women.sum(axis=1).sort_values(ascending=False)/60).round(2).to_frame().reset_index()
merge1 = pd.merge(mer_work, mer_work_male, how='left', on='GESTFIPS')
merge2 = pd.merge(merge1, mer_work_female, how='left', on='GESTFIPS')
merge2 = merge2.rename(columns = {'0_x':'Overall', '0_y':'Male', 0:'Female'})
merge2 = merge2.sort_values(by='Overall', ascending=False)
#nbi:hide_in
top_worktimes = merge2['Overall'][1:11].values
top_worktimes_male = merge2['Male'][1:11].values
top_worktimes_female = merge2['Female'][1:11].values
top_workstates = ['South Dakota', 'New Mexico', 'Nebraska', 'Utah', 'Tennessee',
'Oregon', 'Alaska', 'Idaho', 'Louisiana', 'Georgia']
################### Top 10 work States Bar #######################
sc_x3 = bq.OrdinalScale()
sc_y3 = bq.LinearScale(min=5, max=9.7)
ax_x3 = bq.Axis(label='States',scale=sc_x3, orientation='horizontal', color='#13294a')
ax_y3 = bq.Axis(label = 'Avg. Work Time in Hours', scale=sc_y3, orientation='vertical', color='#13294a',
grid_color='gray', grid_lines='dashed')
bars_topwork = bq.Bars(x = top_workstates, y=top_worktimes, padding=0.35,
scales={'x': sc_x3, 'y': sc_y3}, colors=['#13294a'])
lines_male = bq.Lines(x = top_workstates, y=top_worktimes_male, scales={'x': sc_x3, 'y': sc_y3},
colors=['#22A8DB'], stroke_width = 3, marker='circle', display_legend = True,
labels=['Male'])
lines_female = bq.Lines(x = top_workstates, y=top_worktimes_female, scales={'x': sc_x3, 'y': sc_y3},
colors=['#FC0F3A'], stroke_width = 3, marker='circle', display_legend = True,
labels=['Female'])
fig_bar2 = bq.Figure(marks = [bars_topwork, lines_male, lines_female], axes=[ax_x3, ax_y3],
background_style = {'fill': 'white'},
layout={'flex': '1'},
title = 'States that work the longest hours and comparison of gender work gap',
title_style = {'font-size': '20px', 'text-transform': 'uppercase',
'font-weight': '700', 'color': '#13294a'})
#nbi:hide_in
bar2box = ipywidgets.Box(children=[fig_bar2], layout=Layout(display='flex', justify='center'))
bar2box
Women in general seem to work lower hours than men in most of the states. This might be attributed to the wage inequality faced by women. From the latest study from the Institute for Women’s Policy Research noted that the 80 cents on the dollar comparative wage figure usually cited is actually an overestimation for many women. In many segments of the population, women earn just 49 cents on a white man’s dollar. This pay disparity might be a reason for the lower hours women work. Also women typically still face greated commitments at home (childcare etc.) which could also lead to the trend of lower hours worked.
Alaska seems to be an extreme outlier in terms of gender disparity. This can be explained from the high percentage of jobs in oil and gas industry in Alaska which is predominantly male worker focused. Even experts opine so [3].
"Almost certainly the biggest factor — we have a higher percentage of oil and gas employment than other states do, and then that industry is the highest-paid in Alaska, and … the percentages are high for males," said Dan Robinson, chief of research and analysis at the Alaska Department of Labor and Workforce Development.
We do not see much correlation with hours worked with the gender distribution. Intuition says that hours worked will rise inversely to the hourly wages. Let us see if this is really the case. The chart below shows the average hourly wage of each state.
#nbi:hide_in
respond = pd.read_csv('https://raw.githubusercontent.com/sarvaniputta/nbinteract_tutorial/master/atusresp_2017.dat', usecols=['TUCASEID', 'TRERNHLY'])
wage_merged = pd.merge(merged, respond, how='left', on = 'TUCASEID' ) # merging datasets on case ID
new_wage_merged= wage_merged[wage_merged.TRERNHLY != -1]
salary_per_hr= new_wage_merged.groupby('GESTFIPS')['TRERNHLY'].mean()/100
cscale2 = bq.ColorScale(scheme = 'Blues')
# reverse the colorscale or Hawaii is not visible
map_tt2 = bq.Tooltip(labels = ['State', 'Hourly Wage ($)'], fields = ['name', 'color'])
sc_geo2 = bq.AlbersUSA(scale=2700)
states_map2 = bq.Map(color = (salary_per_hr).round(1).to_dict(),
map_data=bq.topo_load('map_data/USStatesMap.json'),
scales = {'projection':sc_geo2, 'color':cscale2},
tooltip = map_tt2)
cax2 = bq.ColorAxis(scale=cscale2, orientation='vertical', side='left', label='Wage ($)',)
fig_map2 = bq.Figure(marks=[states_map2],axes=[cax2], title = 'Average hourly wage',
layout={'flex': '1'},
background_style = {'fill': 'white'},
title_style = {'font-size': '20px', 'text-transform': 'uppercase',
'font-weight': '700', 'color': '#13294a'},)
fig_map2.fig_margin = {'bottom': 0, 'top': 0, 'left': 65, 'right': 0}
fig_map2.aspect_ratio = 1920/1080
bar2wage = ipywidgets.Box(children=[fig_map2], layout=Layout(display='flex', justify='center'))
bar2wage
This chart explains South Dakota's position in working hours more clearly. The state has the third-lowest average wage for employed people in the country behind only Arkansas and Mississippi [2]. Roughly 21 percent of employed South Dakota residents, about 87,000 people, make under \$30,000 a year; 41 percent of employed South Dakota residents, about 169,400 people, make under \\$35,000 a year, and 71 percent of employed South Dakota residents, about 292,000 people, make under \$40,000 a year. Despite attempts to lure new employers and improve pay for workers, the state has made little progress over the past few decades. After languishing near the bottom for years, the state had the lowest average pay in the nation in 2008 and has moved up only two spots since then. This could be the reason why the residents of South Dakota have to put in extra hours to make ends meet [4]. However, correlation is not causation. To this effect, it is an interesting question to see Mississipi rank lower in hours worked given the lower hourly wage there.
“Behind every successful man, there is a woman”. We put this oft quoted maxim to the test. Based on the presumption, we came up with an idea to visually observe if relationships really bring about a change in one’s life. From the chart below,it is evident that people tend to spend more time working not only at the office but also at home! Man, that must be very tiring. Single people also tend to sleep in longer and have more time for leisure.
#nbi:hide_in
spouse_res = merged[(merged['TRSPPRES'] == 1) | (merged['TRSPPRES'] == 2)]
single_res = merged[(merged['TRSPPRES'] == 3)]
# Gathering data for plots
list_acts = ['Avg. Work Time', 'Avg. Travel Time', 'Avg. Sleep Time', 'Avg. Religion Time',
'Avg. Leisure Time', 'Avg. Sports Time', 'Avg. Housework Time']
def collect_avg_times(list_act_cols, data):
totals_dict = {}
for num, col in enumerate(list_act_cols):
totals_dict[num + 1] = data[col].sum(axis=1).mean()/60
return totals_dict
list_act_cols = [work_cols, travel_cols, sleep_cols, religion_cols, leisure_cols, sports_cols, housework_cols]
spouse_dict1 = collect_avg_times(list_act_cols, spouse_res)
single_dict2 = collect_avg_times(list_act_cols, single_res)
################### Working Vs Nonworking Bar #######################
sc_x4 = bq.OrdinalScale()
sc_y4 = bq.LinearScale()
ax_x4 = bq.Axis(scale=sc_x4, orientation='vertical', color='#13294a')
ax_y4 = bq.Axis(label = 'Avg. Time in Hours', scale=sc_y4, orientation='horizontal', color='#13294a',
grid_color='lightgray', grid_lines='dashed')
bars_grouped = bq.Bars(x = list_acts, y=[list(single_dict2.values()), list(spouse_dict1.values())], padding=0.1,
scales={'x': sc_x4, 'y': sc_y4}, colors=['#13294a','#e84a27'],
display_legend = True, labels=['No spouse/partner','Spouse/Unmarried Partner'],
orientation='horizontal', type='grouped')
fig_bar3 = bq.Figure(marks = [bars_grouped], axes=[ax_x4, ax_y4],
background_style = {'fill': 'white'},
layout={'flex':'1'},
title = 'Activity avg. times with and w/o spouse or unmarried partner',
title_style = {'font-family': 'Inherit', 'font-size': '20px', 'text-transform': 'uppercase',
'font-weight': '600', 'color': '#13294a'},
fig_margin = {'bottom': 60, 'top': 60, 'left': 130, 'right': 20})
bar2relation = ipywidgets.Box(children=[fig_bar3], layout=Layout(display='flex', justify='center'))
bar2relation
On second thought, a possible confounding factor for this analysis is that single respondents are typically younger and have a different lifestyle compared to older respondents in a relationship. A longitudinal study across ages would be necessary to obtain more insights into this extremely interesting question.
We have considered some questions on how Americans spend their time. While most of the observations are interesting, deeper dives are necessary to understand causal factors. As they say "correlation is not causation". On the other hand, if you are looking to relocate, know that there are some places more suited to your lifestyle than others.
Citations
[1] American Time Use Survey — 2017 Microdata Files https://www.bls.gov/tus/datafiles_2017.htm
[2] Current Population Survey (CPS) https://www.census.gov/programs-surveys/cps/data-detail.html