A Day in an American's Life

Beena Balakrishna, Pranay Parmar, Sarvani Putta and Smriiti Singhal
The School of Information Sciences,
University of Illinois - Urbana Champaign

It is an interesting question to see how people spend the available 24 hours across the United States. Demographics and other factors such as time-zone, culture, population, socio-economic factors play a vital role in determining the life-style of people. This article examines the behaviour of person living in the United States.

Using data from the American Time Use Survey (ATUS) [1], it is possible to uncover patterns in time utilization of people across the United States. For instance, the dashboard below depicts how much time people spend in various activity categories across states and the distribution.

#nbi:hide_in
import pandas as pd
import numpy as np
import re
import bqplot as bq
import ipywidgets
from ipywidgets import Layout
#nbi:hide_in
#nbi:hide_out
ats_sum = pd.read_csv('https://raw.githubusercontent.com/sarvaniputta/nbinteract_tutorial/master/atussum_2017.csv')
cps = pd.read_csv('https://raw.githubusercontent.com/sarvaniputta/nbinteract_tutorial/master/atuscps_2017.csv', usecols=['TUCASEID', 'GESTFIPS'])
cps_ = cps.groupby('TUCASEID').GESTFIPS.first().to_frame()
merged = ats_sum.merge(cps_, left_on = 'TUCASEID', right_index = True)

def activity_columns(data, activity_code):
    col_prefix = "t" + activity_code
    return [column for column in data.columns if col_prefix in column]



work_cols = activity_columns(merged, '0501')
travel_cols = activity_columns(merged, '1805')
sleep_cols = activity_columns(merged, '0101')
religion_cols = activity_columns(merged, '1401')
leisure_cols = activity_columns(merged, '1203')
sports_cols = activity_columns(merged, '1301')
housework_cols = activity_columns(merged, '0201')

work_statewise = merged.loc[:, work_cols].groupby(merged.GESTFIPS).mean()
travel_statewise = merged.loc[:, travel_cols].groupby(merged.GESTFIPS).mean()
sleep_statewise = merged.loc[:, sleep_cols].groupby(merged.GESTFIPS).mean()
religion_statewise = merged.loc[:, religion_cols].groupby(merged.GESTFIPS).mean()
leisure_statewise = merged.loc[:, leisure_cols].groupby(merged.GESTFIPS).mean()
sports_statewise = merged.loc[:, sports_cols].groupby(merged.GESTFIPS).mean()
housework_statewise = merged.loc[:, housework_cols].groupby(merged.GESTFIPS).mean()

activity_cols_dict = {0: work_cols, 1: travel_cols, 2: sleep_cols, 3: religion_cols, 4: leisure_cols,
                      5: sports_cols, 6: housework_cols}
#nbi:hide_in
#nbi:hide_out
activity_list = ['Average Working Time', 'Average Travel Time', 'Average Sleeping Time', 'Average Religious Time',
                'Average Leisure Time', 'Average Sports Time', 'Average Housework Time']

act_dd = ipywidgets.Dropdown(options = activity_list, description = 'Select activity', 
                             style={'description_width': 'initial'})


######################### Map ############################
cscale = bq.ColorScale(scheme='Blues')        
                   # reverse the colorscale or Hawaii is not visible

map_tt = bq.Tooltip(labels = ['State', 'Time (minutes)'], fields = ['name', 'color'])
sc_geo = bq.AlbersUSA(scale=2700)
states_map = bq.Map(color = work_statewise.sum(axis=1).round(1).to_dict(), 
                     map_data=bq.topo_load('map_data/USStatesMap.json'),
                    scales = {'projection':sc_geo, 'color':cscale},tooltip = map_tt,
                    interactions = {'click': 'select', 'hover':'tooltip', },
                    anchor_style = {'fill':'red'}, 
                    selected_style = {'opacity': 1.0},
                    unselected_style = {'opacity': 1.0})

cax = bq.ColorAxis(scale=cscale, orientation='vertical', side='left', label='Time(minutes)')
fig_map = bq.Figure(marks=[states_map],axes=[cax], title = 'Average Working Time',
                   layout=Layout(min_width='800px', min_height='800px'),
                   background_style = {'fill': 'white'},
                   title_style = {'font-size': '20px', 'text-transform': 'uppercase',
                                    'font-weight': '700', 'color': '#13294a'},)
fig_map.fig_margin = {'bottom': 0, 'top': 0, 'left': 65, 'right': 0}
fig_map.aspect_ratio = 1920/1080

###########################################################

######################### Bar #############################
time_spent = merged.loc[:, ['TUCASEID'] + work_cols].set_index('TUCASEID').sum(axis=1)

def normalized_hist(data, bins=10):
    counts, bins = np.histogram(data, bins=bins)
    counts = counts*100/counts.sum()
    return bins, counts  

sc_x2 = bq.LinearScale()
sc_y2 = bq.LinearScale()
ax_x2 = bq.Axis(label = 'Time spent(minutes)', scale=sc_x2, orientation='horizontal')
ax_y2 = bq.Axis(label='Population%', scale=sc_y2,orientation='vertical', grid_color='gray', grid_lines='dashed')
x, y = normalized_hist(time_spent, bins=24)
bars_hist = bq.Bars(x = x[:-1], y=y, align='right', scales={'x': sc_x2, 'y': sc_y2}, colors=['#13294a'])
#           ,width=bins[1]-bins[0]  , edgecolor='black')
fig_bar1 = bq.Figure(marks = [bars_hist],axes=[ax_x2, ax_y2], background_style = {'fill': 'white'}, title='Population Distribution of Time Spent')
fig_bar1.layout = Layout(max_width='99%', max_height='60%')
#fig_bar1.fig_margin = {'bottom':50, 'top':50, 'left':20, 'right':10}

###########################################################

##################### Age Line Plot #######################
age_wise_act_sum = merged.groupby('TEAGE').sum()[work_cols].sum(axis=1)
age_wise_count = merged.groupby('TEAGE').count()['TUCASEID']
y_data_line = (age_wise_act_sum / age_wise_count).values
x_data_line = sorted(ats_sum['TEAGE'].unique())

x_sc_line = bq.OrdinalScale()
y_sc_line = bq.LinearScale()

x_ax_line = bq.Axis(scale = x_sc_line, num_ticks=10, label='Age', color='#13294a')
y_ax_line = bq.Axis(scale = y_sc_line, num_ticks=10, 
                   orientation = 'vertical', label='Time spent in minutes',
                    grid_color='gray', grid_lines='dashed')

lines = bq.Lines(x = x_data_line, y = y_data_line, scales = {'x': x_sc_line, 'y': y_sc_line},
                colors=['#13294a'], interpolation = 'basis')

fig_line = bq.Figure(marks = [lines], axes = [x_ax_line, y_ax_line], 
                    title='Average Working Time across US', 
                    background_style = {'fill': 'white'})
fig_line.layout = Layout(max_width='99%', max_height='60%')

###########################################################

##################### Interactivity #######################

def on_select_map(change):
    if not change['new']:
        selected_fips = merged.GESTFIPS.unique()
    else:
        selected_fips = change['new']
        states_map.selected = [selected_fips[-1]]
    activity = act_dd.index
    columns = activity_cols_dict[activity]
    x, y = normalized_hist(merged
                            .loc[merged.GESTFIPS.isin([selected_fips[-1]]), ['TUCASEID']+columns]
                            .set_index('TUCASEID')
                            .sum(axis=1), bins=24)
    bars_hist.x = x
    bars_hist.y = y
    # For updating line plot
    #merged_subset = merged[merged['GESTFIPS'] == selected_fips[-1]]
    #age_wise_act_sum = merged_subset.groupby('TEAGE').sum()[work_cols].sum(axis=1)
    #age_wise_count = merged_subset.groupby('TEAGE').count()['TUCASEID']
    #lines.y = (age_wise_act_sum / age_wise_count).values
    #lines.x = sorted(age_wise_act_sum.index)
    
    #print(selected_fips) Fix needed for selectex FIPS [50, 36]
# Observe above changes 
states_map.observe(on_select_map, 'selected')

def on_activity_change(change):
    states_map.selected=[]
    activity = act_dd.index
    columns = activity_cols_dict[activity]
    filtered = merged.loc[:, columns].groupby(merged.GESTFIPS).mean().sum(axis=1).round(1).to_dict()
    states_map.color = filtered
    fig_map.title = act_dd.value
    # For updating line plot
    age_wise_act_sum = merged.groupby('TEAGE').sum()[columns].sum(axis=1)
    age_wise_count = merged.groupby('TEAGE').count()['TUCASEID']
    lines.y = (age_wise_act_sum / age_wise_count).values
    lines.x = sorted(ats_sum['TEAGE'].unique())
    fig_line.title = act_dd.value + ' across US'
    x, y = normalized_hist(merged.loc[:, ['TUCASEID']+columns].set_index('TUCASEID').sum(axis=1), bins=24)
    bars_hist.x = x
    bars_hist.y = y
    

    
    
# Observe above changes 
act_dd.observe(on_activity_change, 'value')

###########################################################

# ipywidgets.VBox([act_dd, ipywidgets.HBox([fig_map, ipywidgets.VBox([fig_bar1, fig_line])])])
#nbi:left
#nbi:hide_in
ipywidgets.VBox([act_dd, fig_map])
#nbi:right
#nbi:hide_in
ipywidgets.VBox([fig_bar1, fig_line])

Insights

Let’s take a closer look into the statistics of 7 specific activities and visualise how it affects the lifestyle of people of certain states.

  1. Work: It is surprising that the people from South Dakota are the real hard worker as they spend an average of 301.7 minutes on work- and work-related activities. Whereas the people from the states like California and New York who are expected to spend most of their time working as the Bay area and east-coast region are referred as the IT and corporate hub but turns out that these people are smart-workers and allot their time wisely.
  2. Travel: In the current day situation, commuting from one place takes a large portion of the day and seems like people from Maryland tend to travel the most. However, it is also a known fact that New Yorkers spend most of their time in commuting from one part of the another in the city.
  3. Sleeping: Looks like people from Rhode Island love to sleep as they seem to spend an average of 593.1 minutes on this activity. Islanders tend to have a very relaxed life-style, this visualization only adds more proof to this fact that they live a very stress less life. Sadly, the people from Alaska, Nebraska and South Dakota comparatively catch on lesser sleep. Wonder what’s keeping them up in the night!
  4. Religion: Two words: The South. It comes as no surprise that people from the southern regions of the United States are involved in religious activities. Arkansas, Mississippi, Alabama, North Carolina, South Carolina, Texas, Louisiana and Tennessee spend a good 17-24 minutes on paying respect and praying to the Lord.
  5. Leisure: Industrious Utah residents are reported to spend very little time in leisure activities daily. Contrasting behaviour is observed from the people of West Virginia, where people spend 364.2 minutes on leisure activities like socializing, relaxing, playing games, watching TV, etc.
  6. Sports: The tiny town Vermont is a big cradle of Olympians that is home to big sportsmen namely Harry Blanchard, Jimmy Cochran, Paul Hackett, Billy Kidd and Ross Powers.
  7. House-hold work: From the observation made, it looks like Alaskans are neat-freaks because they spend 82.5 minutes on an average daily on house-hold activities. On the other hand, residents of Wyoming barely spare 20 minutes on maintaining a neater house.

NOTE: It is curious the plot of time spent(minutes) vs population% has the highest 0 value under each category of activity. The ‘0’ value indicates that people spent 0 minutes in doing that particular activity. ATUS uses the convention of blanking improper data to '0' which could increase the proportion of population that shows up with no time spent on activities.

Hard workers by choice ?

People in some states like South Dakota seem to work significantly longer than others. Let us dive deeper to check for plausible causal factors. The chart below shows the gender distribution of respondents from the top working states namely South Dakota, New Mexico, Nebraska, Utah, Tennessee, Oregon, Alaska, Idaho, Louisiana and Georgia residents.

#nbi:hide_in
merged['work_total'] = merged[work_cols].sum(axis=1)
m_work = merged
m_work_men = merged[merged['TESEX'] == 1]
m_work_women = merged[merged['TESEX'] == 2]

m_work = m_work[m_work['work_total'] != 0]
m_work_men = m_work_men[m_work_men['work_total'] != 0]
m_work_women = m_work_women[m_work_women['work_total'] != 0]

m_work = m_work.loc[:, work_cols].groupby(m_work.GESTFIPS).mean()
m_work_men = m_work_men.loc[:, work_cols].groupby(m_work_men.GESTFIPS).mean()
m_work_women = m_work_women.loc[:, work_cols].groupby(m_work_women.GESTFIPS).mean()
#nbi:hide_in
mer_work = (m_work.sum(axis=1).sort_values(ascending=False)/60).round(2).to_frame().reset_index()
mer_work_male = (m_work_men.sum(axis=1).sort_values(ascending=False)/60).round(2).to_frame().reset_index()
mer_work_female = (m_work_women.sum(axis=1).sort_values(ascending=False)/60).round(2).to_frame().reset_index()

merge1 = pd.merge(mer_work, mer_work_male, how='left', on='GESTFIPS')
merge2 = pd.merge(merge1, mer_work_female, how='left', on='GESTFIPS')
merge2 = merge2.rename(columns = {'0_x':'Overall', '0_y':'Male', 0:'Female'})
merge2 = merge2.sort_values(by='Overall', ascending=False)
#nbi:hide_in
top_worktimes = merge2['Overall'][1:11].values
top_worktimes_male = merge2['Male'][1:11].values
top_worktimes_female = merge2['Female'][1:11].values

top_workstates = ['South Dakota', 'New Mexico', 'Nebraska', 'Utah', 'Tennessee',
                  'Oregon', 'Alaska', 'Idaho', 'Louisiana', 'Georgia']

################### Top 10 work States Bar #######################

sc_x3 = bq.OrdinalScale()
sc_y3 = bq.LinearScale(min=5, max=9.7)
ax_x3 = bq.Axis(label='States',scale=sc_x3, orientation='horizontal', color='#13294a')
ax_y3 = bq.Axis(label = 'Avg. Work Time in Hours', scale=sc_y3, orientation='vertical', color='#13294a',
               grid_color='gray', grid_lines='dashed')

bars_topwork = bq.Bars(x = top_workstates, y=top_worktimes, padding=0.35,
                    scales={'x': sc_x3, 'y': sc_y3}, colors=['#13294a'])

lines_male = bq.Lines(x = top_workstates, y=top_worktimes_male, scales={'x': sc_x3, 'y': sc_y3}, 
                      colors=['#22A8DB'], stroke_width = 3, marker='circle', display_legend = True, 
                      labels=['Male'])

lines_female = bq.Lines(x = top_workstates, y=top_worktimes_female, scales={'x': sc_x3, 'y': sc_y3}, 
                        colors=['#FC0F3A'], stroke_width = 3, marker='circle', display_legend = True, 
                        labels=['Female'])


fig_bar2 = bq.Figure(marks = [bars_topwork, lines_male, lines_female], axes=[ax_x3, ax_y3], 
                     background_style = {'fill': 'white'},
                     layout={'flex': '1'},
                     title = 'States that work the longest hours and comparison of gender work gap',
                     title_style = {'font-size': '20px', 'text-transform': 'uppercase',
                                    'font-weight': '700', 'color': '#13294a'})
#nbi:hide_in
bar2box = ipywidgets.Box(children=[fig_bar2], layout=Layout(display='flex', justify='center'))
bar2box

Women in general seem to work lower hours than men in most of the states. This might be attributed to the wage inequality faced by women. From the latest study from the Institute for Women’s Policy Research noted that the 80 cents on the dollar comparative wage figure usually cited is actually an overestimation for many women. In many segments of the population, women earn just 49 cents on a white man’s dollar. This pay disparity might be a reason for the lower hours women work. Also women typically still face greated commitments at home (childcare etc.) which could also lead to the trend of lower hours worked.

Alaska seems to be an extreme outlier in terms of gender disparity. This can be explained from the high percentage of jobs in oil and gas industry in Alaska which is predominantly male worker focused. Even experts opine so [3].

"Almost certainly the biggest factor — we have a higher percentage of oil and gas employment than other states do, and then that industry is the highest-paid in Alaska, and … the percentages are high for males," said Dan Robinson, chief of research and analysis at the Alaska Department of Labor and Workforce Development.

We do not see much correlation with hours worked with the gender distribution. Intuition says that hours worked will rise inversely to the hourly wages. Let us see if this is really the case. The chart below shows the average hourly wage of each state.

#nbi:hide_in
respond = pd.read_csv('https://raw.githubusercontent.com/sarvaniputta/nbinteract_tutorial/master/atusresp_2017.dat', usecols=['TUCASEID', 'TRERNHLY'])
wage_merged = pd.merge(merged, respond, how='left', on = 'TUCASEID' )  # merging datasets on case ID
new_wage_merged= wage_merged[wage_merged.TRERNHLY != -1] 
salary_per_hr= new_wage_merged.groupby('GESTFIPS')['TRERNHLY'].mean()/100

cscale2 = bq.ColorScale(scheme = 'Blues')
                       # reverse the colorscale or Hawaii is not visible
                     
map_tt2 = bq.Tooltip(labels = ['State', 'Hourly Wage ($)'], fields = ['name', 'color'])
sc_geo2 = bq.AlbersUSA(scale=2700)
states_map2 = bq.Map(color = (salary_per_hr).round(1).to_dict(), 
                     map_data=bq.topo_load('map_data/USStatesMap.json'),
                    scales = {'projection':sc_geo2, 'color':cscale2}, 
                   tooltip = map_tt2)

cax2 = bq.ColorAxis(scale=cscale2, orientation='vertical', side='left', label='Wage ($)',)
fig_map2 = bq.Figure(marks=[states_map2],axes=[cax2], title = 'Average hourly wage',
                layout={'flex': '1'},
                background_style = {'fill': 'white'},
                title_style = {'font-size': '20px', 'text-transform': 'uppercase',
                               'font-weight': '700', 'color': '#13294a'},)
fig_map2.fig_margin = {'bottom': 0, 'top': 0, 'left': 65, 'right': 0}
fig_map2.aspect_ratio = 1920/1080
               
bar2wage = ipywidgets.Box(children=[fig_map2], layout=Layout(display='flex', justify='center'))
bar2wage

This chart explains South Dakota's position in working hours more clearly. The state has the third-lowest average wage for employed people in the country behind only Arkansas and Mississippi [2]. Roughly 21 percent of employed South Dakota residents, about 87,000 people, make under \$30,000 a year; 41 percent of employed South Dakota residents, about 169,400 people, make under \\$35,000 a year, and 71 percent of employed South Dakota residents, about 292,000 people, make under \$40,000 a year. Despite attempts to lure new employers and improve pay for workers, the state has made little progress over the past few decades. After languishing near the bottom for years, the state had the lowest average pay in the nation in 2008 and has moved up only two spots since then. This could be the reason why the residents of South Dakota have to put in extra hours to make ends meet [4]. However, correlation is not causation. To this effect, it is an interesting question to see Mississipi rank lower in hours worked given the lower hourly wage there.

Stay single to have more leisure

“Behind every successful man, there is a woman”. We put this oft quoted maxim to the test. Based on the presumption, we came up with an idea to visually observe if relationships really bring about a change in one’s life. From the chart below,it is evident that people tend to spend more time working not only at the office but also at home! Man, that must be very tiring. Single people also tend to sleep in longer and have more time for leisure.

#nbi:hide_in
spouse_res = merged[(merged['TRSPPRES'] == 1) | (merged['TRSPPRES'] == 2)]
single_res = merged[(merged['TRSPPRES'] == 3)]

# Gathering data for plots
list_acts = ['Avg. Work Time', 'Avg. Travel Time', 'Avg. Sleep Time', 'Avg. Religion Time',
                'Avg. Leisure Time', 'Avg. Sports Time', 'Avg. Housework Time']

def collect_avg_times(list_act_cols, data):
    totals_dict = {}
    for num, col in enumerate(list_act_cols):
        totals_dict[num + 1] = data[col].sum(axis=1).mean()/60
    return totals_dict

list_act_cols = [work_cols, travel_cols, sleep_cols, religion_cols, leisure_cols, sports_cols, housework_cols]
spouse_dict1 = collect_avg_times(list_act_cols, spouse_res)
single_dict2 = collect_avg_times(list_act_cols, single_res)

################### Working Vs Nonworking Bar #######################

sc_x4 = bq.OrdinalScale()
sc_y4 = bq.LinearScale()
ax_x4 = bq.Axis(scale=sc_x4, orientation='vertical', color='#13294a')
ax_y4 = bq.Axis(label = 'Avg. Time in Hours', scale=sc_y4, orientation='horizontal', color='#13294a',
               grid_color='lightgray', grid_lines='dashed')

bars_grouped = bq.Bars(x = list_acts, y=[list(single_dict2.values()), list(spouse_dict1.values())], padding=0.1,
                    scales={'x': sc_x4, 'y': sc_y4}, colors=['#13294a','#e84a27'], 
                       display_legend = True, labels=['No spouse/partner','Spouse/Unmarried Partner'], 
                       orientation='horizontal', type='grouped')


fig_bar3 = bq.Figure(marks = [bars_grouped], axes=[ax_x4, ax_y4], 
                     background_style = {'fill': 'white'},
                     layout={'flex':'1'},
                     title = 'Activity avg. times with and w/o spouse or unmarried partner',
                     title_style = {'font-family': 'Inherit', 'font-size': '20px', 'text-transform': 'uppercase',
                                    'font-weight': '600', 'color': '#13294a'},
                     fig_margin = {'bottom': 60, 'top': 60, 'left': 130, 'right': 20})
bar2relation = ipywidgets.Box(children=[fig_bar3], layout=Layout(display='flex', justify='center'))
bar2relation

On second thought, a possible confounding factor for this analysis is that single respondents are typically younger and have a different lifestyle compared to older respondents in a relationship. A longitudinal study across ages would be necessary to obtain more insights into this extremely interesting question.

We have considered some questions on how Americans spend their time. While most of the observations are interesting, deeper dives are necessary to understand causal factors. As they say "correlation is not causation". On the other hand, if you are looking to relocate, know that there are some places more suited to your lifestyle than others.