Exploratory Data Analysis

Note

See the beginning of this chapter to load the required libraries and the data.

1

A quick way to check the data types is as follows:

glimpse(dt)
Rows: 753
Columns: 22
$ inlf     <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
$ hours    <dbl> 1610, 1656, 1980, 456, 1568, 2032, 1440, 1020, 1458, 1600, 1969, 1960, 240, 997, 1848, 1224, 1400, 640, 2000, 1324, 2215, 1680, 1600, 800, 1955, 660, 525, 1904, 1516, 346, 1040, 732, 1880, 1680, 2081, 690, 4210, 2205, 1952, 1302, 112, 893, 583, 480, 1900, 576, 2056, 1984, 2640, 240, 1173, 3640, 340, 500, 1599, 1830, 1920, 2052, 2312, 196, 2500, 1980, 1840, 320, 419, 1880, 72, 120, 1885, 240, 1729, 1850, 2033, 608, 1153, 2208, 252, 337, 90, 1174, 372, 30, 1800, 282, 720, 1440, 2100, 1000, 952, 1413, 2100, 120, 3000, 1000, 336, 1216, 988, 2581, 2030, 413, 782, 1388, 1450, 1720, 800, 360, 2000, 1176, 2058, 900, 215, 2000, 757, 1264, 2064, 1280, 1715, 2000, 12, 1528, 561, 2058, 1823, 1680, 1440, 4950, 1176, 1100, 1516, 900, 1080, 480, 288, 1875, 630, 234, 1600, 960, 120, 2025, 1809, 3087, 910, 1840, 784, 400, 1000, 1904, 1771, 1486, 740, 1820, 1275, 450, 1221, 1550, 180, 2090, 1960, 1440, 794, 993, 160, 105, 1200, 450, 996, 1052, 1128, 1840, 1910, 980, 2317, 1089, 800, …
$ kidslt6  <dbl> 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ kidsge6  <dbl> 0, 2, 3, 3, 2, 0, 2, 0, 2, 2, 1, 1, 2, 2, 1, 3, 2, 5, 0, 4, 2, 0, 0, 0, 1, 2, 0, 3, 0, 0, 0, 0, 2, 1, 3, 1, 0, 1, 0, 0, 2, 0, 2, 2, 3, 2, 2, 0, 0, 2, 1, 0, 8, 0, 0, 0, 0, 0, 0, 2, 3, 4, 0, 0, 4, 0, 2, 0, 3, 1, 0, 0, 0, 4, 1, 1, 2, 0, 2, 0, 0, 0, 1, 0, 0, 1, 0, 2, 1, 3, 2, 0, 2, 2, 4, 1, 1, 2, 0, 2, 1, 1, 2, 1, 2, 3, 0, 3, 1, 4, 0, 0, 1, 0, 3, 0, 0, 2, 3, 0, 3, 0, 2, 1, 0, 2, 3, 0, 1, 4, 2, 0, 2, 1, 1, 1, 2, 0, 2, 0, 0, 0, 2, 1, 0, 2, 2, 2, 1, 3, 2, 1, 3, 0, 0, 0, 3, 0, 1, 3, 3, 2, 3, 0, 0, 3, 0, 0, 2, 0, 2, 1, 1, 0, 1, 0, 1, 0, 2, 1, 4, 1, 2, 2, 2, 5, 0, 1, 3, 0, 0, 4, 3, 3, 3, 3, 1, 0, 1, 3, 1, 1, 0, 0, 0, 2, 1, 1, 0, 0, 0, 0, 2, 0, 0, 0, 5, 2, 0, 2, 1, 1, 0, 2, 4, 3, 2, 2, 0, 3, 1, 0, 0, 1, 1, 3, 1, 0, 2, 0, 0, 2, 1, 2, 3, 3, 4, 1, 0, 0, 0, 5, 0, 2, 4, 0, 3, 0, 3, 3, 0, 1, 1, 1, 0, 1, 1, 4, 2, 2, 0, 1, 3, 2, 0, 0, 3, 3, 0, 2, 0, 1, 0, 0, 1, 0, 0, 2, 1, 1, 0, 0, 2, 0, 1, 0, 3, 0, 0, 1, 0, 2, 3, 2, 1, 2, 1, 3, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 4, 0, 1, 1, 5,…
$ age      <dbl> 32, 30, 35, 34, 31, 54, 37, 54, 48, 39, 33, 42, 30, 43, 43, 35, 43, 39, 45, 35, 42, 30, 48, 45, 31, 43, 59, 32, 31, 42, 50, 59, 36, 51, 45, 42, 46, 46, 51, 30, 30, 57, 31, 48, 30, 34, 48, 45, 51, 30, 46, 58, 37, 52, 52, 31, 55, 34, 55, 39, 40, 43, 48, 47, 41, 36, 46, 34, 41, 51, 33, 52, 58, 34, 31, 48, 32, 49, 32, 58, 50, 60, 50, 56, 51, 54, 59, 46, 46, 39, 44, 33, 33, 48, 31, 45, 45, 32, 47, 34, 37, 36, 47, 48, 42, 33, 46, 47, 44, 36, 31, 55, 45, 47, 46, 49, 49, 45, 38, 47, 54, 41, 43, 31, 47, 35, 45, 33, 54, 35, 31, 55, 34, 38, 45, 47, 39, 36, 33, 50, 58, 49, 41, 51, 53, 36, 46, 36, 53, 40, 42, 33, 43, 31, 47, 54, 33, 43, 46, 35, 37, 37, 34, 43, 46, 35, 46, 46, 43, 30, 41, 54, 31, 44, 32, 47, 46, 37, 51, 49, 36, 39, 48, 38, 40, 39, 37, 49, 33, 30, 54, 39, 43, 31, 33, 40, 36, 51, 44, 42, 40, 34, 30, 54, 51, 44, 43, 34, 45, 39, 50, 52, 41, 59, 52, 46, 41, 33, 45, 36, 48, 47, 45, 37, 46, 43, 42, 34, 52, 37, 37, 52, 30, 31, 38, 43, 49, 55, 38, 52, 48, 32, 32, 38, 46, …
$ educ     <dbl> 12, 12, 12, 12, 14, 12, 16, 12, 12, 12, 12, 11, 12, 12, 10, 11, 12, 12, 12, 12, 16, 12, 13, 12, 12, 17, 12, 12, 17, 12, 11, 16, 13, 12, 16, 11, 12, 10, 14, 17, 12, 12, 16, 12, 12, 12, 16, 12, 12, 12, 12, 12, 12, 8, 10, 16, 14, 17, 14, 12, 14, 12, 8, 12, 12, 8, 17, 12, 12, 12, 12, 12, 9, 10, 12, 12, 12, 17, 15, 12, 6, 14, 12, 14, 9, 17, 13, 9, 15, 12, 12, 12, 12, 12, 12, 12, 12, 13, 12, 13, 12, 12, 12, 16, 12, 13, 11, 12, 12, 12, 17, 14, 16, 17, 12, 11, 12, 12, 17, 10, 13, 11, 12, 16, 17, 12, 16, 12, 16, 8, 12, 12, 12, 13, 11, 12, 12, 14, 12, 12, 12, 17, 14, 12, 9, 12, 12, 12, 14, 16, 17, 15, 12, 16, 17, 17, 12, 16, 13, 12, 11, 16, 14, 16, 12, 9, 17, 14, 12, 12, 11, 12, 12, 10, 12, 5, 17, 11, 12, 12, 14, 11, 12, 14, 12, 10, 16, 13, 12, 12, 12, 11, 12, 9, 13, 12, 12, 12, 13, 16, 12, 16, 17, 12, 12, 9, 12, 12, 13, 12, 12, 12, 12, 10, 12, 16, 12, 11, 12, 10, 12, 12, 12, 12, 16, 17, 12, 17, 12, 12, 12, 8, 12, 13, 12, 12, 8, 12, 17, 17, 12, 13, 12, 12, 12, 12, 9, 10, 12, …
$ wage     <dbl> 3.3540, 1.3889, 4.5455, 1.0965, 4.5918, 4.7421, 8.3333, 7.8431, 2.1262, 4.6875, 4.0630, 4.5918, 2.0833, 2.2668, 3.6797, 1.3472, 3.2143, 5.1750, 2.0000, 7.5529, 3.5052, 3.5714, 3.2500, 3.2500, 2.1545, 3.7879, 4.0000, 4.7269, 7.2559, 5.8671, 1.5385, 2.4590, 5.8511, 3.5714, 3.8068, 2.4638, 2.3753, 4.5351, 5.6183, 14.6310, 2.6786, 3.9194, 2.5729, 4.5375, 2.0000, 3.4722, 2.0161, 4.5716, 2.2727, 2.6375, 2.2899, 1.0989, 1.1765, 1.6000, 1.8762, 4.0437, 9.6354, 8.0409, 4.5990, 2.1429, 4.4000, 3.5354, 2.7174, 6.2500, 11.9330, 3.5931, 6.9444, 2.9167, 3.0769, 3.7500, 5.7259, 3.6757, 5.1648, 8.2237, 4.3365, 4.9819, 0.3571, 2.9674, 1.0000, 2.5554, 0.8602, 1.0000, 2.9261, 3.5461, 1.6264, 8.3333, 3.0952, 2.7000, 5.2521, 1.4154, 4.7986, 1.6667, 1.1217, 0.5000, 0.7143, 2.7961, 4.8583, 1.7435, 2.4631, 2.4213, 1.5345, 2.8818, 2.4069, 5.2326, 3.7500, 1.3889, 4.0000, 3.2313, 3.4014, 1.3333, 9.3023, 4.5000, 4.6235, 3.9557, 5.8140, 0.5000, 4.0816, 6.0000, 3.6667, 3.8613, 2.7629, 2.9310, 4.…
$ repwage  <dbl> 2.65, 2.65, 4.04, 3.25, 3.60, 4.70, 5.95, 9.98, 0.00, 4.15, 4.30, 4.58, 0.00, 3.50, 3.38, 0.00, 4.00, 2.25, 2.30, 3.94, 3.30, 3.80, 3.26, 2.20, 2.30, 0.00, 3.18, 6.07, 6.00, 6.39, 0.00, 2.50, 5.20, 3.29, 4.19, 0.00, 4.63, 4.55, 5.60, 9.53, 0.00, 3.50, 9.98, 4.65, 2.23, 3.84, 0.00, 4.82, 0.00, 0.00, 2.50, 0.00, 0.00, 0.00, 2.80, 4.20, 8.75, 8.25, 5.58, 2.50, 5.50, 3.75, 4.80, 6.25, 6.30, 3.75, 0.00, 0.00, 2.90, 0.00, 4.76, 3.40, 4.32, 3.00, 4.52, 5.31, 0.00, 0.00, 0.00, 4.87, 2.25, 0.00, 0.00, 2.50, 2.20, 6.00, 2.95, 2.35, 0.00, 2.20, 4.85, 2.37, 0.00, 0.00, 0.00, 2.90, 4.50, 2.60, 2.80, 3.00, 2.30, 1.87, 3.00, 4.68, 2.50, 0.00, 4.61, 3.35, 3.80, 2.20, 9.98, 4.37, 4.50, 0.00, 6.00, 0.00, 3.75, 4.95, 0.00, 2.95, 2.53, 2.90, 4.20, 4.70, 7.10, 2.75, 3.75, 3.58, 2.40, 3.95, 2.25, 0.00, 4.62, 8.50, 0.00, 2.50, 0.00, 0.00, 0.00, 4.80, 6.45, 9.98, 0.00, 2.50, 2.25, 0.00, 5.50, 4.20, 9.98, 6.25, 1.65, 0.00, 3.00, 4.00, 5.99, 6.00, 0.00, 3.85, 4.20, 4.10, 3.85, 8.30, 0.00, 0.…
$ hushrs   <dbl> 2708, 2310, 3072, 1920, 2000, 1040, 2670, 4120, 1995, 2100, 2450, 2375, 2830, 3317, 2024, 1694, 2156, 2250, 2024, 2123, 4160, 2000, 2420, 1150, 2024, 1904, 2448, 2000, 2390, 1920, 1840, 3360, 2284, 1875, 2140, 1896, 1040, 2200, 1952, 1560, 4030, 2570, 1530, 3149, 2690, 3096, 2552, 2040, 2180, 1864, 2068, 2010, 2152, 1496, 2100, 1960, 1985, 2020, 2178, 3684, 5010, 1880, 1904, 2083, 2125, 1985, 2640, 2070, 2107, 2250, 2880, 1848, 1927, 1304, 3000, 1892, 3644, 1430, 2350, 1948, 1804, 2326, 1739, 1176, 1100, 1528, 2250, 1927, 2414, 768, 1984, 2246, 3024, 2921, 2045, 1928, 1920, 2280, 2300, 2480, 1135, 1384, 1848, 2499, 2390, 2400, 1920, 2301, 1944, 2100, 1920, 2880, 1932, 3234, 2805, 2272, 2227, 1720, 2300, 3410, 2304, 1984, 1890, 1970, 2400, 2504, 2398, 1960, 2550, 2500, 2164, 2640, 1936, 2136, 1955, 1980, 2550, 2058, 2263, 1763, 2096, 2059, 1820, 2832, 1990, 2000, 1885, 2860, 1913, 1800, 2880, 1993, 2250, 2286, 1880, 2350, 3640, 1770, 1875, 2200, 2033, 2739, 1626, 224…
$ husage   <dbl> 34, 30, 40, 53, 32, 57, 37, 53, 52, 43, 34, 47, 33, 46, 45, 38, 45, 40, 51, 40, 48, 35, 52, 53, 31, 43, 53, 33, 30, 47, 53, 57, 35, 50, 47, 44, 49, 52, 58, 30, 33, 58, 34, 48, 32, 33, 53, 47, 50, 37, 46, 58, 40, 54, 47, 35, 55, 33, 56, 39, 42, 47, 56, 47, 44, 37, 48, 51, 48, 54, 34, 53, 53, 38, 35, 52, 32, 47, 31, 59, 42, 51, 55, 57, 55, 58, 52, 47, 47, 49, 43, 36, 30, 52, 37, 44, 44, 37, 47, 35, 34, 39, 50, 46, 43, 44, 53, 48, 43, 37, 32, 55, 48, 48, 47, 52, 52, 49, 38, 50, 57, 42, 33, 32, 46, 37, 48, 39, 55, 41, 37, 60, 36, 41, 51, 48, 42, 37, 32, 54, 59, 47, 40, 53, 60, 40, 46, 37, 48, 54, 45, 32, 47, 30, 47, 58, 33, 47, 49, 38, 39, 39, 38, 44, 48, 42, 59, 48, 46, 30, 46, 54, 41, 47, 31, 53, 57, 47, 50, 49, 37, 44, 50, 43, 42, 36, 43, 47, 35, 32, 56, 40, 43, 32, 37, 39, 39, 51, 43, 48, 40, 33, 41, 53, 54, 46, 42, 37, 52, 47, 48, 53, 45, 59, 60, 49, 48, 35, 38, 37, 56, 47, 45, 48, 49, 46, 46, 39, 53, 52, 40, 59, 30, 32, 46, 48, 51, 58, 42, 56, 51, 33, 33, 41, 47, …
$ huseduc  <dbl> 12, 9, 12, 10, 12, 11, 12, 8, 4, 12, 12, 14, 16, 12, 17, 12, 12, 12, 11, 10, 16, 12, 17, 17, 12, 17, 16, 13, 17, 10, 10, 17, 13, 8, 17, 8, 16, 12, 12, 17, 16, 12, 16, 8, 12, 12, 16, 11, 13, 12, 12, 12, 10, 11, 4, 14, 15, 17, 16, 12, 13, 12, 8, 12, 12, 12, 17, 8, 8, 10, 16, 12, 7, 9, 12, 12, 12, 17, 14, 16, 12, 12, 11, 17, 8, 17, 15, 10, 16, 8, 14, 12, 17, 12, 17, 12, 10, 17, 17, 13, 10, 12, 14, 17, 12, 14, 8, 13, 10, 17, 16, 14, 16, 17, 12, 9, 14, 6, 13, 12, 16, 12, 14, 12, 12, 12, 17, 12, 12, 12, 10, 12, 12, 12, 10, 10, 11, 13, 14, 13, 14, 13, 17, 8, 10, 7, 12, 12, 16, 16, 16, 16, 14, 16, 16, 17, 11, 14, 13, 9, 12, 17, 14, 17, 12, 12, 17, 16, 10, 12, 9, 15, 9, 11, 16, 5, 17, 8, 11, 12, 13, 12, 8, 17, 12, 17, 17, 12, 17, 16, 8, 12, 12, 12, 14, 14, 9, 12, 12, 14, 8, 16, 16, 12, 12, 11, 9, 8, 12, 7, 10, 12, 12, 7, 8, 12, 16, 8, 12, 8, 10, 11, 12, 17, 14, 16, 17, 17, 12, 9, 12, 7, 14, 12, 12, 16, 8, 10, 17, 16, 11, 12, 17, 17, 12, 9, 10, 12, 12, 17, 15, 8, 16, 12, 12, …
$ huswage  <dbl> 4.0288, 8.4416, 3.5807, 3.5417, 10.0000, 6.7106, 3.4277, 2.5485, 4.2206, 5.7143, 9.7959, 8.0000, 5.3004, 4.3413, 10.8700, 9.1499, 6.1224, 6.1498, 6.9170, 4.7103, 3.1310, 4.0000, 7.2227, 7.9652, 4.0884, 14.1810, 6.5359, 8.5000, 6.2762, 5.2083, 2.7821, 4.9107, 5.8669, 7.5200, 7.5449, 5.5380, 6.9231, 5.0000, 7.3064, 11.2180, 3.8462, 5.8366, 13.7250, 6.3493, 5.2528, 1.3075, 2.7998, 2.6961, 7.5688, 3.4077, 6.5401, 7.2139, 6.2732, 5.8824, 3.8095, 6.3776, 6.0453, 8.8119, 8.8765, 3.3420, 3.1836, 6.9149, 5.5147, 5.2808, 3.2000, 5.8791, 6.2500, 7.4879, 6.9767, 8.0000, 4.1319, 14.4760, 5.7343, 3.3742, 1.8333, 5.6025, 4.2975, 3.9161, 4.8787, 9.2402, 6.6519, 4.6217, 9.9741, 20.9180, 1.9400, 8.8351, 4.6667, 5.1894, 18.7240, 10.4170, 8.8458, 7.5690, 4.4507, 9.3204, 9.1687, 6.4834, 7.8125, 11.4040, 5.0870, 6.9758, 5.2863, 11.5620, 8.6061, 12.8050, 6.6946, 8.3333, 4.1667, 5.4759, 5.1440, 11.6670, 7.2917, 4.8611, 12.1640, 10.8230, 12.4780, 6.1620, 7.1846, 7.0930, 17.8260, 6.3930, 11.…
$ faminc   <dbl> 16310, 21800, 21040, 7300, 27300, 19495, 21152, 18900, 20405, 20425, 32300, 28700, 15500, 16860, 31431, 19180, 18600, 19151, 18100, 20300, 30419, 14090, 22679, 12160, 12487, 29850, 18100, 26000, 26100, 17730, 6719, 18550, 24600, 23100, 24656, 15897, 20320, 21384, 25561, 36550, 15810, 25500, 24000, 22172, 17930, 7000, 25300, 16212, 22650, 6985, 30000, 18500, 16658, 10300, 11000, 19900, 32500, 37300, 30018, 12807, 39500, 22050, 15500, 13810, 11950, 19175, 17900, 15850, 27017, 18900, 21800, 33552, 22650, 15200, 13120, 21660, 18190, 9600, 13755, 35350, 12405, 12180, 22962, 25700, 3305, 32950, 17000, 13250, 50750, 15632, 28316, 17290, 33600, 29200, 19870, 16225, 28600, 30800, 25700, 27000, 12077, 29612, 24479, 79750, 20050, 21500, 16120, 24686, 24669, 26400, 16245, 23300, 27200, 51000, 55000, 15389, 23150, 29774, 91044, 28200, 36150, 15652, 18900, 23600, 36200, 18700, 24125, 15800, 19742, 22600, 17500, 19820, 20155, 37300, 24860, 21450, 29650, 23000, 21120, 26000, 28000,…
$ mtr      <dbl> 0.7215, 0.6615, 0.6915, 0.7815, 0.6215, 0.6915, 0.6915, 0.6915, 0.7515, 0.6915, 0.5815, 0.6215, 0.7215, 0.7215, 0.5815, 0.7215, 0.6915, 0.7215, 0.6915, 0.6915, 0.6215, 0.7215, 0.6615, 0.7215, 0.7515, 0.5815, 0.6915, 0.6615, 0.6215, 0.7215, 0.7515, 0.6915, 0.6615, 0.6915, 0.6615, 0.7515, 0.6915, 0.6615, 0.6215, 0.5800, 0.7215, 0.6215, 0.6615, 0.6615, 0.7215, 0.7815, 0.7215, 0.7215, 0.6615, 0.7815, 0.6915, 0.6915, 0.7515, 0.7515, 0.7515, 0.6915, 0.6400, 0.5800, 0.5815, 0.7515, 0.5515, 0.6915, 0.7215, 0.7215, 0.7515, 0.6915, 0.6915, 0.7215, 0.6915, 0.6915, 0.6915, 0.5515, 0.6615, 0.7915, 0.7215, 0.6615, 0.7215, 0.7515, 0.7515, 0.6615, 0.7215, 0.7515, 0.6615, 0.6215, 0.7915, 0.5815, 0.6915, 0.7515, 0.4415, 0.7715, 0.6215, 0.7215, 0.6215, 0.6215, 0.6915, 0.7215, 0.6915, 0.5815, 0.6915, 0.6215, 0.7715, 0.6615, 0.6915, 0.4415, 0.6915, 0.6915, 0.6915, 0.7215, 0.6915, 0.6615, 0.7215, 0.6615, 0.6215, 0.4615, 0.4615, 0.7215, 0.6615, 0.6215, 0.4415, 0.5815, 0.5515, 0.7215, 0.75…
$ motheduc <dbl> 12, 7, 12, 7, 12, 14, 14, 3, 7, 7, 12, 14, 16, 10, 7, 16, 10, 12, 7, 12, 10, 12, 7, 7, 12, 16, 3, 3, 12, 12, 7, 3, 12, 7, 12, 10, 3, 10, 7, 14, 12, 9, 14, 3, 12, 12, 14, 10, 7, 12, 7, 7, 12, 7, 7, 12, 7, 17, 17, 12, 14, 12, 7, 7, 7, 12, 12, 12, 7, 12, 12, 10, 7, 0, 7, 12, 7, 3, 10, 7, 12, 12, 7, 7, 7, 7, 7, 7, 7, 10, 7, 12, 10, 12, 7, 7, 7, 14, 7, 12, 12, 7, 7, 14, 12, 10, 7, 7, 7, 7, 12, 7, 12, 10, 10, 7, 7, 7, 12, 7, 7, 12, 14, 12, 7, 10, 7, 7, 12, 10, 7, 7, 12, 10, 7, 12, 7, 7, 7, 7, 3, 12, 16, 7, 3, 12, 7, 12, 12, 16, 12, 12, 7, 14, 7, 10, 7, 14, 7, 7, 12, 12, 17, 7, 7, 3, 12, 7, 7, 7, 3, 7, 10, 10, 12, 7, 14, 10, 7, 7, 10, 12, 12, 7, 7, 7, 12, 7, 12, 12, 12, 10, 12, 10, 12, 12, 12, 7, 12, 12, 12, 12, 16, 7, 16, 7, 7, 10, 12, 10, 0, 7, 12, 12, 10, 12, 3, 7, 12, 10, 7, 7, 7, 7, 12, 12, 7, 12, 7, 10, 10, 7, 12, 17, 7, 7, 7, 7, 12, 14, 7, 12, 7, 7, 16, 7, 10, 12, 7, 16, 10, 3, 16, 7, 12, 7, 7, 7, 12, 12, 7, 10, 14, 16, 7, 10, 7, 14, 14, 12, 7, 7, 3, 7, 7, 7, 12, 10…
$ fatheduc <dbl> 7, 7, 7, 7, 14, 7, 7, 3, 7, 7, 3, 7, 16, 10, 7, 10, 7, 12, 7, 7, 16, 10, 3, 7, 7, 14, 7, 7, 12, 12, 7, 3, 10, 14, 12, 3, 3, 3, 7, 17, 12, 9, 16, 3, 7, 7, 16, 10, 7, 7, 7, 3, 7, 7, 3, 12, 7, 17, 7, 7, 3, 12, 7, 7, 7, 12, 16, 7, 7, 7, 12, 10, 9, 0, 10, 14, 7, 3, 12, 12, 7, 17, 3, 7, 7, 12, 7, 7, 12, 10, 0, 12, 10, 7, 7, 7, 3, 12, 7, 12, 7, 7, 10, 14, 7, 12, 7, 7, 10, 7, 12, 7, 7, 17, 7, 7, 7, 10, 10, 12, 7, 12, 7, 10, 7, 10, 7, 7, 7, 7, 7, 7, 16, 12, 7, 3, 7, 7, 7, 12, 7, 12, 12, 14, 7, 7, 7, 12, 12, 14, 10, 12, 7, 16, 7, 17, 3, 10, 9, 7, 3, 16, 12, 7, 7, 7, 12, 3, 7, 7, 7, 7, 10, 10, 7, 12, 17, 10, 7, 7, 12, 7, 12, 7, 7, 7, 7, 14, 7, 12, 7, 7, 12, 3, 7, 12, 12, 7, 7, 14, 12, 17, 17, 7, 7, 10, 7, 7, 7, 3, 0, 7, 12, 7, 7, 12, 7, 7, 12, 7, 7, 3, 7, 7, 10, 12, 7, 12, 7, 7, 7, 7, 12, 7, 7, 7, 7, 7, 14, 17, 7, 10, 7, 7, 12, 7, 7, 9, 7, 14, 7, 3, 16, 3, 16, 7, 16, 12, 7, 7, 12, 12, 12, 16, 7, 9, 7, 12, 12, 10, 7, 12, 7, 7, 3, 10, 17, 7, 3, 3, 12, 7, 7, 7, 10, 7, 10, 7, 9, 9…
$ unem     <dbl> 5.0, 11.0, 5.0, 5.0, 9.5, 7.5, 5.0, 5.0, 3.0, 5.0, 5.0, 5.0, 5.0, 7.5, 7.5, 7.5, 7.5, 7.5, 5.0, 5.0, 7.5, 3.0, 5.0, 11.0, 5.0, 9.5, 9.5, 11.0, 5.0, 9.5, 7.5, 14.0, 7.5, 5.0, 5.0, 7.5, 11.0, 11.0, 14.0, 5.0, 3.0, 7.5, 9.5, 5.0, 5.0, 3.0, 5.0, 11.0, 3.0, 14.0, 11.0, 9.5, 5.0, 5.0, 7.5, 7.5, 14.0, 7.5, 11.0, 14.0, 7.5, 9.5, 14.0, 9.5, 5.0, 7.5, 7.5, 5.0, 5.0, 5.0, 7.5, 7.5, 7.5, 7.5, 7.5, 5.0, 3.0, 14.0, 7.5, 7.5, 7.5, 9.5, 7.5, 9.5, 3.0, 5.0, 5.0, 5.0, 5.0, 14.0, 11.0, 9.5, 7.5, 9.0, 11.0, 11.0, 5.0, 7.5, 3.0, 3.0, 9.5, 11.0, 9.5, 7.5, 11.0, 11.0, 5.0, 9.5, 9.5, 7.5, 7.5, 9.5, 9.5, 9.5, 14.0, 7.5, 9.5, 14.0, 14.0, 14.0, 9.5, 7.5, 11.0, 7.5, 7.5, 7.5, 11.0, 7.5, 3.0, 9.5, 7.5, 7.5, 11.0, 7.5, 7.5, 7.5, 5.0, 11.0, 9.0, 5.0, 7.5, 11.0, 14.0, 11.0, 14.0, 11.0, 9.5, 11.0, 14.0, 11.0, 9.0, 11.0, 7.5, 14.0, 7.5, 11.0, 3.0, 11.0, 3.0, 14.0, 9.5, 14.0, 7.5, 9.5, 9.5, 5.0, 9.5, 11.0, 14.0, 7.5, 5.0, 14.0, 7.5, 9.5, 11.0, 14.0, 7.5, 7.5, 7.5, 14.0, 9.5, 9.5, 9.5, 5.0, 7.5, 3.0, …
$ city     <dbl> 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1,…
$ exper    <dbl> 14, 5, 15, 6, 7, 33, 11, 35, 24, 21, 15, 14, 0, 14, 6, 9, 20, 6, 23, 9, 5, 11, 18, 15, 4, 21, 31, 9, 7, 7, 32, 11, 16, 14, 27, 0, 17, 28, 24, 11, 1, 14, 6, 10, 6, 4, 10, 22, 16, 6, 12, 32, 15, 17, 34, 9, 37, 10, 35, 6, 19, 10, 11, 15, 12, 12, 14, 11, 9, 24, 12, 13, 29, 11, 13, 19, 2, 24, 9, 6, 22, 30, 10, 6, 29, 29, 36, 19, 8, 13, 16, 11, 15, 6, 13, 22, 24, 2, 6, 2, 2, 14, 9, 11, 9, 6, 19, 26, 19, 3, 7, 28, 13, 9, 15, 20, 29, 9, 1, 8, 19, 23, 3, 13, 8, 17, 4, 15, 11, 7, 0, 0, 10, 8, 2, 4, 6, 18, 3, 22, 33, 28, 23, 27, 11, 6, 11, 14, 17, 17, 14, 11, 7, 8, 6, 8, 4, 25, 24, 11, 19, 9, 19, 14, 22, 6, 23, 15, 6, 11, 2, 22, 10, 14, 12, 9, 13, 18, 8, 11, 9, 9, 14, 9, 2, 12, 15, 11, 7, 9, 19, 11, 8, 13, 4, 7, 19, 14, 14, 3, 9, 7, 7, 14, 29, 19, 14, 16, 10, 12, 24, 6, 9, 14, 26, 7, 4, 15, 23, 1, 29, 9, 6, 11, 17, 6, 7, 2, 24, 4, 11, 25, 11, 2, 19, 7, 2, 20, 10, 19, 17, 12, 11, 6, 10, 4, 2, 13, 21, 9, 4, 2, 19, 4, 9, 14, 6, 24, 1, 13, 3, 10, 16, 9, 19, 4, 10, 5, 7, 3, 38, 16,…
$ nwifeinc <dbl> 10.910060, 19.499981, 12.039910, 6.799996, 20.100060, 9.859054, 9.152048, 10.900040, 17.305000, 12.925000, 24.299950, 19.700069, 15.000010, 14.600000, 24.630911, 17.531031, 14.099980, 15.839000, 14.100000, 10.299960, 22.654980, 8.090048, 17.479000, 9.560000, 8.274953, 27.349991, 16.000000, 16.999981, 15.100060, 15.699980, 5.118960, 16.750010, 13.599930, 17.100050, 16.734051, 14.196980, 10.319990, 11.384100, 14.594080, 17.500441, 15.510000, 21.999981, 22.500000, 19.993999, 14.130000, 5.000013, 21.154900, 7.141946, 16.650070, 6.352000, 27.313950, 14.500000, 16.257990, 9.500000, 7.999956, 12.500030, 14.000030, 20.800070, 19.385111, 12.386990, 28.500000, 15.049910, 10.499980, 11.810000, 6.950073, 12.419970, 17.400000, 15.500000, 21.217039, 18.000000, 11.899920, 26.751961, 12.149960, 10.199990, 8.120015, 10.659960, 18.100010, 8.599986, 13.665000, 32.349960, 12.085010, 12.150000, 17.695021, 24.700001, 2.133992, 20.950050, 10.500080, 10.550000, 45.750000, 13.632040, 18.238…
$ lwage    <dbl> 1.2101541, 0.3285121, 1.5141380, 0.0921233, 1.5242720, 1.5564801, 2.1202600, 2.0596340, 0.7543364, 1.5448990, 1.4019220, 1.5242720, 0.7339532, 0.8183691, 1.3028311, 0.2980284, 1.1676100, 1.6438390, 0.6931472, 2.0219319, 1.2542480, 1.2729580, 1.1786550, 1.1786550, 0.7675587, 1.3318120, 1.3862940, 1.5532700, 1.9818150, 1.7693599, 0.4308079, 0.8997548, 1.7666301, 1.2729580, 1.3367890, 0.9017048, 0.8651237, 1.5118470, 1.7260290, 2.6831419, 0.9852943, 1.3659390, 0.9450337, 1.5123760, 0.6931472, 1.2447881, 0.7011649, 1.5198630, 0.8209686, 0.9698315, 0.8285082, 0.0943096, 0.1625439, 0.4700036, 0.6292484, 1.3971601, 2.2654440, 2.0845411, 1.5258390, 0.7621601, 1.4816051, 1.2628260, 0.9996756, 1.8325820, 2.4793079, 1.2790149, 1.9379359, 1.0704530, 1.1239229, 1.3217560, 1.7450000, 1.3017440, 1.6418660, 2.1070199, 1.4670680, 1.6058110, -1.0297390, 1.0876859, 0.0000000, 0.9382087, -0.1505904, 0.0000000, 1.0736710, 1.2658480, 0.4863690, 2.1202600, 1.1298530, 0.9932518, 1.6586280,…
$ expersq  <dbl> 196, 25, 225, 36, 49, 1089, 121, 1225, 576, 441, 225, 196, 0, 196, 36, 81, 400, 36, 529, 81, 25, 121, 324, 225, 16, 441, 961, 81, 49, 49, 1024, 121, 256, 196, 729, 0, 289, 784, 576, 121, 1, 196, 36, 100, 36, 16, 100, 484, 256, 36, 144, 1024, 225, 289, 1156, 81, 1369, 100, 1225, 36, 361, 100, 121, 225, 144, 144, 196, 121, 81, 576, 144, 169, 841, 121, 169, 361, 4, 576, 81, 36, 484, 900, 100, 36, 841, 841, 1296, 361, 64, 169, 256, 121, 225, 36, 169, 484, 576, 4, 36, 4, 4, 196, 81, 121, 81, 36, 361, 676, 361, 9, 49, 784, 169, 81, 225, 400, 841, 81, 1, 64, 361, 529, 9, 169, 64, 289, 16, 225, 121, 49, 0, 0, 100, 64, 4, 16, 36, 324, 9, 484, 1089, 784, 529, 729, 121, 36, 121, 196, 289, 289, 196, 121, 49, 64, 36, 64, 16, 625, 576, 121, 361, 81, 361, 196, 484, 36, 529, 225, 36, 121, 4, 484, 100, 196, 144, 81, 169, 324, 64, 121, 81, 81, 196, 81, 4, 144, 225, 121, 49, 81, 361, 121, 64, 169, 16, 49, 361, 196, 196, 9, 81, 49, 49, 196, 841, 361, 196, 256, 100, 144, 576, 36, 81, 19…
1
1

We will divide our dataset into training and test sets as follows:

set.seed(123)
dt_index   <- 1:nrow(dt)
smpl_index <- sample(dt_index, size = floor(length(dt_index) * 0.8), replace = FALSE)
dt_train <- dt[smpl_index, ]
dt_test  <- dt[setdiff(dt_index, smpl_index), ]
1
1

We first take continuous explanatory variables and plot their boxplots against the values of our binary response variable:

dt_train[, c('inlf', 'hours', 'kidslt6', 'kidsge6', 'age', 'educ', 'wage', 'repwage', 'hushrs', 'husage'), with = FALSE] %>%
    melt.data.table(id.vars = 'inlf') %>%
    ggplot(aes(x = factor(inlf), y = value)) + 
    geom_boxplot(aes(fill = variable), na.rm = TRUE) + 
    facet_wrap(~variable, scales = "free_y") +
    theme_bw() + theme(legend.position = "none")

1
1
dt_train[, c('inlf', 'kidslt6', 'kidsge6', 'age', 'educ', 'hushrs', 'husage'), with = FALSE] %>% 
  melt.data.table(id.vars = "inlf") %>%
  ggplot(aes(x = value, fill = factor(inlf), color = factor(inlf))) + geom_density(alpha = 0.1) + 
  facet_wrap(~variable, scales = "free") + theme_bw()

#1

We note that:

In order to meaningfully compare two binary (or categorical) response variables we can use a contingency table, or using a stacked bar chart. For example, we examine the differences between return to labor force in a city versus in a non-city as follows:

table(dt_train[, .(city, inlf)])
    inlf
city   0   1
   0  91 120
   1 164 227
1
1

To make it easier to understand where the data falls in this barchart, we will change the categorization of the variables:

p1 <- dt_train %>% 
    ggplot(aes(x = factor(city), fill = factor(inlf))) +
    geom_bar(stat = 'count') +
    theme_bw() + xlab("Is in the city?") + labs(title = "barchart (version 1)") +
    guides(fill = guide_legend(title = "In labor force?"))
p2 <- dt_train %>% 
    ggplot(aes(x = factor(city), fill = factor(inlf))) +
    geom_bar(stat = 'count', position = "dodge") +
    theme_bw() + xlab("Is in the city?") + labs(title = "barchart (version 2)") +
    guides(fill = guide_legend(title = "In labor force?")) 
(p1 + p2) + plot_layout(guides = "collect")

1
1

Alternatively, we might be interested in the proportion of city and non-city women returning to the workforce:

# percentage of total:
#prop.table(table(dt_train[, .(city, inlf)]))
# percentage of columns (i.e. of each inlf group):
#prop.table(table(dt_train[, .(city, inlf)]), margin = 2)
# percentage of rows (i.e. of total city):
prop.table(table(dt_train[, .(city, inlf)]), margin = 1)
    inlf
city         0         1
   0 0.4312796 0.5687204
   1 0.4194373 0.5805627
1
1

Note that each row sums up to 1 (i.e. \(100\%\) of city and \(100\%\) of non-city women are split into returning to the labor force and staying at home):

dt_train %>% 
    ggplot(aes(x = factor(city), fill = factor(inlf))) +
    geom_bar(stat = 'count', position = "fill") +
    theme_bw() + xlab("Is in the city?") + labs(title = "barchart (version 2)") +
    scale_y_continuous(labels = scales::percent) + 
    guides(fill = guide_legend(title = "In labor force?")) 

1
1

So, while there are fewer women from non-cities in this data sample, the proportion of women, who decide to return to the workforce is very similar, regardless if the woman lives in a city or not.


  1. This this yourself - you should find that these variables almost always perfectly describe inlf, if included in the model. Although they wouldn’t really be useful in identifying whether someone will return to the labor force or not, as their hours and wages already answer this question.↩︎