Here is another possible approach:
#ensure that order is correct before using the most recent for each company
setorder(table2, individual_id, company_id, date2)
table1[, mean_alpha :=
#perform non-equi join
table2[table1, on=.(individual_id, date2<=date),
#for each row of table1,
by=.EACHI,
#get most recent alpha by company_id and average the alphas
mean(.SD[, last(alpha), by=.(company_id)]$V1)]$V1
]
output:
individual_id date mean_alpha
1: 1 2018-01-02 1.00
2: 1 2018-01-04 1.50
3: 1 2018-01-05 1.25
4: 2 2018-01-02 1.50
5: 2 2018-01-05 3.00
data:
library(data.table)
table1 <- fread(
"individual_id | date
1 | 2018-01-02
1 | 2018-01-04
1 | 2018-01-05
2 | 2018-01-02
2 | 2018-01-05",
sep ="|"
)
table1[, date := as.IDate(date)]
table2 <- fread(
"individual_id | date2 | company_id | alpha
1 | 2018-01-02 | 62 | 1
1 | 2018-01-04 | 62 | 1.5
1 | 2018-01-05 | 63 | 1
2 | 2018-01-01 | 71 | 2
2 | 2018-01-02 | 74 | 1
2 | 2018-01-05 | 74 | 4",
sep = "|"
)
table2[, date2 := as.IDate(date2)]
Using `by = .EACHI` you could do something like the following:
table2[table1,
on = .(`individual id`),
.(date = i.date, mean_alpha = mean(alpha[date2 <= i.date])),
by = .EACHI]
# individual id date mean_alpha
# 1: 1 2018-01-02 1.0
# 2: 1 2018-01-03 1.0
# 3: 2 2018-01-02 1.5
# 4: 2 2018-01-03 1.5
*Edit:*
# Assign by reference as a new column
table1[, mean_alpha := table2[table1,
on = .(`individual id`),
mean(alpha[date2 <= i.date]),
by = .EACHI][["V1"]]]
*Edit 2*:
Here is slightly more elegant way suggested by Frank in the comment section.
# In this solution our date columns can't be type character
table1[, date := as.Date(date)]
table2[, date2 := as.Date(date2)]
table1[, mean_alpha := table2[table1, # or equivalently .SD instead of table1
on = .(`individual id`, date2 <= date),
mean(alpha),
by = .EACHI][["V1"]]]
*Reproducible data*
table1 <- fread(
"individual id | date
1 | 2018-01-02
1 | 2018-01-03
2 | 2018-01-02
2 | 2018-01-03",
sep ="|"
)
table2 <- fread(
"individual id | date2 | alpha
1 | 2018-01-02 | 1
1 | 2018-01-04 | 1.5
1 | 2018-01-05 | 1
2 | 2018-01-01 | 2
2 | 2018-01-02 | 1
2 | 2018-01-05 | 4",
sep = "|"
)