A statistical process to follow would be to use linear regression. You can do this in Excel, R, or python.
Typical steps:
- Try a simple linear model
- Examine the residuals and determine if there are necessary variable transforms
- Try to engineer variables
- difference in shares between own and outstanding
- percent of outstanding shares owned
- percent of total possible shares outstanding
- Try a variable transform on the dependent variable (log)
- Try a non-linear model
A good, but not perfect, model is:
$$\mathbf{Rent} = e^{0.96 + 0.01\mathbf{Value}-2.42\frac{\mathbf{owned}}{\mathbf{shares}}-0.244\frac{shares}{9}+0.71\mathbf{Monopoly}}$$
To get a better model this way, you need more examples of different values and some examples with houses.
Update
I also tried the symbolic regression procedure that was mentioned in the comments and in other answers. The results you get are very dependent on the allowed functions and operators. I could not easily beat the linear regression approach with the data presented.
Here is R code to illustrate:
dat <- structure(list(value = c(60, 60, 60, 60, 160, 320, 320, 320,
320, 320, 320, 320, 320, 320, 320, 320, 320, 320, 320, 320, 320,
320, 320, 320, 320, 320, 320, 320, 320, 320, 320, 320, 320, 320,
320, 320, 320, 320, 320, 320, 320, 320, 320, 320, 320, 320, 320,
320, 320, 320, 320, 320, 320, 320, 320, 320, 320, 320, 320, 320,
320, 320, 320), shares = c(1, 2, 2, 5, 7, 1, 2, 2, 3, 3, 4, 4,
4, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8, 8, 9, 9, 9, 9,
9, 1, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8,
8, 8, 8, 8, 9, 9, 9, 9, 9), owned = c(0, 0, 1, 0, 0, 0, 0, 1,
0, 1, 0, 1, 2, 0, 1, 2, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 4,
0, 1, 2, 3, 4, 0, 0, 1, 0, 1, 0, 1, 2, 0, 1, 2, 0, 1, 2, 3, 0,
1, 2, 3, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4), houses = c(0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), monopoly = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1),
rent = c(4, 4, 1, 5, 21, 56, 56, 24, 54, 32, 56, 36, 20,
55, 36, 24, 54, 40, 28, 15, 56, 42, 30, 20, 56, 42, 30, 20,
12, 54, 40, 28, 18, 15, 112, 112, 49, 111, 64, 112, 72, 42,
110, 76, 48, 108, 80, 56, 33, 112, 84, 60, 40, 112, 84, 60,
40, 28, 108, 80, 63, 42, 30)), class = "data.frame", row.names = c(NA,
-63L))
no examples provided with houses
lm1 <- lm(rent ~ value + shares + owned + monopoly, data = dat)
summary(lm1)
#>
#> Call:
#> lm(formula = rent ~ value + shares + owned + monopoly, data = dat)
#>
#> Residuals:
#> Min 1Q Median 3Q Max
#> -25.488 -5.911 -2.093 4.843 21.256
#>
#> Coefficients:
#> Estimate Std. Error t value Pr(>|t|)
#> (Intercept) -8.52642 6.10570 -1.396 0.1679
#> value 0.18937 0.02116 8.950 1.6e-12 ***
#> shares 1.39035 0.60704 2.290 0.0257 *
#> owned -17.71002 1.16736 -15.171 < 2e-16 ***
#> monopoly 37.34621 2.63746 14.160 < 2e-16 ***
#> ---
#> Signif. codes: 0 '*' 0.001 '' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> Residual standard error: 10.05 on 58 degrees of freedom
#> Multiple R-squared: 0.9056, Adjusted R-squared: 0.8991
#> F-statistic: 139.1 on 4 and 58 DF, p-value: < 2.2e-16
plot(lm1, which = 1)

# try to engineer some variables
dat$sharediff <- dat$shares - dat$owned
dat$sharepct <- dat$shares / 9
dat$sharediffpct <- dat$sharediff / 9
dat$pctowned <- dat$owned / dat$shares
try a multiplicative model
lm4 <- lm(log(rent) ~ value + pctowned + sharepct + monopoly, data = dat)
summary(lm4)
#>
#> Call:
#> lm(formula = log(rent) ~ value + pctowned + sharepct + monopoly,
#> data = dat)
#>
#> Residuals:
#> Min 1Q Median 3Q Max
#> -0.30260 -0.10133 0.01289 0.07176 0.65647
#>
#> Coefficients:
#> Estimate Std. Error t value Pr(>|t|)
#> (Intercept) 0.9601256 0.0904284 10.618 3.21e-15 ***
#> value 0.0101118 0.0003152 32.078 < 2e-16 ***
#> pctowned -2.4199149 0.1020827 -23.705 < 2e-16 ***
#> sharepct -0.2442338 0.0736702 -3.315 0.00158 **
#> monopoly 0.7076224 0.0392421 18.032 < 2e-16 ***
#> ---
#> Signif. codes: 0 '*' 0.001 '' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> Residual standard error: 0.1496 on 58 degrees of freedom
#> Multiple R-squared: 0.9739, Adjusted R-squared: 0.9721
#> F-statistic: 540.3 on 4 and 58 DF, p-value: < 2.2e-16
plot(lm4, which = 1, col = dat$shares, pch = 19)

################################################################################
require(gramEvol)
#> Loading required package: gramEvol
#> Warning: package 'gramEvol' was built under R version 4.3.2
ruleDef <- list(expr = gramEvol::grule(op(expr, expr), func(expr), var),
func = gramEvol::grule(exp, sqrt),
op = gramEvol::grule('+', '-', '*', '/'),
var = gramEvol::grule(dat$value, dat$shares, dat$owned, dat$monopoly))
grammarDef <- gramEvol::CreateGrammar(ruleDef)
grammarDef
#> <expr> ::= <op>(<expr>, <expr>) | <func>(<expr>) | <var>
#> <func> ::= exp | sqrt
#> <op> ::= "+" | "-" | "*" | "/"
#> <var> ::= dat$value | dat$shares | dat$owned | dat$monopoly
set.seed(123)
gramEvol::GrammarRandomExpression(grammarDef, 6)
#> [[1]]
#> expression(sqrt(exp(dat$value + dat$owned)))
#>
#> [[2]]
#> expression(exp(exp(dat$value/dat$monopoly)) + dat$owned)
#>
#> [[3]]
#> expression((dat$value - sqrt(dat$owned))/dat$monopoly)
#>
#> [[4]]
#> expression(dat$monopoly)
#>
#> [[5]]
#> expression(exp(dat$shares))
#>
#> [[6]]
#> expression(dat$shares + dat$value)
SymRegFitFunc <- function(expr) {
suppressWarnings(result <- eval(expr))
if (any(is.nan(result)))
return(Inf)
return(mean((dat$rent - result)^2))
}
SymRegFitFunc(expression(exp(dat$shares)))
#> [1] 11838780
ge <- gramEvol::GrammaticalEvolution(grammarDef, SymRegFitFunc,
terminationCost = 0.1,
iterations = 2500,
max.depth = 5)
ge
#> Grammatical Evolution Search Results:
#> No. Generations: 2500
#> Best Expression: sqrt(dat$value) * (exp(dat$monopoly) + dat$monopoly) + dat$owned
#> Best Cost: 693.648325144468
yhat <- eval(ge$best$expressions)
resid <- dat$rent - yhat
plot(yhat, resid, xlab = "Predicted Rent", ylab = "Residuals")

# error of linear model
var(lm4$residuals)
#> [1] 0.02093519
mean(lm4$residuals^2)
#> [1] 0.02060289
error of symbolic regression
var(resid)
#> [1] 600.0785
mean(resid^2)
#> [1] 693.6483
Created on 2023-12-01 with reprex v2.0.2