calculation of the correlation between an index and the etf
hi,
I want to calculate the correlation between an index and an etf, but there is a mistace in my code and I don't know how remedy the defect.
this is my code:
library("quantmod")
library("fPortfolio")
library("RBloomberg")
conn <- blpConnect()
start.date = as.POSIXct("2012-10-01")
end.date = as.POSIXct(Sys.Date())
periodicity <- "DAILY"
price <- NULL
etf <- c("GLD US Equity")
etf_x <- etf_bla
ticker <- c("INDU Index")
get_index_value <- function(index, date) {
for(r in 1:nrow(index)) {
if(index[r,1] == date)
{
return(as.numeric(index[r,2]))
}
}
return(NA)
}
calculate_missing_days <- function(revenues) {
print("calculating missing days ...")
# remove all NA's
revenues_no_na <- revenues[which(!is.na(revenues))]
n <- length(revenues_no_na)
v <- var(revenues_no_na)
m <- mean(revenues_no_na)
sumvar <- 0
for(i in 1:length(revenues_no_na))
{
sumvar <- sumvar + (revenues_no_na[i] - m) ^ 2
}
x = sqrt((v * n - sumvar)) + m
print(sprintf("replacing NAs with x=%f ...", x))
revenues[which(is.na(revenues))] = x
return(revenues)
}
calc_correlation_to_index <- function(etf, ticker) {
load_index_data <- function(etf) {
for(i in 1:length(etf)) {
download <- bdh(conn, etf[i], "PX_LAST", start.date, end.date, option_names = "periodicitySelection", option_values = periodicity)
a <- matrix(data=download$PX_LAST, ncol=1)
rownames(a) <- unique(download$date)
colnames(a) <- etf[i]
price <- merge.xts(price,as.xts(a))
}
}
etf_data <- load_index_data # r cannot find "etf_data"
combined_data <- matrix(, ncol=2, nrow=0)
#print(etf_data)
#print(index_data)
#first walk through all etf data and find the fitting index data to it
for(i in 1:nrow(etf_data)) {
date <- time(etf_data[i])
date_string <- format(date, format="%Y-%m-%d")
row <- matrix(, ncol=2)
#row[1,1] = date_string
row[1,1] = as.numeric(etf_data[i,6])
row[1,2] = get_index_value(index_data, date_string)
combined_data <- rbind(combined_data, row)
}
#print(combined_data)
#print(cor(combined_data[,1], combined_data[,2]))
# calculate revenues now
combined_revenues <- matrix(,ncol=2, nrow=nrow(combined_data)-1)
# calcuclate revenues for etf data
combined_revenues[,1] <- (as.numeric(combined_data[2:nrow(combined_data),1]) / as.numeric(combined_data[1:nrow(combined_data)-1,1])) - 1
# calculate revenues for index data
combined_revenues[,2] <- (as.numeric(combined_data[2:nrow(combined_data),2]) / as.numeric(combined_data[1:nrow(combined_data)-1,2])) - 1
#print(combined_revenues)
sumnas = length(which(is.na(combined_revenues[,2])))
combined_revenues[,2] = calculate_missing_days(combined_revenues[,2])
#print(combined_revenues)
print(sprintf("calculating correlation of %s ...", etf))
correlation <- cor(combined_revenues[,1], combined_revenues[,2])
ret <- matrix(,ncol=3, nrow=1)
ret[1,1] <- etf
ret[1,2] <- correlation
ret[1,3] <- sumnas
return(ret)
}
print_etf_highscore <- function(indexfile, etf_list) {
index_data <- load_index_data(indexfile)
etf_highscore <- matrix(, ncol=3, nrow=0)
for(etf in etf_list) {
print(sprintf("############### %s ###############", etf))
row <- calc_correlation_to_index(etf, ticker)
etf_highscore <- rbind(etf_highscore, row)
}
print(sprintf("############### HIGHSCORE ###############"))
print(etf_highscore[rev(order(etf_highscore[,2])),])
}
print_etf_highscore(ticker, etf)
I think the main problem is, that R can't find "etf_data". And as a result R tells me, that "1:nrow(etf_data)" has the length zero. Does sombody how to fix this?
I would be deeply greatfull.
brigit
Posting Permissions
You may not post new threads
You may not post replies
You may not post attachments
You may not edit your posts
Forum Rules
Advertise on Talk Stats