[PortfolioOptimization] Hierarchical Risk Parity


개요

Hierarchical Risk Parity를 구현해 봅니다.

import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import pandas_datareader.data as web
import datetime
import backtrader as bt
%matplotlib inline
import pyfolio as pf
import quantstats
import sys
from scipy.cluster.hierarchy import dendrogram, linkage
from scipy.stats import rankdata
from scipy.stats import stats
from scipy.optimize import minimize

Hierarchical Risk Parity를 구현해 보겠습니다. 대상은 미국 주식, 선진국 주식, 신흥국 주식, 미국 장기 국채, 미국 장기 TIPS, 미국 장기 회사채, 금, 원자재, 미국 섹터입니다. 중기 채권으로 하면 주식 쪽에 너무 비중이 작게 가는 문제가 있어 장기로 해 보겠습니다.

start = '2018-06-18'
end = '2021-05-21'

vti = web.DataReader("VTI", 'yahoo', start, end)['Adj Close'].to_frame("vti")
vea = web.DataReader("VEA", 'yahoo', start, end)['Adj Close'].to_frame("vea")
vwo = web.DataReader("VWO", 'yahoo', start, end)['Adj Close'].to_frame("vwo")
tlt = web.DataReader("TLT", 'yahoo', start, end)['Adj Close'].to_frame("tlt")
ltpz = web.DataReader("LTPZ", 'yahoo', start, end)['Adj Close'].to_frame("ltpz")
vclt = web.DataReader("VCLT", 'yahoo', start, end)['Adj Close'].to_frame("vclt")
iau = web.DataReader("IAU", 'yahoo', start, end)['Adj Close'].to_frame("iau")
dbc = web.DataReader("DBC", 'yahoo', start, end)['Adj Close'].to_frame("dbc")
xlb = web.DataReader("XLB", 'yahoo', start, end)['Adj Close'].to_frame("xlb")
xlc = web.DataReader("XLC", 'yahoo', start, end)['Adj Close'].to_frame("xlc")
xle = web.DataReader("XLE", 'yahoo', start, end)['Adj Close'].to_frame("xle")
xlf = web.DataReader("XLF", 'yahoo', start, end)['Adj Close'].to_frame("xlf")
xli = web.DataReader("XLI", 'yahoo', start, end)['Adj Close'].to_frame("xli")
xlk = web.DataReader("XLK", 'yahoo', start, end)['Adj Close'].to_frame("xlk")
xlp = web.DataReader("XLP", 'yahoo', start, end)['Adj Close'].to_frame("xlp")
xlu = web.DataReader("XLU", 'yahoo', start, end)['Adj Close'].to_frame("xlu")
xlv = web.DataReader("XLV", 'yahoo', start, end)['Adj Close'].to_frame("xlv")
xly = web.DataReader("XLY", 'yahoo', start, end)['Adj Close'].to_frame("xly")
xlre = web.DataReader("XLRE", 'yahoo', start, end)['Adj Close'].to_frame("xlre")
price_df = pd.concat([vti, vea, vwo, tlt, ltpz, vclt, iau, dbc, xlb, xlc, xle, xlf, xli, xlk, xlp, xlu, xlv, xly, xlre], axis=1)
return_df = price_df.pct_change().dropna(axis=0)
return_df
vtiveavwotltltpzvcltiaudbcxlbxlcxlexlfxlixlkxlpxluxlvxlyxlre
Date
2018-06-200.0023000.0020610.003241-0.008762-0.009852-0.007739-0.004088-0.001160-0.0032480.0124100.004416-0.0025590.0006830.0021000.0009790.0007970.0021210.0047420.010794
2018-06-21-0.006883-0.006856-0.0140780.0052540.005125-0.001492-0.002463-0.007549-0.010635-0.006129-0.018516-0.002933-0.012555-0.0076840.0019570.003385-0.005762-0.0071230.005967
2018-06-220.0014270.0107180.008128-0.0000830.003600-0.0009190.0024690.0198950.0145630.0043760.019951-0.0047790.003455-0.0032380.0082010.0069440.004495-0.0017040.008742
2018-06-25-0.014040-0.014062-0.0130870.0022400.000149-0.002184-0.003284-0.011475-0.015550-0.020598-0.020093-0.010713-0.012671-0.0207630.0050360.016552-0.009184-0.021739-0.002476
2018-06-260.0022780.001169-0.0040250.0014070.0038850.003687-0.0057660.0098670.0038190.0016580.012629-0.0033610.0037660.004038-0.0042400.001163-0.0030900.0071630.005275
............................................................
2021-05-17-0.002134-0.0015530.000579-0.0021160.001444-0.0009790.0125290.0124930.008759-0.0079130.0231820.001848-0.003055-0.007354-0.001410-0.008193-0.001863-0.0018160.000477
2021-05-18-0.0078110.0035000.014470-0.002559-0.004086-0.0056850.001125-0.004828-0.011051-0.010199-0.023205-0.013966-0.014558-0.008001-0.003107-0.0001530.000325-0.0078040.001906
2021-05-19-0.002905-0.008913-0.002663-0.002419-0.012066-0.0022670.001124-0.020485-0.0152790.001585-0.024878-0.006146-0.0056370.003510-0.003258-0.001683-0.001704-0.008397-0.003805
2021-05-200.0108560.0138810.0036230.0082290.0040300.0097810.004489-0.0104570.0005790.017146-0.0015350.0005380.0022480.0191260.0092380.0087360.0104830.0102580.012413
2021-05-21-0.000558-0.000193-0.0106400.0032060.0046220.0015650.0011170.0094550.002199-0.0033710.0021130.0099440.004876-0.0054770.0001410.0051660.000161-0.005018-0.001651

736 rows × 19 columns

# correlation matrix
corr = return_df.corr()
# distance matrix
d_corr = np.sqrt(0.5*(1-corr))
link = linkage(d_corr, 'single')
Z = pd.DataFrame(link)

fig = plt.figure(figsize=(25, 10))
dn = dendrogram(Z)
# plt.show()
C:\ProgramData\Anaconda3\lib\site-packages\ipykernel_launcher.py:1: ClusterWarning: scipy.cluster: The symmetric non-negative hollow observation matrix looks suspiciously like an uncondensed distance matrix
  """Entry point for launching an IPython kernel.

output_7_1

위는 clustering을 한 뒤의 그림이고, 아래의 Z는 cluster 번호 2개, cluster끼리의 거리, cluster 안에 포함된 원소 개수를 나타냅니다. Hierarchical Clustering이 되었으니 Quasi-Diagonalization을 해 보겠습니다. Quasi Diagonalization은 covariance matrix에서 큰 값들을 diagonal로 보냅니다. 유사한 자산군을 붙여놓고 다른 자산군을 떨어뜨려 놓습니다.

Z
0123
08.012.00.3082922.0
10.017.00.3150432.0
211.019.00.3186543.0
39.013.00.3468812.0
420.022.00.3565694.0
51.023.00.3731405.0
621.024.00.3869528.0
72.025.00.4425209.0
815.018.00.4724102.0
914.027.00.4755033.0
1016.026.00.47995510.0
1128.029.00.49441813.0
123.04.00.5467672.0
1310.030.00.54761714.0
147.032.00.76506615.0
155.031.00.8629713.0
166.034.00.8757664.0
1733.035.01.05296519.0
def get_quasi_diag(link):
    
    # sort clustered items by distance
    
    link = link.astype(int)
    
    # get the first and the second item of the last tuple
    sort_ix = pd.Series([link[-1,0], link[-1,1]]) 
    
    # the total num of items is the third item of the last list
    num_items = link[-1,3]
    
    # if the max of sort_ix is bigger than or equal to the max_items
    while sort_ix.max() >= num_items:

        sort_ix.index = range(0, sort_ix.shape[0]*2, 2) # make space
        
        df0 = sort_ix[sort_ix >= num_items] # find clusters
        
        # df0 contains even index and cluster index
        i = df0.index
        j = df0.values - num_items 
        
        sort_ix[i] = link[j,0] # item 1
        
        df0  = pd.Series(link[j, 1], index=i+1)
        
        sort_ix = sort_ix.append(df0) # item 2
        sort_ix = sort_ix.sort_index()
        
        sort_ix.index = range(sort_ix.shape[0])
    
    return sort_ix.tolist()
sort_ix = get_quasi_diag(link)
sort_ix
[7, 10, 14, 15, 18, 16, 2, 11, 8, 12, 1, 0, 17, 9, 13, 6, 5, 3, 4]
def get_cluster_var(cov, c_items):
    cov_ = cov.iloc[c_items, c_items] # matrix slice
    # calculate the inverse-variance portfolio
    ivp = 1./np.diag(cov_)
    ivp/=ivp.sum()
    w_ = ivp.reshape(-1,1)
    c_var = np.dot(np.dot(w_.T, cov_), w_)[0,0]
    return c_var

def get_rec_bipart(cov, sort_ix):
    # compute HRP allocation
    # intialize weights of 1
    w = pd.Series(1, index=sort_ix)
    
    # intialize all items in one cluster
    c_items = [sort_ix]
    while len(c_items) > 0:
        # bisection
        c_items = [i[int(j):int(k)] for i in c_items for j,k in 
                   ((0,len(i)/2),(len(i)/2,len(i))) if len(i)>1]
        
        # parse in pairs
        for i in range(0, len(c_items), 2):
            
            c_items0 = c_items[i] # cluster 1
            c_items1 = c_items[i+1] # cluter 2
            
            c_var0 = get_cluster_var(cov, c_items0)
            c_var1 = get_cluster_var(cov, c_items1)
            
            alpha = 1 - c_var0/(c_var0+c_var1)
            
            w[c_items0] *= alpha
            w[c_items1] *=1-alpha
            
    return w
cov = return_df.cov()
weights = get_rec_bipart(cov, sort_ix)
new_index = [return_df.columns[i] for i in weights.index]
weights.index = new_index
weights
dbc     0.069414
xle     0.014006
xlp     0.052168
xlu     0.029592
xlre    0.021390
xlv     0.032335
vwo     0.024277
xlf     0.008209
xlb     0.010514
xli     0.029322
vea     0.047293
vti     0.037005
xly     0.018139
xlc     0.017726
xlk     0.060270
iau     0.232143
vclt    0.143141
tlt     0.079744
ltpz    0.073310
dtype: float64
weights[0:19]
dbc     0.069414
xle     0.014006
xlp     0.052168
xlu     0.029592
xlre    0.021390
xlv     0.032335
vwo     0.024277
xlf     0.008209
xlb     0.010514
xli     0.029322
vea     0.047293
vti     0.037005
xly     0.018139
xlc     0.017726
xlk     0.060270
iau     0.232143
vclt    0.143141
tlt     0.079744
ltpz    0.073310
dtype: float64
plt.figure(figsize = (12, 8))
plt.bar(list(weights.index), weights)

output_17_1

참고

De Prado, M. L. (2016). Building diversified portfolios that outperform out of sample. The Journal of Portfolio Management, 42(4), 59-69.




© 2021.03. by JacobJinwonLee

Powered by theorydb