代码之家  ›  专栏  ›  技术社区  ›  Rafael Díaz

用RSTAN拟合poisson-HMM-JAGS模型

  •  0
  • Rafael Díaz  · 技术社区  · 5 年前

    沃尔特·西葫芦在他的书中 Hidden Markov Models for Time Series An Introduction Using R ,在第8章第129页,使用R2OpenBUGS调整泊松HMM,然后我展示了代码。我对使用rstan调整这个相同的模型很感兴趣,但是由于我是新使用这个软件包的,所以我不清楚它的语法和任何建议。

    数据

    dat <- read.table("http://www.hmms-for-time-series.de/second/data/earthquakes.txt")
    

    RJAGS

    library(R2jags)
    library(rjags)
    
    HMM <- function(){
      for(i in 1:m){
        delta[i] <- 1/m
        v[i] <- 1}
      s[1] ~ dcat(delta[])
      for(i in 2:100){
        s[i] ~ dcat(Gamma[s[i-1],])}
      states[1] ~ dcat(Gamma[s[100],])
      x[1]~dpois(lambda[states[1]])
      for(i in 2:n){
        states[i]~dcat(Gamma[states[i-1],])
        x[i]~dpois(lambda[states[i]])}
      for(i in 1:m){
        tau[i]~dgamma(1,0.08)
        Gamma[i,1:m]~ddirch(v[])}
      lambda[1]<-tau[1]
      for(i in 2:m){
        lambda[i]<-lambda[i-1]+tau[i]}}
    
    x = dat[,2]
    n = dim(dat)[1]
    m = 2
    
    mod = jags(data = list("x", "n", "m" ), inits = NULL, parameters.to.save = c("lambda","Gamma"), 
               model.file = HMM, n.iter = 10000, n.chains = 1)
    

    输出

    mod
    Inference for Bugs model at "C:/Users/USER/AppData/Local/Temp/RtmpOkrM6m/model36c8429c5442.txt", fit using jags,
     1 chains, each with 10000 iterations (first 5000 discarded), n.thin = 5
     n.sims = 1000 iterations saved
               mu.vect sd.vect    2.5%     25%     50%     75%   97.5%
    Gamma[1,1]   0.908   0.044   0.805   0.884   0.915   0.940   0.971
    Gamma[2,1]   0.155   0.071   0.045   0.105   0.144   0.195   0.325
    Gamma[1,2]   0.092   0.044   0.029   0.060   0.085   0.116   0.195
    Gamma[2,2]   0.845   0.071   0.675   0.805   0.856   0.895   0.955
    lambda[1]   15.367   0.763  13.766  14.877  15.400  15.894  16.752
    lambda[2]   26.001   1.321  23.418  25.171  25.956  26.843  28.717
    deviance   645.351   8.697 630.338 639.359 644.512 650.598 665.405
    
    DIC info (using the rule, pD = var(deviance)/2)
    pD = 37.8 and DIC = 683.2
    DIC is an estimate of expected predictive error (lower deviance is better).
    

    library("rstan")
    rstan_options(auto_write = TRUE)
    options(mc.cores = parallel::detectCores())
    
    HMM <- '
    data{
      int<lower=0> n;    // number of observations (length)
      int<lower=0> x[n]; // observations  
      int<lower=1> m;    // number of hidden states
     }
    
    parameters{
      simplex[m] Gamma[n]; // t.p.m  
      vector[m] lambda;  // mean of poisson ordered
     }
    
    model{
      vector[m] delta[m];
      vector[m] v[m];
      vector[100] s[100];
      vector[n] states[n];
      vector[m] tau;
    
      for(i in 1:m){
        delta[i] = 1/m;
        v[i] = 1;}
      s[1] ~ categorical(delta[]);
    
      for(i in 2:100){
        s[i] ~ categorical(Gamma[s[i-1],]);}
      states[1] ~ categorical(Gamma[s[100],]);
      x[1] ~ poisson(lambda[states[1]]);
    
      for(i in 2:n){
        states[i] ~ categorical(Gamma[states[i-1],]);
      x[i] ~ poisson(lambda[states[i]])};
    
      for(i in 1:m){
        tau[i] ~ gamma(1,0.08);
        Gamma[i,1:m] ~ dirichlet(v[]);}
      lambda[1] = tau[1];
    
      for(i in 2:m){
        lambda[i] = lambda[i-1] + tau[i]};}'
    
    data <- list(n = dim(dat)[1], x = dat[,2], m = 2)
    system.time(mod2 <- stan(model_code = HMM, data = data, chains = 1, iter = 1000, thin = 4))
    mod2
    

    但是,运行stan模型时会发生错误。

    1 回复  |  直到 5 年前
        1
  •  1
  •   Rafael Díaz    5 年前

    使用前向算法,作为先验伽马分布,求相依态的均值向量,并对 simplex[m] 对象,对于概率转移矩阵,其中行和等于1,得到以下估计。

    dat <- read.table("http://www.hmms-for-time-series.de/second/data/earthquakes.txt")
    stan.data <- list(n=dim(dat)[1], m=2, x=dat$V2)
    
    PHMM <- '
    data {
      int<lower=0> n; // length of the time series
      int<lower=0> x[n]; // data
      int<lower=1> m; // number of states 
    }
    
    parameters{
      simplex[m] Gamma[m]; // tpm
      positive_ordered[m] lambda; // mean of poisson - ordered
    }  
    
    model{
      vector[m] log_Gamma_tr[m]; // log, transposed tpm 
      vector[m] lp; // for forward variables
      vector[m] lp_p1; // for forward variables
    
      lambda ~ gamma(0.1, 0.01); // assigning exchangeable priors 
      //(lambdas´s are ordered for sampling purposes)
    
      // transposing tpm and taking the log of each entry
      for(i in 1:m)
        for(j in 1:m)
          log_Gamma_tr[j, i] = log(Gamma[i, j]);
    
      lp = rep_vector(-log(m), m); // 
    
        for(i in 1:n) {
          for(j in 1:m)
            lp_p1[j] = log_sum_exp(log_Gamma_tr[j] + lp) + poisson_lpmf(x[i] | lambda[j]); 
    
          lp = lp_p1;
        }
    
      target += log_sum_exp(lp);
    }'
    
    model <- stan(model_code = PHMM, data = stan.data, iter = 1000, chains = 1)
    print(model,digits_summary = 3)
    

    输出

    Inference for Stan model: 11fa5b74e5bea2ca840fe5068cb01b7b.
    1 chains, each with iter=1000; warmup=500; thin=1; 
    post-warmup draws per chain=500, total post-warmup draws=500.
    
                   mean se_mean    sd     2.5%      25%      50%      75%    97.5% n_eff  Rhat
    Gamma[1,1]    0.907   0.002 0.047    0.797    0.882    0.913    0.941    0.972   387 0.998
    Gamma[1,2]    0.093   0.002 0.047    0.028    0.059    0.087    0.118    0.203   387 0.998
    Gamma[2,1]    0.147   0.004 0.077    0.041    0.090    0.128    0.190    0.338   447 0.999
    Gamma[2,2]    0.853   0.004 0.077    0.662    0.810    0.872    0.910    0.959   447 0.999
    lambda[1]    15.159   0.044 0.894   13.208   14.570   15.248   15.791   16.768   407 1.005
    lambda[2]    25.770   0.083 1.604   22.900   24.581   25.768   26.838   28.940   371 0.998
    lp__       -350.267   0.097 1.463 -353.856 -351.091 -349.948 -349.155 -348.235   230 1.001
    
    Samples were drawn using NUTS(diag_e) at Wed Jan 16 00:35:06 2019.
    For each parameter, n_eff is a crude measure of effective sample size,
    and Rhat is the potential scale reduction factor on split chains (at 
    convergence, Rhat=1).