Hansl realization of exogenous variables transformations in arima
and some testing:
<hansl>
function void my_arima(string spec,
                      list y,
                      list xlist[null],
                      string opt[null])
   list biglist = y xlist
   #xxlist = xlist
   smpl biglist --contiguous
   bigtext = "arima "~spec~"; y"
   bigtext2 = bigtext
   nx = 0
   matrix mxs = {}
   matrix sdxs = {}
   list zlist = null
   if nelem(xlist)>0
       iscst = inlist(xlist,const)
       xlist = xlist - 0
       nx = nelem(xlist)
       matrix mxs = zeros(1,nx)
       matrix sdxs = mxs
       loop i = 1..nx -q
           mi = mean(xlist[i])
           sdi = sd(xlist[i])
           vni = sprintf("oleh%d",i)
           genseries(vni,(xlist[i] - mi)/sdi)
           zlist+= @vni
           mxs[i] = mi
           sdxs[i] = sdi
       endloop
       if iscst
           zlist = 0 zlist
           xlist = 0 xlist
       endif
       bigtext = bigtext~" zlist"
       bigtext2 = bigtext2~" xlist"
   endif

   if !isnull(opt)
       opt = " "~strstrip(opt)
       bigtext = bigtext~opt
       bigtext2 = bigtext2~opt
   endif
   # original arima command
   catch @bigtext2
   err = $error
   if err
       em = errmsg(err)
       printf "\n"
       if strlen(em) == 0

           print "traditional arima failed with empty errmsg(err)"
       else
           eval errmsg(err)
       endif
       printf "\n"
       #print

   else

   matrix b0 = $coeff
   lnl0 = $lnl
   matrix se0 = $stderr
   matrix z0 = b0./se0
   endif
   # arima command for transformed data
   @bigtext
   matrix b1 = $coeff
matrix z1 = b1./$stderr
# eval z1-z0
   if nx>0
       # restoring coeff and vcv for original data
       nc = $ncoeff
       difmat = I(nc)
       b1[(nc-nx+1):] = b1[(nc-nx+1):]./sdxs'
       loop i = 1..nx -q
           difmat[(nc-nx+i),(nc-nx+i)] = 1/sdxs[i]
       endloop

       if iscst

           matrix vec1 = b1[(nc-nx+1):]
           b1[1]=b1[1]-mxs*vec1
           difmat[1,(nc-nx+1):] = -mxs./sdxs
       endif
       #print difmat
       #eval qform(difmat,$vcv)
       matrix se1 = sqrt(diag(qform(difmat,$vcv)))
   endif
   #     print sdxs

   print "estimates, ses are under them:"
   eval b1'
   eval se1'
   lnl1 = $lnl
   printf "\n"
   if !err
       print "traditional estimates, ses are under them:"
   eval b0'
   eval se0'
   printf "\n"
   print "lnl1:"
   eval lnl1
printf "\n"
   print "difference in lnl (transformed - traditional):"
   eval lnl1-lnl0
   printf "\n"
   print "difference in estimates:"
   eval (b1-b0)'
   printf "\n"
   print "difference in ses:"
   eval (se1-se0)'
else  
   printf "\nlnl = %g\n", lnl1
   endif
end function

open bad_data.gdt

z10 = y_two+2
list zlic10 = 0 y_one z10
my_arima("3 0 0; 1 0 0",diff_series,zlic10,"-q")
arima 3 0 0; 1 0 0; diff_series zlic10 --x-12-arima -q
eval transp($coeff)
eval transp($stderr)
eval $lnl
z10 = y_two+4
list zlic10 = 0 y_one z10
my_arima("3 0 0; 1 0 0",diff_series,zlic10,"-q")
arima 3 0 0; 1 0 0; diff_series zlic10 --x-12-arima -q
eval transp($coeff)
eval transp($stderr)
eval $lnl

z10 = y_two+1000
list zlic10 = 0 y_one z10
my_arima("3 0 0; 1 0 0",diff_series,zlic10,"-q")
arima 3 0 0; 1 0 0; diff_series zlic10 --x-12-arima -q
eval transp($coeff)
eval transp($stderr)
eval $lnl

z10 = y_two+10000
list zlic10 = 0 y_one z10
my_arima("3 0 0; 1 0 0",diff_series,zlic10,"-q")
arima 3 0 0; 1 0 0; diff_series zlic10 --x-12-arima -q
eval transp($coeff)
eval transp($stderr)
eval $lnl

z10 = y_two+100000
list zlic10 = 0 y_one z10
my_arima("3 0 0; 1 0 0",diff_series,zlic10,"-q")
arima 3 0 0; 1 0 0; diff_series zlic10 --x-12-arima -q
eval transp($coeff)
eval transp($stderr)
eval $lnl

<hansl>
Oleh



28 жовтня 2018, 20:52:10, від "Riccardo (Jack) Lucchetti" <r.lucchetti@univpm.it>:

On Sun, 28 Oct 2018, Riccardo (Jack) Lucchetti wrote:

> On Sun, 28 Oct 2018, oleg_komashko@ukr.net wrote:
>
>> Current scaling factor makes mean ~10
>> with 1 196 sample mean is ~10^-8
>> Hence, scaling factor ~10^18
>
> I just pushed to git a one-liner that seems to solve the first issue you 
> raised. Now
>
> <hansl>
> open bad_data.gdt
> smpl 1 194
> arima 3 0 0; 1 0 0; diff_series const y_one y_two
> </hansl>
>
> produces sensible results. Allin: the fix is rather trivial, but please take 
> a look.

As for the second problem, I guess the solution is quite easy but I'd wait 
for Allin's opinion before committing any code to git.

In some cases, we initialise ARMA via NLS; however, we use NLS as if we 
were estimating a "real" model, and therefore we employ our usual (rather 
strict) convergence criterion. Of course, there's no need to be picky 
about convergence, since nls is just meant to provide sensible starting 
values. If you modify the nls_toler setting before running the arima 
command, things go back to normal; example:

<hansl>
open bad_data.gdt
smpl 1 194
series sty=diff_series/sd(diff_series)
list zli = y_one y_two

set nls_toler 1.0e-5
y = sty+6.48
arima 3 0 0; 1 0 0; y 0 zli
</hansl>

Now the question is: should we raise NLS tolerance by default, when it's 
used for ARMA initialisation, or do we have a better strategy?

-------------------------------------------------------
   Riccardo (Jack) Lucchetti
   Dipartimento di Scienze Economiche e Sociali (DiSES)

   Università Politecnica delle Marche
   (formerly known as Università di Ancona)

   r.lucchetti@univpm.ithttp://www2.econ.univpm.it/servizi/hpp/lucchetti
-------------------------------------------------------
G
M
T
Text-to-speech function is limited to 200 characters