Hi all,
it seems that we don't have a resample _without_ replacement function so
far -- at least I could not find any. I've got inspired by the following
example from John D. Cook:
https://stackoverflow.com/a/311716
I translated it to hansl and wrote 2 versions. The first one simply
draws n integers from a vector taking values from 1 to N which could be
used for selecting some rows of a matrix in another step. The second
function does the first one's job but returns n randomly drawn rows from
an input matrix X. I am not happy with the functions' name -- but maybe
you've got another idea?
Also, maybe one of the functions could be another candidate for extra.gfn?
Best,
Artur
<hansl>
function matrix SampleWithoutReplacement (int N[1::] "Size of set
sampling from",
int draws[1::] "No. of draws
or size of each sample")
/* Draws integers from a sequence from 1 to N. */
if draws > N
funcerr "Number of draws cannot exceed the size of the set to
sample from"
endif
scalar t = 1 # total input records dealt with
scalar m = 0 # number of items selected so far
matrix sample = zeros(draws, 1)
loop while m < draws -q
scalar u = randgen1(u,0,1)
if (N-t)*u < (draws-m)
sample[1+m] = t
m++
endif
t++
endloop
return sample
end function
function matrix SampleMatrixWithoutReplacement (matrix X "Matrix to
sample rows from",
int draws[1::] "No.
of draws or size of each sample")
/* Draws random rows from X */
scalar N = rows(X)
if draws > N
funcerr "Number of draws cannot exceed the size of the set to
sample from"
endif
scalar t = 1 # total input records dealt with
scalar m = 0 # number of items selected so far
matrix sample = zeros(draws, 1)
loop while m < draws -q
scalar u = randgen1(u,0,1)
if (N-t)*u < (draws-m)
sample[1+m] = t
m++
endif
t++
endloop
# retrieve sampled rows from X
matrix R = zeros(draws,cols(X))
loop i=1..draws -q
R[i,] = X[sample[i],]
endloop
return R
end function
# Examples
#----------
matrix sample = SampleWithoutReplacement(10, 5)
sample
matrix M = mnormal(10,2)
matrix sampled_mat = SampleMatrixWithoutReplacement(M, 5)
sampled_mat
</hansl>