function scalar store_days (scalar yr1, scalar mon1, scalar yr2, scalar mon2) /* based on the starting year and month and ending year and month, determine the total number of daily observations */ scalar days = 0 loop i=yr1..yr2 --quiet loop j=1..12 --quiet if (i==yr1 && jmon2) break # finished else # change the final '7' if not using a 7-day week days += monthlen(j, i, 7) endif endloop endloop return days end function function matrix date_limits (matrix *m) /* for a data matrix that is not necessarily sorted by year and month, determine the starting and ending (year, month) pairs */ scalar n = rows(m) # startyear, startmon, endyear, endmon matrix limits = {9999, 12, 0, 1} # find first and last years loop i=2..n --quiet yr = m[i,3] if yr < limits[1] limits[1] = yr endif if yr > limits[3] limits[3] = yr endif endloop # find first and last months loop i=2..n --quiet yr = m[i,3] mon = m[i,4] if yr == limits[1] && mon < limits[2] limits[2] = mon endif if yr == limits[3] && mon > limits[4] limits[4] = mon endif endloop return limits end function # main script # note: the input data may be in arbitrary order open supermarkets.xls --quiet # sort by supermarket and put into matrix form dataset sortby Supermarket matrix pdata = {dataset} n = rows(pdata) # count the number of supermarkets scalar nsups = 1 loop i=2..n --quiet if pdata[i,1] != pdata[i-1,1] nsups++ endif endloop printf "The dataset contains %d supermarkets\n", nsups matrix limits = date_limits(&pdata) printf "The time series run from %d/%d to %d/%d\n", limits[1], limits[2], limits[3], limits[4] ndays = store_days(limits[1], limits[2], limits[3], limits[4]) printf "Time-series length is %d days\n", ndays # total panel size nT = nsups * ndays printf "Total panel size: n*T = %d\n", nT # create panel dataset of the correct length nulldata nT --preserve setobs ndays 1:1 --stacked-time-series # initialize, declare stuff series supermkt = pdata[1,1] series yr = NA series mon = NA series day = NA scalar current_sup = -1 scalar s0 = 0 scalar k0 = 1 scalar k string vname /* loop across rows of data matrix, distributing the data to the right place in the panel structure */ loop i=1..n --quiet if pdata[i,1] != current_sup # set up a supermarket record: we write out year, month and # day as a check that the time dimension is not messed up if i > 1 printf "*** new supermkt = %d (previous = %d)\n", pdata[i,1], current_sup # shift the base writing position forwards s0 += ndays endif thisyear = limits[1] thismonth = limits[2] mdays = monthlen(thismonth, thisyear, 7) k = 1 loop j=1..ndays --quiet idx = s0 + j supermkt[idx] = pdata[i,1] yr[idx] = thisyear mon[idx] = thismonth day[idx] = k if k == mdays # got to the end of a month if thismonth == 12 thisyear++ thismonth = 1 else thismonth++ endif mdays = monthlen(thismonth, thisyear, 7) k = 1 else k++ endif endloop current_sup = pdata[i,1] endif # now process the price information on this row thisprod = pdata[i,2] # construct the product-price series name sprintf vname, "prod_%d", thisprod if !isseries(@vname) # must be a 'new' product: create series series @vname = NA endif thisyear = pdata[i,3] thismonth = pdata[i,4] thisprice = pdata[i,5] start = pdata[i,6] stop = pdata[i,7] # figure where to start writing the data if thismonth == 1 prevmonth = 12 prevyear = thisyear - 1 else prevmonth = thismonth - 1 prevyear = thisyear endif # starting row for current price k = s0 + store_days(2007, 3, prevyear, prevmonth) + start # write the data into the relevant rows loop j=start..stop --quiet yr[k] = thisyear mon[k] = thismonth @vname[k] = thisprice k++ endloop endloop # write out the panel dataset store supermkt_panel.gdt supermkt yr mon day prod* # print supermkt yr mon day prod* -o