C:/programs/etirm/src/EStepDiscrete.h

Go to the documentation of this file.
00001 /*! \file EStepDiscrete.h
00002  
00003   \brief 
00004   Class definitions of EStepDiscrete and NMatrixVec.
00005   
00006   Used to perform E-step calculation and store results for IRT model with a 
00007   discrete latent variable distribution. For use with dichotomous items where 
00008   n and r are stored for each latent variable category.
00009   
00010   Note that the main definitions of the ExamineePosterior and DoEStep member 
00011   templates of the EStepDiscrete class are outside of the class definition, 
00012   although there are duplicate definitions given inside the class definition 
00013   for use by compilers which do not allow member templates to defined outside
00014   the class definition. 
00015   
00016   If the symbol BOOST_MSVC6_MEMBER_TEMPLATES is defined the member template 
00017   definitions inside the class definition are used, otherwise the member 
00018   template definitions outside the class definition are used. If the 
00019   ExamineePosterior and DoEStep member templates are modified, the 
00020   modifications must be made in both definitions.
00021  
00022   http://www.smallwaters.com/software/cpp/etirm.html
00023  
00024   Author(s): 
00025   Werner Wothke, maintenance (http://www.smallwaters.com)
00026   Brad Hanson (http://www.b-a-h.com/)
00027   See the file LICENSE for information on usage and redistribution.
00028  
00029   Copyright (C) 2008, Werner Wothke
00030   Copyright (c) 2000-2001, Bradley A. Hanson
00031  */
00032 
00033 #ifndef ETIRM_ESTEPDISCRETE_H_
00034 #define ETIRM_ESTEPDISCRETE_H_
00035 
00036 #ifdef ETIRM_NO_DIR_PREFIX
00037 #include "etirmtypes.h"
00038 #include "ItemParamPrior.h"
00039 #else
00040 #include "etirm/etirmtypes.h"
00041 #include "etirm/ItemParamPrior.h"
00042 #endif
00043 
00044 #include <cmath> // for exp and log
00045 #include <vector>
00046 
00047 // for compilers which do not put C library functions in std namespace
00048 #ifdef BOOST_NO_STDC_NAMESPACE
00049 namespace std
00050 { using ::exp; using ::log;}
00051 #endif
00052 
00053 namespace etirm
00054 {
00055   //! Value used to represent the log of zero probabilities
00056   const double logZero = -1021.0;
00057 
00058   /*!
00059     \brief
00060     Helper class template used by EStepDiscrete for managing a vector 
00061     containing pointers to RealMatrix objects.
00062  
00063     \section template_args Template Parameters
00064    
00065     \param II  Iterator over item objects.
00066    */
00067   template <class II> class NMatrixVec
00068   {
00069 
00070   public:
00071 
00072     //! Type of iterator over item matrices.
00073     typedef std::vector<RealMatrix *>::iterator iterator;
00074 
00075     NMatrixVec(II bitem, II eitem, int nlatentcat);
00076     ~NMatrixVec();
00077 
00078     iterator begin()
00079     {
00080       return mVector.begin();
00081     }
00082     //!< Iterator to matrices over items.
00083 
00084     RealMatrix *operator[](int Index)
00085     {
00086       return mVector[Index];
00087     }
00088     //!< Return pointer to matrix for item Index+1 (Index is zero-offset).
00089 
00090   private:
00091 
00092     std::vector<RealMatrix *> mVector;
00093     /*!< Each matrix corresponds to an item.
00094       The rows of each matrix contain a discrete latent variable distribution
00095       corresponding to one response.
00096      */
00097   };
00098 
00099   /*!
00100     \brief
00101     Constructor 
00102     
00103     \section template_args Template Parameters
00104     
00105     \param II  Iterator over item objects.
00106     
00107     \section function_args Function Parameters
00108     
00109     \param[in]  bitem Iterator to first item.
00110     \param[in]  eitem Iterator to last item.
00111     \param[in]  nlatentcat  Number of discrete categories of latent variable distribution.
00112    
00113    */
00114   template <class II> NMatrixVec<II>::NMatrixVec(II bitem, II eitem, int nlatentcat) :
00115   mVector(eitem-bitem)
00116   {
00117     iterator ri = mVector.begin();
00118     for (II i = bitem; i != eitem; ++ri, ++i)
00119     {
00120       *ri = new RealMatrix((*i)->NumRespCat(), nlatentcat);
00121     }
00122   }
00123 
00124   /*! Destructor */
00125   template <class II> NMatrixVec<II>::~NMatrixVec()
00126   {
00127     iterator ri = mVector.begin();
00128     for (int i = mVector.size(); i--; ++ri)
00129     {
00130       if (*ri)
00131       delete *ri;
00132     }
00133   }
00134 
00135   /*!
00136     \brief
00137     Class template to perform E-step calculation and store results
00138     for IRT model with a discrete latent variable distribution.
00139  
00140     \section template_args Template Parameters
00141     
00142     \param E Examinee type.
00143     \param I Item type. 
00144     \param II  Iterator over item objects.
00145     \param D  Class for discrete latent variable distribution.
00146    */
00147   template <class E, class I, class II, class D> class EStepDiscrete
00148   {
00149 
00150   public:
00151 
00152     typedef RealMatrix::row_iterator ngroup_iterator;
00153     //!< Type of iterator over marginal group probabilities.
00154 
00155     typedef typename D::point_iterator point_iterator;
00156     //!< Type of iterator over points of latent variable distribution.
00157 
00158     EStepDiscrete(II bitem, II eitem, D &dist);
00159     //!< Constructor.
00160 
00161     ~EStepDiscrete();
00162     // Destructor.
00163 
00164 #ifndef BOOST_MSVC6_MEMBER_TEMPLATES
00165     /*!
00166       \brief
00167       Computes the E-step of the EM algorithm for IRT models with a
00168       discrete latent variable distribution.
00169       
00170       Returns marginal loglikelihood of examinees' responses
00171       (sum over examinees of the marginal loglikelihood of an examinee's responses)
00172       plus sum of prior likelihoods over all item parameters.
00173       This is the value of the marginal posterior density that the
00174       EM algorithm is maximizing at the values of the item parameters
00175       computed in the last M-step. The log of the priors for the item
00176       parameters are added to this value for the items for which
00177       n and r are calculated.
00178       
00179       Results of the E-step are stored in data member nGroups. The posterior
00180       distributions for examinees are stored in the examinee objects
00181       if storeExamineePosterior is true, and updated n's and r's are computed
00182       the items given by itemsNR_begin and itemsNR_end.
00183       
00184       A duplicate of this definition is given inside the class definition
00185       for use when BOOST_MSVC6_MEMBER_TEMPLATES is defined.
00186  
00187       \section template_args Template Parameters
00188       
00189       \param D  Class for discrete latent variable distribution.
00190       \param E  Examinee type.
00191       \param EI Iterator over pointers to examinee objects.
00192       \param I  Item type. 
00193       \param II Iterator over item objects.
00194  
00195       \section function_args Function Parameters
00196       
00197       \param[in]  examinees_begin Iterator to pointer to first examinee
00198       \param[in]  examinees_end Iterator to pointer to one past last examinee
00199       \param[in]  itemsNR_begin Iterator to first item pointer for which n and r will be updated.
00200           The items for which n and r are updated can be different from the items
00201           used to compute the posterior distribution of the latent variable for
00202           each examinee. If itemsNR_end - itemsNR_begin == 0 then n and r are not
00203           updated for any items.
00204       \param[in]  itemsNR_end Iterator to one past last item pointer for which n and r will be 
00205           updated.
00206       \param[in]  computeExamineePosterior If TRUE posterior latent variable distribution
00207           for each examinee is computed. If FALSE previously stored posterior
00208           latent variable distribution for each examinee is used.
00209       \param[in]  storeExamineePosterior If TRUE posterior latent variable distribution
00210           is stored for each examinee. If 'computeExamineePosterior' is
00211           FALSE the value of the argument is not used (in that case a
00212           previously stored posterior distribution for each examinee is
00213           being used in this function).
00214      */
00215     template <class EI>
00216     Real DoEStep(EI examinees_begin, EI examinees_end, II itemsNR_begin, II itemsNR_end,
00217         bool computeExamineePosterior, bool storeExamineePosterior);
00218 
00219     /*!
00220       \brief
00221       Version of DoEStep in which the items that are used to compute the
00222       posterior distributions for examinees are also the items for which
00223       n and r are updated, i.e., itemsNR_begin == items_begin and
00224       itemsNR_end == items_end.
00225  
00226       A duplicate of this definition is given inside the class definition
00227       for use when BOOST_MSVC6_MEMBER_TEMPLATES is defined.
00228  
00229       \section template_args Template Parameters
00230       
00231       \param D  Class for discrete latent variable distribution.
00232       \param E  Examinee type.
00233       \param EI Iterator over pointers to examinee objects.
00234       \param I  Item type. 
00235       \param II Iterator over item objects.
00236  
00237       \section function_args Function Parameters
00238       
00239       \param[in]  examinees_begin Iterator to pointer to first examinee
00240       \param[in]  examinees_end Iterator to pointer to one past last examinee
00241       \param[in]  computeExamineePosterior If TRUE posterior latent variable distribution
00242           for each examinee is computed. If FALSE previously stored posterior
00243           latent variable distribution for each examinee is used.
00244       \param[in]  storeExamineePosterior If TRUE posterior latent variable distribution
00245           is stored for each examinee. If 'computeExamineePosterior' is
00246           FALSE the value of the argument is not used (in that case a
00247           previously stored posterior distribution for each examinee is
00248           being used in this function).
00249      */
00250     template <class EI>
00251     Real DoEStep(EI examinees_begin, EI examinees_end, bool computeExamineePosterior,
00252         bool storeExamineePosterior);
00253 
00254     /*!
00255      \brief
00256      Computes posterior distribution of discrete latent variable for an examinee.
00257      Returns marginal likelihood of the examinee's responses.
00258      
00259      A duplicate of this definition is given inside the class definition
00260      for use when BOOST_MSVC6_MEMBER_TEMPLATES is defined.
00261 
00262      \section template_args Template Parameters
00263      
00264      \param PI Iterator over posterior probabilities.
00265 
00266      \section function_args Function Parameters
00267      
00268      \param[in]  examinee Object holding information about an examinee's item responses
00269      \param[in]  begin_posterior Iterator pointing to first element of container holding posterior
00270          probabilities.
00271      \param[in]  end_posterior Iterator pointing to one past last element of container holding
00272          posterior probabilities. This argument is only used to make sure there
00273          is enough space in the container which will hold the posterior probabilities.
00274      */
00275     template <class PI>
00276     Real ExamineePosterior(E &examinee, PI begin_posterior, PI end_posterior);
00277     // Calculates posterior latent variable distribution for an examinee.
00278 #endif
00279 
00280     void CalcResponseProb();
00281     //!< Calculates matrices of response probabilities used in DoEStep.
00282 
00283     ngroup_iterator GetNGroup(int group)
00284     {
00285       return (*nGroups).begin_row(group);
00286     }
00287     //!< Returns iterator to latent variable distribution for a group.
00288 
00289     int size()
00290     {
00291       return numLatentVarCat;
00292     }
00293     //!< Returns number of categories in discrete latent variable distribution.
00294 
00295     point_iterator GetPoints()
00296     {
00297       return latentvar_dist.begin_points();
00298     }
00299     //!< Returns Iterator to points of latent variable distribution.
00300 
00301   protected:
00302 
00303     RealMatrix *nGroups;
00304     /*!< 
00305       Row i gives the expected number of examinees at each category of the latent variable
00306       for group i computed in DoEStep.
00307      */
00308 
00309     std::vector<NMatrixVec<II> *> mRespProb;
00310     /*!<  
00311       Element i is a pointer to a NMatrixVec object for examinee group i.
00312       Different NMatrixVec objects are needed for different examinee
00313       groups if unique latent distribution points are used for each group.
00314       The NMatrixVec object for each examinee group holds the log 
00315       of the response probabilities for each response of each item in each
00316       latent variable category as computed in CalcResponseProb.
00317       The k-th element is a pointer to a matrix containing log response probabilities
00318       for item k. Row i of the k-th matrix contains the log response probabilities
00319       for response i of item k over the latent variable categories.
00320      */
00321 
00322     II items_begin;
00323     //!< Iterator to first item.
00324 
00325     II items_end;
00326     //!< Iterator to one past last item.
00327 
00328     D &latentvar_dist;
00329     //!< Object containing discrete latent variable distribution.
00330 
00331     RealMatrix logLatentProb;
00332     /*!< 
00333       Stores logarithms of probilities in latentvar_dist for
00334       for each examinee group for use in ExamineePosterior().
00335       Row i contains log of latent probabilities for examinee
00336       group i.
00337      */
00338 
00339     int numItems; //!< number of items.
00340 
00341     int numLatentVarCat; //!< number of categories in discrete latent variable distribution.
00342 
00343     int numGroupUnique; //!< Number of groups with unique latent distribution points.
00344 
00345     std::vector<int> *itemIndices;
00346     /*!< This vector contains zero-offset indices in the examinee response
00347       vector corresponding to the sequence of items given by items_begin and items_end.
00348      */
00349 
00350     Response notPresentedResponse;
00351     /*!< Response indicating an item was not presented to an examinee.
00352       Assumed to be the same for all items.
00353      */
00354 
00355     /***************
00356      Define member templates in class declaration for Visual C++ 6
00357      ****************/
00358 #ifdef BOOST_MSVC6_MEMBER_TEMPLATES
00359   public:
00360 
00361     /*! 
00362      \brief
00363       Returns the posterior for an examinee.
00364      
00365       Also update n and r for items the examinee responded to.
00366      
00367      \section template_args Template Parameters
00368      
00369      \param E  Examinee type.
00370      \param PI Iterator over posterior distribution vector.
00371      
00372      \section function_args Function Parameters
00373      
00374      \param[in]  &examinee Address of examinee object.
00375      \param[in]  begin_posterior Iterator pointing to first element of posterior vector.
00376      \param[in]  end_posterior Iterator pointing to last element of posterior vector.
00377      */
00378     template <class PI> Real ExamineePosterior(E &examinee, PI begin_posterior, PI end_posterior)
00379     {
00380 
00381       int i, il;
00382 
00383       int group = examinee.Group();
00384 
00385       if ((end_posterior - begin_posterior) != numLatentVarCat)
00386       {
00387         throw InvalidArgument("Incorrect size of vector to hold posterior probabilities",
00388             "EStepDiscrete::ExamineePosterior");
00389       }
00390 
00391       /* Constants used for loop unrolling */
00392       int Ndiv4 = numLatentVarCat / 4;
00393       int Nmod4 = numLatentVarCat - Ndiv4*4;
00394 
00395       /* initialize posterior probabilities */
00396       RealVector::iterator ipost = begin_posterior;
00397       RealMatrix::row_iterator iwt = logLatentProb.begin_row(group);
00398       for (i = Ndiv4; i--; ipost+=4, iwt+=4)
00399       {
00400         *ipost = *iwt;
00401         ipost[1] = iwt[1];
00402         ipost[2] = iwt[2];
00403         ipost[3] = iwt[3];
00404       }
00405       for (i = Nmod4; i--; ++ipost, ++iwt)
00406       {
00407         *ipost = *iwt;
00408       }
00409 
00410       NMatrixVec<II>::iterator item = (numGroupUnique == 1) ? mRespProb[0]->begin()
00411       : mRespProb[group-1]->begin();
00412       II iitem = items_begin;
00413       typename E::response_iterator presp = examinee.responses_begin();
00414       std::vector<int>::iterator ii = itemIndices->begin();
00415       for (i = numItems; i--; ++item, ++iitem, ++ii)
00416       {
00417         Response resp = presp[*ii];
00418         if (resp != notPresentedResponse)
00419         {
00420           ipost = begin_posterior;
00421           int index = (*iitem)->ResponseIndex(resp);
00422           RealMatrix::row_iterator ir = (*item)->begin_row(index+1);
00423           for (il=Ndiv4; il--; ipost+=4, ir+=4)
00424           {
00425             *ipost += *ir;
00426             ipost[1] += ir[1];
00427             ipost[2] += ir[2];
00428             ipost[3] += ir[3];
00429           }
00430           for (il=Nmod4; il--; ++ipost, ++ir)
00431           {
00432             *ipost += *ir;
00433           }
00434         }
00435       }
00436 
00437       /* find sum in order to standardize posterior */
00438       ipost = begin_posterior;
00439       iwt = logLatentProb.begin_row(group);
00440       Real sum = 0.0;
00441       for (i = numLatentVarCat; i--; ++ipost, ++iwt)
00442       {
00443         if (*iwt != logZero)
00444         sum += std::exp(*ipost);
00445         else
00446         *ipost = logZero;
00447       }
00448 
00449       /* standardize */
00450       ipost = begin_posterior;
00451       Real logsum = std::log(sum);
00452       for (i = numLatentVarCat; i--; ++ipost)
00453       {
00454         if (*ipost != logZero)
00455         {
00456           *ipost -= logsum;
00457           *ipost = std::exp(*ipost);
00458         }
00459         else
00460         *ipost = 0.0;
00461       }
00462 
00463       return sum;
00464     }
00465 
00466     /*! 
00467       \brief
00468       Computes the E-step of the EM algorithm for IRT models with a
00469       discrete latent variable distribution.
00470       
00471       Returns marginal loglikelihood of examinees' responses
00472       (sum over examinees of the marginal loglikelihood of an examinee's responses)
00473       plus sum of prior likelihoods over all item parameters.
00474       This is the value of the marginal posterior density that the
00475       EM algorithm is maximizing at the values of the item parameters
00476       computed in the last M-step. The log of the priors for the item
00477       parameters are added to this value for the items for which
00478       n and r are calculated.
00479       
00480       Results of the E-step are stored in data member nGroups. The posterior
00481       distributions for examinees are stored in the examinee objects
00482       if storeExamineePosterior is true, and updated n's and r's are computed
00483       the items given by itemsNR_begin and itemsNR_end.
00484       
00485       A duplicate of this definition is given outside the class definition
00486       for use when BOOST_MSVC6_MEMBER_TEMPLATES is not defined.
00487 
00488       \section template_args Template Parameters
00489       
00490       \param EI Iterator over pointers to examinee objects.
00491       \param II Iterator over item objects.
00492  
00493       \section function_args Function Parameters
00494       
00495       \param[in]  examinees_begin Iterator to pointer to first examinee
00496       \param[in]  examinees_end Iterator to pointer to one past last examinee
00497       \param[in]  itemsNR_begin Iterator to first item pointer for which n and r will be updated.
00498            The items for which n and r are updated can be different from the items
00499            used to compute the posterior distribution of the latent variable for
00500            each examinee. If itemsNR_end - itemsNR_begin == 0 then n and r are not
00501            updated for any items.
00502       \param[in]  itemsNR_end Iterator to one past last item pointer for which n and r will be 
00503            updated.
00504       \param[in]  computeExamineePosterior If TRUE posterior latent variable distribution
00505            for each examinee is computed. If FALSE previously stored posterior
00506            latent variable distribution for each examinee is used.
00507       \param[in]  storeExamineePosterior If TRUE posterior latent variable distribution
00508            is stored for each examinee. If 'computeExamineePosterior' is
00509            FALSE the value of the argument is not used (in that case a
00510            previously stored posterior distribution for each examinee is
00511            being used in this function).     
00512      */
00513     template <class EI> Real DoEStep(EI examinees_begin, EI examinees_end, II itemsNR_begin,
00514         II itemsNR_end, bool computeExamineePosterior, bool storeExamineePosterior)
00515     {
00516 
00517       int i, j;
00518       Real loglikelihood = 0.0;
00519 
00520       RealVector posterior(numLatentVarCat);
00521 
00522       *nGroups = 0.0;
00523       int numItemsNR = itemsNR_end - itemsNR_begin;
00524       II iitem = itemsNR_begin;
00525       for (i=numItemsNR; i--; ++iitem)
00526       {
00527         /*
00528           Check that n and r for each item use the same number of latent variable 
00529           categories as in latent variable distribution used to compute
00530           examinee posterior distributions (does not check that points match,
00531           just that number of points match, it is assumed that points also match).
00532          */
00533         j = (*iitem)->NumLatentVarCat();
00534         if (j != numLatentVarCat)
00535         {
00536           throw RuntimeError("Mismatch in number of latent variable categories",
00537               "EStepDiscrete::DoEStep");
00538         }
00539 
00540         /* Initialize n and r for item to zero */
00541         (*iitem)->InitializeNR();
00542       }
00543 
00544       if (computeExamineePosterior)
00545       {
00546         // Check that number of latent variable categories for latentvar_dist
00547         // has not changed.
00548         if (numLatentVarCat != latentvar_dist.size())
00549         {
00550           throw RuntimeError("Number of latent variable categories has changed",
00551               "EStepDiscrete::DoEStep");
00552         }
00553 
00554         // Compute log probabilities of each response to each item using
00555         // current item parameter estimates
00556         CalcResponseProb();
00557 
00558         /* Set logLatentProb to log of current probabilities of latentvar_dist */
00559         for (i = 1; i <= latentvar_dist.NumGroups(); ++i)
00560         {
00561           RealMatrix::row_iterator ip = logLatentProb.begin_row(i);
00562           typename D::weight_iterator iwt = latentvar_dist.begin_weights(i);
00563           for (j = numLatentVarCat; j--; ++ip, ++iwt)
00564           {
00565             *ip = (*iwt != 0.0) ? std::log(*iwt) : logZero;
00566           }
00567         }
00568       }
00569 
00570       /* For each examinee compute posterior distribution and
00571          update n and r for items the examinee responded to.
00572        */
00573       for (EI examinee_i = examinees_begin; examinee_i != examinees_end; ++examinee_i)
00574       {
00575 
00576         Real marginalLikelihood;
00577         if (computeExamineePosterior) // Compute posterior distribution for examinee.
00578 
00579         {
00580           marginalLikelihood = ExamineePosterior(**examinee_i, posterior.begin(), posterior.end());
00581 
00582           if (storeExamineePosterior)
00583           {
00584             typename E::posterior_vector epost(numLatentVarCat);
00585             typename E::posterior_vector::iterator iep = epost.begin();
00586             RealVector::iterator ip = posterior.begin();
00587             for (i = numLatentVarCat; i--; ++iep, ++ip)
00588             *iep = *ip;
00589             (*examinee_i)->SetPosterior(epost);
00590 
00591             (*examinee_i)->SetMarginalRespLikelihood(marginalLikelihood);
00592           }
00593         }
00594         else // use examinee posterior distribution already computed
00595 
00596         {
00597           typename E::posterior_vector::iterator iep = (*examinee_i)->posterior_begin();
00598           RealVector::iterator ip = posterior.begin();
00599           for (i = numLatentVarCat; i--; ++iep, ++ip)
00600           *ip = *iep;
00601 
00602           marginalLikelihood = (*examinee_i)->GetMarginalRespLikelihood();
00603         }
00604 
00605         /* update marginal loglikelihood */
00606         loglikelihood += std::log(marginalLikelihood);
00607 
00608         typename E::response_iterator iresp = (*examinee_i)->responses_begin();
00609         Real casewt = (*examinee_i)->Count();
00610         int group = (*examinee_i)->Group();
00611         iitem = itemsNR_begin;
00612         for (i = numItemsNR; i--; ++iitem)
00613         {
00614           /* Update n and r for each item */
00615           Response resp = iresp[(*iitem)->Index()];
00616           if (resp != notPresentedResponse)
00617           {
00618             typename I::r_iterator ir = (*iitem)->RVector(resp, group);
00619             typename I::n_iterator in = (*iitem)->NVector(group);
00620             RealVector::iterator ipost = posterior.begin();
00621             for (j = numLatentVarCat; j--; ++ir, ++in, ++ipost)
00622             {
00623               *ir += *ipost * casewt;
00624               *in += *ipost * casewt;
00625             }
00626           }
00627 
00628         }
00629 
00630         /* Update marginal distribution for group examinee belongs to */
00631         RealVector::iterator ipost = posterior.begin();
00632         RealMatrix::row_iterator igroup = nGroups->begin_row(group);
00633         for (j = numLatentVarCat; j--; ++ipost, ++igroup)
00634         {
00635           *igroup += *ipost * casewt;
00636         }
00637 
00638       }
00639 
00640       /* Add log of prior densities of item parameter estimates to loglikelihood */
00641       for (II ii = itemsNR_begin; ii != itemsNR_end; ++ii)
00642       {
00643         PriorVector::iterator iprior = (*ii)->PriorsIterator();
00644         RealVector::iterator iparam = (*ii)->ParametersIterator();
00645         for (i = (*ii)->NumParameters(); i--; ++iprior, ++iparam)
00646         {
00647           if (*iprior)
00648           loglikelihood += (*iprior)->LogDensity(*iparam);
00649         }
00650       }
00651 
00652       return loglikelihood;
00653 
00654     }
00655 
00656     /*!
00657       \brief
00658       Version of DoEStep in which the items that are used to compute the
00659       posterior distributions for examinees are also the items for which
00660       n and r are updated, i.e., itemsNR_begin == items_begin and
00661       itemsNR_end == items_end.
00662  
00663       A duplicate of this definition is given outside the class definition
00664       for use when BOOST_MSVC6_MEMBER_TEMPLATES is not defined.
00665  
00666       \section template_args Template Parameters
00667       
00668       \param EI Iterator over pointers to examinee objects.
00669  
00670       \section function_args Function Parameters
00671       
00672       \param[in]  examinees_begin Iterator to pointer to first examinee
00673       \param[in]  examinees_end Iterator to pointer to one past last examinee
00674       \param[in]  computeExamineePosterior If TRUE posterior latent variable distribution
00675           for each examinee is computed. If FALSE previously stored posterior
00676           latent variable distribution for each examinee is used.
00677       \param[in]  storeExamineePosterior If TRUE posterior latent variable distribution
00678           is stored for each examinee. If 'computeExamineePosterior' is
00679           FALSE the value of the argument is not used (in that case a
00680           previously stored posterior distribution for each examinee is
00681           being used in this function).
00682      */
00683     template <class EI> Real DoEStep(EI examinees_begin, EI examinees_end,
00684         bool computeExamineePosterior, bool storeExamineePosterior)
00685     {
00686       return DoEStep(examinees_begin, examinees_end, items_begin, items_end,
00687           computeExamineePosterior, storeExamineePosterior);
00688     }
00689 #endif // BOOST_MSVC6_MEMBER_TEMPLATES
00690   };
00691 
00692   /*!
00693     \brief 
00694     Class template to perform E-step calculation and store results
00695     for IRT model with a discrete latent variable distribution.
00696  
00697     \section template_args Template Parameters
00698     
00699     \param E Examinee type.
00700     \param I Item type. 
00701     \param II  Iterator over item objects.
00702     \param D  Class for discrete latent variable distribution.
00703     
00704     \section function_args Function Parameters
00705     
00706     \param[in] bitem Iterator pointing to first item.
00707     \param[in] eitem Iterator pointing to one past last item.
00708     \param[in]  &dist Address of latent variable distribution object.
00709    */
00710   template <class E, class I, class II, class D>
00711   EStepDiscrete<E, I, II, D>::EStepDiscrete( II bitem, II eitem, D &dist) :
00712   items_begin(bitem), items_end(eitem), latentvar_dist(dist), nGroups(0),
00713   logLatentProb( dist.NumGroups(), dist.size()),
00714   mRespProb(dist.NumGroupsUnique())
00715   {
00716     int i;
00717     numItems = items_end - items_begin;
00718     numLatentVarCat = dist.size();
00719     numGroupUnique = dist.NumGroupsUnique();
00720 
00721     notPresentedResponse = I::NotPresentedResponse();
00722 
00723     nGroups = new RealMatrix(dist.NumGroups(), numLatentVarCat);
00724 
00725     for (i=0; i<numGroupUnique; ++i)
00726     {
00727       mRespProb[i] = new NMatrixVec<II>(bitem, eitem, dist.size());
00728     }
00729 
00730     /* Store indices of items in examinee response vector */
00731     II iitem = items_begin;
00732     itemIndices = new std::vector<int>(numItems);
00733     std::vector<int>::iterator ii = itemIndices->begin();
00734     for (i=numItems; i--; ++ii, ++iitem)
00735     {
00736       *ii = (*iitem)->Index();
00737     }
00738 
00739     /* Initialize logLatentProb to log of probabilities in latentvar_dist */
00740     for (i = 1; i <= latentvar_dist.NumGroups(); ++i)
00741     {
00742       RealMatrix::row_iterator ip = logLatentProb.begin_row(i);
00743       typename D::weight_iterator iwt = latentvar_dist.begin_weights(i);
00744       for (int j = numLatentVarCat; j--; ++ip, ++iwt)
00745       {
00746         *ip = (*iwt != 0.0) ? std::log(*iwt) : logZero;
00747       }
00748     }
00749   }
00750 
00751   /*! Destructor */
00752   template <class E, class I, class II, class D> EStepDiscrete<E, I, II, D>::~EStepDiscrete()
00753   {
00754     delete nGroups;
00755 
00756     delete itemIndices;
00757 
00758     for (int i = 0; i<numGroupUnique; ++i)
00759     delete mRespProb[i];
00760   }
00761 
00762   /*!
00763     \brief
00764     Fills NMatrixVec objects with log of probabilities of each response to each item
00765     for each level of the discrete latent variable.
00766  
00767     \section template_args Template Parameters
00768     
00769     \param E Examinee type.
00770     \param I Item type. 
00771     \param II  Iterator over item objects.
00772     \param D  Class for discrete latent variable distribution.
00773    */
00774   template <class E, class I, class II, class D> void EStepDiscrete<E, I, II, D>::CalcResponseProb()
00775   {
00776     for (int g=0; g<numGroupUnique; ++g)
00777     {
00778       typename NMatrixVec<II>::iterator vi = mRespProb[g]->begin(); // "typename" keyword added. ww, 1/10/2008
00779       for (II iitem = items_begin; iitem != items_end; ++iitem, ++vi) // loop over items
00780 
00781       {
00782         Response response;
00783         int n = (*iitem)->NumRespCat();
00784         for (int j = 1; j <= n; ++j) // loop over responses to items
00785 
00786         {
00787           typename D::point_iterator ipoint = latentvar_dist.begin_points(g+1);
00788           response = (*iitem)->IndexResponse(j-1);
00789           RealMatrix::row_iterator il = (**vi).begin_row(j);
00790           for (int k = numLatentVarCat; k--; ++il, ++ipoint) // loop over latent variable categories
00791 
00792           {
00793             *il = std::log((*iitem)->ProbResp(response, *ipoint));
00794           }
00795         }
00796       }
00797     }
00798   }
00799 
00800   /* Definitions of member templates for compilers which can handle member template definitions
00801      outside the class declaration.
00802    */
00803 #ifndef BOOST_MSVC6_MEMBER_TEMPLATES
00804   /*!
00805     \brief
00806     Computes posterior distribution of discrete latent variable for an examinee.
00807     Returns marginal likelihood of the examinee's responses.
00808     
00809     A duplicate of this definition is given inside the class definition
00810     for use when BOOST_MSVC6_MEMBER_TEMPLATES is defined.
00811  
00812     \section template_args Template Parameters
00813     
00814     \param PI Iterator over posterior probabilities.
00815  
00816     \section function_args Function Parameters
00817     
00818     \param[in]  examinee Object holding information about an examinee's item responses
00819     \param[in]  begin_posterior Iterator pointing to first element of container holding posterior
00820         probabilities.
00821     \param[in]  end_posterior Iterator pointing to one past last element of container holding
00822         posterior probabilities. This argument is only used to make sure there
00823         is enough space in the container which will hold the posterior probabilities.
00824    */
00825   template <class E, class I, class II, class D> template <class PI> Real
00826   EStepDiscrete<E, I, II, D>::ExamineePosterior( E &examinee,
00827       PI begin_posterior, PI end_posterior)
00828   {
00829     int i, il;
00830 
00831     int group = examinee.Group();
00832 
00833     if ((end_posterior - begin_posterior) != numLatentVarCat)
00834     {
00835       throw InvalidArgument("Incorrect size of vector to hold posterior probabilities",
00836           "EStepDiscrete::ExamineePosterior");
00837     }
00838 
00839     /* Constants used for loop unrolling */
00840     int Ndiv4 = numLatentVarCat / 4;
00841     int Nmod4 = numLatentVarCat - Ndiv4*4;
00842 
00843     /* initialize posterior probabilities */
00844     RealVector::iterator ipost = begin_posterior;
00845     RealMatrix::row_iterator iwt = logLatentProb.begin_row(group);
00846     for (i = Ndiv4; i--; ipost+=4, iwt+=4)
00847     {
00848       *ipost = *iwt;
00849       ipost[1] = iwt[1];
00850       ipost[2] = iwt[2];
00851       ipost[3] = iwt[3];
00852     }
00853     for (i = Nmod4; i--; ++ipost, ++iwt)
00854     {
00855       *ipost = *iwt;
00856     }
00857 
00858     typename NMatrixVec<II>::iterator item = // "typename" keyword added. ww, 1/10/2008.
00859     (numGroupUnique == 1) ? mRespProb[0]->begin() : mRespProb[group-1]->begin();
00860     II iitem = items_begin;
00861     typename E::response_iterator presp = examinee.responses_begin();
00862     std::vector<int>::iterator ii = itemIndices->begin();
00863     for (i = numItems; i--; ++item, ++iitem, ++ii)
00864     {
00865       Response resp = presp[*ii];
00866       if (resp != notPresentedResponse)
00867       {
00868         ipost = begin_posterior;
00869         int index = (*iitem)->ResponseIndex(resp);
00870         RealMatrix::row_iterator ir = (*item)->begin_row(index+1);
00871         for (il=Ndiv4; il--; ipost+=4, ir+=4)
00872         {
00873           *ipost += *ir;
00874           ipost[1] += ir[1];
00875           ipost[2] += ir[2];
00876           ipost[3] += ir[3];
00877         }
00878         for (il=Nmod4; il--; ++ipost, ++ir)
00879         {
00880           *ipost += *ir;
00881         }
00882       }
00883     }
00884 
00885     /* find sum in order to standardize posterior */
00886     ipost = begin_posterior;
00887     iwt = logLatentProb.begin_row(group);
00888     Real sum = 0.0;
00889     for (i = numLatentVarCat; i--; ++ipost, ++iwt)
00890     {
00891       if (*iwt != logZero)
00892       sum += std::exp(*ipost);
00893       else
00894       *ipost = logZero;
00895     }
00896 
00897     /* standardize */
00898     ipost = begin_posterior;
00899     Real logsum = std::log(sum);
00900     for (i = numLatentVarCat; i--; ++ipost)
00901     {
00902       if (*ipost != logZero)
00903       {
00904         *ipost -= logsum;
00905         *ipost = std::exp(*ipost);
00906       }
00907       else
00908       *ipost = 0.0;
00909     }
00910 
00911     return sum;
00912   }
00913 
00914   /*!
00915     \brief
00916     Computes the E-step of the EM algorithm for IRT models with a
00917     discrete latent variable distribution.
00918     
00919     Returns marginal loglikelihood of examinees' responses
00920     (sum over examinees of the marginal loglikelihood of an examinee's responses)
00921     plus sum of prior likelihoods over all item parameters.
00922     This is the value of the marginal posterior density that the
00923     EM algorithm is maximizing at the values of the item parameters
00924     computed in the last M-step. The log of the priors for the item
00925     parameters are added to this value for the items for which
00926     n and r are calculated.
00927     
00928     Results of the E-step are stored in data member nGroups. The posterior
00929     distributions for examinees are stored in the examinee objects
00930     if storeExamineePosterior is true, and updated n's and r's are computed
00931     the items given by itemsNR_begin and itemsNR_end.
00932     
00933     A duplicate of this definition is given inside the class definition
00934     for use when BOOST_MSVC6_MEMBER_TEMPLATES is defined.
00935  
00936     \section template_args Template Parameters
00937     
00938     \param D  Class for discrete latent variable distribution.
00939     \param E  Examinee type.
00940     \param EI Iterator over pointers to examinee objects.
00941     \param I  Item type. 
00942     \param II Iterator over item objects.
00943  
00944     \section function_args Function Parameters
00945     
00946     \param[in]  examinees_begin Iterator to pointer to first examinee
00947     \param[in]  examinees_end Iterator to pointer to one past last examinee
00948     \param[in]  itemsNR_begin Iterator to first item pointer for which n and r will be updated.
00949         The items for which n and r are updated can be different from the items
00950         used to compute the posterior distribution of the latent variable for
00951         each examinee. If itemsNR_end - itemsNR_begin == 0 then n and r are not
00952         updated for any items.
00953     \param[in]  itemsNR_end Iterator to one past last item pointer for which n and r will be 
00954         updated.
00955     \param[in]  computeExamineePosterior If TRUE posterior latent variable distribution
00956         for each examinee is computed. If FALSE previously stored posterior
00957         latent variable distribution for each examinee is used.
00958     \param[in]  storeExamineePosterior If TRUE posterior latent variable distribution
00959         is stored for each examinee. If 'computeExamineePosterior' is
00960         FALSE the value of the argument is not used (in that case a
00961         previously stored posterior distribution for each examinee is
00962         being used in this function).
00963    */
00964   template <class E, class I, class II, class D> template <class EI> Real EStepDiscrete<E, I, II, D>::DoEStep(
00965       EI examinees_begin, EI examinees_end, II itemsNR_begin, II itemsNR_end,
00966       bool computeExamineePosterior, bool storeExamineePosterior)
00967   {
00968     int i, j;
00969     Real loglikelihood = 0.0;
00970 
00971     RealVector posterior(numLatentVarCat);
00972 
00973     *nGroups = 0.0;
00974     int numItemsNR = itemsNR_end - itemsNR_begin;
00975     II iitem = itemsNR_begin;
00976     for (i=numItemsNR; i--; ++iitem)
00977     {
00978       /* Check that n and r for each item use the same number of latent variable 
00979          categories as in latent variable distribution used to compute
00980          examinee posterior distributions (does not check that points match,
00981          just that number of points match, it is assumed 
00982          that points also match).
00983        */
00984       j = (*iitem)->NumLatentVarCat();
00985       if (j != numLatentVarCat)
00986       {
00987         throw RuntimeError("Mismatch in number of latent variable categories",
00988             "EStepDiscrete::DoEStep");
00989       }
00990 
00991       /* Initialize n and r for item to zero */
00992       (*iitem)->InitializeNR();
00993     }
00994 
00995     if (computeExamineePosterior)
00996     {
00997       // Check that number of latent variable categories for latentvar_dist
00998       // has not changed.
00999       if (numLatentVarCat != latentvar_dist.size())
01000       {
01001         throw RuntimeError("Number of latent variable categories has changed",
01002             "EStepDiscrete::DoEStep");
01003       }
01004 
01005       // Compute log probabilities of each response to each item using
01006       // current item parameter estimates
01007       CalcResponseProb();
01008 
01009       /* Set logLatentProb to log of current probabilities of latentvar_dist */
01010       for (i = 1; i <= latentvar_dist.NumGroups(); ++i)
01011       {
01012         RealMatrix::row_iterator ip = logLatentProb.begin_row(i);
01013         typename D::weight_iterator iwt = latentvar_dist.begin_weights(i);
01014         for (j = numLatentVarCat; j--; ++ip, ++iwt)
01015         {
01016           *ip = (*iwt != 0.0) ? std::log(*iwt) : logZero;
01017         }
01018       }
01019     }
01020 
01021     /* For each examinee compute posterior distribution and
01022        update n and r for items the examinee responded to.
01023      */
01024     for (EI examinee_i = examinees_begin; examinee_i != examinees_end; ++examinee_i)
01025     {
01026 
01027       Real marginalLikelihood;
01028       if (computeExamineePosterior) // Compute posterior distribution for examinee
01029 
01030       {
01031         marginalLikelihood = ExamineePosterior(**examinee_i, posterior.begin(), posterior.end());
01032 
01033         if (storeExamineePosterior)
01034         {
01035           typename E::posterior_vector epost(numLatentVarCat);
01036           typename E::posterior_vector::iterator iep = epost.begin();
01037           RealVector::iterator ip = posterior.begin();
01038           for (i = numLatentVarCat; i--; ++iep, ++ip)
01039           *iep = *ip;
01040           (*examinee_i)->SetPosterior(epost);
01041 
01042           (*examinee_i)->SetMarginalRespLikelihood(marginalLikelihood);
01043         }
01044       }
01045       else // use examinee posterior distribution already computed
01046 
01047       {
01048         typename E::posterior_vector::iterator iep = (*examinee_i)->posterior_begin();
01049         RealVector::iterator ip = posterior.begin();
01050         for (i = numLatentVarCat; i--; ++iep, ++ip)
01051         *ip = *iep;
01052 
01053         marginalLikelihood = (*examinee_i)->GetMarginalRespLikelihood();
01054       }
01055 
01056       /* update marginal loglikelihood */
01057       loglikelihood += std::log(marginalLikelihood);
01058 
01059       typename E::response_iterator iresp = (*examinee_i)->responses_begin();
01060       Real casewt = (*examinee_i)->Count();
01061       int group = (*examinee_i)->Group();
01062       iitem = itemsNR_begin;
01063       for (i = numItemsNR; i--; ++iitem)
01064       {
01065         /* Update n and r for each item */
01066         Response resp = iresp[(*iitem)->Index()];
01067         if (resp != notPresentedResponse)
01068         {
01069           typename I::r_iterator ir = (*iitem)->RVector(resp, group);
01070           typename I::n_iterator in = (*iitem)->NVector(group);
01071           RealVector::iterator ipost = posterior.begin();
01072           for (j = numLatentVarCat; j--; ++ir, ++in, ++ipost)
01073           {
01074             *ir += *ipost * casewt;
01075             *in += *ipost * casewt;
01076           }
01077         }
01078 
01079       }
01080 
01081       /* Update marginal distribution for group examinee belongs to */
01082       RealVector::iterator ipost = posterior.begin();
01083       RealMatrix::row_iterator igroup = nGroups->begin_row(group);
01084       for (j = numLatentVarCat; j--; ++ipost, ++igroup)
01085       {
01086         *igroup += *ipost * casewt;
01087       }
01088 
01089     }
01090 
01091     /* Add log of prior densities of item parameter estimates to loglikelihood */
01092     for (II ii = itemsNR_begin; ii != itemsNR_end; ++ii)
01093     {
01094       PriorVector::iterator iprior = (*ii)->PriorsIterator();
01095       RealVector::iterator iparam = (*ii)->ParametersIterator();
01096       for (i = (*ii)->NumParameters(); i--; ++iprior, ++iparam)
01097       {
01098         if (*iprior)
01099         loglikelihood += (*iprior)->LogDensity(*iparam);
01100       }
01101     }
01102 
01103     return loglikelihood;
01104 
01105   }
01106 
01107   /*!
01108     \brief
01109     Version of DoEStep in which the items that are used to compute the
01110     posterior distributions for examinees are also the items for which
01111     n and r are updated, i.e., itemsNR_begin == items_begin and
01112     itemsNR_end == items_end.
01113  
01114     A duplicate of this definition is given inside the class definition
01115     for use when BOOST_MSVC6_MEMBER_TEMPLATES is defined.
01116  
01117     \section template_args Template Parameters
01118     
01119     \param D  Class for discrete latent variable distribution.
01120     \param E  Examinee type.
01121     \param EI Iterator over pointers to examinee objects.
01122     \param I  Item type. 
01123     \param II Iterator over item objects.
01124  
01125     \section function_args Function Parameters
01126     
01127     \param[in]  examinees_begin Iterator to pointer to first examinee
01128     \param[in]  examinees_end Iterator to pointer to one past last examinee
01129     \param[in]  computeExamineePosterior If TRUE posterior latent variable distribution
01130         for each examinee is computed. If FALSE previously stored posterior
01131         latent variable distribution for each examinee is used.
01132     \param[in]  storeExamineePosterior If TRUE posterior latent variable distribution
01133         is stored for each examinee. If 'computeExamineePosterior' is
01134         FALSE the value of the argument is not used (in that case a
01135         previously stored posterior distribution for each examinee is
01136         being used in this function).
01137    */
01138   template <class E, class I, class II, class D> template <class EI> Real EStepDiscrete<E, I, II, D>::DoEStep(
01139       EI examinees_begin, EI examinees_end, bool computeExamineePosterior,
01140       bool storeExamineePosterior)
01141   {
01142     return DoEStep(examinees_begin, examinees_end, items_begin, items_end,
01143         computeExamineePosterior, storeExamineePosterior);
01144   }
01145 
01146 #endif // BOOST_MSVC6_MEMBER_TEMPLATES
01147 } // namespace etirm
01148 
01149 #endif // ETIRM_ESTEPDISCRETE_H_

Generated on Sat Mar 1 21:40:15 2008 for ETIRM by  doxygen 1.5.4