/****************************************************************************************************/ /* PROGRAM: SAIR18_Enrollment_Model.sas */ /* PURPOSE: Forecast student enrollment */ /* PROGRAMMER: Samantha Bradley */ /* DEPARTMENT: Office of Institutional Research */ /* University of North Carolina at Greensboro */ /* DATE CREATED: 10/19/16 */ /* DATE MODIFIED: 9/28/18 */ /****************************************************************************************************/ /****************************************************************************************************/ /* Setup */ /****************************************************************************************************/ libname epdata 'N:\Custom\Samantha\Enrollment Projections\data'; /* assign the library where your data is stored */ options mprint mlogic symbolgen; /* these options let us de-bug macros */ /* First, and most important, what term are we predicting enrollments for? */ /* This is the only element of the program that needs to be modified */ /*************************************/ /**/ %let projection=201801; /**/ /** CHANGE PROJECTION TERM HERE **/ /*************************************/ /* Based on the projection semester, this code creates macro variables to pull in previous semesters of data */ DATA _null_; IF substr("&projection",5,2)="01" THEN DO; /* logic for Spring terms */ semester0=PUT(&projection,6.); semester1=PUT(&projection-93,6.); semester2=PUT(semester1-7,6.); semester3=PUT(semester2-93,6.); semester4=PUT(semester3-7,6.); semester5=PUT(semester4-93,6.); semester6=PUT(semester5-7,6.); semester7=PUT(semester6-93,6.); semester8=PUT(semester7-7,6.); semester9=PUT(semester8-93,6.); semester10=PUT(semester9-7,6.); semester11=PUT(semester10-93,6.); predict_term=substr("&projection",5,2); season="Spring"; season_yr=substr("&projection",1,4); END; ELSE IF substr("&projection",5,2)="08" THEN DO; /* logic for Fall terms */ semester0=PUT(&projection,6.); semester1=PUT(&projection-7,6.); semester2=PUT(semester1-93,6.); semester3=PUT(semester2-7,6.); semester4=PUT(semester3-93,6.); semester5=PUT(semester4-7,6.); semester6=PUT(semester5-93,6.); semester7=PUT(semester6-7,6.); semester8=PUT(semester7-93,6.); semester9=PUT(semester8-7,6.); semester10=PUT(semester9-93,6.); semester11=PUT(semester10-7,6.); predict_term=substr("&projection",5,2); season="Fall"; season_yr=substr("&projection",1,4); END; CALL SYMPUT('semester0',semester0); CALL SYMPUT('semester1',semester1); CALL SYMPUT('semester2',semester2); CALL SYMPUT('semester3',semester3); CALL SYMPUT('semester4',semester4); CALL SYMPUT('semester5',semester5); CALL SYMPUT('semester6',semester6); CALL SYMPUT('semester7',semester7); CALL SYMPUT('semester8',semester8); CALL SYMPUT('semester9',semester9); CALL SYMPUT('semester10',semester10); CALL SYMPUT('semester11',semester11); CALL SYMPUT('term0',(cats("_",semester0))); CALL SYMPUT('term1',(cats("_",semester1))); CALL SYMPUT('term2',(cats("_",semester2))); CALL SYMPUT('term3',(cats("_",semester3))); CALL SYMPUT('term4',(cats("_",semester4))); CALL SYMPUT('term5',(cats("_",semester5))); CALL SYMPUT('term6',(cats("_",semester6))); CALL SYMPUT('term7',(cats("_",semester7))); CALL SYMPUT('term8',(cats("_",semester8))); CALL SYMPUT('term9',(cats("_",semester9))); CALL SYMPUT('term10',(cats("_",semester10))); CALL SYMPUT('term11',(cats("_",semester11))); CALL SYMPUT('predict_term',predict_term); CALL SYMPUT('season',season); CALL SYMPUT('season_yr',season_yr); RUN; %PUT _USER_; /****************************************************************************************************/ /* Step 1 */ /****************************************************************************************************/ /* Pull in Enrollment data for the past 11 terms - we need data for the last 5 full fiscal years, and the directly previous term. */ /* Macro &semester1 resolves to the directly previous term. */ DATA sair_sample; SET epdata.sair_sample; WHERE TERMCODE IN("&semester11","&semester10","&semester9","&semester8","&semester7","&semester6","&semester5","&semester4","&semester3","&semester2","&semester1"); RUN; /* Let's print a few observations to see what the data looks like. */ PROC PRINT DATA=sair_sample (obs=30); TITLE "First 30 records in dataset"; RUN; /* Create a 'state' variable to categorize students at every time point throughout their career */ /* This concatenates the following variables: /* DEGREE: 0-Post Baccalaureate Certificate /* 3-Bachelor's 4-Master's 5-Post Master's Certificate 8-Unclassified P-Doctoral Professional R-Doctorate /* /* ENRL: 1-New Student 2-New Transfer Student 3-Continuing Student 4-Returning Student 6-Unclassified /* /* CLASS: 1-Freshman 2-Sophomore 3-Junior 4-Senior 6-Unclassified Undergraduate 7-Graduate /* /* TIME: F-Full-time P-Part-time /**/ DATA one; SET sair_sample; state = CATS('_',DEGREE,'_',ENROLL,'_',CLASS,'_',TIME); state_desc = CATX (' ',DEGREE_DESC,ENROLL_DESC,CLASS_DESC,TIME_DESC); RUN; PROC SORT DATA=one; BY SAIR_ID; RUN; /* Let's see what our state categories look like. */ PROC SQL; SELECT DISTINCT state, state_desc FROM one; TITLE "Student 'state' categories"; QUIT; /* For a Markov Chain model, we need an NxN matrix with every possible state. */ /* We will build this in a series of dynamic steps that won't require editing each term. */ /* Run this to get all the unique states necessary to build the matrix template. */ /* This gives us a list of every unique state, and a macro variable that holds the number of distinct states. */ PROC SQL NOPRINT; CREATE TABLE states AS SELECT DISTINCT state FROM one; SELECT COUNT(DISTINCT(state)) INTO :cnt SEPARATED BY " " FROM one; QUIT; /* Now we can transpose the list to start building our matrix template. */ /* Make sure to use COPY to keep the column of states. */ /* This will make the values of the states into variables we can use. */ PROC TRANSPOSE DATA=states OUT=statematrix NAME=state; VAR state; ID state; COPY state; RUN; /* Create new variables with suffix _n that will eventually become our numeric variables */ /* Also create flags for each student level we'll be predicting at the end. */ DATA vars; SET states; state_n=trim(LEFT(state))||"_n"; IF substr(state,1,2)="_0" THEN student_cat="certificate"; ELSE IF substr(state,2,1)="3" THEN student_cat="undergrad"; ELSE IF substr(state,2,1)="4" THEN student_cat="masters"; ELSE IF substr(state,2,1)="5" THEN student_cat="specialist"; ELSE IF substr(state,2,1)="8" AND substr(state,6,1) NE "7" THEN student_cat="ug non-degr"; ELSE IF substr(state,2,1)="8" AND substr(state,6,1)="7" THEN student_cat="gr non-degr"; ELSE IF substr(state,2,1) IN("P","R") THEN student_cat="doctorate"; RUN; /* Create macro variables to capture each student level. */ PROC SQL NOPRINT; SELECT TRIM(LEFT(state)) INTO :cert SEPARATED BY ',' FROM vars WHERE student_cat="certificate"; SELECT TRIM(LEFT(state)) INTO :ugrd SEPARATED BY ',' FROM vars WHERE student_cat="undergrad"; SELECT TRIM(LEFT(state)) INTO :mstr SEPARATED BY ',' FROM vars WHERE student_cat="masters"; SELECT TRIM(LEFT(state)) INTO :spcl SEPARATED BY ',' FROM vars WHERE student_cat="specialist"; SELECT TRIM(LEFT(state)) INTO :ugnd SEPARATED BY ',' FROM vars WHERE student_cat="ug non-degr"; SELECT TRIM(LEFT(state)) INTO :grnd SEPARATED BY ',' FROM vars WHERE student_cat="gr non-degr"; SELECT TRIM(LEFT(state)) INTO :dctr SEPARATED BY ',' FROM vars WHERE student_cat="doctorate"; QUIT; %PUT _USER_; /* The code below is adapted from SAS Article '40700 - Convert all character variables to numeric and use the same variable names in the output.' */ /* Full code is publically available at support.sas.com/kb/40/700.html */ /* Now create three macro variables using INTO to create our template of all states. */ /* The first macro variable, c_list, has a list of all the character variables, separated by a space. */ /* The second macro variable, n_list, has a list of all our new variables we made in the previous DATA STEP with the suffix _n. */ /* The third macro variable, renam_list, has a list of all our new _n variables and all our old character variables separated by an equal sign (new = old). */ PROC SQL noprint; SELECT TRIM(LEFT(state)), TRIM(LEFT(state_n)), TRIM(LEFT(state_n))||'='||TRIM(LEFT(state)) INTO :c_list SEPARATED BY ' ', :n_list SEPARATED BY ' ', :renam_list SEPARATED BY ' ' FROM vars; QUIT; /* Now we start the conversion! */ /* Create the array ch by calling the macro variable &c_list, which resolves to the list of all the character variables. */ /* Create the array nu by calling the macro variable &n_list, which resolves to the list of all the numeric variables with suffix _n . */ /* Then loop through the arrays and use the INPUT function to convert the character variables to numeric, stored in our _n variables. /* Then loop through the nu array and change all the missing values to 0. */ /* Finally, rename all the new _n variables back to the original variable names by calling the macro variable &renam_list. */ DATA template; SET statematrix; ARRAY ch(*) &c_list; ARRAY nu(*) &n_list; DO i = 1 TO dim(ch); nu(i)=input(ch(i),?8.); END; DO i=1 TO dim(nu); IF nu(i)=. THEN nu(i)=0; END; DROP i &c_list; RENAME &renam_list; RUN; /** Now we have a N rows by N+1 columns matrix with each flow state and every interior value=0. Column value is N+1 because the first column contains our state values. **/ PROC PRINT DATA=template noobs; TITLE "NxN Markov Chain Matrix Template"; RUN; /* Create macro variables to represent each individual 'state'. */ DATA _null_; SET vars; count=LEFT(PUT(_n_,5.)); CALL SYMPUT('Var'||count,state); RUN; %PUT _USER_; /****************************************************************************************************/ /* Step 2 */ /****************************************************************************************************/ /* Transpose the Enrollment data so that we have one record for every student, with their state category for each term. */ PROC TRANSPOSE DATA=one OUT=two; BY SAIR_ID; VAR state; ID TERMCODE; RUN; /* Make sure the data is in chronological order. */ /* Keep &term1 through &term11. We'll be making matched pairs of Spring>Fall and Fall>Spring, up through the most recent full year. */ /* So for predicting a Fall term, we'll make matched pairs of Spring>Fall up through the most recent full year. */ /* Similarly, for predicting a Spring term, we'll make matched pairs of Fall>Spring up through the most recent full year. */ DATA two_clean; RETAIN SAIR_ID &term11 &term10 &term9 &term8 &term7 &term6 &term5 &term4 &term3 &term2 &term1; SET two; KEEP SAIR_ID &term11 &term10 &term9 &term8 &term7 &term6 &term5 &term4 &term3 &term2 &term1; RUN; /****************************************************************************************************/ /* Step 3 */ /****************************************************************************************************/ /* We need to build probability matrices for each semester pairing. */ /* To predict for a Fall semester, we look at enrollment trends for previous years, tracking enrollment as it flows from Spring into Fall. */ /* For example- To predict enrollment for Fall 2017, the pairs are: /* 1. Spring 2016 to Fall 2016 /* 2. Spring 2015 to Fall 2015 /* 3. Spring 2014 to Fall 2014 /* 4. Spring 2013 to Fall 2013 /* 5. Spring 2012 to Fall 2012 /* To predict for a Spring semester, we look at enrollment trends for previous years, tracking enrollment as it flows from Fall into Spring. */ /* For example- To predict enrollment for Spring 2018, the pairs are: /* 1. Fall 2016 to Spring 2017 /* 2. Fall 2015 to Spring 2016 /* 3. Fall 2014 to Spring 2015 /* 4. Fall 2013 to Spring 2014 /* 5. Fall 2012 to Spring 2013 /* pair1 will be the number of our dataset, based on how many years back we're going - 1 through 5 */ /* pair2 will be first term in pair */ /* pair3 will be second term in pair */ /* pair4 will be semester, equivalent to first term in pair */ %MACRO pairs(pair1, pair2, pair3, pair4); DATA year&pair1; SET two_clean; WHERE &pair2 NE ''; IF &pair2 NE '' AND &pair3='' THEN &pair3="Did not return"; KEEP SAIR_ID &pair2 &pair3; RUN; /* Run a crosstab to compare how many students kept the same status or changed from one status to another during these two terms. */ PROC FREQ DATA=year&pair1 noprint; TABLES &pair2*&pair3 / nocol nocum missing OUTPCT OUT=crosstab; RUN; DATA compare; SET crosstab; KEEP &pair2 &pair3 PCT_ROW; RUN; /* transpose the data so we can use the crosstab as a dataset */ PROC TRANSPOSE DATA=compare OUT=compare2; BY &pair2; VAR PCT_ROW; ID &pair3; RUN; /* set all missing values to 0 and make the values a percent */ DATA matrix_y&pair1; SET compare2; ARRAY CHANGE _NUMERIC_; DO OVER CHANGE; IF CHANGE=. THEN CHANGE=0; CHANGE=CHANGE*.01; END; DROP _LABEL_ _NAME_ ; RUN; /* prepare the blank matrix template */ DATA template_y&pair1; SET template; RENAME state=&pair2; RUN; PROC SORT DATA=template_y&pair1; BY &pair2; RUN; PROC SORT DATA=matrix_y&pair1; BY &pair2; RUN; /* fill in the matrix template with the data for the most recent full year */ DATA year&pair1; FORMAT state $6. start_state $8. &pair2 $8. Did_not_return 13.10; INFORMAT state $6. start_state $8. &pair2 $8. Did_not_return 13.10; LENGTH state $6. start_state $8. &pair2 $8. Did_not_return 8; MERGE template_y&pair1(in=a) matrix_y&pair1(in=b); BY &pair2; IF a; start_state=&pair2; state="&pair4."; RUN; %MEND pairs; %pairs(1,&term3,&term2,&semester3); /* &term3 resolves to _201608, &term2 resolves to _201701. so this tracks students from Fall 2016 to Spring 2017 */ %pairs(2,&term5,&term4,&semester5); %pairs(3,&term7,&term6,&semester7); %pairs(4,&term9,&term8,&semester9); %pairs(5,&term11,&term10,&semester11); /** This gives us 5 datasets containing probability matrices for student enrollment movements over the last 5 years. **/ /****************************************************************************************************/ /* Step 4 */ /****************************************************************************************************/ /* Now we have 5 transition probability matrices comparing movements between each semester. */ /* We need to calculate the average probabilities of moving into each of the states over time. */ /* Because enrollment patterns are different depending on whether it's a Fall or Spring semester, we need to model movement from Spring > Fall and Fall > Spring separately. */ /* Stack all the matrices. */ /* Drop the term-specific columns. This data is captured in the variable 'start_state'. Also drop Did_not_return, as those students fall out of the model. */ DATA prob_stack; SET year1 year2 year3 year4 year5; DROP &term3 &term5 &term7 &term9 &term11 Did_not_return; RUN; /* Sort the data by start_state, to group all start states in each term. */ PROC SORT DATA=prob_stack; BY start_state; RUN; /* Calculate the average probability of transitioning into each state. */ /* use CNT macro var for number of flow states */ %MACRO mkv_states; %DO i=1 %TO &cnt; DATA state_&i; SET prob_stack; KEEP YEAR start_state &&var&i; RUN; PROC SORT DATA=state_&i; BY start_state; RUN; PROC MEANS DATA=state_&i n nway noprint; CLASS start_state; VAR &&var&i; OUTPUT OUT=state_&i (DROP=_FREQ_ _TYPE_) mean=&&var&i; RUN; %END; %MEND mkv_states; /* Call macro. This will loop through the entire stack of probability matrices, and average the probability of movements between each flow state. */ %mkv_states; /* Now we'll merge by start_state to create an overall transition probability matrix */ DATA trans_matrix; MERGE state_1-state_&cnt; BY start_state; DROP start_state; RUN; /****************************************************************************************************/ /* Step 5 */ /****************************************************************************************************/ /* Now we need a dataset with the last actual known enrollment values - This will be our baseline population when forecasting. */ DATA current_enroll1; SET two_clean; WHERE &term1 NE ''; KEEP SAIR_ID &term1; RUN; PROC FREQ DATA=current_enroll1 noprint; TABLES &term1 / norow nocol nocum nopercent OUT=current_enroll (DROP=PERCENT); RUN; /* We need to make sure the baseline dataset includes every possible state, even if we have no students in a particular state for this particular semester. */ /* Pull list of all possible states from the Markov Chain template. */ DATA allstates; SET template; RENAME state=&term1; KEEP state; RUN; PROC SORT DATA=allstates; BY &term1; RUN; /* Merge the full list of possible states onto the baseline population. */ DATA current_enrl_matrix1; MERGE allstates(in=a) current_enroll(in=b); BY &term1; IF a; IF COUNT=. THEN COUNT=0; RUN; /* Transpose the data from tall to long. */ PROC TRANSPOSE DATA=current_enrl_matrix1 OUT=current_enrl_matrix; VAR COUNT; ID &term1; RUN; /* Clean up the transposed data. */ DATA base_pop; RETAIN TERM; SET current_enrl_matrix; DROP _NAME_ _LABEL_ ; TERM=&semester1; RUN; /** Now we have a 1xN baseline population from which to forecast. **/ /****************************************************************************************************/ /* Step 6 */ /****************************************************************************************************/ /* New entries into each state must be separately modeled. */ /* Look at the entire population from semester to semester, and flag any new or re-entries */ %MACRO entry(i,semestera,semesterb); /* start at the earliest term and work up */ DATA academicyear; SET one; WHERE termcode in("&semestera","&semesterb"); RUN; PROC SORT DATA=academicyear; BY SAIR_ID TERMCODE; RUN; DATA entry&i; SET academicyear; BY sair_id; IF TERMCODE IN("&semester10","&semester11") THEN years_past=0; ELSE IF TERMCODE IN("&semester8","&semester9") THEN years_past=1; ELSE IF TERMCODE IN("&semester6","&semester7") THEN years_past=2; ELSE IF TERMCODE IN("&semester4","&semester5") THEN years_past=3; ELSE IF TERMCODE IN("&semester2","&semester3") THEN years_past=4; ELSE IF TERMCODE IN("&semester0","&semester1") THEN years_past=5; /* if the first instance of a student ID occurs in the second semester of the pair, they are flagged as a new entry */ IF FIRST.SAIR_ID and termcode="&semesterb" THEN entry=1; /* only keep new entries */ IF entry NE 1 THEN DELETE; KEEP termcode ENROLL SAIR_ID state years_past; RUN; %MEND entry; %entry(1,&semester11,&semester10); %entry(2,&semester9,&semester8); %entry(3,&semester7,&semester6); %entry(4,&semester5,&semester4); %entry(5,&semester3,&semester2); /* Stack the new entry data. */ DATA new_students; SET entry1 entry2 entry3 entry4 entry5; RUN; PROC SORT DATA=new_students; BY termcode; RUN; /* Now run a frequency to summarize new entries per semester and output results to a new dataset. */ PROC FREQ DATA=new_students noprint; TABLES years_past*TERMCODE*state/list missing OUT=new_student_terms (DROP=PERCENT); WHERE SUBSTR(TERMCODE,5,2)="&predict_term"; /* if we're predicting Spring enrollment, we only need to summarize new entries for Spring terms (and vice versa for Fall) */ RUN; /* Create an empty dataset for the prediction term with every possible state and a blank value for count. */ /* We need to include every state for matrix calculations later. Only the states associated with new entries will be populated, that's okay. */ PROC SQL; CREATE TABLE predict_new AS SELECT state FROM states; ALTER TABLE predict_new ADD years_past NUM, count NUM, termcode CHAR; UPDATE predict_new SET years_past=5; UPDATE predict_new SET count=.; UPDATE predict_new SET termcode="&semester0"; QUIT; /* Stack the counts of new entries with the empty dataset for the prediction term. */ DATA new_states_regress; SET new_student_terms predict_new; RUN; /* Sort the data */ PROC SORT DATA=new_states_regress; BY termcode state; RUN; /* By including the prediction term with a blank count, SAS will predict the blank value using the linear regression. */ /* So we can conduct a simple linear regression where COUNT is the dependent variable (COUNT is the number of new entries for each ENRL category in each term) */ /* and years_past is the independent variable. */ %MACRO reg; %DO i=1 %TO &cnt; PROC REG DATA=new_states_regress NOPRINT; MODEL COUNT=years_past; WHERE state="&&Var&i"; OUTPUT OUT=new_&i predicted=predict_cnt residual=resid; QUIT; %END; %MEND reg; %reg; /* Let's look at an example of predicting new entries with linear regression */ PROC PRINT DATA=new_36 noobs; TITLE "Estimated new entries into state _3_4_4_P - Returning Seniors seeking Bachelor's Degree Part-Time"; RUN; /* Stack all regression results and keep only the predictions. */ DATA new_predictions; SET new_1-new_&cnt; WHERE TERMCODE="&semester0"; IF predict_cnt=. THEN predict_cnt=0; KEEP state predict_cnt; RUN; /* Transpose the data to create a 1xN vector of new entries. */ PROC TRANSPOSE DATA=new_predictions OUT=new_entries1 (DROP=_NAME_ _LABEL_); VAR predict_cnt; ID state; RUN; /* Set negative new entries to 0. */ DATA new_entries; SET new_entries1; ARRAY CHANGE _NUMERIC_; DO OVER CHANGE; IF CHANGE<0 THEN CHANGE=0; END; RUN; /****************************************************************************************************/ /* Step 7 */ /****************************************************************************************************/ /* Now use PROC IML to calculate forecast */ PROC IML; vars={&c_list}; USE trans_matrix; READ ALL INTO trans_matrix; USE base_pop; READ ALL INTO base_pop; USE new_entries; READ ALL INTO new_entries; base_pop=base_pop[1, 2:(&cnt+1)]; new_entries=new_entries[,1:&cnt]; Projection=(base_pop*trans_matrix)+new_entries; CREATE Projection FROM Projection [COLNAME=vars]; APPEND FROM Projection; QUIT; /*** We have our projections! ***/ /* Now we can use those student level macro variables we created earlier to aggregate our projections. */ DATA final_projections; FORMAT Undergraduate COMMA8.0 UG_Nondegree COMMA8.0 Certificate COMMA8.0 Masters COMMA8.0 Specialist COMMA8.0 Doctoral COMMA8.0 GR_Nondegree COMMA8.0 Total COMMA8.0; LABEL Undergraduate="Undergraduate" UG_Nondegree="Non-Degree Seeking Undergraduate" Certificate="Certificate" Masters="Masters" Specialist="Specialist" Doctoral="Doctoral" GR_Nondegree="Non-Degree Seeking Graduate" Total="Total Enrollment"; SET Projection; Certificate=ROUND(sum(&cert),1); Undergraduate=ROUND(sum(&ugrd),1); Masters=ROUND(sum(&mstr),1); Specialist=ROUND(sum(&spcl),1); UG_Nondegree=ROUND(sum(&ugnd),1); GR_Nondegree=ROUND(sum(&grnd),1); Doctoral=ROUND(sum(&dctr),1); Total=sum(Certificate,Undergraduate,Masters,Specialist,UG_nondegree,GR_nondegree,Doctoral); TERM="&semester0"; KEEP TERM Certificate Undergraduate Masters Specialist UG_Nondegree GR_Nondegree Doctoral Total; RUN; PROC PRINT DATA=final_projections noobs label; VAR Undergraduate UG_Nondegree Certificate Masters Specialist Doctoral GR_Nondegree Total; TITLE "&season &season_yr Projections with New Entries"; RUN; /* We can also compare our projections with historical enrollment data. */ PROC SORT DATA=one; BY termcode state; RUN; PROC FREQ DATA=one noprint; TABLES termcode*state /list missing OUT=historic1 (DROP=PERCENT); RUN; PROC TRANSPOSE DATA=historic1 OUT=historic2; VAR COUNT; ID state; BY termcode; RUN; DATA historic; FORMAT Undergraduate COMMA8.0 UG_Nondegree COMMA8.0 Certificate COMMA8.0 Masters COMMA8.0 Specialist COMMA8.0 Doctoral COMMA8.0 GR_Nondegree COMMA8.0 Total COMMA8.0; LABEL Undergraduate="Undergraduate" UG_Nondegree="Non-Degree Seeking Undergraduate" Certificate="Certificate" Masters="Masters" Specialist="Specialist" Doctoral="Doctoral" GR_Nondegree="Non-Degree Seeking Graduate" Total="Total Enrollment"; SET historic2; Certificate=ROUND(sum(&cert),1); Undergraduate=ROUND(sum(&ugrd),1); Masters=ROUND(sum(&mstr),1); Specialist=ROUND(sum(&spcl),1); UG_Nondegree=ROUND(sum(&ugnd),1); GR_Nondegree=ROUND(sum(&grnd),1); Doctoral=ROUND(sum(&dctr),1); Total=sum(Certificate,Undergraduate,Masters,Specialist,UG_nondegree,GR_nondegree,Doctoral); TERM=termcode; KEEP TERM Certificate Undergraduate Masters Specialist UG_Nondegree GR_Nondegree Doctoral Total; RUN; /* Now stack the historic data with the projection data. */ DATA historic_project; SET historic final_projections; WHERE substr(TERM,5,2)="&predict_term"; /* this stacks Springs together or Falls together */ RUN; /* Format the data to print nicely. */ DATA forprinting; RETAIN TERMDESC Undergraduate UG_Nondegree Certificate Masters Specialist Doctoral GR_Nondegree Total ; LABEL termdesc="Semester"; SET historic_project; termdesc=CAT("&season."," ",(substr(term,1,4))); vline=CAT("&season."," ",(&season_yr-1)); DROP term; RUN; /* Print a chart of historical enrollments compared to projections. */ PROC REPORT DATA=forprinting nowd; COLUMNS termdesc Undergraduate UG_Nondegree Certificate Masters Specialist Doctoral GR_Nondegree Total; COMPUTE termdesc; IF termdesc=("&season. &season_yr.") THEN DO; CALL DEFINE (_row_,'style','style={font_weight=bold font_style=italic}'); /* this makes the line with our predicted values bold and italicized */ END; ENDCOMP; RUN; /* Print line graphs of historical enrollment trends compared to projections, with vertical line marking where historical data ends and projections begin. */ /* Print undergraduates on their own graph, as they are a much larger population than other groups. */ PROC SGPLOT DATA=forprinting ; YAXIS LABEL="Headcount"; XAXIS DISPLAY=(NOLABEL); *SERIES X=termdesc Y=Undergraduate; SERIES X=termdesc Y=UG_Nondegree / LINEATTRS=(thickness=2); SERIES X=termdesc Y=Certificate / LINEATTRS=(thickness=2); SERIES X=termdesc Y=Masters / LINEATTRS=(thickness=2); SERIES X=termdesc Y=Specialist / LINEATTRS=(thickness=2); SERIES X=termdesc Y=Doctoral / LINEATTRS=(thickness=2); SERIES X=termdesc Y=GR_Nondegree / LINEATTRS=(thickness=2); REFLINE vline/ AXIS=X TRANSPARENCY = 0.9 LINEATTRS=(thickness=5); TITLE1 "&season Semester Projections Versus Historical Enrollment"; TITLE2 "(Undergraduates displayed separately)"; KEYLEGEND / LOCATION=OUTSIDE POSITION=top; RUN; PROC SGPLOT DATA=forprinting; YAXIS LABEL="Headcount" VALUES=(0,17000); XAXIS DISPLAY=(NOLABEL); SERIES X=termdesc Y=Undergraduate / LINEATTRS=(thickness=2); REFLINE vline/ AXIS=X TRANSPARENCY = 0.9 LINEATTRS=(thickness=5); TITLE1 "&season Semester Projections Versus Historical Enrollment"; TITLE2 "(Undergraduates only)"; KEYLEGEND / LOCATION=OUTSIDE POSITION=top; RUN; /* THE END! */