**************************************************************************; * PROGRAM NAME: TransformData.sas *; * DATE CREATED: 09/23/03 *; * LAST UPDATED: 10/27/03 *; * *; * PROGRAMMERS: NYC DOHMH Bureau of Communicable Disease *; * PURPOSE: Sample program demonstrates manipulation of dates, age *; * etc. which is useful when transforming data from one *; * format to another *; ************************************************************************; * Remember... SAS date variables represent the # of days since 1/1/1960 SAS datetime variables represent the # of seconds since 1/1/1960 SAS time variables represent the # of seconds since midnight Informats can be used to specify the exact format of raw data you are reading in Variable type (eg. character, numeric, date ...) and length are characteristics of the underlying variable Formats simply affect how the data appear when you print and view but don't affect the underlying value There are many useful SAS Functions .... date() time() datetime() mdy() substr() index() put() input() etc.; * DATE AND DATETIME FUNCTIONS AND FORMATS; data _null_; date1='01Nov03'; put "Example 1: " date1; date2='01Nov03'd; put "Example 2: " date2; date3=mdy(11,1,3); put "Example 3: " date3; put "Example 4: " date3 mmddyy8.; put "Example 5: " date3 weekdate25.; put "Example 6: " date3 weekdate15.; put "Example 7: " date3 weekday3.; put "Example 8: " date3 weekdate10.; put "Example 9: " date3 date.; date4=date(); put "Example 10: " date4 mmddyy8.; date4=date()-1; put "Example 11: " date4 mmddyy8.; run; data _null_; string="24SEP03:16:02:15"; datetime1=input(string,datetime.); put "Example 8: " datetime1; put "Example 9: " datetime1 datetime.; put "Example 10: " datetime1 datetime13.; put "Example 11: " datetime1 datetime7.; date5=datepart(datetime1); put "Example 12: " date5 mmddyy8.; time1=timepart(datetime1); put "Example 13: " time1 time.; days='20Oct03'd - '13Sep01'd; put "# of days NYC ED surveillance=" days; seconds=time()-time1; put "# of seconds since Example13=" seconds; run; * READ IN VARIOUS DATE FORMATS; data _null_; input @1 original $10. @1 date mmddyy8.; format date mmddyy8.; put @1 original @12 date; datalines; 10/01/01 1/5/3 12/1/04 5/6/2004 08/09/1998 run; data _null_; input @1 original $10.; index=index(original,'/'); length=length(original); month=substr(original,1,index-1)*1; day=substr(original,index+1,length-index)*1; year=year(date()-1); date=mdy(month,day,year); format date mmddyy8.; put @1 original @12 date; datalines; 10/01 1/5 run; data _null_; input @1 original $10. @1 date date9.; format date weekdate25.; put @1 original @12 date; datalines; 05Sep2003 1Oct01 run; data _null_; input @1 original $25. @1 mon $3. @5 da 2. @9 yr 4. @15 time time5.; mo=upcase(mon); if mo='JAN' then mm='01'; if mo='FEB' then mm='02'; if mo='MAR' then mm='03'; if mo='APR' then mm='04'; if mo='MAY' then mm='05'; if mo='JUN' then mm='06'; if mo='JUL' then mm='07'; if mo='AUG' then mm='08'; if mo='SEP' then mm='09'; if mo='OCT' then mm='10'; if mo='NOV' then mm='11'; if mo='DEC' then mm='12'; date=mdy(mm,da,yr); format date date8.; put @1 original @30 date @40 time time5.; datalines; SEP 17, 1998 12:23 SEP 19, 2003 23:23 SEP 13, 2002 14:23 run; * READ IN VARIOUS TIME FORMATS; data _null_; input @1 original $10. @1 time time8.; format time time5.; put @1 original @12 time; datalines; 10:23 1:15 23:34 11:30 run; data _null_; input @1 original $10. @1 time time8. @5 ampm $1.; format time time5.; if ampm='P' then time=time+(12*60*60); timetxt=put(hour(time),z2.) || ":" || put(minute(time),z2.); put @1 original @12 time time5. @20 timetxt; datalines; 10:23P 01:15A 11:34A 11:30P run; data _null_; input @1 original $10. @1 hour 2. @3 minute 2.; timetxt=put(hour,z2.)||":"||put(minute,z2.); time=input(timetxt,time5.); put @1 original @12 timetxt @20 time time5.; datalines; 1023 0115 2134 1830 ; run; * STANDARDIZE AGE; data _null_; input @1 original $10. @1 age $5.; age=upcase(age); if age="EXP" or age="?" then age=""; if index(age,"D") > 0 then ageyrs=0; else if index(age,"W") > 0 then do; wks=1*compress(age,'WKS'); ageyrs=int(wks/52); end; else if index(age,"M") > 0 then do; mos=1*compress(age,'MOS'); ageyrs=int(mos/12); end; else ageyrs=int(1*compress(age,'YRS ')); if 0<=ageyrs<2 then agegrp=0; else if 2<=ageyrs<5 then agegrp=1; else if 5<=ageyrs<13 then agegrp=2; else if 13<=ageyrs<18 then agegrp=3; else if 18<=ageyrs<40 then agegrp=4; else if 40<=ageyrs<65 then agegrp=5; else if 65<=ageyrs<=120 then agegrp=6; put @1 original @12 age @20 ageyrs @30 agegrp; datalines; 12Y 3M 033 98 1W 5YRS 26WKS ; run;