******************************************************************************************; * PROGRAM NAME: RunSatScanBatchFromSAS.sas *; * DATE CREATED: 11/10/03 *; * LAST UPDATED: 11/10/03 *; * *; * PROGRAMMERS: NYC Department of Health and Mental Hygiene *; * Rick Heffernan, Bureau of Communicable Disease *; * Kristi Metzger, Bureau of Epidemiology *; * *; * PURPOSE: Generates input files for SatScan, runs a Space-Time-Permutation *; * analysis using SatScanBatch.exe, reads SatScan output back into *; * SAS, and prints results to Output and to a permanent, dated file *; ******************************************************************************************; ** This program requires the folders c:\SyndromicWorkshop\SATScan\PARAM c:\SyndromicWorkshop\SATScan\TXTFILES c:\SyndromicWorkshop\SATScan\OUTPUT and the files NYCZipCoord.txt and RunSaTScanBatch.bat in c:\SyndromicWorkshop\SATScan\PARAM and the sample data files ZipAdults.txt or ZipAdults.sas7bdat in c:\SyndromicWorkshop\SAS\data and the program SatScanBatch.exe (can download from www.satscan.org) in c:\Program Files\Satscan ; options nodate nocenter nonumber ps=5000 ls=120; * Create macros indicating folders to which SAS and SatScan output will be written and from which SAS and SatScan input will be read; %LET FOLDER=c:\SyndromicWorkshop\sas\data; %LET TXTFILES =c:\SyndromicWorkshop\SatScan\TXTFILES\; %LET PARAM =c:\SyndromicWorkshop\SatScan\PARAM\; %LET OUTPUT =c:\SyndromicWorkshop\SatScan\OUTPUT\; * Read in sample data from ZipAdults.txt, an ASCII text file; data zipadults; infile "&FOLDER\ZipAdults.txt"; input date mmddyy8. zip $ fevflu resp diar vomit asthma none total; format date mmddyy8.; run; * Alternatively, if available you can read in the sample SAS dataset directly; libname data "c:\SyndromicWorkshop\SAS\data"; data zipadults; set data.ZipAdults; run; ************************** Define Macros **************************; * Generate SAS and text dates for use in program; data _null_; datadate=mdy(10,10,03);*date()-1; today=date(); call symput('datadate',put(datadate,$5.)); call symput('datetxt', put(datadate,mmddyy6.)); call symput('today', put(today,date9.)); run; %macro MakeSatScanFiles; *Creates case file and parameter file needed for space-time permutation analysis. Similar approach can be used to create case and control files for Bernoulli model and case and population files for Poisson model; data DataForSatScan; set data.ZipAdults; where &DATADATE-29<=date<=&DATADATE; *If you need to restrict data to dates of interest; synd=&synd; *Allows you to re-use macro for different values of &SYND; run; * SatScan case file; data _null_; set WORK.DataForSatScan; file "&TXTFILES.cases.txt" delimiter='09'x DSD DROPOVER lrecl=32767; format zip $5. ; format synd best12.; put zip $ synd date yymmdds10.; run; * Determine minimum and maximum date for SatScan parameter file which must correspond to data; proc sql noprint; select min(date), max(date) into :mindate, :maxdate from DataForSatScan; * SatScan parameter files; data _null_; mindate=put(&mindate,yymmdds10.); maxdate=put(&maxdate,yymmdds10.); file "&PARAM.param.prm"; put " [Input Files]" /"CaseFile=&TXTFILES.Cases.txt" /"ControlFile=" /"PopulationFile=" /"CoordinatesFile=&PARAM.NYCZipcoord.txt" /"GridFile=" /"UseGridFile=n" /"; precision of case times (0=None, 1=Year, 2=Month, 3=day)" /"PrecisionCaseTimes=3" /"; coordinate type (0=Cartesian, 1=Lat/Long)" /"CoordinatesType=1" /" " /"[Analysis]" /"; analysis type (1=Purely Spatial, 2=Purely Temporal, 3=Retrospective Space-Time, 4=Prospective Space-Time)" /"AnalysisType=3" /"; model type (0=Poisson, 1=Bernoulli, 2=Space-Time Permutation)" /"ModelType=2" /"; scan areas (1=High, 2=Low, 3=High or Low)" /"ScanAreas=1" /"; start date (YYYY/MM/DD)" /"StartDate="mindate /"; end date (YYYY/MM/DD)" /"EndDate="maxdate /"; Monte Carlo reps (0, 9, 999, n999)" /"MonteCarloReps=999" /" " /"[Time Parameters]" /"; interval units (0=None, 1=Year, 2=Month, 3=Day)" /"IntervalUnits=3" /"; inteval length (positive integer)" /"IntervalLength=1" /"; prospective surveillance start date (YYYY/MM/DD)" /"ProspectiveStartDate=1900/12/31" /"; Time trend adjustment type (0=None, 1=Nonparametric, 2=LogLinear)" /"TimeTrendAdjustmentType=0" /"; time trend adjustment percentage (>-100)" /"TimeTrendPercentage=0.000000" /" " /"[Scanning Window]" /"; max geographic size (<=50%)" /"MaxGeographicSize=5.000000" /"; how max spatial size should be interpretted (0=Percentage, 1=Distance)" /"MaxSpatialSizeInterpretation=1" /"; include purely temporal clusters (y/n)" /"IncludePurelyTemporal=n" /"; max temporal size (<=90%)" /"MaxTemporalSize=7.000000" /"; how max temporal size should be interpretted (0=Percentage, 1=Time)" /"MaxTemporalSizeInterpretation=1" /"; include purely spatial clusters (y/n)" /"IncludePurelySpatial=n" /"; clusters to include (0=All, 1=Alive)" /"IncludeClusters=1" /" " /"[Output Files]" /"ResultsFile=&OUTPUT&SYND&AGE&LEVEL.Out.txt" /"; output most likely clusters in ASCII format (y/n)" /"MostLikelyClusterEachCentroidASCII=y" /"; output most likely clusters in dBase format (y/n)" /"MostLikelyClusterEachCentroidDBase=y" /"; report census areas in ASCII format (y/n)" /"CensusAreasReportedClustersASCII=y" /"; report census areas in dBase format (y/n)" /"CensusAreasReportedClustersDBase=y" /"; report Simulated Log Likelihoods Ratios in ASCII format (y/n)" /"SaveSimLLRsASCII=n" /"; report Simulated Log Likelihoods Ratios in dBase format (y/n)" /"SaveSimLLRsDBase=n" /"; report relative risks in ASCII format (y/n)" /"IncludeRelativeRisksCensusAreasASCII=n" /"; report relative risks in dBase format (y/n)" /"IncludeRelativeRisksCensusAreasDBase=n" /"; criteria for reporting secondary clusters(0=NoGeoOverlap, 1=NoCentersInOther, 2=NoCentersInMostLikely, 3=NoCentersInLessLikely, 4=NoPairsCentersEachOther, 5=NoRestrictions)" /"CriteriaForReportingSecondaryClusters=0" /" " /"[Elliptic Scan]" /"; number of ellipses (0-10)" /"NumberOfEllipses=0" /"; ellipse shapes" /"EllipseShapes=" /"; ellipse angles" /"EllipseAngles=" /"; Duczmal Compactness Correction (y/n)" /"DuczmalCompactnessCorrection=n" /" " /"[Sequential Scan]" /"; sequential scan (y/n)" /"SequentialScan=n" /"; max iterations for sequential scan (0-32000)" /"SequentialScanMaxIterations=0" /"; max p-Value for sequential scan (0.000-1.000)" /"SequentialScanMaxPValue=0.000000" /" " /"[Advanced Features]" /"; validate parameters (y/n)" /"ValidateParameters=y" /"; Isotonic Scan (0=Standard, 1=Monotone)" /"IsotonicScan=0" /"; p-Values for 2 Prespecified LLR's (y/n)" /"PValues2PrespecifiedLLRs=n" /"LLR1=0.000000" /"LLR2=0.000000" ; run; %mend; %macro ReadSatScanOutput; *[An alternative to reading in SatScan *.txt output files as below is to read in *.dbf files but this requires having SAS ACCESS PC File Formats]; data ClustersOut; infile "&OUTPUT&SYND&AGE&LEVEL.Out.col.txt" lrecl=240 pad; attrib Syndrome length=$8 Age length=$5. format=$5. Level length=$4 DataDate length=8. format=mmddyy8. clusterno length=8. format=2. loc_id length=$10. latitude length=8. format=10.6 longitude length=8. format=10.6 radius length=8. format=5.2 num_areas length=8. Observed length=8. format=5. Expected length=8. format=7.1 p length=8. format=6.4 rel_risk length=8. format=4.1 tst_stat length=8. format=6.3 start_dt length=8. format=mmddyy8. end_dt length=8. format=mmddyy8.; Syndrome="&SYND"; Age="&AGE"; Level="&LEVEL"; DataDate = &DATADATE; format datadate MMDDYY10.; input @1 loc_id @33 ClusterNo @39 latitude @57 longitude @74 radius @89 num_areas @102 observed @115 expected @128 rel_risk @141 tst_stat @158 p @171 start_date $10. @189 end_date $10. ; numst=index(substr(start_date,6,3),'/'); start_DT = mdy(substr(start_date,6,numst-1),substr(start_date,6+numst,2),substr(start_date,1,4)); numend=index(substr(end_date,6,3),'/'); end_DT = mdy(substr(end_date,6,numend-1),substr(end_date,6+numend,2),substr(end_date,1,4)); drop numst numend start_date end_date; run; data AreasOut; infile "&OUTPUT&SYND&AGE&LEVEL.Out.gis.txt" lrecl=200 pad; attrib Syndrome length=$8 Age length=$5. format=$5. Level length=$4 DataDate length=8. format=mmddyy8. clusterno length=8. format=2. area length=$10. AreaObs length=8. format=5. AreaExp length=8. format=7.1; Syndrome="&SYND"; Age="&AGE"; Level="&LEVEL"; DataDate = &DATADATE; input @1 area @33 ClusterNo @91 AreaObs @104 AreaExp; keep syndrome age level datadate clusterno area areaobs areaexp; run; %mend; %Macro PrintResults; * Print SatScan Output; data ClustAreas; merge ClustersOut (in=a) AreasOut; if p <.01 then flag='p<0.01'; else if p <.05 then flag='p<0.05'; if a and areaobs>0; ClustDays=end_dt-start_dt+1; format clustdays 3.; by syndrome age level datadate clusterno; run; proc format; value $agef '00_12'='0-12' '13_99'='13+' '50_99'='50+' '00_99'='All'; value $synd1f 'Resp' ='a-Resp ' 'Fevflu'='b-FevFlu' 'Diar' ='c-Diar ' 'Vomit' ='d-Vomit ' 'Asthma'='e-Asthma'; run; options formdlim=' '; proc sort data=ClustAreas; by datadate Syndrome Age Level Clusterno; run; proc report data=ClustAreas nowd missing split=','; column datadate Syndrome Age Level Clusterno p ClustDays Observed Expected Area AreaObs AreaExp; define Datadate/ group format=mmddyy8. 'Date' order=data; define syndrome/ group format=$synd1f. order=formatted 'Syndrome'; define age/ group center format=$agef.; define level / group format=$5.; define clusterno / group format=5. center 'Clust,no.'; define clustDays / group format=7. center 'Clust,days'; define Observed / group format=7. center 'Clust,Obs'; define Expected / group format=7. center 'Clust,Exp'; define p/ group center 'p' format=6.4; define Area / display format=$6. 'Area'; define Areaobs / display format=6. 'Area,Obs'; define Areaexp / display format=6.1 'Area,Exp'; title1 "SatScan Output on &TODAY"; title2 "ED diarrhea visits, all ages, by zip code"; title3 "Retrospective space-time permutation analysis, 30-day baseline."; title4 "Max Geog=&MAXGEOG km, Max Temp=&MAXTEMP days, Alive clusters only"; run; proc printto print="&OUTPUT\SatScanOutput&DATETXT..txt";* new; proc report data=ClustAreas nowd missing split=','; column Datadate Syndrome Age Level Clusterno p ClustDays Observed Expected Area AreaObs AreaExp; define Datadate/ group format=mmddyy8. 'Date' order=data; define syndrome/ group format=$synd1f. order=formatted 'Syndrome'; define age/ group center format=$agef.; define level / group format=$5.; define clusterno / group format=5. center 'Clust,no.'; define clustDays / group format=7. center 'Clust,days'; define Observed / group format=7. center 'Clust,Obs'; define Expected / group format=7. center 'Clust,Exp'; define p/ group center 'p' format=6.4; define Area / display format=$6. 'Area'; define Areaobs / display format=6. 'Area,Obs'; define Areaexp / display format=6.1 'Area,Exp'; title1 "SatScan Output on &TODAY"; title2 "ED diarrhea visits, all ages, by zip code"; title3 "Retrospective space-time permutation analysis, 30-day baseline."; title4 "Max Geog=&MAXGEOG km, Max Temp=&MAXTEMP days, Alive clusters only"; run; proc printto; run; %Mend; options noxwait mprint notes; ****************** Set Parameters and Run SatScan Analysis *******************; ***** DIARRHEA *****; %Let Synd=Diar; %Let Age=00_99; %Let Level=Zip; %let startbase=15; *Start of baseline (=number of days before simdate); %let maxgeog=5;*20.000000; *Maximum geographic cluster size ie. max % of cases in cluster; %let maxtemp=7;*50.000000; *34.000000; *Maximum temporal size, ie. 1 day/total days; %let endbase=2; *End of baseline (=number of days before simdate); %MakeSatScanFiles; *Create SatScan text files and parameter files; x "&PARAM.RunSatScanBatch.bat"; * Run SatScanbatch.exe using Param.prm; %ReadSatScanOutput; *Read in output from text files generated by SatScan; %PrintResults; *Prints to Output Window and also to permanent, dated, file in Output; ***** Respiratory *****; %Let Synd=Resp; %Let Age=00_99; %Let Level=Zip; %let startbase=15; *Start of baseline (=number of days before simdate); %let maxgeog=5;*20.000000; *Maximum geographic cluster size ie. max % of cases in cluster; %let maxtemp=7;*50.000000; *34.000000; *Maximum temporal size, ie. 1 day/total days; %let endbase=2; *End of baseline (=number of days before simdate); %MakeSatScanFiles; *Create SatScan text files and parameter files; x "&PARAM.RunSatScanBatch.bat"; * Run SatScanbatch.exe using Param.prm; %ReadSatScanOutput; *Read in output from text files generated by SatScan; %PrintResults; *Prints to Output Window and also to permanent, dated, file in Output;