#!/usr/bin/perl # # mkPromptFiles.pl Version 1.1 # (C) Copyright 1999 # Chr. Draxler, University of Munich # draxler@phonetik.uni-muenchen.de # # mkPromptFiles.pl makes prompt files for the SpeechDat-Car # recordings. It reads the prompt contents from text files # according to the CCD (corpus code, the two-letter id code # associated with each recorded item), creates a prompt # definition file line which consists of the fields # # CCD;PromptDefinitionFile;LineIndex # # and collects these lines in an array. Once the array is # complete, the elements are extracted in random order # and written to a prompt file in DOS format with no # empty lines. # # NOTES: # 1) LineIndex > 0 in the prompt file, but for array access # in the script, it must be decremented by 1 because # array indexes start with 0 # 2) the script always creates at least two prompt files, one # with the end digit "0", one with "1". # # HISTORY: # 1.1: - added link between L2 and O3 CCDs; this link requires # a file with spelled items (i.e. with a blank between # letters; these items must be in the same order as the # non-spelled items in the original file. # - usage message is printed if number of arguments is less # than six (if command line arguments are used) # 1.0: first release # # --- arguments needed for script to run -------------------- # # on the Mac, they are asked for via dialogs. On other # systems, they are passed as command line arguments. # # Use the version that's compatible with your machine and # comment out the other one. # $srcDir = MacPerl::Ask('Source directory',':ITEM:'); $targetDir = MacPerl::Ask('Target directory',':PROMPT:'); $txtExt = MacPerl::Ask('Text file extension','.TXT'); $pmtExt = '.TXT'; $lowSessNo = MacPerl::Ask('First session number','50'); $highSessNo = MacPerl::Ask('Last session number','60'); #if(int(@ARGV) < 6) { # die "Usage\n\nmkPromptFiles.pl SRCDIR TARGETDIR TXTEXT PMTEXT LOWSESSNO HIGHSESSNO\n"; #} else { # $srcDir = $ARGV[0]; # $targetDir = $ARGV[1]; # $txtExt = $ARGV[2]; # $pmtExt = $ARGV[3]; # $lowSessNo = $ARGV[4]; # $highSessNo = $ARGV[5]; #} # --- define an associative array with CCD codes and the ---- # source files from which to take the corresponding prompt # text or data %ccdSource = ( 'A1','VoiceAct', 'A2','VoiceAct', 'B1','TenDigits', 'C1','SheetNumber', 'C2','TelNumberSpon', 'C3','CreditCard', 'C4','PinCode', 'C5','TelNumber', 'C6','TelNumber', 'C7','TelNumber', 'D1','DateSpon', 'D2','DateAbs', 'D3','DateRel', 'E1','KeywordSentences', 'E2','KeywordSentences', 'I1','IsolatedDigit', 'I2','IsolatedDigit', 'I3','IsolatedDigit', 'I4','IsolatedDigit', 'L1','NameSponSpell', 'L2','CitySpell', 'L3','WordSpell', 'L4','WordSpell', 'L5','WordSpell', 'L6','WordSpell', 'L7','WordArtSpell', 'M1','Amounts', 'N1','NaturalNumber', 'O1','NameSpon', 'O2','BirthCity', 'O3','Cities', 'O4','Cities', 'O5','Companies', 'O6','Companies', 'O7','Names', 'S1','Sentences', 'S2','Sentences', 'S3','Sentences', 'S4','Sentences', 'S5','Sentences', 'S6','Sentences', 'S7','Sentences', 'S8','Sentences', 'S9','Sentences', 'T1','TimeSpon', 'T2','TimeAna', 'W1','Words', 'W2','Words', 'W3','Words', 'W4','Words', 'P1','KeywordsDep', 'P2','KeywordsDep', 'Z0','SponSentences', 'Z1','SponSentences', 'Z2','SponSentences', 'Z3','SponSentences', 'Z4','SponSentences', 'Z5','SponSentences', 'Z6','SponSentences', 'Z7','SponSentences', 'Z8','SponSentences', 'Z9','SponSentences', '00','AppWords', '01','AppWords', '02','AppWords', '03','AppWords', '04','AppWords', '05','AppWords', '06','AppWords', '07','AppWords', '08','AppWords', '09','AppWords', '10','AppWords', '11','AppWords', '12','AppWords', '13','AppWords', '14','AppWords', '15','AppWords', '16','AppWords', '17','AppWords', '18','AppWords', '19','AppWords', '20','AppWords', '21','AppWords', '22','AppWords', '23','AppWords', '24','AppWords', '25','AppWords', '26','AppWords', '27','AppWords', '28','AppWords', '29','AppWords', '30','AppWords', '31','AppWords', '32','AppWords', '33','AppWords', '34','AppWords', '35','AppWords', '36','AppWords', '37','AppWords', '38','AppWords', '39','AppWords', '40','AppWords', '41','AppWords', '42','AppWords', '43','AppWords', '44','AppWords', '45','AppWords', '46','AppWords', '47','AppWords', '48','AppWords', '49','AppWords', '50','AppWords', '51','AppWords', '52','AppWords', '53','AppWords', '54','AppWords', '55','AppWords', '56','AppWords', '57','AppWords', '58','AppWords', '59','AppWords', '60','AppWords', '61','AppWords', '62','AppWords', '63','AppWords', '64','AppWords', '65','AppWords', '66','AppWords', ); # --- now create prompt files --------------------------------------- $sessNo = $lowSessNo; $i=0; while($sessNo <= $highSessNo) { if($sessNo < 10) { $sessCode = '00' . $sessNo . $i; } elsif($sessNo < 100) { $sessCode = '0' . $sessNo . $i; } else { $sessCode = $sessNo . $i; } if($i == 1) { $sessNo++; $i = 0; } else { print "session: $sessNo\n"; $i = 1; } $promptFile = $targetDir . 'prmt' . $sessCode . $pmtExt; open(PROMPT,">$promptFile") || die "Could not open $promptFile"; # --- initialize counters ---------------------------------------- $oldCcd = ''; $inFile = ''; $promptCnt = 0; @prompts = (); foreach $ccd (sort(keys %ccdSource)) { if($ccdSource{$ccd} ne $oldCcd) { # --- if $inFile was open, close now ----------------------- if($inFile ne '') { close(IN); } # --- open $inFile for prompt material --------------------- $inFile = $srcDir . $ccdSource{$ccd} . $txtExt; open(IN,$inFile) || die "Could not open $inFile"; # --- initialize items array ------------------------------- @items = (); $promptFileCnt = 0; while() { chop; # --- remove any new line, carriage return, etc. -------- s/[\n\r\f]//g; if($_ ne '') { $items[$promptFileCnt] = $_; $promptFileCnt++; } } # --- how many items from this file are used? -------------- $itemFileCnt = 0; foreach $itemKey (keys(%ccdSource)) { if($ccdSource{$itemKey} eq $ccdSource{$ccd}) { $itemFileCnt++; } } # --- initialize $lineIndex -------------------------------- $lineIndex = 0; # --- update $oldCcd to detect next change of $ccdSource --- $oldCcd = $ccdSource{$ccd}; } if($promptFileCnt > $itemFileCnt) { $lineIndex = int(rand(@items)+1); } else { $lineIndex++; } # --- handle special cases, e.g. CCDs linked to each other ---- # L2 and O2 are linked; L2 must come before O3 in the array # @ccdSource; the file containing data for L2 and O3 # must have the same sequence of entries if ($ccd eq 'L2') { $lineIndexL2 = $lineIndex; } elsif ($ccd eq 'O3') { $lineIndex = $lineIndexL2; } # --- get prompt text, format if, and print to log output ----- $lineText = $items[$lineIndex-1]; $lineText =~ s/[\r\n\f]//g; printf "%s;%s;%d;%s\n",$ccd,$ccdSource{$ccd},$lineIndex,$lineText; # --- store prompt file line in array ------------------------- $prompts[$promptCnt]="$ccd;$ccdSource{$ccd};$lineIndex"; $promptCnt++; } # --- permutate lines of prompt array and print to prompt file --- # remove any new lines, carriage returns, etc from $promptLine # and print it $promptLine = splice(@prompts,int(rand(@prompts)),1); $promptLine =~ s/[\r\n\f]//g; printf PROMPT "%s",$promptLine; # --- then print rest of lines preceded by CR/LF ----------------- while(@prompts) { $promptLine = splice(@prompts,int(rand(@prompts)),1); # --- remove any new lines, carriage returns, etc. ------------ $promptLine =~ s/[\r\n\f]//g; printf PROMPT "\r\n%s",$promptLine; #print $promptLine; } close(PROMPT); close(IN); print "\n\n"; }