Text processing/2: Difference between revisions
Content added Content deleted
m (→{{header|REXX}}: added/changed whitespace and comments, simplified the COMMAS subroutine.) |
|||
Line 2,241: | Line 2,241: | ||
=={{header|REXX}}== |
=={{header|REXX}}== |
||
This REXX program process the file mentioned in "text processing 1" and does further |
This REXX program process the file mentioned in "text processing 1" and does further validate on the dates, flags, and data. |
||
<br><br> |
<br><br> |
||
Some of the checks performed are: |
Some of the checks performed are: |
||
* checks for duplicated date records. |
::* checks for duplicated date records. |
||
* checks for a bad date (YYYY-MM-DD) format, among: |
::* checks for a bad date (YYYY-MM-DD) format, among: |
||
* wrong length |
::* wrong length |
||
* year > current year |
::* year > current year |
||
* year < 1970 (to allow for posthumous data) |
::* year < 1970 (to allow for posthumous data) |
||
* mm < 1 or mm > 12 |
::* mm < 1 or mm > 12 |
||
* dd < 1 or dd > days for the month |
::* dd < 1 or dd > days for the month |
||
* yyyy, dd, mm isn't numeric |
::* yyyy, dd, mm isn't numeric |
||
* missing data (or flags) |
::* missing data (or flags) |
||
* flag isn't an integer |
::* flag isn't an integer |
||
* flag contains a decimal point |
::* flag contains a decimal point |
||
* data isn't numeric |
::* data isn't numeric |
||
In addition, all of the presented numbers |
In addition, all of the presented numbers may have commas inserted. |
||
<br><br> |
<br><br> |
||
The program has (negated) code to write the report to a file in addition to the console. |
The program has (negated) code to write the report to a file in addition to the console. |
||
<lang rexx>/*REXX program to process instrument data from a data file. */ |
<lang rexx>/*REXX program to process instrument data from a data file. */ |
||
numeric digits 20 /*allow for bigger numbers. */ |
numeric digits 20 /*allow for bigger numbers. */ |
||
ifid='READINGS.TXT' /*the input file. |
ifid='READINGS.TXT' /*name of the input file. */ |
||
ofid='READINGS.OUT' /* |
ofid='READINGS.OUT' /* " " " output " */ |
||
grandSum=0 /*grand sum of whole file. */ |
grandSum=0 /*grand sum of the whole file. */ |
||
grandFlg=0 /*grand number of flagged data. */ |
|||
grandOKs=0 |
grandOKs=0 |
||
Lflag=0 /*longest period of flagged data. */ |
|||
Cflag=0 /*longest continuous flagged data. */ |
|||
oldDate =0 /*placeholder of |
oldDate =0 /*placeholder of penultimate date. */ |
||
w =16 /*width of fields when displayed.*/ |
w =16 /*width of fields when displayed. */ |
||
dupDates=0 /*count of duplicated timestamps.*/ |
dupDates=0 /*count of duplicated timestamps. */ |
||
badFlags=0 /*count of bad flags (not integer). */ |
|||
badDates=0 /*count of bad dates (bad format)*/ |
badDates=0 /*count of bad dates (bad format). */ |
||
badData =0 /*count of bad |
badData =0 /*count of bad data (not numeric). */ |
||
ignoredR=0 /*count of ignored records |
ignoredR=0 /*count of ignored records, bad records*/ |
||
maxInstruments=24 /*maximum number of instruments. */ |
maxInstruments=24 /*maximum number of instruments. */ |
||
yyyyCurr=right(date(),4) /*get the current year (today). */ |
yyyyCurr=right(date(),4) /*get the current year (today). */ |
||
monDD. =31 /*number of days in every month. */ |
monDD. =31 /*number of days in every month. */ |
||
/* |
/*# days in Feb. is figured on the fly.*/ |
||
monDD.4 =30 |
monDD.4 =30 |
||
monDD.6 =30 |
monDD.6 =30 |
||
Line 2,284: | Line 2,284: | ||
monDD.11=30 |
monDD.11=30 |
||
do records=1 while lines(ifid)\==0 /*read until finished. */ |
do records=1 while lines(ifid)\==0 /*read until finished. */ |
||
rec=linein(ifid) /*read the next record (line). */ |
rec=linein(ifid) /*read the next record (line). */ |
||
parse var rec datestamp Idata /*pick off the dateStamp |
parse var rec datestamp Idata /*pick off the the dateStamp and data. */ |
||
if datestamp==oldDate then do |
if datestamp==oldDate then do /*found a duplicate timestamp. */ |
||
dupDates=dupDates+1 |
dupDates=dupDates+1 /*bump the dupDate counter*/ |
||
call sy datestamp copies('~',30), |
call sy datestamp copies('~',30), |
||
'is a duplicate of the', |
'is a duplicate of the', |
||
"previous datestamp." |
"previous datestamp." |
||
ignoredR=ignoredR+1 /*bump ignoredRecs.*/ |
ignoredR=ignoredR+1 /*bump # of ignoredRecs.*/ |
||
iterate |
iterate /*ignore this duplicate record. */ |
||
end |
end |
||
parse var datestamp yyyy '-' mm '-' dd /*obtain YYYY, MM, and DD. */ |
parse var datestamp yyyy '-' mm '-' dd /*obtain YYYY, MM, and the DD. */ |
||
monDD.2=28+leapyear(yyyy) /*how long is February in YYYY ? */ |
monDD.2=28+leapyear(yyyy) /*how long is February in year YYYY ? */ |
||
/*check for various bad formats. */ |
/*check for various bad formats. */ |
||
if verify(yyyy||mm||dd,1234567890)\==0 |, |
if verify(yyyy||mm||dd,1234567890)\==0 |, |
||
length(datestamp)\==10 |, |
length(datestamp)\==10 |, |
||
Line 2,306: | Line 2,306: | ||
yyyy<1970 |, |
yyyy<1970 |, |
||
yyyy>yyyyCurr |, |
yyyy>yyyyCurr |, |
||
mm=0 |
mm=0 | dd=0 |, |
||
mm>12 |
mm>12 | dd>monDD.mm then do |
||
badDates=badDates+1 |
badDates=badDates+1 |
||
call sy datestamp copies('~'), |
call sy datestamp copies('~'), |
||
'has an illegal format.' |
'has an illegal format.' |
||
ignoredR=ignoredR+1 |
ignoredR=ignoredR+1 /*bump number ignoredRecs.*/ |
||
iterate /*ignore this bad |
iterate /*ignore this bad record. */ |
||
end |
end |
||
oldDate=datestamp /*save datestamp for next read. */ |
oldDate=datestamp /*save datestamp for the next read. */ |
||
sum=0 |
sum=0 |
||
flg=0 |
flg=0 |
||
OKs=0 |
OKs=0 |
||
do j=1 until Idata='' /*process the instrument data. */ |
do j=1 until Idata='' /*process the instrument data. */ |
||
parse var Idata data.j flag.j Idata |
parse var Idata data.j flag.j Idata |
||
if pos('.',flag.j)\==0 |, /*flag have a decimal point -or-*/ |
if pos('.',flag.j)\==0 |, /*does flag have a decimal point -or- */ |
||
\datatype(flag.j,'W') then do |
\datatype(flag.j,'W') then do /* ··· is the flag not a whole number? */ |
||
badFlags=badFlags+1 /*bump badFlags counter*/ |
|||
call sy datestamp copies('~'), |
call sy datestamp copies('~'), |
||
'instrument' j "has a bad flag:", |
'instrument' j "has a bad flag:", |
||
flag.j |
flag.j |
||
iterate /*ignore it |
iterate /*ignore it and it's data. */ |
||
end |
end |
||
if \datatype(data.j,'N') then do |
if \datatype(data.j,'N') then do /*is the flag not a whole number?*/ |
||
badData=badData+1 /*bump counter.*/ |
badData=badData+1 /*bump counter.*/ |
||
call sy datestamp copies('~'), |
call sy datestamp copies('~'), |
||
'instrument' j "has bad data:", |
'instrument' j "has bad data:", |
||
data.j |
data.j |
||
iterate /*ignore it & it's flag.*/ |
iterate /*ignore it & it's flag.*/ |
||
end |
end |
||
if flag.j>0 then do |
if flag.j>0 then do /*if good data, ~~~ */ |
||
OKs=OKs+1 |
OKs=OKs+1 |
||
sum=sum+data.j |
sum=sum+data.j |
||
if |
if Cflag>Lflag then do |
||
Ldate=datestamp |
|||
Lflag=Cflag |
|||
end |
|||
Cflag=0 |
|||
end |
end |
||
else do |
else do /*flagged data ~~~ */ |
||
flg=flg+1 |
flg=flg+1 |
||
Cflag=Cflag+1 |
|||
end |
end |
||
end /*j*/ |
end /*j*/ |
||
if j>maxInstruments then do |
if j>maxInstruments then do |
||
badData=badData+1 |
badData=badData+1 /*bump the badData counter.*/ |
||
call sy datestamp copies('~'), |
call sy datestamp copies('~'), |
||
'too many instrument datum' |
'too many instrument datum' |
||
end |
end |
||
if OKs\==0 then avg=format(sum/OKs,,3) |
if OKs\==0 then avg=format(sum/OKs,,3) |
||
else avg='[n/a]' |
else avg='[n/a]' |
||
grandOKs=grandOKs+OKs |
grandOKs=grandOKs+OKs |
||
_=right( |
_=right(commas(avg),w) |
||
grandSum=grandSum+sum |
grandSum=grandSum+sum |
||
grandFlg=grandFlg+flg |
grandFlg=grandFlg+flg |
||
Line 2,370: | Line 2,370: | ||
end /*records*/ |
end /*records*/ |
||
records=records-1 /*adjust for reading |
records=records-1 /*adjust for reading the end─of─file. */ |
||
if grandOKs\==0 then grandAvg=format(grandsum/grandOKs,,3) |
if grandOKs\==0 then grandAvg=format(grandsum/grandOKs,,3) |
||
else grandAvg='[n/a]' |
else grandAvg='[n/a]' |
||
call sy |
call sy |
||
call sy copies('=',60) |
call sy copies('=',60) |
||
call sy ' records read:' right( |
call sy ' records read:' right(commas(records ),w) |
||
call sy ' records ignored:' right( |
call sy ' records ignored:' right(commas(ignoredR),w) |
||
call sy ' grand sum:' right( |
call sy ' grand sum:' right(commas(grandSum),w+4) |
||
call sy ' grand average:' right( |
call sy ' grand average:' right(commas(grandAvg),w+4) |
||
call sy ' grand OK data:' right( |
call sy ' grand OK data:' right(commas(grandOKs),w) |
||
call sy ' grand flagged:' right( |
call sy ' grand flagged:' right(commas(grandFlg),w) |
||
call sy ' duplicate dates:' right( |
call sy ' duplicate dates:' right(commas(dupDates),w) |
||
call sy ' bad dates:' right( |
call sy ' bad dates:' right(commas(badDates),w) |
||
call sy ' bad data:' right( |
call sy ' bad data:' right(commas(badData ),w) |
||
call sy ' bad flags:' right( |
call sy ' bad flags:' right(commas(badFlags),w) |
||
⚫ | |||
if longFlag\==0 then |
|||
⚫ | |||
call sy copies('=',60) |
call sy copies('=',60) |
||
⚫ | |||
call sy |
|||
/*────────────────────────────────────────────────────────────────────────────*/ |
|||
⚫ | |||
commas: procedure; parse arg _; n=_'.9'; #=123456789; b=verify(n,#,"M") |
|||
/*──────────────────────────────────LEAPYEAR subroutine─────────────────*/ |
|||
⚫ | |||
leapyear: procedure; arg y /*year could be: Y, YY, YYY, YYYY*/ |
|||
⚫ | |||
⚫ | |||
/*────────────────────────────────────────────────────────────────────────────*/ |
|||
⚫ | |||
leapyear: procedure; arg y /*year could be: Y, YY, YYY, or YYYY*/ |
|||
⚫ | |||
/*──────────────────────────────────SY subroutine───────────────────────*/ |
|||
⚫ | |||
sy: procedure; parse arg stuff; say stuff |
|||
return y//100\==0 | y//400==0 /*apply the 100 and the 400 year rule.*/ |
|||
if 1==0 then call lineout ofid,stuff |
|||
/*────────────────────────────────────────────────────────────────────────────*/ |
|||
return |
|||
sy: say arg(1); call lineout ofid,arg(1); return</lang> |
|||
/*──────────────────────────────────COMMA subroutine────────────────────*/ |
|||
'''output''' when using the default input file: |
|||
comma: procedure; parse arg _,c,p,t;arg ,cu;c=word(c ",",1) |
|||
if cu=='BLANK' then c=' ';o=word(p 3,1);p=abs(o);t=word(t 999999999,1) |
|||
if \datatype(p,'W')|\datatype(t,'W')|p==0|arg()>4 then return _;n=_'.9' |
|||
#=123456789;k=0;if o<0 then do;b=verify(_,' ');if b==0 then return _ |
|||
e=length(_)-verify(reverse(_),' ')+1;end;else do;b=verify(n,#,"M") |
|||
⚫ | |||
⚫ | |||
'''output''' |
|||
<pre style="height:35ex"> |
<pre style="height:35ex"> |
||
∙ |
∙ |