Merge and aggregate datasets: Difference between revisions

Content added Content deleted
(Harbour: simplify example using SET RELATION TO)
Line 832: Line 832:
INSERT INTO visit (patient_id, visit_date, score) VALUES (1001, ctod("2020-11-19"), 5.3)
INSERT INTO visit (patient_id, visit_date, score) VALUES (1001, ctod("2020-11-19"), 5.3)
INDEX ON patient_id TO visit_id
INDEX ON patient_id TO visit_id

&& create the result table
&& create the result table
rStruct := { {"patient_id", "n", 8, 0}, {"lastname", "c", 10, 0}, ;
rStruct := { {"patient_id", "n", 8, 0}, ;
{"n", "i", 8, 0}, {"sum_score", "n", 8, 1}, ;
{"n", "i", 8, 0}, {"sum_score", "n", 8, 1}, ;
{"avg_score", "n", 8, 1}, {"max_date", "d", 10, 0}}
{"avg_score", "n", 8, 1}, {"max_date", "d", 10, 0}}
dbCreate("report", rStruct,, .T., "report")
dbCreate("report", rStruct,, .T., "report")


SELECT visit
&& left join: for each patient
SELECT patient
DO WHILE ! Eof()
DO WHILE ! Eof()
xCurId := patient_id && grouping variable
xCurId := patient_id && grouping variable
aAgg := {0, 0.0, ctod("19000101")} && initial values
SELECT visit
DO WHILE ! Eof() .AND. xCurId == patient_id
LOCATE FOR patient_id==xCurId
aAgg := {1+aAgg[1], score+aAgg[2], max(visit_date, aAgg[3])} && update
IF found()
SKIP
&& aggregate within visits for a given patient
ENDDO
aAgg := {0, 0.0, ctod("19000101")} && initial values
INSERT INTO report (patient_id, n, sum_score, avg_score, max_date) ;
DO WHILE ! Eof() .AND. xCurId == patient_id
aAgg := {1+aAgg[1], score+aAgg[2], max(visit_date, aAgg[3])} && update
VALUES (xCurId, aAgg[1], aAgg[2], aAgg[2]/aAgg[1], aAgg[3])
SKIP
ENDDO
INSERT INTO report (patient_id, lastname, n, sum_score, avg_score, max_date) ;
VALUES (xCurId, patient->lastname, aAgg[1], aAgg[2], aAgg[2]/aAgg[1], aAgg[3])
ELSE
INSERT INTO report (patient_id, lastname) VALUES (xCurId, patient->lastname)
END IF
SELECT patient
SKIP
ENDDO
ENDDO

SELECT report
SELECT report
INDEX ON patient_id TO report_id

SELECT patient
SET RELATION TO patient_id INTO report
? "NUM", "PATIENT_ID", "LASTNAME", "N", "SUM_SCORE", "AVG_SCORE", "MAX_DATE"
? "NUM", "PATIENT_ID", "LASTNAME", "N", "SUM_SCORE", "AVG_SCORE", "MAX_DATE"
LIST patient_id, lastname, n, sum_score, avg_score, max_date
LIST patient_id, lastname, report->n, report->sum_score, report->avg_score, report->max_date


RETURN</syntaxhighlight>
RETURN</syntaxhighlight>