Averages/Mode
You are encouraged to solve this task according to the task description, using any language you may know.
Write a program to find the mode value of a vector of integers. The case where the vector is empty may be ignored. Care must be taken to handle the case where the mode is non-unique.
C
<lang c>#include <stdio.h>
- include <stdlib.h>
- include <stdbool.h>
- define INC_VAL 50
// to hold the results typedef struct stack {
double *data; size_t n; size_t cap;
} stack_t;
// to hold values and their frequencies typedef struct fav_el {
double v; size_t f;
} fav_el_t;
typedef struct freqs_and_vals_table {
fav_el_t *vf; size_t n; size_t cap;
} fav_t;</lang>
<lang c>fav_t *alloc_table() {
fav_t *r = malloc(sizeof(fav_t)); r->n = 0; r->cap = 1; r->vf = malloc(sizeof(fav_el_t)*INC_VAL); return r;
}
void free_table(fav_t *t) {
free(t->vf); free(t);
}
// compare functions for bsearch and qsort
- define CMP_HOOK(F) ((int (*)(const void *, const void *))F)
int t_cmp(double *k, double *v) {
if ( *k < *v ) return -1; else if ( *k > *v ) return 1; else return 0;
}
int s_cmp(fav_el_t *a, fav_el_t *b) {
return t_cmp(&a->v, &b->v);
}
int f_cmp(fav_el_t *a, fav_el_t *b) {
if ( a->f < b->f ) return 1; else if ( a->f > b->f ) return -1; else return 0;
}
size_t table_search(fav_t *t, double v) {
fav_el_t *el = bsearch(&v, t->vf, t->n, sizeof(fav_el_t), CMP_HOOK(s_cmp)); return (el==NULL) ? -1 : (el - t->vf);
}
void add_value(fav_t *t, double v) {
int i;
if ( (i=table_search(t, v)) < 0 ) { if ( (t->n + 1) > (t->cap * INC_VAL) ) { t->cap++; t->vf = realloc(t->vf, t->cap * INC_VAL * sizeof(fav_el_t)); } t->vf[t->n].v = v; t->vf[t->n].f = 1; t->n++; } else { t->vf[i].f++; }
}
void collect_from_array(fav_t *t, double *v, size_t n) {
size_t i; qsort(v, n, sizeof(double), CMP_HOOK(t_cmp)); for(i=0; i < n; i++) { add_value(t, v[i]); }
}
stack_t *init_stack() {
stack_t *r = malloc(sizeof(stack_t)); r->n = 0; r->cap = 1; r->data = malloc(sizeof(double)*INC_VAL); return r;
}
void free_stack(stack_t *s) {
free(s->data); free(s);
}
void push_value(stack_t *s, double v) {
if ( (s->n + 1) > (s->cap * INC_VAL) ) { s->cap++; s->data = realloc(s->data, s->cap * INC_VAL * sizeof(double)); } s->data[s->n] = v; s->n++;
}</lang>
All the codes above are utilities to keep this main function simple. First, we build a table of value-frequency, for each different value, using the utility function collect_from_array. Then we sort the value-frequency pair according to the frequency (descending, cfr. f_cmp), and push on a "stack" all the values with the biggest same frequency.
<lang c>stack_t *median(double *v, size_t n) {
size_t i, f; stack_t *r;
fav_t *t1 = alloc_table(); collect_from_array(t1, v, n); // now let's sort according to freqs qsort(t1->vf, t1->n, sizeof(fav_el_t), CMP_HOOK(f_cmp)); r = init_stack(); f = t1->vf[0].f; push_value(r, t1->vf[0].v); for(i=1; (i < t1->n) && (t1->vf[i].f == f); i++) push_value(r, t1->vf[i].v); free_table(t1); return r;
}</lang>
<lang c>double v1[] = { 1, 3, 6, 6, 6, 6, 7, 7, 12, 12, 17 }; double v2[] = { 1, 1, 2, 4, 4 };
- define PRINT_MEDIAN(V) do { size_t i; \
stack_t *r = median(V, sizeof(V)/sizeof(double)); \ for(i=0; i < r->n; i++) printf("%lf ", r->data[i]); \ free_stack(r); printf("\n"); \ } while(0);
int main() {
PRINT_MEDIAN(v1) PRINT_MEDIAN(v2);
return EXIT_SUCCESS;
}</lang>
Java
<lang java>import java.util.*;
public class Mode {
public static <T> List<T> mode(List<? extends T> coll) { Map<T, Integer> seen = new HashMap<T, Integer>(); int max = 0; List<T> maxElems = new ArrayList<T>(); for (T value : coll) { if (seen.containsKey(value)) seen.put(value, seen.get(value) + 1); else seen.put(value, 1); if (seen.get(value) > max) { max = seen.get(value); maxElems.clear(); maxElems.add(value); } else if (seen.get(value) == max) { maxElems.add(value); } } return maxElems; }
public static void main(String[] args) { System.out.println(mode(Arrays.asList(1, 3, 6, 6, 6, 6, 7, 7, 12, 12, 17))); // prints [6] System.out.println(mode(Arrays.asList(1, 1, 2, 4, 4))); // prints [1, 4] }
}</lang>
Objective-C
<lang objc>#import <Foundation/Foundation.h>
@interface NSArray (Mode) - (NSArray *)mode; @end
@implementation NSArray (Mode) - (NSArray *)mode {
NSCountedSet *seen = [NSCountedSet setWithArray:self]; int max = 0; NSMutableArray *maxElems = [NSMutableArray array]; NSEnumerator *enm = [seen objectEnumerator]; id obj; while( (obj = [enm nextObject]) ) { int count = [seen countForObject:obj]; if (count > max) { max = count; [maxElems removeAllObjects]; [maxElems addObject:obj]; } else if (count == max) { [maxElems addObject:obj]; } } return maxElems;
} @end</lang>
Octave
Of course Octave has the mode function; but it returns only the "lowest" mode if multiple modes are available.
<lang octave>function m = mode2(v)
sv = sort(v); % build two vectors, vals and c, so that % c(i) holds how many times vals(i) appears i = 1; c = []; vals = []; while (i <= numel(v) ) tc = sum(sv==sv(i)); % it would be faster to count % them "by hand", since sv is sorted... c = [c, tc]; vals = [vals, sv(i)]; i += tc; endwhile % stack vals and c building a 2-rows matrix x x = cat(1,vals,c); % sort the second row (frequencies) into t (most frequent % first) and take the "original indices" i ... [t, i] = sort(x(2,:), "descend"); % ... so that we can use them to sort columns according % to frequencies nv = x(1,i); % at last, collect into m (the result) all the values % having the same bigger frequency r = t(1); i = 1; m = []; while ( t(i) == r ) m = [m, nv(i)]; i++; endwhile
endfunction</lang>
<lang octave>a = [1, 3, 6, 6, 6, 6, 7, 7, 12, 12, 17]; mode2(a) mode(a)
a = [1, 1, 2, 4, 4]; mode2(a) % returns 1 and 4 mode(a) % returns 1 only</lang>
Perl
<lang perl>use strict; use List::Util qw(max);
sub mode {
my %c; foreach my $e ( @_ ) {
$c{$e}++;
} my $best = max(values %c); return grep { $c{$_} == $best } keys %c;
}</lang>
<lang perl>print "$_ " foreach mode(1, 3, 6, 6, 6, 6, 7, 7, 12, 12, 17); print "\n"; print "$_ " foreach mode(1, 1, 2, 4, 4); print "\n";</lang>
Python
<lang python>>>> from collections import defaultdict >>> def modes(values): count = defaultdict(int) for v in values: count[v] +=1 best = max(count.itervalues()) return [k for k,v in count.iteritems() if v == best]
>>> modes([1,3,6,6,6,6,7,7,12,12,17]) [6] >>> modes((1,1,2,4,4)) [1, 4]</lang>
Ruby
Here's two methods, the first more Ruby-ish, the second perhaps a bit more efficient. <lang ruby>def mode(ary)
seen = Hash.new(0) ary.each {|value| seen[value] += 1} max = seen.values.max seen.find_all {|key,value| value == max}.map {|key,value| key}
end
def mode_one_pass(ary)
seen = Hash.new(0) max = 0 max_elems = [] ary.each do |value| seen[value] += 1 if seen[value] > max max = seen[value] max_elems = [value] elsif seen[value] == max max_elems << value end end max_elems
end
p mode([1, 3, 6, 6, 6, 6, 7, 7, 12, 12, 17]) # => [6] p mode([1, 1, 2, 4, 4]) # => [1, 4] p mode_one_pass([1, 3, 6, 6, 6, 6, 7, 7, 12, 12, 17]) # => [6] p mode_one_pass([1, 1, 2, 4, 4]) # => [1, 4]</lang>
Smalltalk
<lang smalltalk>OrderedCollection extend [
mode [ |s| s := self asBag sortedByCount. ^ (s select: [ :k | ((s at: 1) key) = (k key) ]) collect: [:k| k value] ]
].
- ( 1 3 6 6 6 6 7 7 12 12 17 ) asOrderedCollection
mode displayNl.
- ( 1 1 2 4 4) asOrderedCollection
mode displayNl.</lang>
Tcl
<lang tcl># Can find the modal value of any vector of values proc mode {n args} {
foreach n [list $n {*}$args] { dict incr counter $n } set counts [lsort -stride 2 -index 1 -decreasing $counter] set best {} foreach {n count} $counts { if {[lindex $counts 1] == $count} { lappend best $n } else break } return $best
}
- Testing
puts [mode 1 3 6 6 6 6 7 7 12 12 17]; # --> 6 puts [mode 1 1 2 4 4]; # --> 1 4</lang>