Averages/Mode

From Rosetta Code
Revision as of 02:47, 15 June 2009 by rosettacode>Spoon! (added objective-c)
Task
Averages/Mode
You are encouraged to solve this task according to the task description, using any language you may know.

Write a program to find the mode value of a vector of integers. The case where the vector is empty may be ignored. Care must be taken to handle the case where the mode is non-unique.

See also: Mean, Median

C

<lang c>#include <stdio.h>

  1. include <stdlib.h>
  2. include <stdbool.h>
  1. define INC_VAL 50

// to hold the results typedef struct stack {

 double *data;
 size_t n;
 size_t cap;

} stack_t;

// to hold values and their frequencies typedef struct fav_el {

 double v;
 size_t f;

} fav_el_t;

typedef struct freqs_and_vals_table {

 fav_el_t *vf;
 size_t n;
 size_t cap;

} fav_t;</lang>

<lang c>fav_t *alloc_table() {

 fav_t *r = malloc(sizeof(fav_t));
 r->n = 0;
 r->cap = 1;
 r->vf = malloc(sizeof(fav_el_t)*INC_VAL);
 return r;

}

void free_table(fav_t *t) {

 free(t->vf);
 free(t);

}

// compare functions for bsearch and qsort

  1. define CMP_HOOK(F) ((int (*)(const void *, const void *))F)

int t_cmp(double *k, double *v) {

 if ( *k < *v )
   return -1;
 else if ( *k > *v )
   return 1;
 else
   return 0;

}

int s_cmp(fav_el_t *a, fav_el_t *b) {

 return t_cmp(&a->v, &b->v);

}

int f_cmp(fav_el_t *a, fav_el_t *b) {

 if ( a->f < b->f )
   return 1;
 else if ( a->f > b->f )
   return -1;
 else
   return 0;

}

size_t table_search(fav_t *t, double v) {

 fav_el_t *el = bsearch(&v, t->vf, t->n, sizeof(fav_el_t), CMP_HOOK(s_cmp));
 return (el==NULL) ? -1 : (el - t->vf);

}

void add_value(fav_t *t, double v) {

 int i;
 if ( (i=table_search(t, v)) < 0 ) {
   if ( (t->n + 1) > (t->cap * INC_VAL) ) {
     t->cap++;
     t->vf = realloc(t->vf, t->cap * INC_VAL * sizeof(fav_el_t));
   }
   t->vf[t->n].v = v;
   t->vf[t->n].f = 1;
   t->n++;
 } else {
   t->vf[i].f++;
 }

}

void collect_from_array(fav_t *t, double *v, size_t n) {

 size_t i;
 qsort(v, n, sizeof(double), CMP_HOOK(t_cmp));
 for(i=0; i < n; i++) {
   add_value(t, v[i]);
 }

}

stack_t *init_stack() {

 stack_t *r = malloc(sizeof(stack_t));
 r->n = 0;
 r->cap = 1;
 r->data = malloc(sizeof(double)*INC_VAL);
 return r;

}

void free_stack(stack_t *s) {

 free(s->data);
 free(s);

}

void push_value(stack_t *s, double v) {

 if ( (s->n + 1) > (s->cap * INC_VAL) ) {
   s->cap++;
   s->data = realloc(s->data, s->cap * INC_VAL * sizeof(double));
 }
 s->data[s->n] = v;
 s->n++;

}</lang>

All the codes above are utilities to keep this main function simple. First, we build a table of value-frequency, for each different value, using the utility function collect_from_array. Then we sort the value-frequency pair according to the frequency (descending, cfr. f_cmp), and push on a "stack" all the values with the biggest same frequency.

<lang c>stack_t *median(double *v, size_t n) {

 size_t i, f;
 stack_t *r;
 fav_t *t1 = alloc_table();
 collect_from_array(t1, v, n);
 // now let's sort according to freqs
 qsort(t1->vf, t1->n, sizeof(fav_el_t), CMP_HOOK(f_cmp));
 r = init_stack();
 f = t1->vf[0].f;
 push_value(r, t1->vf[0].v);
 for(i=1; (i < t1->n) && (t1->vf[i].f == f); i++)
   push_value(r, t1->vf[i].v);
 free_table(t1);
 return r;

}</lang>

<lang c>double v1[] = { 1, 3, 6, 6, 6, 6, 7, 7, 12, 12, 17 }; double v2[] = { 1, 1, 2, 4, 4 };

  1. define PRINT_MEDIAN(V) do { size_t i; \
   stack_t *r = median(V, sizeof(V)/sizeof(double));	\
   for(i=0; i < r->n; i++) printf("%lf ", r->data[i]); \
   free_stack(r); printf("\n");			\
 } while(0);
 

int main() {

 PRINT_MEDIAN(v1)
 PRINT_MEDIAN(v2);
 return EXIT_SUCCESS;

}</lang>

Java

<lang java>import java.util.*;

public class Mode {

   public static <T> List<T> mode(List<? extends T> coll) {
       Map<T, Integer> seen = new HashMap<T, Integer>();
       int max = 0;
       List<T> maxElems = new ArrayList<T>();
       for (T value : coll) {
           if (seen.containsKey(value))
               seen.put(value, seen.get(value) + 1);
           else
               seen.put(value, 1);
           if (seen.get(value) > max) {
               max = seen.get(value);
               maxElems.clear();
               maxElems.add(value);
           } else if (seen.get(value) == max) {
               maxElems.add(value);
           }
       }
       return maxElems;
   }
   public static void main(String[] args) {
       System.out.println(mode(Arrays.asList(1, 3, 6, 6, 6, 6, 7, 7, 12, 12, 17))); // prints [6]
       System.out.println(mode(Arrays.asList(1, 1, 2, 4, 4))); // prints [1, 4]
   }

}</lang>

Objective-C

<lang objc>#import <Foundation/Foundation.h>

@interface NSArray (Mode) - (NSArray *)mode; @end

@implementation NSArray (Mode) - (NSArray *)mode {

   NSCountedSet *seen = [NSCountedSet setWithArray:self];
   int max = 0;
   NSMutableArray *maxElems = [NSMutableArray array];
   NSEnumerator *enm = [seen objectEnumerator];
   id obj;
   while( (obj = [enm nextObject]) ) {
       int count = [seen countForObject:obj];
       if (count > max) {
           max = count;
           [maxElems removeAllObjects];
           [maxElems addObject:obj];
       } else if (count == max) {
           [maxElems addObject:obj];
       }
   }
   return maxElems;

} @end</lang>

Octave

Of course Octave has the mode function; but it returns only the "lowest" mode if multiple modes are available.

<lang octave>function m = mode2(v)

 sv = sort(v);
 % build two vectors, vals and c, so that
 % c(i) holds how many times vals(i) appears
 i = 1; c = []; vals = [];
 while (i <= numel(v) )
   tc = sum(sv==sv(i)); % it would be faster to count
                        % them "by hand", since sv is sorted...
   c = [c, tc];
   vals = [vals, sv(i)];
   i += tc;
 endwhile
 % stack vals and c building a 2-rows matrix x
 x = cat(1,vals,c);
 % sort the second row (frequencies) into t (most frequent
 % first) and take the "original indices" i ... 
 [t, i] = sort(x(2,:), "descend");
 % ... so that we can use them to sort columns according
 % to frequencies
 nv = x(1,i);
 % at last, collect into m (the result) all the values
 % having the same bigger frequency
 r = t(1); i = 1;
 m = [];
 while ( t(i) == r )
   m = [m, nv(i)];
   i++;
 endwhile

endfunction</lang>

<lang octave>a = [1, 3, 6, 6, 6, 6, 7, 7, 12, 12, 17]; mode2(a) mode(a)

a = [1, 1, 2, 4, 4]; mode2(a)  % returns 1 and 4 mode(a)  % returns 1 only</lang>

Perl

<lang perl>use strict;

sub mode {

   my %c = ();
   foreach my $e ( @_ ) {

$c{$e} = 0 if !exists($c{$e}); $c{$e}++;

   }
   my @r = ();
   my @s = sort { $c{$b} <=> $c{$a} } keys %c;
   foreach my $e ( @s ) {

last if $c{$s[0]} != $c{$e}; push @r, $e

   }
   return @r;

}</lang>

<lang perl>print "$_ " foreach mode(1, 3, 6, 6, 6, 6, 7, 7, 12, 12, 17); print "\n"; print "$_ " foreach mode(1, 1, 2, 4, 4); print "\n";</lang>

Python

<lang python>>>> from collections import defaultdict >>> def modes(values): count = defaultdict(int) for v in values: count[v] +=1 best = max(count.itervalues()) return [k for k,v in count.iteritems() if v == best]

>>> modes([1,3,6,6,6,6,7,7,12,12,17]) [6] >>> modes((1,1,2,4,4)) [1, 4]</lang>

Ruby

Here's two methods, the first more Ruby-ish, the second perhaps a bit more efficient. <lang ruby>def mode(ary)

 seen = Hash.new(0)
 ary.each {|value| seen[value] += 1}
 max = seen.values.max
 seen.find_all {|key,value| value == max}.map {|key,value| key}

end

def mode_one_pass(ary)

 seen = Hash.new(0)
 max = 0
 max_elems = []
 ary.each do |value|
   seen[value] += 1
   if seen[value] > max
     max = seen[value]
     max_elems = [value]
   elsif seen[value] == max
     max_elems << value
   end
 end
 max_elems

end

p mode([1, 3, 6, 6, 6, 6, 7, 7, 12, 12, 17]) # => [6] p mode([1, 1, 2, 4, 4]) # => [1, 4] p mode_one_pass([1, 3, 6, 6, 6, 6, 7, 7, 12, 12, 17]) # => [6] p mode_one_pass([1, 1, 2, 4, 4]) # => [1, 4]</lang>

Smalltalk

Works with: GNU Smalltalk

<lang smalltalk>OrderedCollection extend [

 mode [ |s|
    s := self asBag sortedByCount.
    ^ (s select: [ :k | ((s at: 1) key) = (k key) ]) collect: [:k| k value]
 ]

].

  1. ( 1 3 6 6 6 6 7 7 12 12 17 ) asOrderedCollection
   mode displayNl.
  1. ( 1 1 2 4 4) asOrderedCollection
   mode displayNl.</lang>

Tcl

Works with: Tcl version 8.6

<lang tcl># Can find the modal value of any vector of values proc mode {n args} {

   foreach n [list $n {*}$args] {
       dict incr counter $n
   }
   set counts [lsort -stride 2 -index 1 -decreasing $counter]
   set best {}
   foreach {n count} $counts {
       if {[lindex $counts 1] == $count} {
           lappend best $n
       } else break
   }
   return $best

}

  1. Testing

puts [mode 1 3 6 6 6 6 7 7 12 12 17]; # --> 6 puts [mode 1 1 2 4 4]; # --> 1 4</lang>