Function frequency: Difference between revisions

From Rosetta Code
Content added Content deleted
(Made program actually print top ten functions/macros by marking each struct functionInfo when printing it.)
(I fixed a huge leak, removed a useless variable, and fixed the cause of some compiler warnings by using size_t.)
Line 142: Line 142:
else {
else {
char found = 0;
char found = 0;
for (int i = 0;i < *numElements;i++) {
for (unsigned int i = 0;i < *numElements;i++) {
if (!strcmp((*list)[i].name, toAdd.name)) {
if (!strcmp((*list)[i].name, toAdd.name)) {
found = 1;
found = 1;
Line 158: Line 158:
}
}
memcpy(newList, *list, (*allocatedSize)*sizeof(struct functionInfo));
memcpy(newList, *list, (*allocatedSize)*sizeof(struct functionInfo));
free(*list);
*allocatedSize += 10;
*allocatedSize += 10;
*list = newList;
*list = newList;
Line 176: Line 177:
char maxSet = 0;
char maxSet = 0;
size_t maxIndex = 0;
size_t maxIndex = 0;
for (int i = 0;i<10;i++) {
for (unsigned int i = 0;i<10;i++) {
maxSet = 0;
maxSet = 0;
for (int j = 0;j<numElements;j++) {
for (size_t j = 0;j<numElements;j++) {
if (!maxSet || (*list)[j].timesCalled > (*list)[maxIndex].timesCalled) {
if (!maxSet || (*list)[j].timesCalled > (*list)[maxIndex].timesCalled) {
if (!(*list)[j].marked) {
if (!(*list)[j].marked) {
Line 192: Line 193:
}
}
void freeList(struct functionInfo** list, size_t numElements) {
void freeList(struct functionInfo** list, size_t numElements) {
for (int i = 0;i<numElements;i++) {
for (size_t i = 0;i<numElements;i++) {
free((*list)[i].name);
free((*list)[i].name);
}
}
Line 238: Line 239:
abort();
abort();
}
}
char buf[1025];
buf[1024] = '\0';
char comment = 0;
char comment = 0;
#define DOUBLEQUOTE 1
#define DOUBLEQUOTE 1

Revision as of 05:37, 14 June 2012

Task
Function frequency
You are encouraged to solve this task according to the task description, using any language you may know.

Display - for a program or runtime environment (whatever suites the style of your language) - the top ten most frequently occurring functions (or also identifiers or tokens, if preferred).

This is a static analysis: The question is not how often each function is actually executed at runtime, but how often it is used by the programmer.

Besides its practical usefulness, the intent of this task is to show how to do self-inspection within the language.

ACL2

This will, unfortunately, also catch variable names after an open paren, such as in (let ...) expressions. <lang Lisp>(in-package "ACL2")

(set-state-ok t)

(defun read-all-objects (limit channel state)

  (mv-let (eof obj state)
          (read-object channel state)
     (if (or eof (zp limit))
         (mv nil state)
         (mv-let (so-far state)
                 (read-all-objects (- limit 1) channel state)
            (mv (cons obj so-far) state)))))

(defun list-starters (xs)

  (cond ((endp xs) nil)
        ((consp (first xs))
         (append (if (symbolp (first (first xs)))
                     (list (first (first xs)))
                     nil)
                 (list-starters (rest (first xs)))
                 (list-starters (rest xs))))
        (t (list-starters (rest xs)))))

(defun invoked-functions (filename state)

  (mv-let (channel state)
          (open-input-channel filename :object state)
     (mv-let (code state)
             (read-all-objects 1000 channel state)
        (mv (list-starters code) state))))

(defun increment-for (key alist)

  (cond ((endp alist) (list (cons key 1)))
        ((equal key (car (first alist)))
         (cons (cons key (1+ (cdr (first alist))))
               (rest alist)))
        (t (cons (first alist)
                 (increment-for key (rest alist))))))

(defun symbol-freq-table (symbols)

  (if (endp symbols)
      nil
      (increment-for (first symbols)
                     (symbol-freq-table (rest symbols)))))

(defun insert-freq-table (pair alist)

  (cond ((endp alist)
         (list pair))
        ((> (cdr pair) (cdr (first alist)))
         (cons pair alist))
        (t (cons (first alist)
                 (insert-freq-table pair (rest alist))))))

(defun isort-freq-table (alist)

  (if (endp alist)
      nil
      (insert-freq-table (first alist)
                         (isort-freq-table (rest alist)))))

(defun main (state)

  (mv-let (fns state)
          (invoked-functions "function-freq.lisp" state)
     (mv (take 10 (isort-freq-table
                   (symbol-freq-table fns))) state)))</lang>

Output (for itself):

(((FIRST . 10)
  (REST . 8)
  (DEFUN . 8)
  (CONS . 7)
  (MV-LET . 5)
  (LIST-STARTERS . 4)
  (IF . 4)
  (MV . 4)
  (COND . 3)
  (LIST . 3))
 <state>)

C

This program treats doesn't differentiate between macros and functions. It works by looking for function calls which are not inside strings or comments. If a function call has a C style comment between the opening brace and the name of the function, this program will not recognize it as a function call. <lang C>

  1. define _POSIX_SOURCE
  2. include <ctype.h>
  3. include <stdio.h>
  4. include <stdlib.h>
  5. include <errno.h>
  6. include <string.h>
  7. include <stddef.h>
  8. include <sys/mman.h>
  9. include <sys/types.h>
  10. include <sys/stat.h>
  11. include <unistd.h>

struct functionInfo {

  char* name;
  int timesCalled;
  char marked;

}; void addToList(struct functionInfo** list, struct functionInfo toAdd, \

              size_t* numElements, size_t* allocatedSize) {
  static char* keywords[32] = {"auto", "break", "case", "char", "const", \
                               "continue", "default", "do", "double", \
                               "else", "enum", "extern", "float", "for", \
                               "goto", "if", "int", "long", "register", \
                               "return", "short", "signed", "sizeof", \
                               "static", "struct", "switch", "typedef", \
                               "union", "unsigned", "void", "volatile", \
                               "while"};
  /* If the "function" being called is actually a keyword, then ignore it */
  for (int i = 0;i < 32;i++) {
     if (!strcmp(toAdd.name, keywords[i])) {
        return;
     }
  }
  if (!*list) {
     *allocatedSize = 10;
     *list = calloc(*allocatedSize, sizeof(struct functionInfo));
     if (!list) {
        printf("Failed to allocate %d elements of %d bytes each.\n", \
               *allocatedSize, sizeof(struct functionInfo));
        abort();
     }
     (*list)[0].name = malloc(strlen(toAdd.name)+1);
     if (!(*list)[0].name) {
        printf("Failed to allocate %d bytes.\n", strlen(toAdd.name)+1);
        abort();
     }
     (*list)[0].name[strlen(toAdd.name)] = '\0';
     memcpy((*list)[0].name, toAdd.name, strlen(toAdd.name));
     (*list)[0].timesCalled = 1;
     (*list)[0].marked = 0;
     *numElements = 1;
  }
  else {
     char found = 0;
     for (unsigned int i = 0;i < *numElements;i++) {
        if (!strcmp((*list)[i].name, toAdd.name)) {
           found = 1;
           (*list)[i].timesCalled++;
           break;
        }
     }
     if (!found) {
        struct functionInfo* newList = calloc((*allocatedSize)+10, \
                                              sizeof(struct functionInfo));
        if (!newList) {
           printf("Failed to allocate %d elements of %d bytes each.\n", \
                  (*allocatedSize)+1, sizeof(struct functionInfo));
           abort();
        }
        memcpy(newList, *list, (*allocatedSize)*sizeof(struct functionInfo));
        free(*list);
        *allocatedSize += 10;
        *list = newList;
        (*list)[*numElements].name = malloc(strlen(toAdd.name)+1);
        if (!(*list)[*numElements].name) {
           printf("Failed to allocate %d bytes.\n", strlen(toAdd.name)+1);
           abort();
        }
        (*list)[*numElements].name[strlen(toAdd.name)] = '\0';
        memcpy((*list)[*numElements].name, toAdd.name, strlen(toAdd.name));
        (*list)[*numElements].timesCalled = 1;
        (*list)[*numElements].marked = 0;
        (*numElements)++;
     }
  }

} void printList(struct functionInfo** list, size_t numElements) {

  char maxSet = 0;
  size_t maxIndex = 0;
  for (unsigned int i = 0;i<10;i++) {
     maxSet = 0;
     for (size_t j = 0;j<numElements;j++) {
        if (!maxSet || (*list)[j].timesCalled > (*list)[maxIndex].timesCalled) {
           if (!(*list)[j].marked) {
              maxSet = 1;
              maxIndex = j;
           }
        }
     }
     (*list)[maxIndex].marked = 1;
     printf("%s() called %d times.\n", (*list)[maxIndex].name, \
            (*list)[maxIndex].timesCalled);
  }

} void freeList(struct functionInfo** list, size_t numElements) {

  for (size_t i = 0;i<numElements;i++) {
     free((*list)[i].name);
  }
  free(*list);

} char* extractFunctionName(char* readHead) {

  char* identifier = readHead;
  if (isalpha(*identifier) || *identifier == '_') {
     while (isalnum(*identifier) || *identifier == '_') {
        identifier++;
     }
  }
  /* Search forward for spaces and then an open parenthesis
   * but do not include this in the function name.
   */
  char* toParen = identifier;
  if (toParen == readHead) return NULL;
  while (isspace(*toParen)) {
     toParen++;
  }
  if (*toParen != '(') return NULL;
  /* Copy the found function name to the output string */
  ptrdiff_t size = (ptrdiff_t)((ptrdiff_t)identifier) \
                   - ((ptrdiff_t)readHead)+1;
  char* const name = malloc(size);
  if (!name) {
     printf("Failed to allocate %d bytes.\n", size);
     abort();
  }
  name[size-1] = '\0';
  memcpy(name, readHead, size-1);
  /* Function names can't be blank */
  if (strcmp(name, "")) {
     return name;
  }
  return NULL;

} int main(int argc, char** argv) {

  for (int i = 1;i<argc;i++) {
     errno = 0;
     FILE* file = fopen(argv[i], "r");
     if (errno || !file) {
        printf("fopen() failed with error code \"%s\"\n", \
               strerror(errno));
        abort();
     }
     char comment = 0;
     #define DOUBLEQUOTE 1
     #define SINGLEQUOTE 2
     int string = 0;
     struct functionInfo* functions = NULL;
     struct functionInfo toAdd;
     size_t numElements = 0;
     size_t allocatedSize = 0;
     struct stat metaData;
     errno = 0;
     if (fstat(fileno(file), &metaData) < 0) {
        printf("fstat() returned error \"%s\"\n", strerror(errno));
        abort();
     }
     char* mmappedSource = (char*)mmap(NULL, metaData.st_size, PROT_READ, \
                                       MAP_PRIVATE, fileno(file), 0);
     if (errno) {
        printf("mmap() failed with error \"%s\"\n", strerror(errno));
        abort();
     }
     if (!mmappedSource) {
        printf("mmap() returned NULL.\n");
        abort();
     }
     char* readHead = mmappedSource;
     while (readHead < mmappedSource + metaData.st_size) {
        while (*readHead) {
           /* Ignore comments inside strings */
           if (!string) {
              if (*readHead == '/' && !strncmp(readHead, "/*", 2)) {
                 comment = 1;
              }
              if (*readHead == '*' && !strncmp(readHead, "*/", 2)) {
                 comment = 0;
              }
           }
           /* Ignore strings inside comments */
           if (!comment) {
              if (*readHead == '"') {
                 if (!string) {
                    string = DOUBLEQUOTE;
                 }
                 else if (string == DOUBLEQUOTE) {
                    /* Only toggle string mode if the quote character
                     * is not escaped
                     */
                    if (strncmp((readHead-1), "\\\"", 2)) {
                       string = 0;
                    }
                 }
              }
              if (*readHead == '\) {
                 if (!string) {
                    string = SINGLEQUOTE;
                 }
                 else if (string == SINGLEQUOTE) {
                    if (strncmp((readHead-1), "\\\'", 2)) {
                       string = 0;
                    }
                 }
              }
           }
           /* Look for identifiers outside of any comment or string */
           if (!comment && !string) {
              char* name = extractFunctionName(readHead);
              /* Don't read part of an identifier on the next iteration */
              if (name) {
                 toAdd.name = name;
                 addToList(&functions, toAdd, &numElements, &allocatedSize);
                 readHead += strlen(name);
              }
              free(name);
           }
           readHead++;
        }
     }
     errno = 0;
     munmap(mmappedSource, metaData.st_size);
     if (errno) {
        printf("munmap() returned error \"%s\"\n", strerror(errno));
        abort();
     }
     errno = 0;
     fclose(file);
     if (errno) {
        printf("fclose() returned error \"%s\"\n", strerror(errno));
        abort();
     }
     printList(&functions, numElements);
     freeList(&functions, numElements);
  }
  return 0;

} </lang>

Go

Only crude approximation is currently easy in Go. The following parses source code, looks for function call syntax (an expression followed by an argument list) and prints the expression. <lang go>package main

import (

   "fmt"
   "go/ast"
   "go/parser"
   "go/token"
   "io/ioutil"
   "os"
   "sort"

)

func main() {

   if len(os.Args) != 2 {
       fmt.Println("usage ff <go source filename>")
       return
   }
   src, err := ioutil.ReadFile(os.Args[1])
   if err != nil {
       fmt.Println(err)
       return
   }
   fs := token.NewFileSet()
   a, err := parser.ParseFile(fs, os.Args[1], src, 0)
   if err != nil {
       fmt.Println(err)
       return
   }
   f := fs.File(a.Pos())
   m := make(map[string]int)
   ast.Inspect(a, func(n ast.Node) bool {
       if ce, ok := n.(*ast.CallExpr); ok {
           start := f.Offset(ce.Pos())
           end := f.Offset(ce.Lparen)
           m[string(src[start:end])]++
       }
       return true
   })
   cs := make(calls, 0, len(m))
   for k, v := range m {
       cs = append(cs, &call{k, v})
   }
   sort.Sort(cs)
   for i, c := range cs {
       fmt.Printf("%-20s %4d\n", c.expr, c.count)
       if i == 9 {
           break
       }
   }

}

type call struct {

   expr  string
   count int

} type calls []*call

func (c calls) Len() int { return len(c) } func (c calls) Swap(i, j int) { c[i], c[j] = c[j], c[i] } func (c calls) Less(i, j int) bool { return c[i].count > c[j].count }</lang> Output, when run on source code above:

len                     3
fmt.Println             3
f.Offset                2
make                    2
fmt.Printf              1
ioutil.ReadFile         1
a.Pos                   1
string                  1
token.NewFileSet        1
append                  1

PicoLisp

<lang PicoLisp>(let Freq NIL

  (for "L" (filter pair (extract getd (all)))
     (for "F"
        (filter atom
           (fish '((X) (or (circ? X) (getd X)))
              "L" ) )
        (accu 'Freq "F" 1) ) )
  (for X (head 10 (flip (by cdr sort Freq)))
     (tab (-7 4) (car X) (cdr X)) ) )</lang>

Output, for the system in debug mode plus the above code:

quote   310
car     236
cdr     181
setq    148
let     136
if      127
and     124
cons    110
cadr     80
or       76

If the condition in the 5th line (getd X) is replaced with (sym? X), then all symbols are counted, and the output is

X       566
quote   310
car     236
cdr     181
C       160
N       157
L       155
Lst     152
setq    148
T       144

And if it is replaced with (num? X), it is

1        71
0        38
2        27
3        17
7         9
-1        9
100       8
48        6
43        6
12        6

Tcl

<lang tcl>package require Tcl 8.6

proc examine {filename} {

   global cmds
   set RE "(?:^|\[\[\{\])\[\\w:.\]+"
   set f [open $filename]
   while {[gets $f line] >= 0} {

set line [string trim $line] if {$line eq "" || [string match "#*" $line]} { continue } foreach cmd [regexp -all -inline $RE $line] { incr cmds([string trim $cmd "\{\["]) }

   }
   close $f

}

  1. Parse each file on the command line

foreach filename $argv {

   examine $filename

}

  1. Get the command list in order of frequency

set cmdinfo [lsort -stride 2 -index 1 -integer -decreasing [array get cmds]]

  1. Print the top 10 (two list items per entry, so 0-19, not 0-9)

foreach {cmd count} [lrange $cmdinfo 0 19] {

   puts [format "%-20s%d" $cmd $count]

}</lang> Sample run (note that the commands found are all standard Tcl commands; they're just commands so it is natural to expect them to be found):

bash$ tclsh8.6 RosettaCode/cmdfreq.tcl RosettaCode/*.tcl 
set                 2374
expr                846
if                  775
puts                558
return              553
proc                549
incr                485
foreach             432
lindex              406
lappend             351