Summarize and say sequence: Difference between revisions

Content added Content deleted

Inline

Revision as of 02:43, 24 August 2011

There are several ways to generate a self-referential sequence. One very common one (the Look-and-say sequence) is to start with a positive integer, then generate the next term by concatenating enumerated groups of adjacent alike digits:

0, 10, 1110, 3110, 132110, 13122110, 111311222110 ...

The terms generated grow in length geometrically and never converge.

Another way to generate a self-referential sequence is to summarize the previous term.

Count how many of each alike digit there is, then concatenate the sum and digit for each of the sorted enumerated digits. Note that the first five terms are the same as for the previous sequence.

0, 10, 1110, 3110, 132110, 13123110, 23124110 ... see The On-Line Encyclopedia of Integer Sequences

Sort the digits largest to smallest. Do not include counts of digits that do not appear in the previous term.

Depending on the seed value, series generated this way always either converge to a stable value or to a short cyclical pattern. (For our purposes, I'll use converge to mean an element matches a previously seen element.) The sequence shown, with a seed value of 0, converges to a stable value of 1433223110 after 11 iterations. The seed value that converges most quickly is 22. It goes stable after the first element. (The next element is 22, which has been seen before.)

Task:

Find all the positive integer seed values under 1000000, for the above convergent self-referential sequence, that takes the largest number of iterations before converging. Then print out the number of iterations and the sequence they return. Note that different permutations of the digits of the seed will yield the same sequence. For this task, assume leading zeros are not permitted.

Seed Value(s): 9009 9090 9900

Iterations: 21 

Sequence: (same for all three seeds except for first element)
9009
2920
192210
19222110
19323110
1923123110
1923224110
191413323110
191433125110
19151423125110
19251413226110
1916151413325110
1916251423127110
191716151413326110
191726151423128110
19181716151413327110
19182716151423129110
29181716151413328110
19281716151423228110
19281716151413427110
19182716152413228110

C

This example is incorrect. Please fix the code and remove this message.

Details: Logic errors somewhere

<lang c>#include <stdio.h>

include <stdlib.h>

define MAXN 1000000

typedef struct rec_t rec_t, *rec; struct rec_t { int depth; struct rec_t * p[10]; } *rec_root;

rec find_rec(char *s, rec root) { if (!*s) return root; int c = *s++ - '0';

if (!root->p[c]) root->p[c] = calloc(1, sizeof(rec_t)); return find_rec(s, root->p[c]); }

void free_rec(rec root) { if (!root) return;

int i; for (i = 0; i < 10; i++) free_rec(root->p[i]); free(root); }

void next_num(char *s) { int i = 0, cnt[10] = {0};

while (s[i]) cnt[s[i++] - '0']++; for (i = 9; i >= 0; i--) { if (!cnt[i]) continue; s += sprintf(s, "%d%c", cnt[i], i + '0'); } }

int get_len(char *s) { /* Sort chars in incoming string. Digits are only cached in sorted form, and seq length calculation is correct as long as sequences in the form of A->B->B doesn't happen where A and B are different permutations of the same digits */ char *p, *q, c; for (p = s; *p; p++) for (q = p+1; *q; q++) if (*p > *q) c = *p, *p = *q, *q = c;

rec r = find_rec(s, rec_root); if (r->depth) return r->depth; r->depth = 1;

next_num(s); return r->depth = 1 + get_len(s); }

int main() { rec_root = calloc(1, sizeof(rec_t));

int longest[100], n_longest = 0; int i, l, ml = 0; char buf[32];

for (i = 0; i < MAXN; i++) { sprintf(buf, "%d", i); if ((l = get_len(buf)) < ml) continue; if (l > ml) { n_longest = 0; ml = l; } longest[n_longest++] = i; }

printf("seq leng: %d\n", ml); for (i = 0; i < n_longest; i++) { sprintf(buf, "%d", longest[i]); /* print len+1 so we know repeating starts from when */ for (l = 0; l <= ml || !puts(""); next_num(buf), l++) printf("%s\n", buf); }

// free_rec(rec_root); return 0; }</lang>

Icon and Unicon

<lang Icon>link printf

procedure main() every L := !longestselfrefseq(1000000) do

  every printf(" %i : %i\n",i := 1 to *L,L[i])

end

procedure longestselfrefseq(N) #: find longest sequences from 1 to N

mlen := 0 every L := selfrefseq(n := 1 to N) do {

  if mlen <:= *L then 
     ML := [L] 
  else if mlen = *L then 
     put(ML,L)
  }

return ML end

procedure selfrefseq(n) #: return list of sequence oeis:A036058 for seed n S := set() L := [] every p := seq(1) do

  if member(S,n) then return L   # ends at a repeat 
  else {
     insert(S,n)
     put(L,n)
     n := nextselfrefseq(n)
     }

end

procedure nextselfrefseq(n) #: return next element of sequence oeis:A036058 every (Counts := table(0))[integer(!n)] +:= 1 # count digits every (n := "") ||:= (0 < Counts[i := 9 to 0 by -1]) || i # assemble counts return integer(n) end</lang>

Library: Icon Programming Library

printf.icn provides printf, sprintf, fprintf, etc.

Sample of Output:

 1 : 9009
 2 : 2920
 3 : 192210
 4 : 19222110
 5 : 19323110
 6 : 1923123110
 7 : 1923224110
 8 : 191413323110
 9 : 191433125110
 10 : 19151423125110
 11 : 19251413226110
 12 : 1916151413325110
 13 : 1916251423127110
 14 : 191716151413326110
 15 : 191726151423128110
 16 : 19181716151413327110
 17 : 19182716151423129110
 18 : 29181716151413328110
 19 : 19281716151423228110
 20 : 19281716151413427110
 21 : 19182716152413228110
 1 : 9090
 2 : 2920
 ... (manually removed, same as above)
 21 : 19182716152413228110
 1 : 9900
 2 : 2920
 ... (manually removed, same as above)
 21 : 19182716152413228110

J

Given: <lang j>require'stats' digits=: 10&#.inv"0 :. ([: ".@; (<'x'),~":&.>) summar=: (#/.~ ,@,. ~.)@\:~&.digits sequen=: ~.@(, summar@{:)^:_ values=: ~. \:~&.digits i.1e6 allvar=: [:(#~(=&<.&(10&^.) >./))@~.({~ perm@#)&.(digits"1) </lang>

The values with the longest sequence are:

<lang j> ;allvar&.> values #~ (= >./) #@sequen"0 values 9900 9090 9009

  # sequen 9900

21

  ,.sequen 9900
               9900
               2920
             192210
           19222110
           19323110
         1923123110
         1923224110
       191413323110
       191433125110
     19151423125110
     19251413226110
   1916151413325110
   1916251423127110
 191716151413326110
 191726151423128110

19181716151413327110 19182716151423129110 29181716151413328110 19281716151423228110 19281716151413427110 19182716152413228110</lang>

Notes:

digits is an invertible function that maps from a number to a sequence of digits and back where the inverse transform converts numbers to strings, concatenates them, and then back to a number.

<lang j> digits 321 3 2 1

  digits inv 34 5

345</lang>

summar computes the summary successor.

<lang j> summar 0 1 2 10 11 12</lang>

sequen computes the complete non-repeating sequence of summary successors

The computation for values could have been made much more efficient. Instead, though, all one million integers have their digits sorted in decreasing order, and then the unique set of them is found.

Finally, allvar finds all variations of a number which would have the same summary sequence based on the permutations of that number's digits.

Perl 6

<lang perl6>my @list; my $longest = 0; my %seen;

for 1 .. 1000000 -> $m {

   next unless $m ~~ /0/;         # seed must have a zero
   my $j = join , $m.comb.sort;
   next if %seen.exists($j);      # already tested a permutation
   %seen{$j} = ;
   my @seq := converging($m);
   my %elems;
   my $count;
   for @seq[] -> $value { last if ++%elems{$value} == 2; $count++; };
   if $longest == $count {
       @list.push($m);
       say "\b" x 20, "$count, $m"; # monitor progress
   }
   elsif $longest < $count {
       $longest = $count;
       @list = $m;
       say "\b" x 20, "$count, $m"; # monitor progress
   }

};

for @list -> $m {

   say "Seed Value(s): ", ~permutations($m).uniq.grep( { .substr(0,1) != 0 } );
   my @seq := converging($m);
   my %elems;
   my $count;
   for @seq[] -> $value { last if ++%elems{$value} == 2; $count++; };
   say "\nIterations: ", $count;
   say "\nSequence: (Only one shown per permutation group.)";
  .say for @seq[^$count], "\n";

}

sub converging ($seed) { return $seed, -> $l { join , map { $_.value.elems~$_.key }, $l.comb.classify({$^b}).sort: {-$^c.key} } ... * }

sub permutations ($string, $sofar? = ) {

   return $sofar unless $string.chars;
   my @perms;
   for ^$string.chars -> $idx {
       my $this = $string.substr(0,$idx)~$string.substr($idx+1);
       my $char = substr($string, $idx,1);
       @perms.push( permutations( $this, join , $sofar, $char ) ) ;
   }
   return @perms;

}</lang>

Output:

Seed Value(s): 9009 9090 9900

Iterations: 21

Sequence: (Only one shown per permutation group.)
9009
2920
192210
19222110
19323110
1923123110
1923224110
191413323110
191433125110
19151423125110
19251413226110
1916151413325110
1916251423127110
191716151413326110
191726151423128110
19181716151413327110
19182716151423129110
29181716151413328110
19281716151423228110
19281716151413427110
19182716152413228110

Python

The number generation function follows that of Look-and-say with a sort. only the first of any set of numbers with the same digits has the length of its sequence calculated in function max_A036058_length, although no timings were taken to check if the optimisation was of value.

<lang python>from itertools import groupby, permutations

def A036058(number):

   return .join( str(len(list(g))) + k
                   for k,g in groupby(sorted(str(number), reverse=True)) )

def A036058_length(numberstring='0', printit=False):

   iterations, last_three, queue_index = 1, ([None] * 3), 0

   def A036058(number):
       # rely on external reverse-sort of digits of number
       return .join( str(len(list(g))) + k
                       for k,g in groupby(number) )

   while True:
       if printit:
           print("  %2i %s" % (iterations, numberstring))
       numberstring = .join(sorted(numberstring, reverse=True))
       if numberstring in last_three:
           break
       assert iterations < 1000000
       last_three[queue_index], numberstring = numberstring, A036058(numberstring)
       iterations += 1
       queue_index +=1
       queue_index %=3
   return iterations

def max_A036058_length( start_range=range(11) ):

   already_done = set()
   max_len = (-1, [])
   for n in start_range:
       sn = str(n)
       sns = tuple(sorted(sn, reverse=True))
       if sns not in already_done:
           already_done.add(sns)
           size = A036058_length(sns)
           if size > max_len[0]:
               max_len = (size, [n])
           elif size == max_len[0]:
               max_len[1].append(n)
   return max_len

lenmax, starts = max_A036058_length( range(1000000) )

Expand

allstarts = [] for n in starts:

   allstarts += [int(.join(x))
                 for x in set(k
                              for k in permutations(str(n), 4)
                              if k[0] != '0')]

allstarts = [x for x in sorted(allstarts) if x < 1000000]

print ( \ The longest length, followed by the number(s) with the longest sequence length for starting sequence numbers below 1000000 are:

 Iterations = %i and sequence-starts = %s. % (lenmax, allstarts)   )

print ( Note that only the first of any sequences with the same digits is printed below. (The others will differ only in their first term) )

for n in starts:

   print()
   A036058_length(str(n), printit=True)</lang>

Output

The longest length, followed by the number(s) with the longest sequence length
for starting sequence numbers below 1000000 are:
  Iterations = 21 and sequence-starts = [9009, 9090, 9900].

Note that only the first of any sequences with the same digits is printed below.
(The others will differ only in their first term)

   1 9009
   2 2920
   3 192210
   4 19222110
   5 19323110
   6 1923123110
   7 1923224110
   8 191413323110
   9 191433125110
  10 19151423125110
  11 19251413226110
  12 1916151413325110
  13 1916251423127110
  14 191716151413326110
  15 191726151423128110
  16 19181716151413327110
  17 19182716151423129110
  18 29181716151413328110
  19 19281716151423228110
  20 19281716151413427110
  21 19182716152413228110

Tcl

<lang tcl>proc nextterm n {

   foreach c [split $n ""] {incr t($c)}
   foreach c {9 8 7 6 5 4 3 2 1 0} {

if {[info exist t($c)]} {append r $t($c) $c}

   }
   return $r

}

Local context of lambda term is just for speed

apply {limit {

   #  Build a digit cache; this adds quite a bit of speed
   set done [lrepeat [set l2 [expr {$limit * 100}]] 0]
   # Iterate over search space
   set maxlen 0
   set maxes {}
   for {set i 0} {$i < $limit} {incr i} {

if {[lindex $done $i]} continue # Compute the sequence length for this value (with help from cache) set seq {} for {set seed $i} {$seed ni $seq} {set seed [nextterm $seed]} { if {$seed < $l2 && [lindex $done $seed]} { set len [expr {[llength $seq] + [lindex $done $seed]}] break } set len [llength [lappend seq $seed]] } # What are we going to do about it? if {$len > $maxlen} { set maxlen $len set maxes [list $i] } elseif {$len == $maxlen} { lappend maxes $i } # Update the cache with what we have learned foreach n $seq { if {$n < $l2} {lset done $n $len} incr len -1 }

   }
   # Output code
   puts "max length: $maxlen"
   foreach c $maxes {puts $c}
   puts "Sample max-len sequence:"
   set seq {}
   # Rerun the sequence generator for printing; faster for large limits
   for {set seed [lindex $c 0]} {$seed ni $seq} {set seed [nextterm $seed]} {

lappend seq $seed

       puts "\t$seed"
   }

}} 1000000</lang> Output:

max length: 21
9009
9090
9900
Sample max-len sequence:
	9900
	2920
	192210
	19222110
	19323110
	1923123110
	1923224110
	191413323110
	191433125110
	19151423125110
	19251413226110
	1916151413325110
	1916251423127110
	191716151413326110
	191726151423128110
	19181716151413327110
	19182716151423129110
	29181716151413328110
	19281716151423228110
	19281716151413427110
	19182716152413228110

@@ Line 49: / Line 49: @@
 See also: [[Self-describing numbers]] and [[Look-and-say sequence]]
 =={{header|C}}==
+{{incorrect|C|Logic errors somewhere}}
 <lang c>#include <stdio.h>
 #include <stdlib.h>