Revision as of 10:56, 5 June 2015 (view source) rosettacode>Paulo Jorente No edit summary ← Older edit		Revision as of 00:14, 26 July 2015 (view source) rosettacode>Dchapes (→‎{{header\|Go}}: Add Go) Newer edit →
Line 10: * Make the board size variable. The player should decide it * It would be cool to have a trainer, so you don’t need to keep playing until the computer learns it =={{header\|Go}}== This implements '''HER''', '''H'''exapawn '''E'''ducable '''R'''obot, as specified in the paper rather than more efficiently (e.g. no attempt is made to see if a possible move can immediately win). However, unlike the paper this does ''not'' handle mirror games as the same. I.e. this implementation will need to learn how to respond to white's openning "3 6" move independently from the mirror white opening move of "1 4". <lang go>package main import ( "bytes" "errors" "flag" "fmt" "io" "io/ioutil" "log" "math/rand" "os" "time" ) // In the following `her` is H.E.R. or Hexapawn Educable Robot const ( Rows = 3 Cols = 3 ) var vlog log.Logger func main() { verbose := flag.Bool("v", false, "verbose") flag.Parse() if flag.NArg() != 0 { flag.Usage() os.Exit(2) } logOutput := ioutil.Discard if verbose { logOutput = os.Stderr } vlog = log.New(logOutput, "hexapawn: ", 0) rand.Seed(time.Now().UnixNano()) wins := make(map[spot]int, 2) for { h := New() var s herGameState for c := false; h[stateIdx] == empty; c = !c { if c { h = s.Move(h) } else { h = h.HumanMove() } } fmt.Printf("Board:\n%v is a win for %v\n", h, h[stateIdx]) s.Result(h[stateIdx]) wins[h[stateIdx]]++ fmt.Printf("Wins: Black=%d, White=%d\n", wins[black], wins[white]) fmt.Println() } } func (h Hexapawn) HumanMove() Hexapawn { fmt.Print("Board:\n", h, "\n") var from, to int for { fmt.Print("Your move: ") _, err := fmt.Scanln(&from, &to) if err != nil { fmt.Println(err) if err == io.EOF { os.Exit(0) // ick, exiting from here } continue } if err := h.doMove(white, from-1, to-1); err != nil { fmt.Println(err) continue } return h } } var herNextMove = make(map[Hexapawn][]move) type herGameState struct { // Last "unknown" move was herNextMove[h][i] h Hexapawn i int } func (s herGameState) Move(h Hexapawn) Hexapawn { known := false moves := herNextMove[h] if moves == nil { // Lazy init moves = possibleMoves(black, h) herNextMove[h] = moves } else if len(moves) == 0 { // From here all possibilities can lose vlog.Println("no good moves left to black, picking a random looser") known = true moves = possibleMoves(black, h) } vlog.Println("considering", moves) i := rand.Intn(len(moves)) if !known { s.h = h s.i = i } fmt.Println("Computer moves", moves[i]) if err := h.doMove(black, moves[i].from, moves[i].to); err != nil { panic(err) } return h } func (s herGameState) Result(winner spot) { if winner == black { return // Do nothing } // Throw out the last "unknown" move H.E.R. made moves := herNextMove[s.h] vlog.Printf("Training:\n%v will no longer do %v\n", s.h, moves[s.i]) herNextMove[s.h] = append(moves[:s.i], moves[s.i+1:]...) vlog.Println("will instead do one of:", herNextMove[s.h]) } type move struct{ from, to int } func (m move) String() string { return fmt.Sprintf("%d→%d", m.from+1, m.to+1) } var cachedMoves = []map[Hexapawn][]move{ black: make(map[Hexapawn][]move), white: make(map[Hexapawn][]move), } func possibleMoves(s spot, h Hexapawn) []move { m := cachedMoves[s][h] if m != nil { return m } //vlog.Printf("calculating possible moves for %v\n%v\n", s, h) // These are cached so no effort at optimization is made // (e.g. skipping from==to or continuing the outer loop when h[from]!=s) m = make([]move, 0) for from := 0; from < RowsCols; from++ { for to := 0; to < RowsCols; to++ { if err := h.checkMove(s, from, to); err == nil { m = append(m, move{from, to}) } } } cachedMoves[s][h] = m vlog.Printf("caclulated possible moves for %v\n%v as %v\n", s, h, m) return m } func (h Hexapawn) doMove(p spot, from, to int) error { if err := h.checkMove(p, from, to); err != nil { return err } h[from] = empty h[to] = p if (p == white && to/Rows == Rows-1) \|\| (p == black && to/Rows == 0) { h[stateIdx] = p } else if len(possibleMoves(p.Other(), h)) == 0 { h[stateIdx] = p } return nil } func (h Hexapawn) checkMove(p spot, from, to int) error { if h[from] != p { return fmt.Errorf("No %v located at spot %v", p, from+1) } if h[to] == p { return fmt.Errorf("%v already occupies spot %v", p, to+1) } Δr := from/Rows - to/Rows if (p == white && Δr != -1) \|\| (p == black && Δr != 1) { return errors.New("must move forward one row") } Δc := from%Rows - to%Rows capture := h[to] != empty if (capture \|\| Δc != 0) && (!capture \|\| (Δc != 1 && Δc != -1)) { return errors.New("ilegal move") } return nil } type Hexapawn [RowsCols + 1]spot func New() Hexapawn { // TODO for Rows,Cols != 3,3 return Hexapawn{ white, white, white, empty, empty, empty, black, black, black, } } func idx(r, c int) int { return rCols + c } // The game winner (or empty) is stored at this index const stateIdx = Rows Cols func (h Hexapawn) String() string { var b bytes.Buffer for r := Rows - 1; r >= 0; r-- { for c := 0; c < Cols; c++ { b.WriteByte(h[idx(r, c)].Byte()) } b.WriteByte('\n') } // b.String() contains an extra newline return string(b.Next(Rows*(Cols+1) - 1)) } type spot uint8 const ( empty spot = iota black white ) func (s spot) String() string { switch s { case black: return "Black" case white: return "White" } panic(s) } func (s spot) Byte() byte { switch s { case empty: return '.' case black: return 'B' case white: return 'W' } panic(s) } func (s spot) Other() spot { if s == black { return white } return black }</lang> {{out\|Sample Output}} <pre> Board: BBB ... WWW Your move: 2 5 Computer moves 9→5 Board: BB. .B. W.W Your move: 3 5 Computer moves 7→5 Board: .B. .B. W.. Your move: 1 5 Board: .B. .W. ... is a win for White Wins: Black=0, White=1 […] </pre> Since H.E.R. won't make the same mistake twice and quickly becomes unbeatable the "game" becomes how many total wins can you achieve in a single session. Note that you can get verbose output about possible moves, considered moves, and training by running with the <tt>-v</tt> command line flag.

Hexapawn: Difference between revisions

Hexapawn (view source)

Revision as of 00:14, 26 July 2015