Wordle Solver

github.com/bakert/wordle

import random
import sys

MAX_GUESSES = 6
WORD_LENGTH = 5

class Game:
    def __init__(self):
        self.guesses = []

    def guess(self, word):
        print('Guessing ' + word)
        self.guesses.append(word)
        return word == self.word

    def incomplete(self):
        return len(self.guesses) < MAX_GUESSES

    def win(self):
        print('You won, the word was ' + self.word)

    def lose(self):
        print('You lost, the word was ' + self.word)

    def num_guesses(self):
        return len(self.guesses)

    def fixed_letter(self, n):
        for g in self.guesses:
            if self.word[n] == g[n]:
                return self.word[n]
        return None

    def wild_letters(self):
        ls = []
        for g in self.guesses:
            for l in g:
                if l in self.word:
                    ls.append(l)
        return ls

    def bad_letters(self):
        ls = []
        for g in self.guesses:
            for l in g:
                if l not in self.word:
                    ls.append(l)
        return ls

def play():
    game = Game()
    dictionary = get_dictionary()
    game.word = random.choice(dictionary)
    while game.incomplete():
        potential_answers = find_potential_answers(game, dictionary)
        hist = histogram(potential_answers)
        if game.num_guesses() == MAX_GUESSES - 1:
            guess = random.choice(potential_answers)
        else:
            best_words = find_best_words(hist, dictionary)
            guess = random.choice(best_words)
        if game.guess(guess):
            game.win()
            return
    game.lose()


def histogram(words):
    hist = {}
    for word in words:
        for letter in word:
            hist[letter] = hist.get(letter, 0) + 1
    return hist

def find_potential_answers(game, dictionary):
    r = []
    for word in dictionary:
        if not has_fixed_letters(word, game):
            continue
        if not has_wild_letters(word, game):
            continue
        if has_bad_letter(word, game):
            continue
        r.append(word)
    return r

def has_fixed_letters(word, game):
    for n in range(0, len(word)):
        if game.fixed_letter(n) and not word[n] == game.fixed_letter(n):
            return False
    return True

def has_wild_letters(word, game):
    for letter in game.wild_letters():
        if letter not in word: # it also can't be in the position(s) guessed incorrectly but we ignore that here
            return False
    return True

def has_bad_letter(word, game):
    for letter in word:
        if letter in game.bad_letters():
            return True
    return False

def find_best_words(hist, dictionary):
    r = []
    best_score = find_best_score(hist, dictionary)
    for word in dictionary:
        if score(hist, word) == best_score:
            r.append(word)
    return r

def find_best_score(hist, dictionary):
    best_score = 0
    for word in dictionary:
        best_score = max(best_score, score(hist, word))
    return best_score

def score(hist, word):
    found = []
    n = 0
    for letter in word:
        if letter not in found:
            n += hist.get(letter, 0)
            found.append(letter)
    return n

def get_dictionary():
    with open('/usr/share/dict/words') as f:
        return [w.strip() for w in f.readlines() if len(w.strip()) == WORD_LENGTH and w.lower() == w]

def test():
    test_find_best_words()
    test_find_best_score()
    test_score()
    test_get_dictionary()

def test_find_best_words():
    hist = {
        'a': 10,
        'b': 1,
    }
    dictionary = ['a', 'baa', 'count']
    assert find_best_words(hist, dictionary) == ['baa']

def test_find_best_score():
    hist = {
        'a': 10,
        'b': 1,
    }
    dictionary = ['a', 'baa', 'count']
    assert find_best_score(hist, dictionary) == 11

def test_score():
    hist = {
        'a': 10,
        'b': 1,
    }
    assert score(hist, 'baa') == 11
    assert score(hist, 'count') == 0
    assert score(hist, 'a') == 10

def test_get_dictionary():
    d = get_dictionary()
    assert len(d) > 1000
    for w in d:
        assert len(w) == 5
        assert w.lower() == w

if len(sys.argv) < 2:
    play()
else:
    test()

Qwirkle

A few years ago I wrote a version of the tile game Qwirkle including some rudimentary AI – github.com/bakert/qwirkle. It’s surprising how many good scoring positions you miss in the normal run of play and how little it seems to matter if you give away a Qwirkle to your opponent. The code is open source/MIT licensed.

The Dobble Algorithm

I played a few games of Dobble with my niece and got curious about the algorithm used to generate the cards. It turns out it’s not trivial and one of the best ways to interrogate it is as a finite projective plane.

I got the most useful information from www.101computing.net/the-dobble-algorithm which has actual code although the clearest explanation was on www.petercollingridge.co.uk/blog/mathematics-toys-and-games/dobble

The maximum number of possible cards is n^2 + n + 1 where n is the number of symbols per card minus one.

Number of Symbols on CardMax Number of Cards
23
413
521
857
12133

The number of symbols on the card doesn’t have to be a prime number plus one but the algorithms found online require that.

I thought it was pretty interesting that 8 symbols per card allows 57 cards but the game itself comes with 55 cards. Presumably because once you put one card in the center 54 is divisible by both 2 and 3 for even number of cards per player in the common cases. I wonder which two cards are missing but not enough to look through them all!

Emit logs in JSON format from fastapi/gunicorn, for DataDog

import logging

from pythonjsonlogger import jsonlogger

def init() -> None:
    log_in_json()
    map_levelname_to_status()

# Force all loggers to talk JSON rather than text so DataDog can parse the output.
def log_in_json() -> None:
    loggers = [
        logging.getLogger("uvicorn.access"),
        logging.getLogger("uvicorn.error"),
        logging.getLogger("uvicorn"),
        logging.getLogger(),
    ]
    for logger in loggers:
        for handler in logger.handlers:
            logger.removeHandler(handler)
        logger.level = logging.DEBUG
        log_handler = logging.StreamHandler()
        formatter = jsonlogger.JsonFormatter(
            "%(asctime)s %(levelname)s %(name)s %(message)s"
        )
        log_handler.setFormatter(formatter)
        logger.addHandler(log_handler)

# DataDog is expecting 'status' for log level but the python default is 'levelname'.
def map_levelname_to_status() -> None:
    old_factory = logging.getLogRecordFactory()

    def record_factory(*args: str, **kwargs: str) -> logging.LogRecord:
        record = old_factory(*args, **kwargs)
        record.status = record.levelname  # type: ignore
        return record

    logging.setLogRecordFactory(record_factory)

That One Blog Post That Gets All the Emails

In 2003 I spent half a day figuring out a bad Microsoft error. I hadn’t been able to Google the answer. And I guess this was before the days of Stack Overflow. So I wrote it up as a blog post – https://bluebones.net/2003/07/server-did-not-recognize-http-header-soapaction/

Because I’d been extremely frustrated by the error I included at the bottom, “If you’re having a similar problem but can’t work what I’m saying here, feel free to mail me on bakert+web@gmail.com – I wouldn’t wish my four hours on anyone!”

I never get email about any of my other blog posts. But Gmail tells me I’ve had at least 269 about this one. Including within the last two weeks! If you find this phenomenon interesting and wish to discuss it further, feel free to email me at bakert@gmail.com 😉

bakert’s 4th Law

The Backlog Never Gets Smaller

Previously:

  1. All Production Code is Shit
  2. It’s more important to have a standard than what the standard is
  3. TODOs don’t get TODOne

“Perfect” Libraries

Sometimes you use a third party library and the interface is so well designed it’s just effortless. Something that would have been gnarly and murky becomes simple. The kind of library that gets ported to multiple languages because everyone wants access to it.

One slightly obscure example is feedparser, (originally) Mark Pilgrim’s python2 library for reading Atom and RSS feeds. Hiding all this nonsense:

behind a simple interface.

import feedparser 
d = feedparser.parse('http://www.reddit.com/r/python/.rss') 
print(d['feed']['title'])
>>> Python
print d.feed.subtitle 
>>> news about the dynamic, interpreted, interactive, object-oriented, extensible programming language Python 
print d.headers           
>>>  {'content-length': '5393', 'content-encoding': 'gzip', 'vary': 'accept-encoding', 'server': "'; DROP TABLE servertypes; --", 'connection': 'close', 'date': 'Mon, 14 Oct 2013 09:13:34 GMT', 'content-type': 'text/xml; charset=UTF-8'}

Another library that has the same simplicity is Mustache logic-less templates. This one has been ported to literally dozens of languages. Every template I ever worked on was kind of a mess until I found Mustache. It’s actually the restrictions here that make it sing.

Hello {{name}} 
You have just won {{value}} dollars! 
{{#in_ca}} Well, {{taxed_value}} dollars, after taxes. {{/in_ca}}

Some other examples:

  • web.py – Dead simple web framework
  • BeautifulSoup – HTML/XML parser
  • requests – Python library for HTTP
  • humps – Underscore-to-camelCase converter (and vice versa) for strings and object keys in JavaScript (has been ported to Python as pyhumps).
  • Markdown – Text format with HTML representation that has taken over the web due to its simplicity and usefulness compared to actual HTML

Do you know any “perfect” libraries?

GPX to PostGIS, PostGIS to GPX

With ogr2ogr.

export CONN_STRING="host=localhost dbname=DATABASE user=USERNAME password=PASSWORD port=5432"
# Import
ogr2ogr -append -f PostgreSQL PG:dbname=DATABASE_NAME /path/to/your.gpx
# Export
ogr2ogr -f gpx -nlt MULTILINESTRING /path/to/output/tracks.gpx PG:"$CONN_STRING" "tracks(wkb_geometry)"
ogr2ogr -f gpx -nlt MULTILINESTRING /path/to/output/routes.gpx PG:"$CONN_STRING" "routes(wkb_geometry)"
ogr2ogr -f gpx -nlt POINT /path/to/output/waypoints.gpx PG:"$CONN_STRING" "waypoints(wkb_geometry)"

The wkb_geometry references can be replaced with full SQL statements as required.