Word Ladder and Breadth-First Searching

In [1]:
from pythonds9 import Graph, Vertex
from pythonds9.graphs.utils import viz_graph
from pythonds9 import Queue
from random import random

Build the Graph

In [2]:
def build_graph(word_file, count=None, random_select=False):
    d = {}
    g = Graph()
    current = 0
    
    # Pass in a filename, or an already open fileobject
    if isinstance(word_file, str):
        # 'word_file' is a filename, a string
        wfile = open(word_file, 'r')
    else:
        # We should tighten up this conditional check, but it works for now
        # The idea here is word_file can be passed in as an already open file object
        wfile = word_file
        
    # Create the buckets of words that differ by one letter
    for line in wfile:
        # randomly pick a word to avoid loosely connects graph
        # 25% chance of picking a word, or 75% chance of skipping a word    
        if random_select and random() >= 0.05:
            continue
            
        # If required, and count is not None, constrain our number of words used.
        if count and current > count:
            break
            
        word = line.strip()  # stripping '\n' newline character
        for i, _ in enumerate(word):
            bucket = word[:i] + "_" + word[i+1:]
            if bucket in d:
                d[bucket].append(word)
            else:
                d[bucket] = [word]

        current += 1
            
    
    total_connections = 0
        
    # add vertices and edges for words in the same bucket
    for bucket in d:  # iterate over all the keys in dictionary d
        for word1 in d[bucket]:
            for word2 in d[bucket]:
                if word1 != word2:
                    g.add_vertex(word1)
                    g.add_vertex(word2)
                    g.add_edge(word1, word2)
                    total_connections += 1
    
    print(f"Total edges in this graph: {total_connections}")
    return g
In [3]:
word = "ROPE"
i = 0
print(f"{word[:i]!r} {'_'!r} {word[i+1:]!r} => {word[:i] + '_' + word[i+1:]}")

i = 1
print(f"{word[:i]!r} {'_'!r} {word[i+1:]!r} => {word[:i] + '_' + word[i+1:]}")

i = 2
print(f"{word[:i]!r} {'_'!r} {word[i+1:]!r} => {word[:i] + '_' + word[i+1:]}")

i = 3
print(f"{word[:i]!r} {'_'!r} {word[i+1:]!r} => {word[:i] + '_' + word[i+1:]}")
'' '_' 'OPE' => _OPE
'R' '_' 'PE' => R_PE
'RO' '_' 'E' => RO_E
'ROP' '_' '' => ROP_
In [4]:
2+2
Out[4]:
4
In [5]:
print(_)
4
In [6]:
x = 2*3
In [7]:
print(_)
4

Testing the Initial Word Graph

In [8]:
import requests
from io import StringIO
In [9]:
def get_words(url):
    req = requests.get(url)
    return StringIO(req.text)
In [10]:
def main():
    url = "http://www.webplaces.com/passwords/lists/4-letter-words-2478.txt"
    words_file = get_words(url)  # large string acting as a file object using StringIO
    
    limit = 150
    word_graph = build_graph(words_file, count=limit)
    return word_graph
In [11]:
wg = main()
Total edges in this graph: 470
In [12]:
len(wg.vertices)
Out[12]:
129
In [13]:
g = viz_graph(wg)
In [14]:
g.render("word_graph")
Out[14]:
'word_graph.pdf'
In [30]:
def main2():
    words_string = """
foul
foil
fail
fall
pole
pall
poll
pool
cool
fool
pope
pale
sale
sage
page
"""
    words_file = StringIO(words_string)
    word_graph = build_graph(words_file)
    
    return word_graph
In [31]:
wg = main2()
Total edges in this graph: 38
In [32]:
len(wg.vertices)
Out[32]:
15
In [33]:
g = viz_graph(wg)
In [34]:
g
Out[34]:
nodefoul foul nodefoil foil nodefoul->nodefoil nodefool fool nodefoul->nodefool nodefoil->nodefoul nodefoil->nodefool nodefail fail nodefoil->nodefail nodefool->nodefoul nodefool->nodefoil nodepool pool nodefool->nodepool nodecool cool nodefool->nodecool nodefail->nodefoil nodefall fall nodefail->nodefall nodepool->nodefool nodepool->nodecool nodepoll poll nodepool->nodepoll nodecool->nodefool nodecool->nodepool nodefall->nodefail nodepall pall nodefall->nodepall nodepall->nodefall nodepall->nodepoll nodepale pale nodepall->nodepale nodepoll->nodepool nodepoll->nodepall nodepole pole nodepoll->nodepole nodepale->nodepall nodepale->nodepole nodesale sale nodepale->nodesale nodepage page nodepale->nodepage nodepole->nodepoll nodepole->nodepale nodepope pope nodepole->nodepope nodepope->nodepole nodesale->nodepale nodesage sage nodesale->nodesage nodepage->nodepale nodepage->nodesage nodesage->nodesale nodesage->nodepage

Breadth-First Searching for Word Ladder Solution

In [35]:
def bfs(g, start):
    start.set_distance(0)
    start.set_pred(None)
    vert_queue = Queue()
    vert_queue.enqueue(start)
    
    while vert_queue.size() > 0:
        current_vert = vert_queue.dequeue()
        
        for nbr in current_vert.get_connections():
            if nbr.get_color() == 'white':
                nbr.set_color('gray')
                nbr.set_distance(current_vert.get_distance() + 1)
                nbr.set_pred(current_vert)
                vert_queue.enqueue(nbr)
                
        current_vert.set_color('black')
In [36]:
def traverse(y):
    x = y
    while x.get_pred():
        print(x.get_id())
        x = x.get_pred()
    print(x.get_id())
In [37]:
wg
Out[37]:
<pythonds9.graphs.graph.Graph at 0x106cfcd30>
In [38]:
bfs(wg, wg.get_vertex('fool'))

Get our end word, and work our way to the start word

In [39]:
vert_sage = wg.get_vertex('sage')
print(f"{vert_sage.color!r} {vert_sage.pred!r} {vert_sage.pred.get_id()!r}")
'black' Vertex(sale) 'sale'
In [40]:
traverse(wg.get_vertex('sage'))
sage
sale
pale
pole
poll
pool
fool

Show all the possible word ladder paths using a directed graph

In [41]:
viz_graph(wg, show_pred=True)
Out[41]:
nodefoul foul nodefool fool nodefoul->nodefool nodefoil foil nodefoil->nodefool nodefail fail nodefail->nodefoil nodefall fall nodefall->nodefail nodepall pall nodepoll poll nodepall->nodepoll nodepool pool nodepoll->nodepool nodepole pole nodepole->nodepoll nodepale pale nodepale->nodepole nodepope pope nodepope->nodepole nodepool->nodefool nodecool cool nodecool->nodefool nodesale sale nodesale->nodepale nodepage page nodepage->nodepale nodesage sage nodesage->nodesale