A heap implementation in Python

One more python implementation of a data structure. This is a heap implementation that follows an architecture similar to the binary search tree we have built earlier.

from collections import Container

class Heap(Container):
    array = []

    # Allows "if item in tree:"
    def __contains__(self, item):
        return self._contains(item, 1)

    def _contains(self, item, index):
        if item > self.array[index-1]:
            return False
        elif item < self.array[index-1]:
            if len(self.array) > index * 2:
                return self._contains(item, index*2) or self._contains(item, index*2+1)
            elif len(self.array) > index * 2 + 1:
                return self._contains(item, index*2)
            else:
                return False
        else:
            return True

    def add(self, item):
        self.array.append(item)
        self._bubble(len(self.array)-1)

    def _bubble(self, index):
        # Note: we take 1-based indexes, but convert them to 0-based ones
        index = index-1
        if self.array[index] > self.array[index/2]:
            tmp = self.array[index]
            self.array[index] = self.array[index/2]
            self.array[index/2] = tmp
            self._bubble(index/2)

    # Useful for debugging. Prints the whole tree
    def __repr__(self):
        return self._repr(1)

    def _repr(self, index, indent=0):
        # Note: we use 1-based indexes here
        string = ""
        string = ('\t' * indent) + str(self.array[index-1]) + '\n'

        if len(self.array) >= index * 2:
            string += ('\t' * (indent+1)) + 'Left:\n'
            string += self._repr(index*2, indent+1)

        if len(self.array) >= index * 2 + 1:
            string += ('\t' * (indent+1)) + 'Right:\n'
            string += self._repr(index*2+1, indent+1)

        return string

You can reuse the same code as the last time to test it:

from random import randint

# Add random values
values = []
tree = Heap()
for i in range(40):
    val = randint(0,100)
    tree.add(val)
    values.append(val)

print(tree)  # uses __repr__
print(values)
print(25 in tree)  # uses __contains__
print(25 in values)

A binary search tree in Python

As part of my series on concepts you should never, ever need to reimplement in a real world scenario, here is a binary search tree in Python. This particular example contains a bit of duplicate code, but I doubt the benefits of changing this would outweigh the loss in readability, especially for a learning example.

For the sake of doing things correctly, we extend collections.Container, a Python abstract class shared by most other collections.

from collections import Container
class Node(object):
    left = None
    right = None
    value = None

class BinarySearchTree(Container):

    root = Node()

    # Allows "if item in tree:"
    def __contains__(self, item):
        return self._contains(item, self.root)

    def _contains(self, item, node):
        if item > node.value:
            if node.right:
                return self._contains(item, node.right)
            else:
                return False

        elif item < node.value:
            if node.left:
                return self._contains(item, node.left)
            else:
                return False

        else:
            return True

    def add(self, item):
        self._add(item, self.root)

    def _add(self, item, node):
        if node.value:
            if item > node.value:
                if node.right:
                    self._add(item, node.right)
                else:
                    node.right = Node()
                    node.right.value = item
            elif item < node.value:
                if node.left:
                    self._add(item, node.left)
                else:
                    node.left = Node()
                    node.left.value = item
            else:  # Node does not exist or has same value as item
                node = Node()
                node.value = item
        else:
            node.value = item

    # Useful for debugging. Prints the whole tree
    def __repr__(self):
        return self._repr(self.root)

    def _repr(self, node, indent=0):
        string = ('\t' * indent) + str(node.value) + '\n'

        if node.left:
            string += ('\t' * (indent+1)) + 'Left:\n'
            string += self._repr(node.left, indent+1)
        if node.right:
            string += ('\t' * (indent+1)) + 'Right:\n'
            string += self._repr(node.right, indent+1)

        return string

And now we can test our tree:

from random import randint

# Add random values
values = []
tree = BinarySearchTree()
for i in range(40):
    val = randint(0,100)
    tree.add(val)
    values.append(val)

print(tree)  # uses __repr__
print(values)
print(25 in tree)  # uses __contains__
print(25 in values)

A bubble sort in Python

As a second exercise in preparation for my Microsoft phone interview, I give you the Python bubble sort (or sinking sort). Once again, this is just an exercise. You should use the standard Python facilities for your sorting needs, especially given the terrible efficiency of bubble sort.

from random import shuffle

countries = ["Afghanistan","Albania","Algeria", ...]
sorted_countries = countries

shuffle(countries)

def bubblesort(items):
    dirty = True
    while dirty:
        dirty = False
        for index, item in enumerate(items[1:]):
            if items[index-1] > items[index]:
                tmp = items[index]
                items[index] = items[index-1]
                items[index-1] = tmp
                dirty = True
    return items

print bubblesort(countries)

A binary search in Python

This week, I received some exciting news by email: I will be interviewed for an internship at Microsoft! In order to keep my memory fresh, I have decided to practice my algorithms and data structures, then post the results.

To get things started, here is how you perform a binary search in Python, not that you should have to. This function will return the index of the given item in the given list.

countries = ("Afghanistan","Albania","Algeria", ...)

def bsearch(items, val, offset=0):
    index = int(len(items)/2)
    if val == items[index]:
        return index + offset
    elif(val > items[index]):
        return bsearch(items[index:], val, offset+index)
    else:
        return bsearch(items[:index], val, offset)

print bsearch(countries, "Central African Rep")

Telling if a field is required in Django templates

If you want to tell whether a field in a template is required, use form.myfield.field.required as in the example below:

{% for field in form %}
    <label for="{{ field.id_for_label }}">
        {{ field.label }}
        {% if field.field.required %}*{% endif %}
    </label>
    {{ field }}
    {{ field.errors }}
{% endfor %}

In this example, there will be an asterisk next to the required fields.

Diagnosing a bad ping

Once in a while, you will deal with an unbearably long ping time. Calling your ISP might solve the problem after a few minutes of trial and error, but it’s usually after spending a few hours on the phone. Here is a short list of things you should check before calling:

Step 1: Gathering data

First, let’s see whether it’s you or the ISP:

  1. Look at your ISP’s Twitter and status page. It’s a low effort way to see if other people are having the same issue.
  2. Reboot your modem and router and try plugging your computer to the modem directly. Sometimes, it’s the simple stuff.
  3. Check your ping immediately after rebooting your router and modem. In a recent case, the ping was fast for a minute or two after rebooting, then slowed to a crawl. A service on my server was hogging my bandwidth as soon as it had internet access.
  4. Ping from different devices. If you can get a good ping on a wired device but not on a wireless one, it might be a signal problem.
  5. Check your router’s status page. If you run DD-WRT, you can see the bandwidth usage and connection strength. In a recent case, I could see that a wired device was hogging the bandwidth.

Step 2: Common culprits

If you have figured that the problem is on your side, here are some common problems to look for.

  1. Check your torrents. If your upload rate matches that of your internet connection, it can slow the internet down to a crawl. Don’t forget to convert kilobytes to kilobits when comparing.
  2. Check your services. If you are running services (VPN, proxy, web server, SSH etc), check if they are being attacked or used without consent. I once foolishly ran a public proxy that somehow made its way onto a public proxy list and it killed my router after a few minutes. The Apache access logs, among other things, might provide some information.
  3. Check your other computers. On OS X, use the Network tab in Activity Monitor to spot bandwidth hogs. The Windows task manager offers similar functionality.

Specifying a port when connecting with SSH

Some servers run their SSH server on a different port than 22 for a variety of reasons, including security.

To specify a custom port when connecting, use the -p flag as in the example below:

ssh user@server.com -p 2222

In this example, the client would attempt to connect using port 2222.

Fix garbled file names in SMB shares

If you have files in your SMB networked folders that look like this: TRZB4~J.mp4, there is a very easy fix to get the original file names.

Open your smb.conf file (usually at /etc/samba/smb.conf) and add the following line under [global]:

mangle case = no
mangled names = no

Once this is done, enter the following command in your terminal:

sudo service samba2 restart

Names are mangled to make them compatible with older operating systems, but this absolutely shouldn’t be a problem that you will face at home.

Following redirects with the Django test client

When running unit tests with in Django, the test client’s default behaviour is to stop at the first response, even if that response is a redirect.

If you want the client to follow these redirects and return the last page, perform your requests like this:

response = c.get('/redirect_me/', follow=True)

This will also add response.redirect_chain so you can see which URLs it followed before getting to the final page. Here is a sample redirect_chain:

[(u'http://testserver/next/', 302), (u'http://testserver/final/', 302)]