Friday, September 12, 2014

Java Fun

It's Friday as I'm writing this, so for fun, here are some neat facts about Java.

  • hashCode() of strings are a hash int value of the contents of the string which can be 0 or at the integer limits.
  • Math.abs(Integer.MIN_VALUE) == Integer.MIN_VALUE
For some information about hashCode() strings, see this answer in StackOverflow.

Some examples in beanshell:

% System.out.println("actuation conjugant".hashCode());
2147483647
% System.out.println("bequirtle zorillo".hashCode());
0
% System.out.println("polygenelubricants".hashCode());
-2147483648
% System.out.println(Integer.MIN_VALUE);
-2147483648
% System.out.println(Math.abs("polygenelubricants".hashCode()));
-2147483648

I quickly hacked up a simple Java app to process OS X's /usr/share/dict/words. It's suboptimal but works for a test:

import java.io.*;
import java.util.ArrayList;

public class Test {
    public static final void main(String args[]) throws IOException {
        ArrayList words = new ArrayList();
        try(BufferedReader br = new BufferedReader(new FileReader("words"))) {
            for(String line; (line = br.readLine()) != null; ) {
                words.add(line);
            }
        }

        String s;
        for (int i = 0; i < words.size(); i++) {
            s = words.get(i);
            test(s);
            test(s.toUpperCase());
            test(s.toLowerCase());
            for (int j = 0; j < words.size(); j++) {
                s = words.get(i) + words.get(j);
                test(s);
                test(s.toUpperCase());
                test(s.toLowerCase());
                s = words.get(i) + " " + words.get(j);
                test(s);
                test(s.toUpperCase());
                test(s.toLowerCase());
            }
        }
    }

    public static final void test(String s) {
        int h = s.hashCode();
        if (h == Integer.MIN_VALUE || h == 0 || h == Integer.MAX_VALUE) {
            System.out.println("" + h + " = " + s);
        }
    }
}

Some strings with hashCode() of Integer.MAX_VALUE, 2147483647 (some of the following I found with a different version of the code):

ABASEDNESS RESEMBLANCERESEMBLANT
actuation conjugant
addlepate Burushaski
actuation conjugant
addlepate Burushaski
AIRCREWBRUTALIZATION
ANALYZATIONDIANCECHT
andrewsite sruti
apatetic libber
attached retranslate

Some strings with hashCode() of 0:

accismus toddlertoddy
acephalisthematolysis
ACETOMORPHINE ACHENE
acousmatic gymnarchidaegymnarchus
ALVEOLODENTAL WALDENSES
APODAN UNCONTENDED
auraadmonitorial
BALSAMROOT SQUARENESS

Some strings with hashCode() of Integer.MIN_VALUE, -2147483648:

accompt hypercriticism
acetothienoneruledom
achroite unenviablyunenvied
adventitiouslyoniscoidea
AFFORCEMISBAPTIZE
aggressquadrifurcate
bamboozlercastaway

I modified it to wait until the end to output and show progress, e.g.

rate = 186ms/word set. Estimated 730 min left. 1 found
Here's the code with the modification:

import java.io.*;
import java.util.ArrayList;

public class Test {
    public static final void main(String args[]) throws IOException {
        ArrayList<String> words = new ArrayList<String>();
        try(BufferedReader br = new BufferedReader(new FileReader("words"))) {
            for(String line; (line = br.readLine()) != null; ) {
                words.add(line);
            }
        }

        String s;
        int counter = 0;
        ArrayList<String> results = new ArrayList<String>();
        long startTime = System.currentTimeMillis();
        for (int i = 0; i < words.size(); i++) {
            s = words.get(i);
            test(s, results);
            test(s.toUpperCase(), results);
            test(s.toLowerCase(), results);
            for (int j = 0; j < words.size(); j++) {
                s = s + words.get(j);
                test(s.toUpperCase(), results);
                test(s.toLowerCase(), results);
                s = words.get(i) + " " + words.get(j);
                test(s.toUpperCase(), results);
                test(s.toLowerCase(), results);
            }
            counter++;
            long rate = (System.currentTimeMillis() - startTime) / counter;
            long minLeft = ((words.size() - counter) * rate) / 1000 / 60;
            System.out.println("rate = " + rate + "ms/word set. Estimated " + minLeft + " min left. " + results.size() + " found");
        }
    }

    public static final void test(String s, ArrayList<String> results) {
        int h = s.hashCode();
        if (h == Integer.MIN_VALUE || h == 0 || h == Integer.MAX_VALUE) {
            results.add("" + h + " = " + s);
        }
    }
}

Then I made it multi-threaded which helped it to go a bit faster:

rate = 86ms/word set. Estimated 337 min left. 0 found
Here's the code with the suboptimal multi-threading modifications:

import java.io.*;
import java.util.ArrayList;

public class Test {
    public static final int SPLIT = 3;
    public long startTime;
    public int counter;
    public ArrayList<String> results;

    public static final void main(String args[]) throws IOException, InterruptedException {
        (new Test()).test();
    }

    public void test() throws IOException, InterruptedException {
        processWords(readWords());
    }

    private ArrayList<String> readWords() throws IOException {
        System.out.println("Reading file 'words'");
        ArrayList<String> allWords = new ArrayList<String>();
        try(BufferedReader br = new BufferedReader(new FileReader("words"))) {
            for(String line; (line = br.readLine()) != null; ) {
                allWords.add(line);
            }
        }
        return allWords;
    }

    private void processWords(ArrayList<String> allWords) throws InterruptedException {
        System.out.println("" + SPLIT + " threads starting");
        ArrayList<ArrayList<String>> wordLists = split(allWords, SPLIT);

        this.results = new ArrayList<String>();
        this.counter = 0;
        this.startTime = System.currentTimeMillis();

        for (ArrayList<String> words : wordLists) {
            (new Thread(new WordHashCodeChecker(words, allWords, this))).start();
        }
        
        while(counter < allWords.size()) {
            Thread.sleep(1000);
            if (counter > 0) {
                long rate = (System.currentTimeMillis() - this.startTime) / counter;
                long minLeft = ((allWords.size() - counter) * rate) / 1000 / 60;
                System.out.println("rate = " + rate + "ms/word set. Estimated " + minLeft + " min left. " + this.results.size() + " found");
            }
        }

        for (String result : results) {
            System.out.println(result);
        }

    }

    public ArrayList<ArrayList<String>> split(ArrayList<String> originalList, int numLists) {
        int usualMaxSize = originalList.size() / numLists;
        ArrayList<ArrayList<String>> lists = new ArrayList<ArrayList<String>>();
        ArrayList<String> current = new ArrayList<String>();
        for (int i=0; i<originalList.size(); i++) {
            current.add(originalList.get(i));
            if (current.size() >= usualMaxSize && lists.size() < numLists) {
                lists.add(current);
                current = new ArrayList<String>();
            }
        }
        return lists;
    }

    class WordHashCodeChecker implements Runnable {
        
        ArrayList<String> words;
        ArrayList<String> allWords;
        Test test;

        public WordHashCodeChecker(ArrayList<String> words, ArrayList<String> allWords, Test test) {
            this.words = words;
            this.allWords = allWords;
            this.test = test;
        }

        public void run() {
            String s;
            for (int i = 0; i < words.size(); i++) {
                s = words.get(i);
                checkHashCode(s.toUpperCase());
                checkHashCode(s.toLowerCase());
                for (int j = 0; j < allWords.size(); j++) {
                    s = words.get(i) + allWords.get(j);
                    checkHashCode(s.toUpperCase());
                    checkHashCode(s.toLowerCase());
                    s = words.get(i) + " " + allWords.get(j);
                    checkHashCode(s.toUpperCase());
                    checkHashCode(s.toLowerCase());
                }
                this.test.counter++;
            }
        }

        public final void checkHashCode(String s) {
         int h = s.hashCode();
            if (h == Integer.MIN_VALUE || h == 0 || h == Integer.MAX_VALUE) {
                this.test.results.add("" + h + " = " + s);
            }
        }
    }
}

No comments: