DZone Snippets is a public source code repository. Easily build up your personal collection of code snippets, categorize them with tags / keywords, and share them with the world

Chu has posted 12 posts at DZone. View Full User Profile

Java Inflections

06.07.2007
| 4867 views |
  • submit to reddit
        
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * Transforms words (from singular to plural, from camelCase to under_score, etc.). I got bored of doing Real Work...
 * 
 * @author chuyeow
 */
public class Inflector {

    // Pfft, can't think of a better name, but this is needed to avoid the price of initializing the pattern on each call.
    private static final Pattern UNDERSCORE_PATTERN_1 = Pattern.compile("([A-Z]+)([A-Z][a-z])");
    private static final Pattern UNDERSCORE_PATTERN_2 = Pattern.compile("([a-z\\d])([A-Z])");

    private static List<RuleAndReplacement> plurals = new ArrayList<RuleAndReplacement>();
    private static List<RuleAndReplacement> singulars = new ArrayList<RuleAndReplacement>();
    private static List<String> uncountables = new ArrayList<String>();

    private static Inflector instance; // (Pseudo-)Singleton instance.

    private Inflector() {
        // Woo, you can't touch me.
        
        initialize();
    }
    
    private void initialize() {
        plural("$", "s");
        plural("s$", "s");
        plural("(ax|test)is$", "$1es");
        plural("(octop|vir)us$", "$1i");
        plural("(alias|status)$", "$1es");
        plural("(bu)s$", "$1es");
        plural("(buffal|tomat)o$", "$1oes");
        plural("([ti])um$", "$1a");
        plural("sis$", "ses");
        plural("(?:([^f])fe|([lr])f)$", "$1$2ves");
        plural("(hive)$", "$1s");
        plural("([^aeiouy]|qu)y$", "$1ies");
        plural("([^aeiouy]|qu)ies$", "$1y");
        plural("(x|ch|ss|sh)$", "$1es");
        plural("(matr|vert|ind)ix|ex$", "$1ices");
        plural("([m|l])ouse$", "$1ice");
        plural("(ox)$", "$1en");
        plural("(quiz)$", "$1zes");

        singular("s$", "");
        singular("(n)ews$", "$1ews");
        singular("([ti])a$", "$1um");
        singular("((a)naly|(b)a|(d)iagno|(p)arenthe|(p)rogno|(s)ynop|(t)he)ses$", "$1$2sis");
        singular("(^analy)ses$", "$1sis");
        singular("([^f])ves$", "$1fe");
        singular("(hive)s$", "$1");
        singular("(tive)s$", "$1");
        singular("([lr])ves$", "$1f");
        singular("([^aeiouy]|qu)ies$", "$1y");
        singular("(s)eries$", "$1eries");
        singular("(m)ovies$", "$1ovie");
        singular("(x|ch|ss|sh)es$", "$1");
        singular("([m|l])ice$", "$1ouse");
        singular("(bus)es$", "$1");
        singular("(o)es$", "$1");
        singular("(shoe)s$", "$1");
        singular("(cris|ax|test)es$", "$1is");
        singular("([octop|vir])i$", "$1us");
        singular("(alias|status)es$", "$1");
        singular("^(ox)en", "$1");
        singular("(vert|ind)ices$", "$1ex");
        singular("(matr)ices$", "$1ix");
        singular("(quiz)zes$", "$1");

        irregular("person", "people");
        irregular("man", "men");
        irregular("child", "children");
        irregular("sex", "sexes");
        irregular("move", "moves");

        uncountable(new String[] {"equipment", "information", "rice", "money", "species", "series", "fish", "sheep"});
    }

    public static Inflector getInstance() {
        if (instance == null) {
            instance = new Inflector();
        }
        return instance;
    }

    public String underscore(String camelCasedWord) {

        // Regexes in Java are fucking stupid...
        String underscoredWord = UNDERSCORE_PATTERN_1.matcher(camelCasedWord).replaceAll("$1_$2");
        underscoredWord = UNDERSCORE_PATTERN_2.matcher(underscoredWord).replaceAll("$1_$2");
        underscoredWord = underscoredWord.replace('-', '_').toLowerCase();

        return underscoredWord;
    }

    public String pluralize(String word) {
        if (uncountables.contains(word.toLowerCase())) {
            return word;
        }
        return replaceWithFirstRule(word, plurals);
    }

    public String singularize(String word) {
        if (uncountables.contains(word.toLowerCase())) {
            return word;
        }
        return replaceWithFirstRule(word, singulars);
    }

    private String replaceWithFirstRule(String word, List<RuleAndReplacement> ruleAndReplacements) {

        for (RuleAndReplacement rar : ruleAndReplacements) {
            String rule = rar.getRule();
            String replacement = rar.getReplacement();

            // Return if we find a match.
            Matcher matcher = Pattern.compile(rule, Pattern.CASE_INSENSITIVE).matcher(word);
            if (matcher.find()) {
                return matcher.replaceAll(replacement);
            }
        }
        return word;
    }

    public String tableize(String className) {
        return pluralize(underscore(className));
    }
    
    public String tableize(Class klass) {
        // Strip away package name - we only want the 'base' class name.
        String className = klass.getName().replace(klass.getPackage().getName()+".", "");
        return tableize(className);
    }

    public static void plural(String rule, String replacement) {
        plurals.add(0, new RuleAndReplacement(rule, replacement));
    }

    public static void singular(String rule, String replacement) {
        singulars.add(0, new RuleAndReplacement(rule, replacement));
    }

    public static void irregular(String singular, String plural) {
        plural(singular, plural);
        singular(plural, singular);
    }

    public static void uncountable(String... words) {
        for (String word : words) {
            uncountables.add(word);
        }
    }
}


// Ugh, no open structs in Java (not-natively at least).
class RuleAndReplacement {
    private String rule;
    private String replacement;
    public RuleAndReplacement(String rule, String replacement) {
        this.rule = rule;
        this.replacement = replacement;
    }
    public String getReplacement() {
        return replacement;
    }
    public void setReplacement(String replacement) {
        this.replacement = replacement;
    }
    public String getRule() {
        return rule;
    }
    public void setRule(String rule) {
        this.rule = rule;
    }
}
    

Comments

Snippets Manager replied on Wed, 2009/10/07 - 3:57pm

One more change • uncountable() uses addall import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.springframework.stereotype.Component; /** * Transforms words (from singular to plural, from camelCase to under_score, etc.). I got bored of doing Real Work... * * @author chuyeow, djk */ @Component public class Inflector { private static final class RuleAndReplacement { private final String rule; private final String replacement; public RuleAndReplacement( final String rule, final String replacement ) { this.rule = rule; this.replacement = replacement; } public String getReplacement() { return replacement; } public String getRule() { return rule; } } private static final Pattern UNDERSCORE_PATTERN_1 = Pattern.compile( "([A-Z]+)([A-Z][a-z])" ); private static final Pattern UNDERSCORE_PATTERN_2 = Pattern.compile( "([a-z\\d])([A-Z])" ); private static List plurals = new ArrayList(); private static List singulars = new ArrayList(); private static List uncountables = new ArrayList(); static { plural( "$", "s" ); plural( "s$", "s" ); plural( "(ax|test)is$", "$1es" ); plural( "(octop|vir)us$", "$1i" ); plural( "(alias|status)$", "$1es" ); plural( "(bu)s$", "$1es" ); plural( "(buffal|tomat)o$", "$1oes" ); plural( "([ti])um$", "$1a" ); plural( "sis$", "ses" ); plural( "(?:([^f])fe|([lr])f)$", "$1$2ves" ); plural( "(hive)$", "$1s" ); plural( "([^aeiouy]|qu)y$", "$1ies" ); plural( "([^aeiouy]|qu)ies$", "$1y" ); plural( "(x|ch|ss|sh)$", "$1es" ); plural( "(matr|vert|ind)ix|ex$", "$1ices" ); plural( "([m|l])ouse$", "$1ice" ); plural( "(ox)$", "$1en" ); plural( "(quiz)$", "$1zes" ); singular( "s$", "" ); singular( "(n)ews$", "$1ews" ); singular( "([ti])a$", "$1um" ); singular( "((a)naly|(b)a|(d)iagno|(p)arenthe|(p)rogno|(s)ynop|(t)he)ses$", "$1$2sis" ); singular( "(^analy)ses$", "$1sis" ); singular( "([^f])ves$", "$1fe" ); singular( "(hive)s$", "$1" ); singular( "(tive)s$", "$1" ); singular( "([lr])ves$", "$1f" ); singular( "([^aeiouy]|qu)ies$", "$1y" ); singular( "(s)eries$", "$1eries" ); singular( "(m)ovies$", "$1ovie" ); singular( "(x|ch|ss|sh)es$", "$1" ); singular( "([m|l])ice$", "$1ouse" ); singular( "(bus)es$", "$1" ); singular( "(o)es$", "$1" ); singular( "(shoe)s$", "$1" ); singular( "(cris|ax|test)es$", "$1is" ); singular( "([octop|vir])i$", "$1us" ); singular( "(alias|status)es$", "$1" ); singular( "^(ox)en", "$1" ); singular( "(vert|ind)ices$", "$1ex" ); singular( "(matr)ices$", "$1ix" ); singular( "(quiz)zes$", "$1" ); irregular( "person", "people" ); irregular( "man", "men" ); irregular( "child", "children" ); irregular( "sex", "sexes" ); irregular( "move", "moves" ); uncountable( new String[] { "equipment", "information", "rice", "money", "species", "series", "fish", "sheep" } ); } public static String camelCase( final String name ) { final StringBuilder builder = new StringBuilder(); for ( final String part : name.split( "_" ) ) { builder.append( Character.toTitleCase( part.charAt( 0 ) ) ) .append( part.substring( 1 ) ); } return builder.toString(); } public static void irregular( final String singular, final String plural ) { plural( singular + "$", plural ); singular( plural + "$", singular ); } public static void plural( final String rule, final String replacement ) { plurals.add( 0, new RuleAndReplacement( rule, replacement ) ); } public static void singular( final String rule, final String replacement ) { singulars.add( 0, new RuleAndReplacement( rule, replacement ) ); } public static void uncountable( final String... words ) { uncountables.addAll( Arrays.asList( words ) ); } public String pluralize( final String word ) { if ( uncountables.contains( word.toLowerCase() ) ) { return word; } return replaceWithFirstRule( word, plurals ); } private String replaceWithFirstRule( final String word, final List ruleAndReplacements ) { for ( final RuleAndReplacement rar : ruleAndReplacements ) { final String rule = rar.getRule(); final String replacement = rar.getReplacement(); final Matcher matcher = Pattern.compile( rule, Pattern.CASE_INSENSITIVE ) .matcher( word ); if ( matcher.find() ) { return matcher.replaceAll( replacement ); } } return word; } public String singularize( final String word ) { if ( uncountables.contains( word.toLowerCase() ) ) { return word; } return replaceWithFirstRule( word, singulars ); } public String tableize( final Class<?> klass ) { return tableize( klass.getSimpleName() ); } public String tableize( final String className ) { return pluralize( underscore( className ) ); } public String underscore( final String camelCasedWord ) { String underscoredWord = UNDERSCORE_PATTERN_1.matcher( camelCasedWord ) .replaceAll( "$1_$2" ); underscoredWord = UNDERSCORE_PATTERN_2.matcher( underscoredWord ) .replaceAll( "$1_$2" ); underscoredWord = underscoredWord.replace( '-', '_' ) .toLowerCase(); return underscoredWord; } }

Snippets Manager replied on Wed, 2009/10/07 - 3:57pm

Also moved initialize logic into a static code block and removed the singleton code which was not thread safe.

Snippets Manager replied on Wed, 2009/10/07 - 3:57pm

Did a little bit of cleaning up for performance and java 5+ • Moved RuleAndReplacement into a private static class • converted camelCase(String name) to use a StringBuilder • converted tableize(Class klass) to use .getSimpleName() and take Class<?> package com.locamoda.db; import java.util.ArrayList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * Transforms words (from singular to plural, from camelCase to under_score, etc.). I got bored of doing Real Work... * * @author chuyeow, djk */ public class Inflector { private static final class RuleAndReplacement { private final String rule; private final String replacement; public RuleAndReplacement( final String rule, final String replacement ) { this.rule = rule; this.replacement = replacement; } public String getReplacement() { return replacement; } public String getRule() { return rule; } } private static final Pattern UNDERSCORE_PATTERN_1 = Pattern.compile( "([A-Z]+)([A-Z][a-z])" ); private static final Pattern UNDERSCORE_PATTERN_2 = Pattern.compile( "([a-z\\d])([A-Z])" ); private static List plurals = new ArrayList(); private static List singulars = new ArrayList(); private static List uncountables = new ArrayList(); static { plural( "$", "s" ); plural( "s$", "s" ); plural( "(ax|test)is$", "$1es" ); plural( "(octop|vir)us$", "$1i" ); plural( "(alias|status)$", "$1es" ); plural( "(bu)s$", "$1es" ); plural( "(buffal|tomat)o$", "$1oes" ); plural( "([ti])um$", "$1a" ); plural( "sis$", "ses" ); plural( "(?:([^f])fe|([lr])f)$", "$1$2ves" ); plural( "(hive)$", "$1s" ); plural( "([^aeiouy]|qu)y$", "$1ies" ); plural( "([^aeiouy]|qu)ies$", "$1y" ); plural( "(x|ch|ss|sh)$", "$1es" ); plural( "(matr|vert|ind)ix|ex$", "$1ices" ); plural( "([m|l])ouse$", "$1ice" ); plural( "(ox)$", "$1en" ); plural( "(quiz)$", "$1zes" ); singular( "s$", "" ); singular( "(n)ews$", "$1ews" ); singular( "([ti])a$", "$1um" ); singular( "((a)naly|(b)a|(d)iagno|(p)arenthe|(p)rogno|(s)ynop|(t)he)ses$", "$1$2sis" ); singular( "(^analy)ses$", "$1sis" ); singular( "([^f])ves$", "$1fe" ); singular( "(hive)s$", "$1" ); singular( "(tive)s$", "$1" ); singular( "([lr])ves$", "$1f" ); singular( "([^aeiouy]|qu)ies$", "$1y" ); singular( "(s)eries$", "$1eries" ); singular( "(m)ovies$", "$1ovie" ); singular( "(x|ch|ss|sh)es$", "$1" ); singular( "([m|l])ice$", "$1ouse" ); singular( "(bus)es$", "$1" ); singular( "(o)es$", "$1" ); singular( "(shoe)s$", "$1" ); singular( "(cris|ax|test)es$", "$1is" ); singular( "([octop|vir])i$", "$1us" ); singular( "(alias|status)es$", "$1" ); singular( "^(ox)en", "$1" ); singular( "(vert|ind)ices$", "$1ex" ); singular( "(matr)ices$", "$1ix" ); singular( "(quiz)zes$", "$1" ); irregular( "person", "people" ); irregular( "man", "men" ); irregular( "child", "children" ); irregular( "sex", "sexes" ); irregular( "move", "moves" ); uncountable( new String[] { "equipment", "information", "rice", "money", "species", "series", "fish", "sheep" } ); } public static String camelCase( final String name ) { final StringBuilder builder = new StringBuilder(); for ( final String part : name.split( "_" ) ) { builder.append( Character.toTitleCase( part.charAt( 0 ) ) ) .append( part.substring( 1 ) ); } return builder.toString(); } public static void irregular( final String singular, final String plural ) { plural( singular + "$", plural ); singular( plural + "$", singular ); } public static void plural( final String rule, final String replacement ) { plurals.add( 0, new RuleAndReplacement( rule, replacement ) ); } public static void singular( final String rule, final String replacement ) { singulars.add( 0, new RuleAndReplacement( rule, replacement ) ); } public static void uncountable( final String... words ) { for ( final String word : words ) { uncountables.add( word ); } } public String pluralize( final String word ) { if ( uncountables.contains( word.toLowerCase() ) ) { return word; } return replaceWithFirstRule( word, plurals ); } private String replaceWithFirstRule( final String word, final List ruleAndReplacements ) { for ( final RuleAndReplacement rar : ruleAndReplacements ) { final String rule = rar.getRule(); final String replacement = rar.getReplacement(); final Matcher matcher = Pattern.compile( rule, Pattern.CASE_INSENSITIVE ) .matcher( word ); if ( matcher.find() ) { return matcher.replaceAll( replacement ); } } return word; } public String singularize( final String word ) { if ( uncountables.contains( word.toLowerCase() ) ) { return word; } return replaceWithFirstRule( word, singulars ); } public String tableize( final Class<?> klass ) { return tableize( klass.getSimpleName() ); } public String tableize( final String className ) { return pluralize( underscore( className ) ); } public String underscore( final String camelCasedWord ) { String underscoredWord = UNDERSCORE_PATTERN_1.matcher( camelCasedWord ) .replaceAll( "$1_$2" ); underscoredWord = UNDERSCORE_PATTERN_2.matcher( underscoredWord ) .replaceAll( "$1_$2" ); underscoredWord = underscoredWord.replace( '-', '_' ) .toLowerCase(); return underscoredWord; } }

Snippets Manager replied on Sat, 2008/05/24 - 5:48pm

Solution for the problem: - Comments -> Commant method converting UNDERSCORE TO CAMELCASE - cammelCase import java.util.ArrayList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * Transforms words (from singular to plural, from camelCase to under_score, etc.). I got bored of doing Real Work... * * @author chuyeow, djk */ public class Inflector { // Pfft, can't think of a better name, but this is needed to avoid the price of initializing the pattern on each call. private static final Pattern UNDERSCORE_PATTERN_1 = Pattern.compile("([A-Z]+)([A-Z][a-z])"); private static final Pattern UNDERSCORE_PATTERN_2 = Pattern.compile("([a-z\\d])([A-Z])"); private static List plurals = new ArrayList(); private static List singulars = new ArrayList(); private static List uncountables = new ArrayList(); private static Inflector instance; // (Pseudo-)Singleton instance. private Inflector() { // Woo, you can't touch me. initialize(); } private void initialize() { plural("$", "s"); plural("s$", "s"); plural("(ax|test)is$", "$1es"); plural("(octop|vir)us$", "$1i"); plural("(alias|status)$", "$1es"); plural("(bu)s$", "$1es"); plural("(buffal|tomat)o$", "$1oes"); plural("([ti])um$", "$1a"); plural("sis$", "ses"); plural("(?:([^f])fe|([lr])f)$", "$1$2ves"); plural("(hive)$", "$1s"); plural("([^aeiouy]|qu)y$", "$1ies"); plural("([^aeiouy]|qu)ies$", "$1y"); plural("(x|ch|ss|sh)$", "$1es"); plural("(matr|vert|ind)ix|ex$", "$1ices"); plural("([m|l])ouse$", "$1ice"); plural("(ox)$", "$1en"); plural("(quiz)$", "$1zes"); singular("s$", ""); singular("(n)ews$", "$1ews"); singular("([ti])a$", "$1um"); singular("((a)naly|(b)a|(d)iagno|(p)arenthe|(p)rogno|(s)ynop|(t)he)ses$", "$1$2sis"); singular("(^analy)ses$", "$1sis"); singular("([^f])ves$", "$1fe"); singular("(hive)s$", "$1"); singular("(tive)s$", "$1"); singular("([lr])ves$", "$1f"); singular("([^aeiouy]|qu)ies$", "$1y"); singular("(s)eries$", "$1eries"); singular("(m)ovies$", "$1ovie"); singular("(x|ch|ss|sh)es$", "$1"); singular("([m|l])ice$", "$1ouse"); singular("(bus)es$", "$1"); singular("(o)es$", "$1"); singular("(shoe)s$", "$1"); singular("(cris|ax|test)es$", "$1is"); singular("([octop|vir])i$", "$1us"); singular("(alias|status)es$", "$1"); singular("^(ox)en", "$1"); singular("(vert|ind)ices$", "$1ex"); singular("(matr)ices$", "$1ix"); singular("(quiz)zes$", "$1"); irregular("person", "people"); irregular("man", "men"); irregular("child", "children"); irregular("sex", "sexes"); irregular("move", "moves"); uncountable(new String[] {"equipment", "information", "rice", "money", "species", "series", "fish", "sheep"}); } public static Inflector getInstance() { if (instance == null) { instance = new Inflector(); } return instance; } public String underscore(String camelCasedWord) { // Regexes in Java are fucking stupid... String underscoredWord = UNDERSCORE_PATTERN_1.matcher(camelCasedWord).replaceAll("$1_$2"); underscoredWord = UNDERSCORE_PATTERN_2.matcher(underscoredWord).replaceAll("$1_$2"); underscoredWord = underscoredWord.replace('-', '_').toLowerCase(); return underscoredWord; } public String pluralize(String word) { if (uncountables.contains(word.toLowerCase())) { return word; } return replaceWithFirstRule(word, plurals); } public String singularize(String word) { if (uncountables.contains(word.toLowerCase())) { return word; } return replaceWithFirstRule(word, singulars); } private String replaceWithFirstRule(String word, List ruleAndReplacements) { for (RuleAndReplacement rar : ruleAndReplacements) { String rule = rar.getRule(); String replacement = rar.getReplacement(); // Return if we find a match. Matcher matcher = Pattern.compile(rule, Pattern.CASE_INSENSITIVE).matcher(word); if (matcher.find()) { //System.out.println(word + " + " + rule +"->"+ replacement); return matcher.replaceAll(replacement); } } return word; } public String tableize(String className) { return pluralize(underscore(className)); } public String tableize(Class klass) { // Strip away package name - we only want the 'base' class name. String className = klass.getName().replace(klass.getPackage().getName()+".", ""); return tableize(className); } public static void plural(String rule, String replacement) { plurals.add(0, new RuleAndReplacement(rule, replacement)); } public static void singular(String rule, String replacement) { singulars.add(0, new RuleAndReplacement(rule, replacement)); } public static void irregular(String singular, String plural) { plural(singular+"$", plural); singular(plural+"$", singular); } public static void uncountable(String... words) { for (String word : words) { uncountables.add(word); } } public static String camelCase(String name){ String parts[] = name.split("_"); String string = ""; for (String part : parts) { string += part.substring(0, 1).toUpperCase(); string += part.substring(1).toLowerCase(); } return string; } } // Ugh, no open structs in Java (not-natively at least). class RuleAndReplacement { private String rule; private String replacement; public RuleAndReplacement(String rule, String replacement) { this.rule = rule; this.replacement = replacement; } public String getReplacement() { return replacement; } public void setReplacement(String replacement) { this.replacement = replacement; } public String getRule() { return rule; } public void setRule(String rule) { this.rule = rule; } }