Source code for chardetails.core

#! /usr/bin/env python
# -*- coding: utf-8 -*-
# Unicode Character Details
# Copyright 2008-2010 Santhosh Thottingal <santhosh.thottingal@gmail.com>
# http://www.smc.org.in
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Library General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#
# If you find any bugs or have any suggestions email:
# santhosh.thottingal@gmail.com
# URL: http://www.smc.org.in

import os
import unicodedata

__all__ = ['CharDetails', 'getInstance']


[docs]class CharDetails:
    """
    Shows the Unicode Character Details of a given character
    """
[docs]    def getdetails(self, text):
        """
        Gives details of all charecters in the given string.

        :param text: The unicode string to be examined.
        :type text: str.
        :returns:  dictionary with details.

        ::

         >>> import chardetails.getInstance
         >>> a = getInstance()
         >>> a.getdetails(u"run")
         {'Characters': [u'r', u'u', u'n'],
         u'n': {'AlphaNumeric': 'True',
         'Alphabet': 'True',
         'Canonical Decomposition': '',
         'Code point': "u'n'",
         'Digit': 'False',
         'HTML Entity': '110',
         'Name': 'LATIN SMALL LETTER N'},
         u'r': {'AlphaNumeric': 'True',
         'Alphabet': 'True',
         'Canonical Decomposition': '',
         'Code point': "u'r'",
         'Digit': 'False',
         'HTML Entity': '114',
         'Name': 'LATIN SMALL LETTER R'},
         u'u': {'AlphaNumeric': 'True',
         'Alphabet': 'True',
         'Canonical Decomposition': '',
         'Code point': "u'u'",
         'Digit': 'False',
         'HTML Entity': '117',
         'Name': 'LATIN SMALL LETTER U'}}


        """
        chardetails = {}
        for character in text:
            chardetails[character] = {}
            chardetails[character]['Name'] = unicodedata.name(character)
            chardetails[character]['HTML Entity'] = str(ord(character))
            chardetails[character]['Code point'] = repr(character)
            try:
                chardetails[character]['Numeric Value'] = \
                        unicodedata.numeric(character)
            except:
                pass
            try:
                chardetails[character]['Decimal Value'] = \
                        unicodedata.decimal(character)
            except:
                pass
            try:
                chardetails[character]['Digit'] = unicodedata.digit(mychar)
            except:
                pass
            chardetails[character]['Alphabet'] = str(character.isalpha())
            chardetails[character]['Digit'] = str(character.isdigit())
            chardetails[character]['AlphaNumeric'] = str(character.isalnum())
            chardetails[character]['Canonical Decomposition'] = \
                    unicodedata.decomposition(character)

        chardetails['Characters'] = list(text)
        return chardetails

[docs]    def get_module_name(self):
        """Returns modules Name """
        return "Unicode Character Details"

[docs]    def get_info(self):
        """ Gives Info on the module """
        return "Shows the Unicode Character Details of a given character"


[docs]def getInstance():
    """Returns an instance of :class:`CharDetails` class."""
    return CharDetails()
Source code for chardetails.core

Related Topics

This Page

Navigation

Source code for chardetails.core

Related Topics

This Page

Quick search