Source code for foundations.strings

#!/usr/bin/env python
# -*- coding: utf-8 -*-

"""
**strings.py**

**Platform:**
	Windows, Linux, Mac Os X.

**Description:**
	Defines various strings manipulation objects.

**Others:**

"""

#**********************************************************************************************************************
#***	Future imports.
#**********************************************************************************************************************
from __future__ import unicode_literals

#**********************************************************************************************************************
#***	External imports.
#**********************************************************************************************************************
import os
import platform
import posixpath
import random
import re

#**********************************************************************************************************************
#***	Internal imports.
#**********************************************************************************************************************
import foundations.common
import foundations.verbose

#**********************************************************************************************************************
#***	Module attributes.
#**********************************************************************************************************************
__author__ = "Thomas Mansencal"
__copyright__ = "Copyright (C) 2008 - 2014 - Thomas Mansencal"
__license__ = "GPL V3.0 - http://www.gnu.org/licenses/"
__maintainer__ = "Thomas Mansencal"
__email__ = "thomas.mansencal@gmail.com"
__status__ = "Production"

__all__ = ["LOGGER",
			"ASCII_CHARACTERS",
			"toString",
			"getNiceName",
			"getVersionRank",
			"getSplitextBasename",
			"getCommonAncestor",
			"getCommonPathsAncestor",
			"getWords",
			"filterWords",
			"replace",
			"removeStrip",
			"toForwardSlashes",
			"toBackwardSlashes",
			"toPosixPath",
			"getNormalizedPath",
			"getRandomSequence",
			"isEmail",
			"isWebsite"]

LOGGER = foundations.verbose.installLogger()

ASCII_CHARACTERS = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_"

#**********************************************************************************************************************
#***	Module classes and definitions.
#**********************************************************************************************************************
toString = foundations.verbose.toUnicode

[docs]def getNiceName(name): """ Converts a string to nice string: **currentLogText** -> **Current Log Text**. Usage:: >>> getNiceName("getMeANiceName") u'Get Me A Nice Name' >>> getNiceName("__getMeANiceName") u'__Get Me A Nice Name' :param name: Current string to be nicified. :type name: unicode :return: Nicified string. :rtype: unicode """ chunks = re.sub(r"(.)([A-Z][a-z]+)", r"\1 \2", name) return " ".join(element.title() for element in re.sub(r"([a-z0-9])([A-Z])", r"\1 \2", chunks).split())
[docs]def getVersionRank(version): """ Converts a version string to it's rank. Usage:: >>> getVersionRank("4.2.8") 4002008000000 >>> getVersionRank("4.0") 4000000000000 >>> getVersionRank("4.2.8").__class__ <type 'int'> :param version: Current version to calculate rank. :type version: unicode :return: Rank. :rtype: int """ tokens = list(foundations.common.unpackDefault(filter(any, re.split("\.|-|,", version)), length=4, default=0)) rank = sum((int(1000 ** i) * int(tokens[-i]) for i in range(len(tokens), 0, -1))) LOGGER.debug("> Rank: '{0}'.".format(rank)) return rank
[docs]def getSplitextBasename(path): """ Gets the basename of a path without its extension. Usage:: >>> getSplitextBasename("/Users/JohnDoe/Documents/Test.txt") u'Test' :param path: Path to extract the basename without extension. :type path: unicode :return: Splitext basename. :rtype: unicode """ basename = foundations.common.getFirstItem(os.path.splitext(os.path.basename(os.path.normpath(path)))) LOGGER.debug("> Splitext basename: '{0}'.".format(basename)) return basename
[docs]def getCommonAncestor(*args): """ Gets common ancestor of given iterables. Usage:: >>> getCommonAncestor(("1", "2", "3"), ("1", "2", "0"), ("1", "2", "3", "4")) (u'1', u'2') >>> getCommonAncestor("azerty", "azetty", "azello") u'aze' :param \*args: Iterables to retrieve common ancestor from. :type \*args: [iterable] :return: Common ancestor. :rtype: iterable """ array = map(set, zip(*args)) divergence = filter(lambda i: len(i) > 1, array) if divergence: ancestor = foundations.common.getFirstItem(args)[:array.index(foundations.common.getFirstItem(divergence))] else: ancestor = min(args) LOGGER.debug("> Common Ancestor: '{0}'".format(ancestor)) return ancestor
[docs]def getCommonPathsAncestor(*args): """ Gets common paths ancestor of given paths. Usage:: >>> getCommonPathsAncestor("/Users/JohnDoe/Documents", "/Users/JohnDoe/Documents/Test.txt") u'/Users/JohnDoe/Documents' :param \*args: Paths to retrieve common ancestor from. :type \*args: [unicode] :return: Common path ancestor. :rtype: unicode """ pathAncestor = os.sep.join(getCommonAncestor(*[path.split(os.sep) for path in args])) LOGGER.debug("> Common Paths Ancestor: '{0}'".format(pathAncestor)) return pathAncestor
[docs]def getWords(data): """ Extracts the words from given string. Usage:: >>> getWords("Users are: John Doe, Jane Doe, Z6PO.") [u'Users', u'are', u'John', u'Doe', u'Jane', u'Doe', u'Z6PO'] :param data: Data to extract words from. :type data: unicode :return: Words. :rtype: list """ words = re.findall(r"\w+", data) LOGGER.debug("> Words: '{0}'".format(", ".join(words))) return words
[docs]def filterWords(words, filtersIn=None, filtersOut=None, flags=0): """ Filters the words using the given filters. Usage:: >>> filterWords(["Users", "are", "John", "Doe", "Jane", "Doe", "Z6PO"], filtersIn=("John", "Doe")) [u'John', u'Doe', u'Doe'] >>> filterWords(["Users", "are", "John", "Doe", "Jane", "Doe", "Z6PO"], filtersIn=("\w*r",)) [u'Users', u'are'] >>> filterWords(["Users", "are", "John", "Doe", "Jane", "Doe", "Z6PO"], filtersOut=("\w*o",)) [u'Users', u'are', u'Jane', u'Z6PO'] :param filtersIn: Regex filters in list. :type filtersIn: tuple or list :param filtersIn: Regex filters out list. :type filtersIn: tuple or list :param flags: Regex flags. :type flags: int :return: Filtered words. :rtype: list """ filteredWords = [] for word in words: if filtersIn: filterMatched = False for filter in filtersIn: if not re.search(filter, word, flags): LOGGER.debug("> '{0}' word skipped, filter in '{1}' not matched!".format(word, filter)) else: filterMatched = True break if not filterMatched: continue if filtersOut: filterMatched = False for filter in filtersOut: if re.search(filter, word, flags): LOGGER.debug("> '{0}' word skipped, filter out '{1}' matched!".format(word, filter)) filterMatched = True break if filterMatched: continue filteredWords.append(word) LOGGER.debug("> Filtered words: '{0}'".format(", ".join(filteredWords))) return filteredWords
[docs]def replace(string, data): """ Replaces the data occurrences in the string. Usage:: >>> replace("Users are: John Doe, Jane Doe, Z6PO.", {"John" : "Luke", "Jane" : "Anakin", "Doe" : "Skywalker", "Z6PO" : "R2D2"}) u'Users are: Luke Skywalker, Anakin Skywalker, R2D2.' :param string: String to manipulate. :type string: unicode :param data: Replacement occurrences. :type data: dict :return: Manipulated string. :rtype: unicode """ for old, new in data.iteritems(): string = string.replace(old, new) return string
[docs]def removeStrip(string, pattern): """ Removes the pattern occurrences in the string and strip the result. Usage:: >>> removeStrip("John Doe", "John") u'Doe' :param string: String to manipulate. :type string: unicode :param pattern: Replacement pattern. :type pattern: unicode :return: Manipulated string. :rtype: unicode """ return string.replace(pattern, "").strip()
[docs]def toForwardSlashes(data): """ Converts backward slashes to forward slashes. Usage:: >>> toForwardSlashes("To\Forward\Slashes") u'To/Forward/Slashes' :param data: Data to convert. :type data: unicode :return: Converted path. :rtype: unicode """ data = data.replace("\\", "/") LOGGER.debug("> Data: '{0}' to forward slashes.".format(data)) return data
[docs]def toBackwardSlashes(data): """ Converts forward slashes to backward slashes. Usage:: >>> toBackwardSlashes("/Users/JohnDoe/Documents") u'\\Users\\JohnDoe\\Documents' :param data: Data to convert. :type data: unicode :return: Converted path. :rtype: unicode """ data = data.replace("/", "\\") LOGGER.debug("> Data: '{0}' to backward slashes.".format(data)) return data
[docs]def toPosixPath(path): """ Converts Windows path to Posix path while stripping drives letters and network server slashes. Usage:: >>> toPosixPath("c:\\Users\\JohnDoe\\Documents") u'/Users/JohnDoe/Documents' :param path: Windows path. :type path: unicode :return: Path converted to Posix path. :rtype: unicode """ posixPath = posixpath.normpath(toForwardSlashes(re.sub(r"[a-zA-Z]:\\|\\\\", "/", os.path.normpath(path)))) LOGGER.debug("> Stripped converted to Posix path: '{0}'.".format(posixPath)) return posixPath
[docs]def getNormalizedPath(path): """ Normalizes a path, escaping slashes if needed on Windows. Usage:: >>> getNormalizedPath("C:\\Users/johnDoe\\Documents") u'C:\\Users\\JohnDoe\\Documents' :param path: Path to normalize. :type path: unicode :return: Normalized path. :rtype: unicode """ if platform.system() == "Windows" or platform.system() == "Microsoft": path = os.path.normpath(path).replace("\\", "\\\\") LOGGER.debug("> Path: '{0}', normalized path.".format(path)) return path else: path = os.path.normpath(path) LOGGER.debug("> Path: '{0}', normalized path.".format(path)) return path
[docs]def getRandomSequence(length=8): """ Returns a random sequence. Usage:: >>> getRandomSequence() u'N_mYO7g5' :param length: Length of the sequence. :type length: int :return: Random sequence. :rtype: unicode """ return "".join([random.choice(ASCII_CHARACTERS) for i in range(length)])
[docs]def isEmail(data): """ Check if given data string is an email. Usage:: >>> isEmail("john.doe@domain.com") True >>> isEmail("john.doe:domain.com") False :param data: Data to check. :type data: unicode :return: Is email. :rtype: bool """ if re.match(r"[\w.%+-]+@[\w.]+\.[a-zA-Z]{2,4}", data): LOGGER.debug("> {0}' is matched as email.".format(data)) return True else: LOGGER.debug("> {0}' is not matched as email.".format(data)) return False
[docs]def isWebsite(url): """ Check if given url string is a website. Usage:: >>> isWebsite("http://www.domain.com") True >>> isWebsite("domain.com") False :param data: Data to check. :type data: unicode :return: Is website. :rtype: bool """ if re.match(r"(http|ftp|https)://([\w\-\.]+)/?", url): LOGGER.debug("> {0}' is matched as website.".format(url)) return True else: LOGGER.debug("> {0}' is not matched as website.".format(url)) return False