diff --git a/2-automate/Regex Exercises.ipynb b/2-automate/Regex Exercises.ipynb new file mode 100644 index 0000000..2f6a8c3 --- /dev/null +++ b/2-automate/Regex Exercises.ipynb @@ -0,0 +1,474 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import re" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "String contains characters other than certain set of characters (in this case a-z, A-Z and 0-9)\n" + ] + } + ], + "source": [ + "# Check that a string contains only a certain set of characters (in this case a-z, A-Z and 0-9)\n", + "\n", + "pattern = r'[^a-zA-Z0-9]'\n", + "test_string = 'abyss,'\n", + "match = re.search(pattern, test_string)\n", + "\n", + "# If-statement after search() tests if it succeeded\n", + "if not match:\n", + " print('The string contains only a certain set of characters (in this case a-z, A-Z and 0-9)')\n", + "else:\n", + " print('String contains characters other than certain set of characters (in this case a-z, A-Z and 0-9)')" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "String that has an \"a\" followed by zero or more \"b\"\n", + "['ab']\n" + ] + } + ], + "source": [ + "# Match a string that has an a followed by zero or more b\n", + "\n", + "pattern = r'a*b'\n", + "test_string = 'abss,'\n", + "match = re.search(pattern, test_string)\n", + "\n", + "# If-statement after search() tests if it succeeded\n", + "if match:\n", + " print('String that has an \"a\" followed by zero or more \"b\"')\n", + " print(re.findall(pattern, test_string))\n", + "else:\n", + " print('String that does not have an \"a\" followed by zero or more \"b\"')" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "String that does not have an \"a\" followed by 1 or more \"b\"\n" + ] + } + ], + "source": [ + "# Match a string that has an a followed by 1 or more b\n", + "\n", + "pattern = r'a+b'\n", + "test_string = 'ass,'\n", + "match = re.search(pattern, test_string)\n", + "\n", + "# If-statement after search() tests if it succeeded\n", + "if match:\n", + " print('String that has an \"a\" followed by 1 or more \"b\"')\n", + " print(re.findall(pattern, test_string))\n", + "else:\n", + " print('String that does not have an \"a\" followed by 1 or more \"b\"')" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "String that does not have an \"a\" followed by 0 or 1 \"b\"\n" + ] + } + ], + "source": [ + "# Match a string that has an a followed by zero or one b\n", + "\n", + "pattern = r'a?b'\n", + "test_string = 'ass,'\n", + "match = re.search(pattern, test_string)\n", + "\n", + "# If-statement after search() tests if it succeeded\n", + "if match:\n", + " print('String that has an \"a\" followed by 0 or 1 \"b\"')\n", + " print(re.findall(pattern, test_string))\n", + "else:\n", + " print('String that does not have an \"a\" followed by 0 or 1 \"b\"')" + ] + }, + { + "cell_type": "code", + "execution_count": 100, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "String that has an \"a\" followed by 3 \"b\"\n", + "['abbb', 'abbb']\n" + ] + } + ], + "source": [ + "# Match a string that has an a followed by three b\n", + "\n", + "pattern = r'ab{3}'\n", + "test_string = 'asdabbbasdasdaswrfdhcvdhrtbbbbdasdabbbbb,'\n", + "match = re.search(pattern, test_string)\n", + "\n", + "# If-statement after search() tests if it succeeded\n", + "if match:\n", + " print('String that has an \"a\" followed by 3 \"b\"')\n", + " print(re.findall(pattern, test_string))\n", + "else:\n", + " print('String that does not have an \"a\" followed by 3 \"b\"')" + ] + }, + { + "cell_type": "code", + "execution_count": 104, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "String that has an \"a\" followed by 2 to 3 \"b\"\n", + "['abbb', 'abb', 'abbb']\n" + ] + } + ], + "source": [ + "# Match a string that has an a followed by two to three b\n", + "\n", + "pattern = r'ab{2,3}'\n", + "test_string = 'asdabbbasdasdaswrfdhcvdhrtbbabbdasdabbbbb,'\n", + "match = re.search(pattern, test_string)\n", + "\n", + "# If-statement after search() tests if it succeeded\n", + "if match:\n", + " print('String that has an \"a\" followed by 2 to 3 \"b\"')\n", + " print(re.findall(pattern, test_string))\n", + "else:\n", + " print('String that does not have an \"a\" followed by 2 to 3 \"b\"')" + ] + }, + { + "cell_type": "code", + "execution_count": 126, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "String that has an \"a\" followed by anything, ending in \"b\"\n", + "['b']\n" + ] + } + ], + "source": [ + "# Match a string that has an 'a' followed by anything, ending in b\n", + "\n", + "pattern = r'a*b$'\n", + "test_string = 'asdabbbasdasdaswrfdhcvdhrtbbabbdasdabbbbb,b'\n", + "match = re.search(pattern, test_string)\n", + "\n", + "# If-statement after search() tests if it succeeded\n", + "if match:\n", + " print('String that has an \"a\" followed by anything, ending in \"b\"')\n", + " print(re.findall(pattern, test_string))\n", + "else:\n", + " print('String that does not have an \"a\" followed by anything, ending in \"b\"')" + ] + }, + { + "cell_type": "code", + "execution_count": 138, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The word matched at the beginning of a string\n", + "['My ']\n" + ] + } + ], + "source": [ + "# Match a word at the beginning of a string\n", + "\n", + "pattern = r'\\AMy '\n", + "test_string = 'My name is Prabin!!'\n", + "match = re.search(pattern, test_string)\n", + "\n", + "# If-statement after search() tests if it succeeded\n", + "if match:\n", + " print('The word matched at the beginning of a string')\n", + " print(re.findall(pattern, test_string))\n", + "else:\n", + " print('The word didnt match at the beginning of a string')" + ] + }, + { + "cell_type": "code", + "execution_count": 151, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "A word containing \"z\", not at the start or end of the word\n", + "['z']\n" + ] + } + ], + "source": [ + "# Match a word containing 'z', not at the start or end of the word\n", + "\n", + "pattern = r'\\Bz*z*z\\B'\n", + "test_string = 'zibber zabber saszsaedw!'\n", + "\n", + "match = re.search(pattern, test_string)\n", + "\n", + "# If-statement after search() tests if it succeeded\n", + "if match:\n", + " print('A word containing \"z\", not at the start or end of the word')\n", + " print(re.findall(pattern, test_string))\n", + "else:\n", + " print('Pattern not matched')" + ] + }, + { + "cell_type": "code", + "execution_count": 197, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The word matched at the beginning of a string\n", + "['5']\n", + "0\n" + ] + } + ], + "source": [ + "# Check for a number at the end of a string\n", + "\n", + "pattern = r'[0-9]$'\n", + "test_string = 'zibber zabber saszsaedw!a15'\n", + "\n", + "match = re.search(pattern, test_string)\n", + "\n", + "# If-statement after search() tests if it succeeded\n", + "if match:\n", + " print('The word matched at the beginning of a string')\n", + " print(re.findall(pattern, test_string))\n", + "else:\n", + " print('The word didnt match at the beginning of a string')" + ] + }, + { + "cell_type": "code", + "execution_count": 206, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Substring is in the string\n", + "\"narayan\" matched from 6 to 13 position\n" + ] + } + ], + "source": [ + "# Find the occurrence and position of the substrings within a string.\n", + "\n", + "substring = r'narayan'\n", + "string = 'sajal narayan shrestha'\n", + "\n", + "match = re.search(substring, string)\n", + "# print(dir(match))\n", + "# If-statement after search() tests if it succeeded\n", + "if match:\n", + " print('Substring is in the string')\n", + " print('\"{}\" matched from {} to {} position'.format(substring, match.start(), match.end()))\n", + "else:\n", + " print('Substring not present')" + ] + }, + { + "cell_type": "code", + "execution_count": 220, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "27-02-2020\n" + ] + } + ], + "source": [ + "# Convert a date of yyyy-mm-dd format to dd-mm-yyyy format\n", + "\n", + "from datetime import datetime\n", + "\n", + "# print(dir(datetime))\n", + "yyyymmdd = datetime.strptime('2020-02-27', '%Y-%m-%d')\n", + "\n", + "print(datetime.strftime(yyyymmdd, '%d-%m-%Y'))" + ] + }, + { + "cell_type": "code", + "execution_count": 225, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['asdas', 'edfsdf']\n" + ] + } + ], + "source": [ + "# Find all words starting with 'a' or 'e' in a given string\n", + "\n", + "pattern = r'^[ae]'\n", + "test_string = 'zibber zabber asdas edfsdf rw3q saszsaedw!'\n", + "\n", + "match = re.search(pattern, test_string)\n", + "\n", + "words_a_e = [word for word in test_string.split() if re.search(pattern, word)]\n", + "print(words_a_e)" + ] + }, + { + "cell_type": "code", + "execution_count": 230, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "cotiviti_nepal\n" + ] + } + ], + "source": [ + "# Convert camel case string to snake case string\n", + "\n", + "pattern = r'(.)([A-Z][a-z]+)'\n", + "replacement = r'\\1_\\2'\n", + "camelcase = 'CotivitiNepal'\n", + "\n", + "\n", + "match = re.sub(pattern,replacement, camelcase).lower()\n", + "\n", + "print(match)" + ] + }, + { + "cell_type": "code", + "execution_count": 237, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CotivitiTechnologiesNepal\n" + ] + } + ], + "source": [ + "# Remove all whitespaces from a string\n", + "\n", + "string = 'Cotiviti Technologies Nepal'\n", + "\n", + "print(string.replace(' ', ''))" + ] + }, + { + "cell_type": "code", + "execution_count": 239, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['https://stackoverflow.com/questions/6038061/regular-expression-to-find-urls-within-a-string', 'www.google.com', 'facebook.com', 'http://test.com/method?param=wasd']\n" + ] + } + ], + "source": [ + "# Find urls in a string\n", + "# copied\n", + "\n", + "text = \"\"\"The link of this question: https://stackoverflow.com/questions/6038061/regular-expression-to-find-urls-within-a-string\n", + "Also there are some urls: www.google.com, facebook.com, http://test.com/method?param=wasd\n", + "The code below catches all urls in text and returns urls in list.\"\"\"\n", + "\n", + "urls = re.findall('(?:(?:https?|ftp):\\/\\/)?[\\w/\\-?=%.]+\\.[\\w/\\-?=%.]+', text)\n", + "print(urls)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.1" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}