Extract URLs present in a given string
Given a string S, the task is to find and extract all the URLs from the string. If no URL is present in the string, then print “-1”.
Examples:
Input: S = “Welcome to https://www.w3wiki.net Computer Science Portal”
Output: https://www.w3wiki.net
Explanation:
The given string contains the URL ‘https://www.w3wiki.net’.Input: S = “Welcome to https://write.w3wiki.net portal of https://www.w3wiki.net Computer Science Portal”
Output:
https://write.w3wiki.net
https://www.w3wiki.net
Explanation:
The given string contains two URLs ‘https://write.w3wiki.net’ and ‘https://www.w3wiki.net’.
Approach: The idea is to use Regular Expression to solve this problem. Follow the steps below to solve the given problem:
- Create a regular expression to extract all the URLs from the string as mentioned below:
regex = “\\b((?:https?|ftp|file)://[-a-zA-Z0-9+&@#/%?=~_|!:, .;]*[-a-zA-Z0-9+&@#/%=~_|])”
- Create an ArrayList in Java and compile the regular expression using Pattern.compile().
- Match the given string with the regular expression. In Java, this can be done by using Pattern.matcher().
- Find the substring from the first index of match result to the last index of the match result and add this substring into the list.
- After completing the above steps, if the list is found to be empty, then print “-1” as there is no URL present in the string S. Otherwise, print all the string stored in the list.
Below is the implementation of the above approach:
C++
#include <iostream> #include <regex> #include <vector> using namespace std; // Function to extract all the URLs from the string void extractURL(string str) { // Creating an empty vector to store URLs vector<string> url_list; // Regular Expression to extract URLs from the string string regex_str = "\\b((?:https?|ftp|file):" "\\/\\/[a-zA-Z0-9+&@#\\/%?=~_|!:,.;]*" "[a-zA-Z0-9+&@#\\/%=~_|])" ; // Compile the Regular Expression regex r(regex_str, regex_constants::icase); // Find the match between string and the regular expression sregex_iterator m(str.begin(), str.end(), r); sregex_iterator m_end; // Find and store all the URLs in the vector while (m != m_end) { url_list.push_back(m->str()); m++; } // If no URLs are found, print -1, otherwise print the URLs if (url_list.size() == 0) { cout << "-1" << endl; } else { for (string url : url_list) { cout << url << endl; } } } // Driver Code int main() { // Given String str string str = "Welcome to https://www.w3wiki.net Computer Science Portal" ; // Function Call extractURL(str); return 0; } |
Java
import java.util.ArrayList; import java.util.regex.Matcher; import java.util.regex.Pattern; public class ExtractURL { // Function to extract all the URLs from the string public static void extractURL(String str) { // Creating an empty ArrayList to store URLs ArrayList<String> urlList = new ArrayList<>(); // Regular Expression to extract URL from the string String regexStr = "\\b((?:https?|ftp|file):" + "\\/\\/[a-zA-Z0-9+&@#\\/%?=~_|!:,.;]*" + "[a-zA-Z0-9+&@#\\/%=~_|])" ; // Compile the Regular Expression pattern Pattern pattern = Pattern.compile(regexStr, Pattern.CASE_INSENSITIVE); // Create a Matcher that matches the pattern with the input string Matcher matcher = pattern.matcher(str); // Find and add all matching URLs to the ArrayList while (matcher.find()) { // Add the matched URL to the ArrayList urlList.add(matcher.group()); } // If no URL is found, print -1 if (urlList.isEmpty()) { System.out.println( "-1" ); } else { // Print all the URLs stored in the ArrayList for (String url : urlList) { System.out.println(url); } } } public static void main(String[] args) { // Given String str String str = "Welcome to https://www.w3wiki.net " + "Computer Science Portal" ; // Function Call extractURL(str); } } |
Python3
import re def extractURL( str ): # Creating an empty list url_list = [] # Regular Expression to extract URL from the string regex = r '\b((?:https?|ftp|file):\/\/[-a-zA-Z0-9+&@#\/%?=~_|!:,.;]*[-a-zA-Z0-9+&@#\/%=~_|])' # Compile the Regular Expression p = re. compile (regex, re.IGNORECASE) # Find the match between string and the regular expression m = p.finditer( str ) # Find the next subsequence of the input subsequence that find the pattern for match in m: # Find the substring from the first index of match result to the last index of match result and add in the list url_list.append( str [match.start():match.end()]) # Corrected slicing here # IF there no URL present if len (url_list) = = 0 : print ( "-1" ) return # Print all the URLs stored for url in url_list: print (url) # Driver Code if __name__ = = '__main__' : # Given String str string = "Welcome to https://www.w3wiki.net Computer Science Portal" # Function Call extractURL(string) |
C#
using System; using System.Collections.Generic; using System.Text.RegularExpressions; class Program { static void ExtractURL( string str) { // Create an empty list to store URLs List< string > urlList = new List< string >(); // Regular Expression to extract URLs from the string string regexStr = @"\b((https?|ftp|file)://[a-zA-Z0-9+&@#/%?=~_|!:,.;]*[a-zA-Z0-9+&@#/%=~_|])" ; // Compile the Regular Expression Regex regex = new Regex(regexStr, RegexOptions.IgnoreCase); // Find all matches in the string MatchCollection matches = regex.Matches(str); foreach (Match match in matches) { urlList.Add(match.Value); } // If there are no URLs present if (urlList.Count == 0) { Console.WriteLine( "-1" ); return ; } // Print all the URLs stored foreach ( string url in urlList) { Console.WriteLine(url); } } static void Main() { // Given String str string str = "Welcome to https://www.w3wiki.net Computer Science Portal" ; // Function Call ExtractURL(str); } } |
Javascript
// Function to extract all the URLs from the string function extractURL(str) { // Creating an empty array to store URLs let urlList = []; // Regular Expression to extract URL from the string const regexStr = "\\b((?:https?|ftp|file):" // Defines the URL pattern + "\\/\\/[a-zA-Z0-9+&@#\\/%?=~_|!:,.;]*" + "[a-zA-Z0-9+&@#\\/%=~_|])" ; // Compile the Regular Expression pattern const regex = new RegExp(regexStr, 'gi' ); // 'g' flag for global match, 'i' for case-insensitive // Find and add all matching URLs to the array let match; while ((match = regex.exec(str)) !== null ) { // Loop through matches and add to urlList urlList.push(match[0]); } // If no URL is found, print -1 if (urlList.length === 0) { console.log( "-1" ); return ; } // Print all the URLs stored in the array for (let url of urlList) { console.log(url); } } // Given String str const str = "Welcome to https://www.w3wiki.net Computer Science Portal" ; // Function Call extractURL(str); |
https://www.w3wiki.net
Time Complexity: O(N)
Auxiliary Space: O(1)