Cold Fusion Style Regular Expressions for the Common URL Formats

By Protocol

FILE
(file://((((([[:alnum:]](([[:alnum:]]|-)*[[:alnum:]])?)\.)*([a-zA-Z](([[:alnum:]]|-)*[[:alnum:]])?))|(([[:digit:]]+)(\.([[:digit:]]+)){3}))|localhost)?/(((([-a-zA-Z0-9$_.+!*'(),]|(%[[:xdigit:]]{2}))|[?:@&=])*)(/((([-a-zA-Z0-9$_.+!*'(),]|(%[[:xdigit:]]{2}))|[?:@&=])*))*))
FTP
(ftp://((((([-a-zA-Z0-9$_.+!*'(),]|(%[[:xdigit:]]{2}))|[;?&=])*)(:((([-a-zA-Z0-9$_.+!*'(),]|(%[[:xdigit:]]{2}))|[;?&=])*))?@)?((((([[:alnum:]](([[:alnum:]]|-)*[[:alnum:]])?)\.)*([a-zA-Z](([[:alnum:]]|-)*[[:alnum:]])?))|(([[:digit:]]+)(\.([[:digit:]]+)){3}))(:([[:digit:]]+))?))(/(((([-a-zA-Z0-9$_.+!*'(),]|(%[[:xdigit:]]{2}))|[?:@&=])*)(/((([-a-zA-Z0-9$_.+!*'(),]|(%[[:xdigit:]]{2}))|[?:@&=])*))*)(;type=[AIDaid])?)?)
GOPHER
(gopher://((((([[:alnum:]](([[:alnum:]]|-)*[[:alnum:]])?)\.)*([a-zA-Z](([[:alnum:]]|-)*[[:alnum:]])?))|(([[:digit:]]+)(\.([[:digit:]]+)){3}))(:([[:digit:]]+))?)(/([-a-zA-Z0-9$_.+!*'(),;/?:@&=]|(%[[:xdigit:]]{2}))((([-a-zA-Z0-9$_.+!*'(),;/?:@&=]|(%[[:xdigit:]]{2}))*)(%09((([-a-zA-Z0-9$_.+!*'(),]|(%[[:xdigit:]]{2}))|[;:@&=])*)(%09(([-a-zA-Z0-9$_.+!*'(),;/?:@&=]|(%[[:xdigit:]]{2}))*))?)?)?)?)
HTTP
(https?://((((([-$_.+[:alnum:]!*'(),]|(%[[:xdigit:]]{2}))|[;?&=])*)(:((([-a-zA-Z0-9$_.+!*'(),]|(%[[:xdigit:]]{2}))|[;?&=])*))?@)?((((([[:alnum:]](([[:alnum:]]|-)*[[:alnum:]])?)\.)*([[:alpha:]](([[:alnum:]]|-)*[[:alnum:]])?))|(([[:digit:]]+)(\.([[:digit:]]+)){3}))(:([[:digit:]]+))?))(/(((([-a-zA-Z0-9$_.+!*'(),~]|(%[[:xdigit:]]{2}))|[;:@&=])*)(/((([-a-zA-Z0-9$_.+!*'(),~]|(%[[:xdigit:]]{2}))|[;:@&=])*))*)(\?((([-a-zA-Z0-9$_.+!*'(),~]|(%[[:xdigit:]]{2}))|[;:@&=])*))?)?)
IMAP
(imap://((((((([-a-zA-Z0-9$_.+!*'(),]|(%[[:xdigit:]]{2}))|[&=~])+)((;[Aa][Uu][Tt][Hh]=(\*|((([-a-zA-Z0-9$_.+!*'(),]|(%[[:xdigit:]]{2}))|[&=~])+))))?)|((;[Aa][Uu][Tt][Hh]=(\*|((([-a-zA-Z0-9$_.+!*'(),]|(%[[:xdigit:]]{2}))|[&=~])+)))(((([-a-zA-Z0-9$_.+!*'(),]|(%[[:xdigit:]]{2}))|[&=~])+))?))@)?((((([[:alnum:]](([[:alnum:]]|-)*[[:alnum:]])?)\.)*([a-zA-Z](([[:alnum:]]|-)*[[:alnum:]])?))|(([[:digit:]]+)(\.([[:digit:]]+)){3}))(:([[:digit:]]+))?))/(((((([-a-zA-Z0-9$_.+!*'(),]|(%[[:xdigit:]]{2}))|[&=~:@/])+)?;[Tt][Yy][Pp][Ee]=([Ll]([Ii][Ss][Tt]|[Ss][Uu][Bb])))|(((([-a-zA-Z0-9$_.+!*'(),]|(%[[:xdigit:]]{2}))|[&=~:@/])+)(\?((([-a-zA-Z0-9$_.+!*'(),]|(%[[:xdigit:]]{2}))|[&=~:@/])+))?((;[Uu][Ii][Dd][Vv][Aa][Ll][Ii][Dd][Ii][Tt][Yy]=([1-9][[:digit:]]*)))?)|(((([-a-zA-Z0-9$_.+!*'(),]|(%[[:xdigit:]]{2}))|[&=~:@/])+)((;[Uu][Ii][Dd][Vv][Aa][Ll][Ii][Dd][Ii][Tt][Yy]=([1-9][[:digit:]]*)))?(/;[Uu][Ii][Dd]=([1-9][[:digit:]]*))((/;[Ss][Ee][Cc][Tt][Ii][Oo][Nn]=((([-a-zA-Z0-9$_.+!*'(),]|(%[[:xdigit:]]{2}))|[&=~:@/])+)))?)))?)
LDAP
(ldap://(((((([[:alnum:]](([[:alnum:]]|-)*[[:alnum:]])?)\.)*([a-zA-Z](([[:alnum:]]|-)*[[:alnum:]])?))|(([[:digit:]]+)(\.([[:digit:]]+)){3}))(:([[:digit:]]+))?))?/((((((([[:alnum:]]|%(3[[:digit:]]|[46][[:xdigit:]]|[57][Aa0-9]))|(%20))+|(OID|oid)\.(([[:digit:]]+)(\.([[:digit:]]+))*))((%0[Aa])?(%20)*)=((%0[Aa])?(%20)*))?(([-a-zA-Z0-9$_.+!*'(),]|(%[[:xdigit:]]{2}))*))(((%0[Aa])?(%20)*)\+((%0[Aa])?(%20)*)((((([[:alnum:]]|%(3[[:digit:]]|[46][[:xdigit:]]|[57][Aa0-9]))|(%20))+|(OID|oid)\.(([[:digit:]]+)(\.([[:digit:]]+))*))((%0[Aa])?(%20)*)=((%0[Aa])?(%20)*))?(([-a-zA-Z0-9$_.+!*'(),]|(%[[:xdigit:]]{2}))*)))*)((((%0[Aa])?(%20)*)([;,])((%0[Aa])?(%20)*))(((((([[:alnum:]]|%(3[[:digit:]]|[46][[:xdigit:]]|[57][Aa0-9]))|(%20))+|(OID|oid)\.(([[:digit:]]+)(\.([[:digit:]]+))*))((%0[Aa])?(%20)*)=((%0[Aa])?(%20)*))?(([-a-zA-Z0-9$_.+!*'(),]|(%[[:xdigit:]]{2}))*))(((%0[Aa])?(%20)*)\+((%0[Aa])?(%20)*)((((([[:alnum:]]|%(3[[:digit:]]|[46][[:xdigit:]]|[57][Aa0-9]))|(%20))+|(OID|oid)\.(([[:digit:]]+)(\.([[:digit:]]+))*))((%0[Aa])?(%20)*)=((%0[Aa])?(%20)*))?(([-a-zA-Z0-9$_.+!*'(),]|(%[[:xdigit:]]{2}))*)))*))*(((%0[Aa])?(%20)*)([;,])((%0[Aa])?(%20)*))?)(\?(((([-a-zA-Z0-9$_.+!*'(),]|(%[[:xdigit:]]{2}))+)(,(([-a-zA-Z0-9$_.+!*'(),]|(%[[:xdigit:]]{2}))+))*)?)(\?(base|one|sub)(\?((([-a-zA-Z0-9$_.+!*'(),;/?:@&=]|(%[[:xdigit:]]{2}))+)))?)?)?)
MAILTO
(mailto:(([-a-zA-Z0-9$_.+!*'(),;/?:@&=]|(%[[:xdigit:]]{2}))+))
NEWS
(news:(((([-a-zA-Z0-9$_.+!*'(),]|(%[[:xdigit:]]{2}))|[;/?:&=])+@(((([[:alnum:]](([[:alnum:]]|-)*[[:alnum:]])?)\.)*([a-zA-Z](([[:alnum:]]|-)*[[:alnum:]])?))|(([[:digit:]]+)(\.([[:digit:]]+)){3})))|([a-zA-Z]([[:alnum:]]|[_.+-])*)|\*))
NFS
(nfs:((//((((([[:alnum:]](([[:alnum:]]|-)*[[:alnum:]])?)\.)*([a-zA-Z](([[:alnum:]]|-)*[[:alnum:]])?))|(([[:digit:]]+)(\.([[:digit:]]+)){3}))(:([[:digit:]]+))?)((/((((([-a-zA-Z0-9$_.!~*'(),])|(%[[:xdigit:]]{2})|[:@&=+])*)(/((([-a-zA-Z0-9$_.!~*'(),])|(%[[:xdigit:]]{2})|[:@&=+])*))*)?)))?)|(/((((([-a-zA-Z0-9$_.!~*'(),])|(%[[:xdigit:]]{2})|[:@&=+])*)(/((([-a-zA-Z0-9$_.!~*'(),])|(%[[:xdigit:]]{2})|[:@&=+])*))*)?))|((((([-a-zA-Z0-9$_.!~*'(),])|(%[[:xdigit:]]{2})|[:@&=+])*)(/((([-a-zA-Z0-9$_.!~*'(),])|(%[[:xdigit:]]{2})|[:@&=+])*))*)?)))
NNTP
(nntp://((((([[:alnum:]](([[:alnum:]]|-)*[[:alnum:]])?)\.)*([a-zA-Z](([[:alnum:]]|-)*[[:alnum:]])?))|(([[:digit:]]+)(\.([[:digit:]]+)){3}))(:([[:digit:]]+))?)/([a-zA-Z]([[:alnum:]]|[_.+-])*)(/([[:digit:]]+))?)
TELNET
(telnet://((((([-a-zA-Z0-9$_.+!*'(),]|(%[[:xdigit:]]{2}))|[;?&=])*)(:((([-a-zA-Z0-9$_.+!*'(),]|(%[[:xdigit:]]{2}))|[;?&=])*))?@)?((((([[:alnum:]](([[:alnum:]]|-)*[[:alnum:]])?)\.)*([a-zA-Z](([[:alnum:]]|-)*[[:alnum:]])?))|(([[:digit:]]+)(\.([[:digit:]]+)){3}))(:([[:digit:]]+))?))/?)
WAIS
(wais://((((([[:alnum:]](([[:alnum:]]|-)*[[:alnum:]])?)\.)*([a-zA-Z](([[:alnum:]]|-)*[[:alnum:]])?))|(([[:digit:]]+)(\.([[:digit:]]+)){3}))(:([[:digit:]]+))?)/(([-a-zA-Z0-9$_.+!*'(),]|(%[[:xdigit:]]{2}))*)((/(([-a-zA-Z0-9$_.+!*'(),]|(%[[:xdigit:]]{2}))*)/(([-a-zA-Z0-9$_.+!*'(),]|(%[[:xdigit:]]{2}))*))|\?((([-a-zA-Z0-9$_.+!*'(),]|(%[[:xdigit:]]{2}))|[;:@&=])*))?)

All-In-One

(file://((((([[:alnum:]](([[:alnum:]]|-)*[[:alnum:]])?)\.)*([a-zA-Z](([[:alnum:]]|-)*[[:alnum:]])?))|(([[:digit:]]+)(\.([[:digit:]]+)){3}))|localhost)?/(((([-a-zA-Z0-9$_.+!*'(),]|(%[[:xdigit:]]{2}))|[?:@&=])*)(/((([-a-zA-Z0-9$_.+!*'(),]|(%[[:xdigit:]]{2}))|[?:@&=])*))*))|(ftp://((((([-a-zA-Z0-9$_.+!*'(),]|(%[[:xdigit:]]{2}))|[;?&=])*)(:((([-a-zA-Z0-9$_.+!*'(),]|(%[[:xdigit:]]{2}))|[;?&=])*))?@)?((((([[:alnum:]](([[:alnum:]]|-)*[[:alnum:]])?)\.)*([a-zA-Z](([[:alnum:]]|-)*[[:alnum:]])?))|(([[:digit:]]+)(\.([[:digit:]]+)){3}))(:([[:digit:]]+))?))(/(((([-a-zA-Z0-9$_.+!*'(),]|(%[[:xdigit:]]{2}))|[?:@&=])*)(/((([-a-zA-Z0-9$_.+!*'(),]|(%[[:xdigit:]]{2}))|[?:@&=])*))*)(;type=[AIDaid])?)?)|(gopher://((((([[:alnum:]](([[:alnum:]]|-)*[[:alnum:]])?)\.)*([a-zA-Z](([[:alnum:]]|-)*[[:alnum:]])?))|(([[:digit:]]+)(\.([[:digit:]]+)){3}))(:([[:digit:]]+))?)(/([-a-zA-Z0-9$_.+!*'(),;/?:@&=]|(%[[:xdigit:]]{2}))((([-a-zA-Z0-9$_.+!*'(),;/?:@&=]|(%[[:xdigit:]]{2}))*)(%09((([-a-zA-Z0-9$_.+!*'(),]|(%[[:xdigit:]]{2}))|[;:@&=])*)(%09(([-a-zA-Z0-9$_.+!*'(),;/?:@&=]|(%[[:xdigit:]]{2}))*))?)?)?)?)|(https?://((((([-$_.+[:alnum:]!*'(),]|(%[[:xdigit:]]{2}))|[;?&=])*)(:((([-a-zA-Z0-9$_.+!*'(),]|(%[[:xdigit:]]{2}))|[;?&=])*))?@)?((((([[:alnum:]](([[:alnum:]]|-)*[[:alnum:]])?)\.)*([[:alpha:]](([[:alnum:]]|-)*[[:alnum:]])?))|(([[:digit:]]+)(\.([[:digit:]]+)){3}))(:([[:digit:]]+))?))(/(((([-a-zA-Z0-9$_.+!*'(),~]|(%[[:xdigit:]]{2}))|[;:@&=])*)(/((([-a-zA-Z0-9$_.+!*'(),~]|(%[[:xdigit:]]{2}))|[;:@&=])*))*)(\?((([-a-zA-Z0-9$_.+!*'(),~]|(%[[:xdigit:]]{2}))|[;:@&=])*))?)?)|(imap://((((((([-a-zA-Z0-9$_.+!*'(),]|(%[[:xdigit:]]{2}))|[&=~])+)((;[Aa][Uu][Tt][Hh]=(\*|((([-a-zA-Z0-9$_.+!*'(),]|(%[[:xdigit:]]{2}))|[&=~])+))))?)|((;[Aa][Uu][Tt][Hh]=(\*|((([-a-zA-Z0-9$_.+!*'(),]|(%[[:xdigit:]]{2}))|[&=~])+)))(((([-a-zA-Z0-9$_.+!*'(),]|(%[[:xdigit:]]{2}))|[&=~])+))?))@)?((((([[:alnum:]](([[:alnum:]]|-)*[[:alnum:]])?)\.)*([a-zA-Z](([[:alnum:]]|-)*[[:alnum:]])?))|(([[:digit:]]+)(\.([[:digit:]]+)){3}))(:([[:digit:]]+))?))/(((((([-a-zA-Z0-9$_.+!*'(),]|(%[[:xdigit:]]{2}))|[&=~:@/])+)?;[Tt][Yy][Pp][Ee]=([Ll]([Ii][Ss][Tt]|[Ss][Uu][Bb])))|(((([-a-zA-Z0-9$_.+!*'(),]|(%[[:xdigit:]]{2}))|[&=~:@/])+)(\?((([-a-zA-Z0-9$_.+!*'(),]|(%[[:xdigit:]]{2}))|[&=~:@/])+))?((;[Uu][Ii][Dd][Vv][Aa][Ll][Ii][Dd][Ii][Tt][Yy]=([1-9][[:digit:]]*)))?)|(((([-a-zA-Z0-9$_.+!*'(),]|(%[[:xdigit:]]{2}))|[&=~:@/])+)((;[Uu][Ii][Dd][Vv][Aa][Ll][Ii][Dd][Ii][Tt][Yy]=([1-9][[:digit:]]*)))?(/;[Uu][Ii][Dd]=([1-9][[:digit:]]*))((/;[Ss][Ee][Cc][Tt][Ii][Oo][Nn]=((([-a-zA-Z0-9$_.+!*'(),]|(%[[:xdigit:]]{2}))|[&=~:@/])+)))?)))?)|(ldap://(((((([[:alnum:]](([[:alnum:]]|-)*[[:alnum:]])?)\.)*([a-zA-Z](([[:alnum:]]|-)*[[:alnum:]])?))|(([[:digit:]]+)(\.([[:digit:]]+)){3}))(:([[:digit:]]+))?))?/((((((([[:alnum:]]|%(3[[:digit:]]|[46][[:xdigit:]]|[57][Aa0-9]))|(%20))+|(OID|oid)\.(([[:digit:]]+)(\.([[:digit:]]+))*))((%0[Aa])?(%20)*)=((%0[Aa])?(%20)*))?(([-a-zA-Z0-9$_.+!*'(),]|(%[[:xdigit:]]{2}))*))(((%0[Aa])?(%20)*)\+((%0[Aa])?(%20)*)((((([[:alnum:]]|%(3[[:digit:]]|[46][[:xdigit:]]|[57][Aa0-9]))|(%20))+|(OID|oid)\.(([[:digit:]]+)(\.([[:digit:]]+))*))((%0[Aa])?(%20)*)=((%0[Aa])?(%20)*))?(([-a-zA-Z0-9$_.+!*'(),]|(%[[:xdigit:]]{2}))*)))*)((((%0[Aa])?(%20)*)([;,])((%0[Aa])?(%20)*))(((((([[:alnum:]]|%(3[[:digit:]]|[46][[:xdigit:]]|[57][Aa0-9]))|(%20))+|(OID|oid)\.(([[:digit:]]+)(\.([[:digit:]]+))*))((%0[Aa])?(%20)*)=((%0[Aa])?(%20)*))?(([-a-zA-Z0-9$_.+!*'(),]|(%[[:xdigit:]]{2}))*))(((%0[Aa])?(%20)*)\+((%0[Aa])?(%20)*)((((([[:alnum:]]|%(3[[:digit:]]|[46][[:xdigit:]]|[57][Aa0-9]))|(%20))+|(OID|oid)\.(([[:digit:]]+)(\.([[:digit:]]+))*))((%0[Aa])?(%20)*)=((%0[Aa])?(%20)*))?(([-a-zA-Z0-9$_.+!*'(),]|(%[[:xdigit:]]{2}))*)))*))*(((%0[Aa])?(%20)*)([;,])((%0[Aa])?(%20)*))?)(\?(((([-a-zA-Z0-9$_.+!*'(),]|(%[[:xdigit:]]{2}))+)(,(([-a-zA-Z0-9$_.+!*'(),]|(%[[:xdigit:]]{2}))+))*)?)(\?(base|one|sub)(\?((([-a-zA-Z0-9$_.+!*'(),;/?:@&=]|(%[[:xdigit:]]{2}))+)))?)?)?)|(mailto:(([-a-zA-Z0-9$_.+!*'(),;/?:@&=]|(%[[:xdigit:]]{2}))+))|(news:(((([-a-zA-Z0-9$_.+!*'(),]|(%[[:xdigit:]]{2}))|[;/?:&=])+@(((([[:alnum:]](([[:alnum:]]|-)*[[:alnum:]])?)\.)*([a-zA-Z](([[:alnum:]]|-)*[[:alnum:]])?))|(([[:digit:]]+)(\.([[:digit:]]+)){3})))|([a-zA-Z]([[:alnum:]]|[_.+-])*)|\*))|(nfs:((//((((([[:alnum:]](([[:alnum:]]|-)*[[:alnum:]])?)\.)*([a-zA-Z](([[:alnum:]]|-)*[[:alnum:]])?))|(([[:digit:]]+)(\.([[:digit:]]+)){3}))(:([[:digit:]]+))?)((/((((([-a-zA-Z0-9$_.!~*'(),])|(%[[:xdigit:]]{2})|[:@&=+])*)(/((([-a-zA-Z0-9$_.!~*'(),])|(%[[:xdigit:]]{2})|[:@&=+])*))*)?)))?)|(/((((([-a-zA-Z0-9$_.!~*'(),])|(%[[:xdigit:]]{2})|[:@&=+])*)(/((([-a-zA-Z0-9$_.!~*'(),])|(%[[:xdigit:]]{2})|[:@&=+])*))*)?))|((((([-a-zA-Z0-9$_.!~*'(),])|(%[[:xdigit:]]{2})|[:@&=+])*)(/((([-a-zA-Z0-9$_.!~*'(),])|(%[[:xdigit:]]{2})|[:@&=+])*))*)?)))|(nntp://((((([[:alnum:]](([[:alnum:]]|-)*[[:alnum:]])?)\.)*([a-zA-Z](([[:alnum:]]|-)*[[:alnum:]])?))|(([[:digit:]]+)(\.([[:digit:]]+)){3}))(:([[:digit:]]+))?)/([a-zA-Z]([[:alnum:]]|[_.+-])*)(/([[:digit:]]+))?)|(telnet://((((([-a-zA-Z0-9$_.+!*'(),]|(%[[:xdigit:]]{2}))|[;?&=])*)(:((([-a-zA-Z0-9$_.+!*'(),]|(%[[:xdigit:]]{2}))|[;?&=])*))?@)?((((([[:alnum:]](([[:alnum:]]|-)*[[:alnum:]])?)\.)*([a-zA-Z](([[:alnum:]]|-)*[[:alnum:]])?))|(([[:digit:]]+)(\.([[:digit:]]+)){3}))(:([[:digit:]]+))?))/?)|(wais://((((([[:alnum:]](([[:alnum:]]|-)*[[:alnum:]])?)\.)*([a-zA-Z](([[:alnum:]]|-)*[[:alnum:]])?))|(([[:digit:]]+)(\.([[:digit:]]+)){3}))(:([[:digit:]]+))?)/(([-a-zA-Z0-9$_.+!*'(),]|(%[[:xdigit:]]{2}))*)((/(([-a-zA-Z0-9$_.+!*'(),]|(%[[:xdigit:]]{2}))*)/(([-a-zA-Z0-9$_.+!*'(),]|(%[[:xdigit:]]{2}))*))|\?((([-a-zA-Z0-9$_.+!*'(),]|(%[[:xdigit:]]{2}))|[;:@&=])*))?)

Tests

References

Version Information

01-Aug-2000 Rick Osborne
First version, no changes.