Filename | /usr/share/perl5/URI/Escape.pm |
Statements | Executed 279 statements in 972µs |
Calls | P | F | Exclusive Time |
Inclusive Time |
Subroutine |
---|---|---|---|---|---|
1 | 1 | 1 | 14µs | 14µs | CORE:regcomp (opcode) | URI::Escape::
1 | 1 | 1 | 12µs | 27µs | uri_escape | URI::Escape::
1 | 1 | 1 | 11µs | 25µs | BEGIN@2 | URI::Escape::
1 | 1 | 1 | 7µs | 8µs | uri_unescape | URI::Escape::
1 | 1 | 1 | 5µs | 5µs | BEGIN@145 | URI::Escape::
2 | 1 | 1 | 3µs | 3µs | CORE:qr (opcode) | URI::Escape::
2 | 2 | 1 | 2µs | 2µs | CORE:subst (opcode) | URI::Escape::
0 | 0 | 0 | 0s | 0s | _fail_hi | URI::Escape::
0 | 0 | 0 | 0s | 0s | escape_char | URI::Escape::
0 | 0 | 0 | 0s | 0s | uri_escape_utf8 | URI::Escape::
Line | State ments |
Time on line |
Calls | Time in subs |
Code |
---|---|---|---|---|---|
1 | package URI::Escape; | ||||
2 | 2 | 153µs | 2 | 38µs | # spent 25µs (11+14) within URI::Escape::BEGIN@2 which was called:
# once (11µs+14µs) by URI::BEGIN@22 at line 2 # spent 25µs making 1 call to URI::Escape::BEGIN@2
# spent 14µs making 1 call to strict::import |
3 | |||||
4 | =head1 NAME | ||||
5 | |||||
6 | URI::Escape - Percent-encode and percent-decode unsafe characters | ||||
7 | |||||
8 | =head1 SYNOPSIS | ||||
9 | |||||
10 | use URI::Escape; | ||||
11 | $safe = uri_escape("10% is enough\n"); | ||||
12 | $verysafe = uri_escape("foo", "\0-\377"); | ||||
13 | $str = uri_unescape($safe); | ||||
14 | |||||
15 | =head1 DESCRIPTION | ||||
16 | |||||
17 | This module provides functions to percent-encode and percent-decode URI strings as | ||||
18 | defined by RFC 3986. Percent-encoding URI's is informally called "URI escaping". | ||||
19 | This is the terminology used by this module, which predates the formalization of the | ||||
20 | terms by the RFC by several years. | ||||
21 | |||||
22 | A URI consists of a restricted set of characters. The restricted set | ||||
23 | of characters consists of digits, letters, and a few graphic symbols | ||||
24 | chosen from those common to most of the character encodings and input | ||||
25 | facilities available to Internet users. They are made up of the | ||||
26 | "unreserved" and "reserved" character sets as defined in RFC 3986. | ||||
27 | |||||
28 | unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" | ||||
29 | reserved = ":" / "/" / "?" / "#" / "[" / "]" / "@" | ||||
30 | "!" / "$" / "&" / "'" / "(" / ")" | ||||
31 | / "*" / "+" / "," / ";" / "=" | ||||
32 | |||||
33 | In addition, any byte (octet) can be represented in a URI by an escape | ||||
34 | sequence: a triplet consisting of the character "%" followed by two | ||||
35 | hexadecimal digits. A byte can also be represented directly by a | ||||
36 | character, using the US-ASCII character for that octet. | ||||
37 | |||||
38 | Some of the characters are I<reserved> for use as delimiters or as | ||||
39 | part of certain URI components. These must be escaped if they are to | ||||
40 | be treated as ordinary data. Read RFC 3986 for further details. | ||||
41 | |||||
42 | The functions provided (and exported by default) from this module are: | ||||
43 | |||||
44 | =over 4 | ||||
45 | |||||
46 | =item uri_escape( $string ) | ||||
47 | |||||
48 | =item uri_escape( $string, $unsafe ) | ||||
49 | |||||
50 | Replaces each unsafe character in the $string with the corresponding | ||||
51 | escape sequence and returns the result. The $string argument should | ||||
52 | be a string of bytes. The uri_escape() function will croak if given a | ||||
53 | characters with code above 255. Use uri_escape_utf8() if you know you | ||||
54 | have such chars or/and want chars in the 128 .. 255 range treated as | ||||
55 | UTF-8. | ||||
56 | |||||
57 | The uri_escape() function takes an optional second argument that | ||||
58 | overrides the set of characters that are to be escaped. The set is | ||||
59 | specified as a string that can be used in a regular expression | ||||
60 | character class (between [ ]). E.g.: | ||||
61 | |||||
62 | "\x00-\x1f\x7f-\xff" # all control and hi-bit characters | ||||
63 | "a-z" # all lower case characters | ||||
64 | "^A-Za-z" # everything not a letter | ||||
65 | |||||
66 | The default set of characters to be escaped is all those which are | ||||
67 | I<not> part of the C<unreserved> character class shown above as well | ||||
68 | as the reserved characters. I.e. the default is: | ||||
69 | |||||
70 | "^A-Za-z0-9\-\._~" | ||||
71 | |||||
72 | =item uri_escape_utf8( $string ) | ||||
73 | |||||
74 | =item uri_escape_utf8( $string, $unsafe ) | ||||
75 | |||||
76 | Works like uri_escape(), but will encode chars as UTF-8 before | ||||
77 | escaping them. This makes this function able to deal with characters | ||||
78 | with code above 255 in $string. Note that chars in the 128 .. 255 | ||||
79 | range will be escaped differently by this function compared to what | ||||
80 | uri_escape() would. For chars in the 0 .. 127 range there is no | ||||
81 | difference. | ||||
82 | |||||
83 | Equivalent to: | ||||
84 | |||||
85 | utf8::encode($string); | ||||
86 | my $uri = uri_escape($string); | ||||
87 | |||||
88 | Note: JavaScript has a function called escape() that produces the | ||||
89 | sequence "%uXXXX" for chars in the 256 .. 65535 range. This function | ||||
90 | has really nothing to do with URI escaping but some folks got confused | ||||
91 | since it "does the right thing" in the 0 .. 255 range. Because of | ||||
92 | this you sometimes see "URIs" with these kind of escapes. The | ||||
93 | JavaScript encodeURIComponent() function is similar to uri_escape_utf8(). | ||||
94 | |||||
95 | =item uri_unescape($string,...) | ||||
96 | |||||
97 | Returns a string with each %XX sequence replaced with the actual byte | ||||
98 | (octet). | ||||
99 | |||||
100 | This does the same as: | ||||
101 | |||||
102 | $string =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg; | ||||
103 | |||||
104 | but does not modify the string in-place as this RE would. Using the | ||||
105 | uri_unescape() function instead of the RE might make the code look | ||||
106 | cleaner and is a few characters less to type. | ||||
107 | |||||
108 | In a simple benchmark test I did, | ||||
109 | calling the function (instead of the inline RE above) if a few chars | ||||
110 | were unescaped was something like 40% slower, and something like 700% slower if none were. If | ||||
111 | you are going to unescape a lot of times it might be a good idea to | ||||
112 | inline the RE. | ||||
113 | |||||
114 | If the uri_unescape() function is passed multiple strings, then each | ||||
115 | one is returned unescaped. | ||||
116 | |||||
117 | =back | ||||
118 | |||||
119 | The module can also export the C<%escapes> hash, which contains the | ||||
120 | mapping from all 256 bytes to the corresponding escape codes. Lookup | ||||
121 | in this hash is faster than evaluating C<sprintf("%%%02X", ord($byte))> | ||||
122 | each time. | ||||
123 | |||||
124 | =head1 SEE ALSO | ||||
125 | |||||
126 | L<URI> | ||||
127 | |||||
128 | |||||
129 | =head1 COPYRIGHT | ||||
130 | |||||
131 | Copyright 1995-2004 Gisle Aas. | ||||
132 | |||||
133 | This program is free software; you can redistribute it and/or modify | ||||
134 | it under the same terms as Perl itself. | ||||
135 | |||||
136 | =cut | ||||
137 | |||||
138 | 1 | 700ns | require Exporter; | ||
139 | 1 | 10µs | our @ISA = qw(Exporter); | ||
140 | 1 | 200ns | our %escapes; | ||
141 | 1 | 800ns | our @EXPORT = qw(uri_escape uri_unescape uri_escape_utf8); | ||
142 | 1 | 300ns | our @EXPORT_OK = qw(%escapes); | ||
143 | 1 | 300ns | our $VERSION = "3.31"; | ||
144 | |||||
145 | 2 | 493µs | 1 | 5µs | # spent 5µs within URI::Escape::BEGIN@145 which was called:
# once (5µs+0s) by URI::BEGIN@22 at line 145 # spent 5µs making 1 call to URI::Escape::BEGIN@145 |
146 | |||||
147 | # Build a char->hex map | ||||
148 | 1 | 1µs | for (0..255) { | ||
149 | 256 | 253µs | $escapes{chr($_)} = sprintf("%%%02X", $_); | ||
150 | } | ||||
151 | |||||
152 | 1 | 200ns | my %subst; # compiled patterns | ||
153 | |||||
154 | 1 | 14µs | 2 | 3µs | my %Unsafe = ( # spent 3µs making 2 calls to URI::Escape::CORE:qr, avg 2µs/call |
155 | RFC2732 => qr/[^A-Za-z0-9\-_.!~*'()]/, | ||||
156 | RFC3986 => qr/[^A-Za-z0-9\-\._~]/, | ||||
157 | ); | ||||
158 | |||||
159 | # spent 27µs (12+15) within URI::Escape::uri_escape which was called:
# once (12µs+15µs) by Search::Elasticsearch::Util::API::Path::path_handler at line 32 of /opt/flows/lib/lib/perl5/Search/Elasticsearch/Util/API/Path.pm | ||||
160 | 1 | 500ns | my($text, $patn) = @_; | ||
161 | 1 | 300ns | return undef unless defined $text; | ||
162 | 1 | 800ns | if (defined $patn){ | ||
163 | unless (exists $subst{$patn}) { | ||||
164 | # Because we can't compile the regex we fake it with a cached sub | ||||
165 | (my $tmp = $patn) =~ s,/,\\/,g; | ||||
166 | eval "\$subst{\$patn} = sub {\$_[0] =~ s/([$tmp])/\$escapes{\$1} || _fail_hi(\$1)/ge; }"; | ||||
167 | Carp::croak("uri_escape: $@") if $@; | ||||
168 | } | ||||
169 | &{$subst{$patn}}($text); | ||||
170 | } else { | ||||
171 | 1 | 23µs | 2 | 15µs | $text =~ s/($Unsafe{RFC3986})/$escapes{$1} || _fail_hi($1)/ge; # spent 14µs making 1 call to URI::Escape::CORE:regcomp
# spent 2µs making 1 call to URI::Escape::CORE:subst |
172 | } | ||||
173 | 1 | 3µs | $text; | ||
174 | } | ||||
175 | |||||
176 | sub _fail_hi { | ||||
177 | my $chr = shift; | ||||
178 | Carp::croak(sprintf "Can't escape \\x{%04X}, try uri_escape_utf8() instead", ord($chr)); | ||||
179 | } | ||||
180 | |||||
181 | sub uri_escape_utf8 { | ||||
182 | my $text = shift; | ||||
183 | utf8::encode($text); | ||||
184 | return uri_escape($text, @_); | ||||
185 | } | ||||
186 | |||||
187 | # spent 8µs (7+300ns) within URI::Escape::uri_unescape which was called:
# once (7µs+300ns) by URI::_server::host at line 93 of URI/_server.pm | ||||
188 | # Note from RFC1630: "Sequences which start with a percent sign | ||||
189 | # but are not followed by two hexadecimal characters are reserved | ||||
190 | # for future extension" | ||||
191 | 1 | 800ns | my $str = shift; | ||
192 | 1 | 500ns | if (@_ && wantarray) { | ||
193 | # not executed for the common case of a single argument | ||||
194 | my @str = ($str, @_); # need to copy | ||||
195 | for (@str) { | ||||
196 | s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg; | ||||
197 | } | ||||
198 | return @str; | ||||
199 | } | ||||
200 | 1 | 4µs | 1 | 300ns | $str =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg if defined $str; # spent 300ns making 1 call to URI::Escape::CORE:subst |
201 | 1 | 3µs | $str; | ||
202 | } | ||||
203 | |||||
204 | sub escape_char { | ||||
205 | return join '', @URI::Escape::escapes{$_[0] =~ /(\C)/g}; | ||||
206 | } | ||||
207 | |||||
208 | 1 | 9µs | 1; | ||
# spent 3µs within URI::Escape::CORE:qr which was called 2 times, avg 2µs/call:
# 2 times (3µs+0s) by URI::BEGIN@22 at line 154, avg 2µs/call | |||||
# spent 14µs within URI::Escape::CORE:regcomp which was called:
# once (14µs+0s) by URI::Escape::uri_escape at line 171 | |||||
sub URI::Escape::CORE:subst; # opcode |