| <?php |
| /** |
| * Serve static files in a multiversion-friendly way. |
| * |
| * See https://wikitech.wikimedia.org/wiki/MediaWiki_at_WMF#Static_files for |
| * usage documentation. |
| * |
| * Design requirements: |
| * - Support changes within the life of a deployed branch. https://phabricator.wikimedia.org/T99096 |
| * - Support caching beyond the life of a deployed branch. https://phabricator.wikimedia.org/T99096 |
| * - Avoid cache poisoning. https://phabricator.wikimedia.org/T47877, https://phabricator.wikimedia.org/T117587 |
| * |
| * Overview: |
| * |
| * - multiversion requires the MediaWiki script directory on disk (/w) to be shared across |
| * all domains. Files in /w are generic and proxy to a real MediaWiki entry point |
| * in the current wiki's MediaWiki version, as determined by the request host name. |
| * - MediaWiki configuration sets $wgResourceBasePath to "/w". |
| * - Apache configuration rewrites "/w/skins/*", "/w/resources/*", "/w/extension/*", |
| * "/w/COPYING", "/w/CREDITS", and "/ontology/ontology.owl" (mediawiki.org only) |
| * to /w/static.php (this file). |
| * Here we stream the file from the appropiate MediaWiki branch directory. |
| * - For performance and to address race conditions around deployment, |
| * Varnish routes static.php requests in a hostname-agnostic way. |
| * Therefore static.php MUST respond in a deterministic way for all requests |
| * regardless of which wiki made the request. (Compliance is enforced via VCL by |
| * hardcoding "en.wikipedia.org", via static_host in Puppet.) |
| * |
| * In addition to the above, when responding to a request with a version hash we consider |
| * both the latest and previous MediaWiki branches as the source to serve the file. |
| * This is to support references from cached HTML in ParserCache and CDN for 30 days. |
| * While responses for those static assets may themselves also be cached, these caches |
| * are logically separated and generally last much shorter. As such, we must be able to |
| * respond to requests for older resources as well. |
| * |
| * Another reason is to avoid cache poisoning during deployments where otherwise |
| * a new URI could be routed to a server with the previous version and end up |
| * populating the old object under the new URI (T47877, T117587). |
| * |
| * StatsD metrics: |
| * |
| * - wmfstatic.success.<responseType (nohash, verified)> |
| * - wmfstatic.notfound |
| * - wmfstatic.mismatch |
| */ |
| |
| // This endpoint is supposed to be independent of request cookies and other |
| // details of the session. Enforce this constraint with respect to session use. |
| define( 'MW_NO_SESSION', 1 ); |
| |
| // Allow MediaWiki extensions to detect when they're called from this entry point (T374286) |
| define( 'MW_ENTRY_POINT', 'static' ); |
| |
| require_once __DIR__ . '/../multiversion/MWMultiVersion.php'; |
| require MWMultiVersion::getMediaWiki( 'includes/WebStart.php' ); |
| |
| define( 'WMF_STATIC_1MIN', 60 ); |
| define( 'WMF_STATIC_1Y', 31536000 ); |
| |
| /** |
| * This should always use 404 if there is an issue with the url. |
| * Avoid exposing the reason of it being invalid (T204186). |
| * |
| * @param string $message |
| * @param int $status HTTP status code (One of 500 or 404) |
| */ |
| function wmfStaticShowError( $message, $status ) { |
| HttpStatus::header( $status ); |
| header( |
| 'Cache-Control: ' . |
| 's-maxage=' . WMF_STATIC_1MIN . ', must-revalidate, max-age=0' |
| ); |
| header( 'Content-Type: text/plain; charset=utf-8' ); |
| echo "$message\n"; |
| } |
| |
| /** |
| * Stream file from disk to web response. |
| * |
| * Based on MediaWiki's StreamFile::stream(). |
| * |
| * @param string $filePath File to stream |
| * @param string $responseType Cache control |
| * For details about these modes, and how they are used, |
| * see <https://wikitech.wikimedia.org/wiki/MediaWiki_at_WMF#Static_files>. |
| * |
| * - "verified": Immutable or 1-year cache. These are versioned URLs under /w/, |
| * as generated by ResourceLoader. This represents most frontend traffic for static.php. |
| * |
| * - "nohash": 1-year cache. These are unversioned URLs under /w/, and URLs under /w/ with unknown |
| * query parameters. These misformatted URLs are typically hotlinks, bots, or proxies that set |
| * some kind of garbage query string. |
| * |
| * - "mismatch": 1-minute cache. Quick debounce when a new hash isn't yet found on this server. |
| */ |
| function wmfStaticStreamFile( $filePath, $responseType = 'nohash' ) { |
| // `false` below means not to do retroactive upload prevention checks |
| $ctype = StreamFile::contentTypeFromPath( $filePath, false ); |
| if ( !$ctype || $ctype === 'unknown/unknown' ) { |
| // Directory, extension-less file or unknown extension |
| if ( in_array( basename( $filePath ), [ 'COPYING', 'CREDITS' ] ) ) { |
| $ctype = 'text/plain'; |
| } else { |
| wmfStaticShowError( 'Unknown file path', 404 ); |
| return; |
| } |
| } |
| |
| $stat = stat( $filePath ); |
| if ( !$stat ) { |
| wmfStaticShowError( 'Unknown file path', 404 ); |
| return; |
| } |
| |
| // Match puppet:///mediawiki/apache/expires.conf |
| if ( preg_match( '/\.(gif|jpe?g|png|css|js|json|woff|woff2|svg|eot|ttf|ico)$/', $filePath ) ) { |
| header( 'Access-Control-Allow-Origin: *' ); |
| } |
| header( 'Last-Modified: ' . wfTimestamp( TS_RFC2822, $stat['mtime'] ) ); |
| header( "Content-Type: $ctype" ); |
| if ( $responseType === 'nohash' ) { |
| // Unversioned |
| header( |
| sprintf( 'Cache-Control: public, s-maxage=%d, max-age=%d, must-revalidate', |
| WMF_STATIC_1Y, WMF_STATIC_1Y |
| ) |
| ); |
| } elseif ( $responseType === 'mismatch' ) { |
| // Fallback |
| header( |
| sprintf( 'Cache-Control: public, s-maxage=%d, max-age=%d, must-revalidate', |
| WMF_STATIC_1MIN, WMF_STATIC_1MIN |
| ) |
| ); |
| } else { |
| // Verified files are considered immutable |
| // For the CDN, and clients not supporting "immutable", allow re-use for 1 year. |
| header( |
| sprintf( 'Cache-Control: public, s-maxage=%d, max-age=%d, immutable', |
| WMF_STATIC_1Y, WMF_STATIC_1Y |
| ) |
| ); |
| } |
| |
| if ( !empty( $_SERVER['HTTP_IF_MODIFIED_SINCE'] ) ) { |
| $ims = preg_replace( '/;.*$/', '', $_SERVER['HTTP_IF_MODIFIED_SINCE'] ); |
| if ( wfTimestamp( TS_UNIX, $stat['mtime'] ) <= strtotime( $ims ) ) { |
| ini_set( 'zlib.output_compression', 0 ); |
| header( 'HTTP/1.1 304 Not Modified' ); |
| return; |
| } |
| } |
| |
| header( 'Content-Length: ' . $stat['size'] ); |
| readfile( $filePath ); |
| } |
| |
| /** |
| * Extract the path after the prefix |
| * |
| * @param string $uri Full Request URI |
| * @return string|false Request path stripped of the prefix, or false if prefix not found. |
| */ |
| function wmfStaticParsePath( $uri ) { |
| global $wgScriptPath; |
| |
| // Strip query parameters |
| $uriPath = parse_url( $uri, PHP_URL_PATH ); |
| |
| // T359643 |
| if ( in_array( $_SERVER['SERVER_NAME'], [ "www.mediawiki.org", "mediawiki.org" ] ) |
| && $uriPath == '/ontology/ontology.owl' ) { |
| return '/docs/ontology.owl'; |
| } |
| |
| $urlPrefix = $wgScriptPath; |
| |
| if ( strpos( $uriPath, $urlPrefix ) !== 0 ) { |
| // Prefix not found. |
| return false; |
| } |
| |
| // Strip the prefix |
| return substr( $uriPath, strlen( $urlPrefix ) ); |
| } |
| |
| function wmfStaticRespond() { |
| if ( !isset( $_SERVER['SERVER_NAME'] ) || !isset( $_SERVER['REQUEST_URI'] ) || !isset( $_SERVER['SCRIPT_NAME'] ) ) { |
| wmfStaticShowError( 'Bad request', 400 ); |
| return; |
| } |
| |
| // Reject direct requests (eg. "/w/static.php" or "/w/static.php/test") |
| // Use strpos() to tolerate trailing pathinfo or query string |
| if ( strpos( $_SERVER['REQUEST_URI'], $_SERVER['SCRIPT_NAME'] ) === 0 ) { |
| wmfStaticShowError( 'Unknown file path', 404 ); |
| return; |
| } |
| |
| // Strip query parameters and the prefix (e.g. "/w/foo/bar.js" -> "foo/bar.js") |
| // To be interpreted relative to one of the /srv/mediawiki/php-.../ directories |
| $uriPath = wmfStaticParsePath( $_SERVER['REQUEST_URI'] ); |
| if ( !$uriPath ) { |
| wmfStaticShowError( 'Unknown file path', 404 ); |
| return; |
| } |
| // Reject access to dot files and dot directories |
| if ( strpos( $uriPath, '/.' ) !== false ) { |
| wmfStaticShowError( 'Unknown file path', 404 ); |
| return; |
| } |
| |
| // Get branch dirs and sort with newest first |
| $branchDirs = MWWikiversions::getAvailableBranchDirs(); |
| usort( $branchDirs, static function ( $a, $b ) { |
| return version_compare( $b, $a ); |
| } ); |
| $newestFoundDir = null; |
| |
| // Validation hash |
| // |
| // If the request has a garbage query string that isn't a valid 5-char hex hash, |
| // treat it the same as if there was no hash (serve latest available version). |
| // |
| // This prevents extra backend hits from broken URLs, and yet is compatible |
| // and keeps expected behavior for extensions that embed libraries that may |
| // append query strings for a different URL versioning/cache-busting scheme. |
| $queryStr = $_SERVER['QUERY_STRING'] ?? ''; |
| $validHash = ( preg_match( '/^[a-fA-F0-9]{5}$/', $queryStr ) ? $queryStr : false ); |
| |
| $responseMetric = MediaWiki\MediaWikiServices::getInstance()->getStatsFactory() |
| ->getCounter( 'wmfstatic_response_total' ) |
| ->setLabel( 'status', 'unknown' ) |
| ->setLabel( 'responseType', 'na' ); |
| |
| // Try each version in descending order |
| // |
| // - Requests with validation hash get the first matching file. |
| // If none found, fall back to the lastest available version. |
| // In the fallback case, we shorten the expiry to avoids cache poisoning and |
| // to ensure eventual-consistency by letting the caches self-correct |
| // (see T47877). |
| // |
| // - Requests without a validation hash simply get the latest version. |
| // If the file no longer exists in the latest version, we correctly |
| // fall back to the last available version. |
| // |
| foreach ( $branchDirs as $branchDir ) { |
| // Use realpath() to prevent path escalation through e.g. "../" |
| $filePath = realpath( "$branchDir/$uriPath" ); |
| if ( !$filePath ) { |
| continue; |
| } |
| |
| if ( strpos( $filePath, $branchDir ) !== 0 ) { |
| wmfStaticShowError( 'Unknown file path', 404 ); |
| return; |
| } |
| |
| if ( !$newestFoundDir ) { |
| // Remember what we found, in case we need a fallback |
| $newestFoundDir = $branchDir; |
| } |
| |
| if ( $validHash ) { |
| // Match OutputPage::transformFilePath() |
| $fileHash = substr( md5_file( $filePath ), 0, 5 ); |
| if ( $fileHash !== $validHash ) { |
| // Hash mismatch, continue search in older branches |
| continue; |
| } |
| // Cache hash-validated responses for long |
| $responseType = 'verified'; |
| } else { |
| $responseType = 'nohash'; |
| } |
| |
| wmfStaticStreamFile( $filePath, $responseType ); |
| |
| $responseMetric->setLabel( 'status', 'success' ) |
| ->setLabel( 'responseType', $responseType ) |
| ->copyToStatsdAt( "wmfstatic.success.$responseType" ) |
| ->increment(); |
| return; |
| } |
| |
| if ( !$newestFoundDir ) { |
| wmfStaticShowError( 'Unknown file path', 404 ); |
| |
| $responseMetric->setLabel( 'status', 'notfound' ) |
| ->copyToStatsdAt( 'wmfstatic.notfound' ) |
| ->increment(); |
| return; |
| } |
| |
| // Serve fallback with short TTL if version looks like a valid hash |
| // but we don't (yet) have a matching file. |
| wmfStaticStreamFile( "$newestFoundDir/$uriPath", 'mismatch' ); |
| $responseMetric->setLabel( 'status', 'mismatch' ) |
| ->copyToStatsdAt( 'wmfstatic.mismatch' ) |
| ->increment(); |
| } |
| |
| wfResetOutputBuffers(); |
| wmfStaticRespond(); |
| |
| $mediawiki = new MediaWiki(); |
| $mediawiki->doPostOutputShutdown(); |