blob: c64f7f45b3232e91fa4868a88c389b9cb1bc3d64 [file] [log] [blame]
<?php
/**
* Serve static files in a multiversion-friendly way.
*
* See https://wikitech.wikimedia.org/wiki/MediaWiki_at_WMF#Static_files for
* usage documentation.
*
* Design requirements:
* - Support changes within the life of a deployed branch. https://phabricator.wikimedia.org/T99096
* - Support caching beyond the life of a deployed branch. https://phabricator.wikimedia.org/T99096
* - Avoid cache poisoning. https://phabricator.wikimedia.org/T47877, https://phabricator.wikimedia.org/T117587
*
* Overview:
*
* - multiversion requires the MediaWiki script directory on disk (/w) to be shared across
* all domains. Files in /w are generic and proxy to a real MediaWiki entry point
* in the current wiki's MediaWiki version, as determined by the request host name.
* - MediaWiki configuration sets $wgResourceBasePath to "/w".
* - Apache configuration rewrites "/w/skins/*", "/w/resources/*", "/w/extension/*",
* "/w/COPYING", "/w/CREDITS", and "/ontology/ontology.owl" (mediawiki.org only)
* to /w/static.php (this file).
* Here we stream the file from the appropiate MediaWiki branch directory.
* - For performance and to address race conditions around deployment,
* Varnish routes static.php requests in a hostname-agnostic way.
* Therefore static.php MUST respond in a deterministic way for all requests
* regardless of which wiki made the request. (Compliance is enforced via VCL by
* hardcoding "en.wikipedia.org", via static_host in Puppet.)
*
* In addition to the above, when responding to a request with a version hash we consider
* both the latest and previous MediaWiki branches as the source to serve the file.
* This is to support references from cached HTML in ParserCache and CDN for 30 days.
* While responses for those static assets may themselves also be cached, these caches
* are logically separated and generally last much shorter. As such, we must be able to
* respond to requests for older resources as well.
*
* Another reason is to avoid cache poisoning during deployments where otherwise
* a new URI could be routed to a server with the previous version and end up
* populating the old object under the new URI (T47877, T117587).
*
* StatsD metrics:
*
* - wmfstatic.success.<responseType (nohash, verified)>
* - wmfstatic.notfound
* - wmfstatic.mismatch
*/
// This endpoint is supposed to be independent of request cookies and other
// details of the session. Enforce this constraint with respect to session use.
define( 'MW_NO_SESSION', 1 );
// Allow MediaWiki extensions to detect when they're called from this entry point (T374286)
define( 'MW_ENTRY_POINT', 'static' );
require_once __DIR__ . '/../multiversion/MWMultiVersion.php';
require MWMultiVersion::getMediaWiki( 'includes/WebStart.php' );
define( 'WMF_STATIC_1MIN', 60 );
define( 'WMF_STATIC_1Y', 31536000 );
/**
* This should always use 404 if there is an issue with the url.
* Avoid exposing the reason of it being invalid (T204186).
*
* @param string $message
* @param int $status HTTP status code (One of 500 or 404)
*/
function wmfStaticShowError( $message, $status ) {
HttpStatus::header( $status );
header(
'Cache-Control: ' .
's-maxage=' . WMF_STATIC_1MIN . ', must-revalidate, max-age=0'
);
header( 'Content-Type: text/plain; charset=utf-8' );
echo "$message\n";
}
/**
* Stream file from disk to web response.
*
* Based on MediaWiki's StreamFile::stream().
*
* @param string $filePath File to stream
* @param string $responseType Cache control
* For details about these modes, and how they are used,
* see <https://wikitech.wikimedia.org/wiki/MediaWiki_at_WMF#Static_files>.
*
* - "verified": Immutable or 1-year cache. These are versioned URLs under /w/,
* as generated by ResourceLoader. This represents most frontend traffic for static.php.
*
* - "nohash": 1-year cache. These are unversioned URLs under /w/, and URLs under /w/ with unknown
* query parameters. These misformatted URLs are typically hotlinks, bots, or proxies that set
* some kind of garbage query string.
*
* - "mismatch": 1-minute cache. Quick debounce when a new hash isn't yet found on this server.
*/
function wmfStaticStreamFile( $filePath, $responseType = 'nohash' ) {
// `false` below means not to do retroactive upload prevention checks
$ctype = StreamFile::contentTypeFromPath( $filePath, false );
if ( !$ctype || $ctype === 'unknown/unknown' ) {
// Directory, extension-less file or unknown extension
if ( in_array( basename( $filePath ), [ 'COPYING', 'CREDITS' ] ) ) {
$ctype = 'text/plain';
} else {
wmfStaticShowError( 'Unknown file path', 404 );
return;
}
}
$stat = stat( $filePath );
if ( !$stat ) {
wmfStaticShowError( 'Unknown file path', 404 );
return;
}
// Match puppet:///mediawiki/apache/expires.conf
if ( preg_match( '/\.(gif|jpe?g|png|css|js|json|woff|woff2|svg|eot|ttf|ico)$/', $filePath ) ) {
header( 'Access-Control-Allow-Origin: *' );
}
header( 'Last-Modified: ' . wfTimestamp( TS_RFC2822, $stat['mtime'] ) );
header( "Content-Type: $ctype" );
if ( $responseType === 'nohash' ) {
// Unversioned
header(
sprintf( 'Cache-Control: public, s-maxage=%d, max-age=%d, must-revalidate',
WMF_STATIC_1Y, WMF_STATIC_1Y
)
);
} elseif ( $responseType === 'mismatch' ) {
// Fallback
header(
sprintf( 'Cache-Control: public, s-maxage=%d, max-age=%d, must-revalidate',
WMF_STATIC_1MIN, WMF_STATIC_1MIN
)
);
} else {
// Verified files are considered immutable
// For the CDN, and clients not supporting "immutable", allow re-use for 1 year.
header(
sprintf( 'Cache-Control: public, s-maxage=%d, max-age=%d, immutable',
WMF_STATIC_1Y, WMF_STATIC_1Y
)
);
}
if ( !empty( $_SERVER['HTTP_IF_MODIFIED_SINCE'] ) ) {
$ims = preg_replace( '/;.*$/', '', $_SERVER['HTTP_IF_MODIFIED_SINCE'] );
if ( wfTimestamp( TS_UNIX, $stat['mtime'] ) <= strtotime( $ims ) ) {
ini_set( 'zlib.output_compression', 0 );
header( 'HTTP/1.1 304 Not Modified' );
return;
}
}
header( 'Content-Length: ' . $stat['size'] );
readfile( $filePath );
}
/**
* Extract the path after the prefix
*
* @param string $uri Full Request URI
* @return string|false Request path stripped of the prefix, or false if prefix not found.
*/
function wmfStaticParsePath( $uri ) {
global $wgScriptPath;
// Strip query parameters
$uriPath = parse_url( $uri, PHP_URL_PATH );
// T359643
if ( in_array( $_SERVER['SERVER_NAME'], [ "www.mediawiki.org", "mediawiki.org" ] )
&& $uriPath == '/ontology/ontology.owl' ) {
return '/docs/ontology.owl';
}
$urlPrefix = $wgScriptPath;
if ( strpos( $uriPath, $urlPrefix ) !== 0 ) {
// Prefix not found.
return false;
}
// Strip the prefix
return substr( $uriPath, strlen( $urlPrefix ) );
}
function wmfStaticRespond() {
if ( !isset( $_SERVER['SERVER_NAME'] ) || !isset( $_SERVER['REQUEST_URI'] ) || !isset( $_SERVER['SCRIPT_NAME'] ) ) {
wmfStaticShowError( 'Bad request', 400 );
return;
}
// Reject direct requests (eg. "/w/static.php" or "/w/static.php/test")
// Use strpos() to tolerate trailing pathinfo or query string
if ( strpos( $_SERVER['REQUEST_URI'], $_SERVER['SCRIPT_NAME'] ) === 0 ) {
wmfStaticShowError( 'Unknown file path', 404 );
return;
}
// Strip query parameters and the prefix (e.g. "/w/foo/bar.js" -> "foo/bar.js")
// To be interpreted relative to one of the /srv/mediawiki/php-.../ directories
$uriPath = wmfStaticParsePath( $_SERVER['REQUEST_URI'] );
if ( !$uriPath ) {
wmfStaticShowError( 'Unknown file path', 404 );
return;
}
// Reject access to dot files and dot directories
if ( strpos( $uriPath, '/.' ) !== false ) {
wmfStaticShowError( 'Unknown file path', 404 );
return;
}
// Get branch dirs and sort with newest first
$branchDirs = MWWikiversions::getAvailableBranchDirs();
usort( $branchDirs, static function ( $a, $b ) {
return version_compare( $b, $a );
} );
$newestFoundDir = null;
// Validation hash
//
// If the request has a garbage query string that isn't a valid 5-char hex hash,
// treat it the same as if there was no hash (serve latest available version).
//
// This prevents extra backend hits from broken URLs, and yet is compatible
// and keeps expected behavior for extensions that embed libraries that may
// append query strings for a different URL versioning/cache-busting scheme.
$queryStr = $_SERVER['QUERY_STRING'] ?? '';
$validHash = ( preg_match( '/^[a-fA-F0-9]{5}$/', $queryStr ) ? $queryStr : false );
$responseMetric = MediaWiki\MediaWikiServices::getInstance()->getStatsFactory()
->getCounter( 'wmfstatic_response_total' )
->setLabel( 'status', 'unknown' )
->setLabel( 'responseType', 'na' );
// Try each version in descending order
//
// - Requests with validation hash get the first matching file.
// If none found, fall back to the lastest available version.
// In the fallback case, we shorten the expiry to avoids cache poisoning and
// to ensure eventual-consistency by letting the caches self-correct
// (see T47877).
//
// - Requests without a validation hash simply get the latest version.
// If the file no longer exists in the latest version, we correctly
// fall back to the last available version.
//
foreach ( $branchDirs as $branchDir ) {
// Use realpath() to prevent path escalation through e.g. "../"
$filePath = realpath( "$branchDir/$uriPath" );
if ( !$filePath ) {
continue;
}
if ( strpos( $filePath, $branchDir ) !== 0 ) {
wmfStaticShowError( 'Unknown file path', 404 );
return;
}
if ( !$newestFoundDir ) {
// Remember what we found, in case we need a fallback
$newestFoundDir = $branchDir;
}
if ( $validHash ) {
// Match OutputPage::transformFilePath()
$fileHash = substr( md5_file( $filePath ), 0, 5 );
if ( $fileHash !== $validHash ) {
// Hash mismatch, continue search in older branches
continue;
}
// Cache hash-validated responses for long
$responseType = 'verified';
} else {
$responseType = 'nohash';
}
wmfStaticStreamFile( $filePath, $responseType );
$responseMetric->setLabel( 'status', 'success' )
->setLabel( 'responseType', $responseType )
->copyToStatsdAt( "wmfstatic.success.$responseType" )
->increment();
return;
}
if ( !$newestFoundDir ) {
wmfStaticShowError( 'Unknown file path', 404 );
$responseMetric->setLabel( 'status', 'notfound' )
->copyToStatsdAt( 'wmfstatic.notfound' )
->increment();
return;
}
// Serve fallback with short TTL if version looks like a valid hash
// but we don't (yet) have a matching file.
wmfStaticStreamFile( "$newestFoundDir/$uriPath", 'mismatch' );
$responseMetric->setLabel( 'status', 'mismatch' )
->copyToStatsdAt( 'wmfstatic.mismatch' )
->increment();
}
wfResetOutputBuffers();
wmfStaticRespond();
$mediawiki = new MediaWiki();
$mediawiki->doPostOutputShutdown();