Campuses:
This shows you the differences between two versions of the page.
groups:osg:cms:install_notes [2013/03/26 16:27] – created nick | groups:osg:cms:install_notes [2016/02/03 17:21] (current) – removed allan | ||
---|---|---|---|
Line 1: | Line 1: | ||
- | ====== OSG Installation Notes ====== | ||
- | Installation overview of a basic 'Tier 3' installation configured for use by CMS. Initial planning based on [[https:// | ||
- | |||
- | ===== Compute Element (CE) ===== | ||
- | |||
- | **Hostname**: | ||
- | |||
- | **Services**: | ||
- | |||
- | **Primary packages**: osg-ca-certs osg-gridftp-hdfs osg-ce-condor globus-gram-job-manager-managedfork | ||
- | |||
- | **Installation docs**: [[https:// | ||
- | |||
- | ===== Storage Element (SE) ===== | ||
- | |||
- | **Hostname**: | ||
- | |||
- | **Services**: | ||
- | |||
- | **Primary packages**: osg-ca-certs osg-gridftp-hdfs bestman2-server | ||
- | |||
- | **Installation docs**: [[https:// | ||
- | ==== BeStMan GridFTP Plugin ==== | ||
- | |||
- | To allow BeStMan to direct requests to specific GridFTP servers based on pathnames, a protocol selection plugin is used. The plugin is based on the [[https:// | ||
- | |||
- | === Source === | ||
- | |||
- | <code java TPolicyPathBased.java> | ||
- | package policy; | ||
- | |||
- | import gov.lbl.srm.util.TSRMLog; | ||
- | import java.io.IOException; | ||
- | import java.util.*; | ||
- | |||
- | public class TPolicyPathBased implements gov.lbl.srm.policy.ISRMSelectionPolicy { | ||
- | Object[] _itemArray = null; | ||
- | Boolean _configProcessed = false; | ||
- | HashMap< | ||
- | |||
- | private static Properties getConfigProperties(String configFileName) throws java.io.IOException { | ||
- | Properties prop = new Properties(); | ||
- | java.io.FileInputStream configFile = null; | ||
- | try { | ||
- | configFile = new java.io.FileInputStream(configFileName); | ||
- | prop.load(configFile); | ||
- | return prop; | ||
- | } catch (java.io.IOException e) { | ||
- | System.err.println(" | ||
- | throw new RuntimeException(" | ||
- | } finally { | ||
- | if (configFile != null) { | ||
- | configFile.close(); | ||
- | } | ||
- | } | ||
- | } | ||
- | |||
- | public Object getNext(Object hint) { | ||
- | TSRMLog.debug(this.getClass(), | ||
- | if (_itemArray.length > 0 && _configProcessed == false) { | ||
- | TSRMLog.debug(this.getClass(), | ||
- | TSRMLog.debug(this.getClass(), | ||
- | try { | ||
- | Properties prop = getConfigProperties(gov.lbl.srm.server.Config._configFileNameLoaded); | ||
- | TSRMLog.debug(this.getClass(), | ||
- | String mappingPolicy = prop.getProperty(" | ||
- | String[] pathMaps = mappingPolicy.split(";" | ||
- | _pathMaps = new HashMap< | ||
- | for (String pathMap : pathMaps) { | ||
- | TSRMLog.debug(this.getClass(), | ||
- | Object mapHost = null; | ||
- | String[] mapParts = pathMap.split(" | ||
- | if (mapParts.length != 2) { | ||
- | System.err.println(" | ||
- | } | ||
- | else { | ||
- | TSRMLog.debug(this.getClass(), | ||
- | for (Object host : _itemArray) { | ||
- | if (host.toString().equals(mapParts[1])) { | ||
- | mapHost = mapParts[1]; | ||
- | } | ||
- | } | ||
- | if (mapHost == null) { | ||
- | TSRMLog.debug(this.getClass(), | ||
- | mapHost = _itemArray[0]; | ||
- | } | ||
- | TSRMLog.debug(this.getClass(), | ||
- | _pathMaps.put(mapParts[0], | ||
- | } | ||
- | } | ||
- | _configProcessed = true; | ||
- | } | ||
- | catch (IOException e) { | ||
- | System.err.println(" | ||
- | } | ||
- | } | ||
- | if (_pathMaps != null) { | ||
- | for (Map.Entry< | ||
- | TSRMLog.debug(this.getClass(), | ||
- | if (hint.toString().startsWith(pathMap.getKey())) { | ||
- | TSRMLog.debug(this.getClass(), | ||
- | return pathMap.getValue(); | ||
- | } | ||
- | } | ||
- | } | ||
- | TSRMLog.debug(this.getClass(), | ||
- | return getNext(); | ||
- | } | ||
- | |||
- | public Object getNext() { | ||
- | Object result = null; | ||
- | if (_itemArray != null) { | ||
- | result = _itemArray[0]; | ||
- | } | ||
- | return result; | ||
- | } | ||
- | |||
- | public void setItems(Object[] col) { | ||
- | _itemArray = col; | ||
- | _configProcessed = false; | ||
- | TSRMLog.debug(this.getClass(), | ||
- | } | ||
- | |||
- | public String[] displayContents() { | ||
- | String[] contents = new String[_itemArray.length]; | ||
- | for (int i = 0; i < _itemArray.length; | ||
- | contents[i] = _itemArray[i].toString(); | ||
- | } | ||
- | TSRMLog.debug(this.getClass(), | ||
- | return contents; | ||
- | } | ||
- | } | ||
- | </ | ||
- | |||
- | === Compiling and packaging === | ||
- | |||
- | < | ||
- | [1051]nick@gc1-se:/ | ||
- | </ | ||
- | |||
- | ==== Validation ==== | ||
- | |||
- | **Obtain VOMS proxy** | ||
- | [1005]nick@gc1-ce: | ||
- | < | ||
- | Enter GRID pass phrase: | ||
- | Your identity: / | ||
- | Creating temporary proxy ........................................ Done | ||
- | Contacting | ||
- | Creating proxy .......................................... Done | ||
- | |||
- | Your proxy is valid until Fri Oct 26 05:30:29 2012 | ||
- | </ | ||
- | |||
- | **Validate proxy info** | ||
- | [1006]nick@gc1-ce: | ||
- | < | ||
- | subject | ||
- | issuer | ||
- | identity | ||
- | type : proxy | ||
- | strength | ||
- | path : / | ||
- | timeleft | ||
- | key usage : Digital Signature, Key Encipherment, | ||
- | === VO cms extension information === | ||
- | VO : cms | ||
- | subject | ||
- | issuer | ||
- | attribute : / | ||
- | attribute : / | ||
- | attribute : / | ||
- | timeleft | ||
- | uri : lcg-voms.cern.ch: | ||
- | </ | ||
- | |||
- | **Transfer a file via GridFTP** | ||
- | < | ||
- | [1018]nick@gc1-ce:/ | ||
- | [1019]nick@gc1-ce:/ | ||
- | test | ||
- | [1020]nick@gc1-ce:/ | ||
- | [1021]nick@gc1-ce:/ | ||
- | [1022]nick@gc1-ce:/ | ||
- | test | ||
- | [1023]nick@gc1-ce:/ | ||
- | Found 1 items | ||
- | -rw-r--r-- | ||
- | </ | ||
- | |||
- | **Ping BeStMan SRM** | ||
- | [1013]nick@gc1-ce:/ | ||
- | < | ||
- | srm-ping | ||
- | BeStMan and SRM-Clients Copyright(c) 2007-2012, | ||
- | Lawrence Berkeley National Laboratory. All rights reserved. | ||
- | Support at SRM@LBL.GOV and documents at http:// | ||
- | |||
- | | ||
- | Built on dm.lbl.gov 128.3.30.104 at 05/09/2012 09:49:24 PDT | ||
- | |||
- | Built on ${myhost.NAME}.${myhost.DOMAIN} ${myhost.ADDR4} at 07/25/2012 16:05:58 CDT | ||
- | |||
- | SRM-CLIENT: Connecting to serviceurl httpg:// | ||
- | |||
- | SRM-PING: Thu Oct 25 17:55:00 CDT 2012 Calling SrmPing Request... | ||
- | versionInfo=v2.2 | ||
- | |||
- | Extra information (Key=Value) | ||
- | backend_type=BeStMan | ||
- | backend_version=2.2.2.2.0 | ||
- | backend_build_date=2012-07-25T21: | ||
- | gsiftpTxfServers[0]=gsiftp:// | ||
- | GatewayMode=Enabled | ||
- | clientDN=/ | ||
- | gumsIDMapped=cmsuser | ||
- | </ | ||
- | |||
- | **Transfer a file via BeStMan SRM** | ||
- | [1025]nick@gc1-ce:/ | ||
- | < | ||
- | srm-copy | ||
- | BeStMan and SRM-Clients Copyright(c) 2007-2012, | ||
- | Lawrence Berkeley National Laboratory. All rights reserved. | ||
- | Support at SRM@LBL.GOV and documents at http:// | ||
- | |||
- | | ||
- | Built on dm.lbl.gov 128.3.30.104 at 05/09/2012 09:49:24 PDT | ||
- | |||
- | Built on ${myhost.NAME}.${myhost.DOMAIN} ${myhost.ADDR4} at 07/25/2012 16:05:58 CDT | ||
- | |||
- | SRM-CLIENT: Thu Oct 25 18:07:00 CDT 2012 Connecting to httpg:// | ||
- | |||
- | SRM-CLIENT: Thu Oct 25 18:07:01 CDT 2012 Calling SrmPrepareToPutRequest now ... | ||
- | request.token= put:1 | ||
- | Request.status=SRM_SUCCESS | ||
- | explanation=null | ||
- | |||
- | SRM-CLIENT: RequestFileStatus for SURL=file:/// | ||
- | SRM-CLIENT: received TURL=gsiftp:// | ||
- | |||
- | SRM-CLIENT: Thu Oct 25 18:07:09 CDT 2012 start file transfer | ||
- | SRM-CLIENT: | ||
- | SRM-CLIENT: | ||
- | |||
- | SRM-CLIENT: Thu Oct 25 18:07:13 CDT 2012 end file transfer for file:/// | ||
- | |||
- | SRM-CLIENT: Thu Oct 25 18:07:13 CDT 2012 Calling putDone for srm:// | ||
- | Result.status=SRM_SUCCESS | ||
- | Result.Explanation=null | ||
- | |||
- | SRM-CLIENT: Request completed with success | ||
- | |||
- | SRM-CLIENT: Printing text report now ... | ||
- | |||
- | SRM-CLIENT*REQUESTTYPE=put | ||
- | SRM-CLIENT*TOTALFILES=1 | ||
- | SRM-CLIENT*TOTAL_SUCCESS=1 | ||
- | SRM-CLIENT*TOTAL_FAILED=0 | ||
- | SRM-CLIENT*REQUEST_TOKEN=put: | ||
- | SRM-CLIENT*REQUEST_STATUS=SRM_SUCCESS | ||
- | SRM-CLIENT*SOURCEURL[0]=file:/// | ||
- | SRM-CLIENT*TARGETURL[0]=srm:// | ||
- | SRM-CLIENT*TRANSFERURL[0]=gsiftp:// | ||
- | SRM-CLIENT*ACTUALSIZE[0]=5 | ||
- | SRM-CLIENT*FILE_STATUS[0]=SRM_SPACE_AVAILABLE | ||
- | SRM-CLIENT*EXPLANATION[0]=SRM-CLIENT: | ||
- | </ | ||
- | [1026]nick@gc1-ce:/ | ||
- | < | ||
- | srm-copy | ||
- | BeStMan and SRM-Clients Copyright(c) 2007-2012, | ||
- | Lawrence Berkeley National Laboratory. All rights reserved. | ||
- | Support at SRM@LBL.GOV and documents at http:// | ||
- | |||
- | | ||
- | Built on dm.lbl.gov 128.3.30.104 at 05/09/2012 09:49:24 PDT | ||
- | |||
- | Built on ${myhost.NAME}.${myhost.DOMAIN} ${myhost.ADDR4} at 07/25/2012 16:05:58 CDT | ||
- | |||
- | SRM-CLIENT: Thu Oct 25 18:07:34 CDT 2012 Connecting to httpg:// | ||
- | |||
- | SRM-CLIENT: Thu Oct 25 18:07:35 CDT 2012 Calling SrmPrepareToGet Request now ... | ||
- | request.token= get:2 | ||
- | |||
- | Request.status=SRM_SUCCESS | ||
- | Request.explanation=null | ||
- | |||
- | SRM-CLIENT: RequestFileStatus for SURL=srm:// | ||
- | SRM-CLIENT: received TURL=gsiftp:// | ||
- | |||
- | SRM-CLIENT: Thu Oct 25 18:07:44 CDT 2012 start file transfer | ||
- | SRM-CLIENT: | ||
- | SRM-CLIENT: | ||
- | |||
- | SRM-CLIENT: Thu Oct 25 18:07:48 CDT 2012 end file transfer for srm:// | ||
- | |||
- | SRM-CLIENT: Thu Oct 25 18:07:48 CDT 2012 Calling releaseFile | ||
- | |||
- | SRM-CLIENT: | ||
- | status=SRM_SUCCESS | ||
- | explanation=null | ||
- | status=SRM_SUCCESS | ||
- | explanation=null | ||
- | |||
- | SRM-CLIENT: Request completed with success | ||
- | |||
- | SRM-CLIENT: Printing text report now ... | ||
- | |||
- | SRM-CLIENT*REQUESTTYPE=get | ||
- | SRM-CLIENT*TOTALFILES=1 | ||
- | SRM-CLIENT*TOTAL_SUCCESS=1 | ||
- | SRM-CLIENT*TOTAL_FAILED=0 | ||
- | SRM-CLIENT*REQUEST_TOKEN=get: | ||
- | SRM-CLIENT*REQUEST_STATUS=SRM_SUCCESS | ||
- | SRM-CLIENT*SOURCEURL[0]=srm:// | ||
- | SRM-CLIENT*TARGETURL[0]=file:/// | ||
- | SRM-CLIENT*TRANSFERURL[0]=gsiftp:// | ||
- | SRM-CLIENT*ACTUALSIZE[0]=5 | ||
- | SRM-CLIENT*FILE_STATUS[0]=SRM_FILE_PINNED | ||
- | </ | ||
- | [1027]nick@gc1-ce:/ | ||
- | < | ||
- | test | ||
- | </ | ||
- | |||
- | ==== Troubleshooting ==== | ||
- | |||
- | === BeStMan fails to start === | ||
- | |||
- | BeStMan requires the host certificate private key to be in RSA format (the key should start '' | ||
- | |||
- | openssl rsa -in hostkey.pem -out hostkey.pem | ||
- | |||
- | === globus-url-copy fails with permission denied === | ||
- | |||
- | |||
- | '' | ||
- | |||
- | The client issuing the '' | ||
- | < | ||
- | [1048]nick@gc1-se: | ||
- | |||
- | error: globus_ftp_client: | ||
- | 500 500-Command failed. : System error in Failed to open checksum file (host=gc1-se.spa.umn.edu, | ||
- | 500-A system call failed: Permission denied | ||
- | 500 End. | ||
- | </ | ||
- | |||
- | The gridftp server destination will see the following in ''/ | ||
- | |||
- | < | ||
- | [25293] Thu Oct 25 17:39:51 2012 :: Configuration read from / | ||
- | [25293] Thu Oct 25 17:39:51 2012 :: Server started in inetd mode. | ||
- | [25293] Thu Oct 25 17:39:51 2012 :: New connection from: gc1-se.spa.umn.edu: | ||
- | [25293] Thu Oct 25 17:39:52 2012 :: Max memory buffer count: 200. | ||
- | [25293] Thu Oct 25 17:39:52 2012 :: Max file buffer count: 1500. | ||
- | [25293] Thu Oct 25 17:39:52 2012 :: Checking current load on the server. | ||
- | [25293] Thu Oct 25 17:39:52 2012 :: Start gridftp server; hadoop nameserver hadoop-name, | ||
- | [25293] Thu Oct 25 17:39:53 2012 :: Checksum algorithms in use: MD5, | ||
- | [25293] Thu Oct 25 17:39:53 2012 :: Cannot set rlimits due to Unknown error 18446744073709551615. | ||
- | [25293] Thu Oct 25 17:39:53 2012 :: DN / | ||
- | [25293] Thu Oct 25 17:39:53 2012 :: User cmsuser successfully authorized. | ||
- | [25293] Thu Oct 25 17:39:53 2012 :: Going to do stat on file / | ||
- | [25293] Thu Oct 25 17:39:53 2012 :: We are going to open file / | ||
- | [25293] Thu Oct 25 17:39:53 2012 :: Open file / | ||
- | [25293] Thu Oct 25 17:39:53 2012 :: Successfully opened file / | ||
- | [25293] Thu Oct 25 17:39:53 2012 :: Starting to transfer "/ | ||
- | [25293] Thu Oct 25 17:39:53 2012 :: receive 1 blocks of size 5 bytes | ||
- | [25293] Thu Oct 25 17:39:53 2012 :: Trying to close file in HDFS; zero outstanding blocks. | ||
- | [25293] Thu Oct 25 17:39:55 2012 :: receive 1 blocks of size 0 bytes | ||
- | [25293] Thu Oct 25 17:39:55 2012 :: Checksum CKSUM: 935282863 | ||
- | [25293] Thu Oct 25 17:39:55 2012 :: Checksum ADLER32: 062801cb | ||
- | [25293] Thu Oct 25 17:39:55 2012 :: Checksum MD5: d8e8fca2dc0f896fd7cb4cb0031ba249 | ||
- | [25293] Thu Oct 25 17:39:55 2012 :: Checksum CRC32: 1001993670 | ||
- | [25293] Thu Oct 25 17:39:55 2012 :: Failed to open checksum file (host=gc1-se.spa.umn.edu, | ||
- | [25293] Thu Oct 25 17:39:55 2012 :: Failure attempting to transfer "/ | ||
- | [25293] Thu Oct 25 17:39:55 2012 :: Transfer failure: | ||
- | System error in Failed to open checksum file (host=gc1-se.spa.umn.edu, | ||
- | A system call failed: Permission denied | ||
- | </ | ||
- | |||
- | In the case of a remote hadoop filesystem, the following commands will remedy the problem: | ||
- | |||
- | hadoop fs -mkdir /cksums | ||
- | hadoop fs -chown root /cksums | ||
- | |||
- | === RSV probes fail with ' | ||
- | |||
- | Condor-G jobs fail with the error ' | ||
- | |||
- | < | ||
- | Running metric org.osg.gratia.hadoop-transfer (9 of 18) | ||
- | |||
- | metricName: org.osg.gratia.hadoop-transfer | ||
- | metricType: status | ||
- | timestamp: 2012-10-26 19:36:15 CDT | ||
- | metricStatus: | ||
- | serviceType: | ||
- | serviceURI: gc1-ce.spa.umn.edu | ||
- | gatheredAt: gc1-hn.spa.umn.edu | ||
- | summaryData: | ||
- | detailsData: | ||
- | |||
- | Condor log file: | ||
- | 000 (144.000.000) 10/26 19:36:05 Job submitted from host: < | ||
- | ... | ||
- | 018 (144.000.000) 10/26 19:36:10 Globus job submission failed! | ||
- | Reason: 73 the job manager failed to open stdout | ||
- | ... | ||
- | |||
- | EOT | ||
- | </ | ||
- | ===== Head Node (HN) ===== | ||
- | |||
- | **Hostname**: | ||
- | |||
- | **Services**: | ||
- | |||
- | **Primary packages**: osg-ca-certs condor osg-gums fetch-crl rsv | ||
- | |||
- | ** Installation docs**: [[https:// |